Python code annotation library for NVIDIA Tools Extension enabling performance profiling with NVIDIA Nsight Systems
—
Automatically annotate all function calls in your program with configurable detail levels, including line numbers and C function support. The Profile class provides comprehensive automatic annotation without manual code modification.
The Profile class enables automatic annotation of all function calls using Python's profiling hooks with configurable detail levels and C function support.
class Profile:
def __init__(self, linenos: bool = True, annotate_cfuncs: bool = True):
"""
Create a Profile object for automatic function call annotation.
Parameters:
- linenos: Include file and line number information in annotations
- annotate_cfuncs: Also annotate C-extension and builtin functions
"""
def enable(self):
"""
Start annotating function calls automatically.
Sets up profiling hooks for current thread and all new threads.
"""
def disable(self):
"""
Stop annotating function calls automatically.
Removes profiling hooks from current thread and new threads.
"""Basic Usage Example:
import nvtx
import time
# Create profiler with default settings
profiler = nvtx.Profile()
# Enable automatic annotation
profiler.enable()
# All function calls are now automatically annotated
def process_data():
time.sleep(0.1) # This will be annotated
compute_results()
def compute_results():
time.sleep(0.2) # This will also be annotated
process_data()
# Stop automatic annotation
profiler.disable()
# Function calls after disable() are not annotated
process_data() # Not annotatedControl whether file names and line numbers are included in annotation messages:
import nvtx
# Include line numbers (default)
profiler_with_lines = nvtx.Profile(linenos=True)
# Example annotation message: "main.py:42(process_data)"
# Exclude line numbers for cleaner display
profiler_clean = nvtx.Profile(linenos=False)
# Example annotation message: "process_data"Usage Example:
import nvtx
import time
def detailed_profiling():
# Detailed profiling with file and line info
profiler = nvtx.Profile(linenos=True)
profiler.enable()
def func_a():
time.sleep(0.1)
func_b()
def func_b():
time.sleep(0.1)
func_a() # Annotations show: "example.py:123(func_a)", "example.py:126(func_b)"
profiler.disable()
def clean_profiling():
# Clean profiling with function names only
profiler = nvtx.Profile(linenos=False)
profiler.enable()
def func_a():
time.sleep(0.1)
func_b()
def func_b():
time.sleep(0.1)
func_a() # Annotations show: "func_a", "func_b"
profiler.disable()Control whether C extensions and builtin functions are annotated:
import nvtx
# Annotate C functions (default)
profiler_full = nvtx.Profile(annotate_cfuncs=True)
# Skip C functions for focus on Python code
profiler_python_only = nvtx.Profile(annotate_cfuncs=False)Usage Example:
import nvtx
import time
import json
def c_function_profiling():
# Profile including C functions
profiler = nvtx.Profile(annotate_cfuncs=True)
profiler.enable()
data = {"key": "value"}
json_str = json.dumps(data) # json.dumps is C function - will be annotated
time.sleep(0.1) # time.sleep is C function - will be annotated
profiler.disable()
def python_only_profiling():
# Profile Python functions only
profiler = nvtx.Profile(annotate_cfuncs=False)
profiler.enable()
data = {"key": "value"}
json_str = json.dumps(data) # json.dumps is C function - NOT annotated
time.sleep(0.1) # time.sleep is C function - NOT annotated
def python_func():
return "python"
result = python_func() # Python function - will be annotated
profiler.disable()The Profile class automatically handles both current thread and new thread annotation:
import nvtx
import threading
import time
def automatic_thread_profiling():
# Create and enable profiler
profiler = nvtx.Profile()
profiler.enable()
def worker_function(worker_id):
time.sleep(0.1)
print(f"Worker {worker_id} completed")
# Main thread function calls are annotated
main_work()
# New threads are also automatically profiled
threads = []
for i in range(3):
thread = threading.Thread(target=worker_function, args=(i,))
threads.append(thread)
thread.start()
# Wait for all threads
for thread in threads:
thread.join()
profiler.disable()NVTX provides a command-line interface for automatic profiling of entire Python programs:
python -m nvtx [options] scriptfile [args] ...--linenos # Include file and line number information (default)
--no-linenos # Do not include file and line number information
--annotate-cfuncs # Also annotate C-extension and builtin functionsUsage Examples:
# Profile with default settings (line numbers, no C functions)
python -m nvtx my_script.py
# Profile with line numbers and C functions
python -m nvtx --annotate-cfuncs my_script.py
# Profile with clean function names only
python -m nvtx --no-linenos my_script.py arg1 arg2
# Full profiling with all options
python -m nvtx --linenos --annotate-cfuncs my_script.py --input data.txtUse profiling for specific code sections:
import nvtx
class ProfiledSection:
def __init__(self, **profile_kwargs):
self.profiler = nvtx.Profile(**profile_kwargs)
def __enter__(self):
self.profiler.enable()
return self.profiler
def __exit__(self, exc_type, exc_val, exc_tb):
self.profiler.disable()
# Usage
with ProfiledSection(linenos=True, annotate_cfuncs=False) as profiler:
# All function calls in this block are automatically annotated
complex_algorithm()
data_processing()Enable profiling based on environment or configuration:
import nvtx
import os
class ConditionalProfiler:
def __init__(self):
self.profiler = None
self.enabled = os.getenv('ENABLE_NVTX_PROFILING', 'false').lower() == 'true'
if self.enabled:
self.profiler = nvtx.Profile(
linenos=os.getenv('NVTX_LINENOS', 'true').lower() == 'true',
annotate_cfuncs=os.getenv('NVTX_C_FUNCS', 'false').lower() == 'true'
)
def enable(self):
if self.profiler:
self.profiler.enable()
def disable(self):
if self.profiler:
self.profiler.disable()
# Global profiler instance
profiler = ConditionalProfiler()
def main():
profiler.enable()
try:
run_application()
finally:
profiler.disable()Create decorators for automatic profiling of specific functions:
import nvtx
from functools import wraps
def auto_profile(linenos=True, annotate_cfuncs=False):
def decorator(func):
@wraps(func)
def wrapper(*args, **kwargs):
profiler = nvtx.Profile(linenos=linenos, annotate_cfuncs=annotate_cfuncs)
profiler.enable()
try:
return func(*args, **kwargs)
finally:
profiler.disable()
return wrapper
return decorator
# Usage
@auto_profile(linenos=True, annotate_cfuncs=True)
def critical_function():
# All nested function calls will be automatically annotated
helper_function_1()
helper_function_2()
return resultsannotate_cfuncs=True due to additional hook processinglinenos=True for file path resolutionimport nvtx
# Strategy 1: Profile specific modules only
def selective_profiling():
profiler = nvtx.Profile(annotate_cfuncs=False) # Skip C functions
profiler.enable()
# Only profile specific parts of application
critical_business_logic()
profiler.disable()
# Strategy 2: Use sampling for long-running applications
import time
def sampled_profiling():
profiler = nvtx.Profile()
for iteration in range(1000):
if iteration % 100 == 0: # Profile every 100th iteration
profiler.enable()
process_iteration(iteration)
profiler.disable()
else:
process_iteration(iteration)Install with Tessl CLI
npx tessl i tessl/pypi-nvtx