Deep learning framework providing tensor computation with GPU acceleration and dynamic neural networks with automatic differentiation
JIT compilation, model export, graph transformations, quantization, and deployment utilities for optimizing and deploying PyTorch models in production environments.
TorchScript compilation for model optimization and deployment.
def jit.script(obj, optimize=None, _frames_up=0, _rcb=None):
"""
Compile Python code to TorchScript.
Parameters:
- obj: Function, method, or class to compile
- optimize: Whether to apply optimizations
Returns:
ScriptModule or ScriptFunction
"""
def jit.trace(func, example_inputs, optimize=None, check_trace=True, check_inputs=None, check_tolerance=1e-5, strict=True, _force_outplace=False, _module_class=None, _compilation_unit=None):
"""
Trace function execution to create TorchScript.
Parameters:
- func: Function or module to trace
- example_inputs: Example inputs for tracing
- optimize: Whether to apply optimizations
- check_trace: Whether to verify trace correctness
- strict: Whether to record all operations
Returns:
TracedModule or function
"""
def jit.load(f, map_location=None, _extra_files=None):
"""Load TorchScript model from file."""
def jit.save(m, f, _extra_files=None):
"""Save TorchScript model to file."""
class jit.ScriptModule(nn.Module):
"""TorchScript compiled module."""
def save(self, f, _extra_files=None): ...
def code(self) -> str: ...
def graph(self): ...
def code_with_constants(self) -> Tuple[str, List[Tensor]]: ...
def jit.freeze(mod, preserved_attrs=None, optimize_numerics=True):
"""Freeze TorchScript module for inference."""
def jit.optimize_for_inference(mod, other_methods=None):
"""Optimize TorchScript module for inference."""
def jit.enable_onednn_fusion(enabled: bool):
"""Enable/disable OneDNN fusion optimization."""
def jit.set_fusion_strategy(strategy: List[Tuple[str, bool]]):
"""Set fusion strategy for optimization."""Export PyTorch models for deployment and optimization.
def export.export(mod: nn.Module, args, kwargs=None, *, dynamic_shapes=None, strict=True) -> ExportedProgram:
"""
Export PyTorch module to exportable format.
Parameters:
- mod: Module to export
- args: Example arguments
- kwargs: Example keyword arguments
- dynamic_shapes: Dynamic shape specifications
- strict: Whether to enforce strict export
Returns:
ExportedProgram
"""
class export.ExportedProgram:
"""Exported PyTorch program."""
def module(self) -> nn.Module: ...
def graph_module(self): ...
def graph_signature(self): ...
def call_spec(self): ...
def verifier(self): ...
def state_dict(self) -> Dict[str, Any]: ...
def named_parameters(self): ...
def named_buffers(self): ...
def export.save(ep: ExportedProgram, f) -> None:
"""Save exported program to file."""
def export.load(f) -> ExportedProgram:
"""Load exported program from file."""Compile PyTorch models for performance optimization.
def compile(model=None, *, fullgraph=False, dynamic=None, backend="inductor", mode=None, options=None, disable=False):
"""
Compile PyTorch model for optimization.
Parameters:
- model: Model to compile (or use as decorator)
- fullgraph: Whether to compile the entire graph
- dynamic: Enable dynamic shapes
- backend: Compilation backend ("inductor", "aot_eager", etc.)
- mode: Compilation mode ("default", "reduce-overhead", "max-autotune")
- options: Backend-specific options
- disable: Disable compilation
Returns:
Compiled model
"""
@compile
def compiled_function(x):
"""Example of function compilation."""
return x * 2 + 1
# Alternative usage
compiled_model = torch.compile(model, mode="max-autotune")Symbolic tracing and graph manipulation for model analysis and optimization.
class fx.GraphModule(nn.Module):
"""Module with FX graph representation."""
def __init__(self, root, graph, class_name='GraphModule'): ...
def recompile(self): ...
def code(self) -> str: ...
def graph(self): ...
def print_readable(self, print_output=True): ...
def fx.symbolic_trace(root, concrete_args=None, meta_args=None, _force_outplace=False) -> GraphModule:
"""
Symbolically trace PyTorch module.
Parameters:
- root: Module or function to trace
- concrete_args: Arguments to keep concrete
- meta_args: Meta tensor arguments
Returns:
GraphModule with traced computation graph
"""
class fx.Tracer:
"""Tracer for symbolic execution."""
def trace(self, root, concrete_args=None): ...
def call_module(self, m, forward, args, kwargs): ...
def call_function(self, target, args, kwargs): ...
def call_method(self, target, args, kwargs): ...
class fx.Graph:
"""Computational graph representation."""
def nodes(self): ...
def create_node(self, op, target, args=None, kwargs=None, name=None, type_expr=None): ...
def erase_node(self, to_erase): ...
def inserting_before(self, n): ...
def inserting_after(self, n): ...
def lint(self): ...
def print_tabular(self): ...
class fx.Node:
"""Node in FX graph."""
def replace_all_uses_with(self, replace_with): ...
def replace_input_with(self, old_input, new_input): ...
def append(self, x): ...
def prepend(self, x): ...
def fx.replace_pattern(gm: GraphModule, pattern, replacement) -> List[Match]:
"""Replace patterns in graph."""
class fx.Interpreter:
"""Base class for FX graph interpreters."""
def run(self, *args, **kwargs): ...
def run_node(self, n): ...
def call_function(self, target, args, kwargs): ...
def call_method(self, target, args, kwargs): ...
def call_module(self, target, args, kwargs): ...Model quantization for efficient deployment.
def quantization.quantize_dynamic(model, qconfig_spec=None, dtype=torch.qint8, mapping=None, inplace=False, remove_qconfig=True):
"""
Dynamic quantization of model.
Parameters:
- model: Model to quantize
- qconfig_spec: Quantization configuration
- dtype: Target quantized data type
- mapping: Custom op mapping
- inplace: Whether to modify model in-place
Returns:
Quantized model
"""
def quantization.quantize(model, run_fn, run_args, mapping=None, inplace=False):
"""Post-training static quantization."""
def quantization.prepare(model, inplace=False, allow_list=None, observer_non_leaf_module_list=None, prepare_custom_config_dict=None):
"""Prepare model for quantization aware training."""
def quantization.convert(model, mapping=None, inplace=False, remove_qconfig=True, convert_custom_config_dict=None):
"""Convert prepared model to quantized version."""
def quantization.prepare_qat(model, mapping=None, inplace=False):
"""Prepare model for quantization aware training."""
class quantization.QuantStub(nn.Module):
"""Quantization stub for marking quantization points."""
def __init__(self, qconfig=None): ...
def forward(self, x): ...
class quantization.DeQuantStub(nn.Module):
"""Dequantization stub for marking dequantization points."""
def __init__(self): ...
def forward(self, x): ...
class quantization.QConfig:
"""Quantization configuration."""
def __init__(self, activation, weight): ...
def quantization.get_default_qconfig(backend='fbgemm'):
"""Get default quantization configuration."""
def quantization.get_default_qat_qconfig(backend='fbgemm'):
"""Get default QAT quantization configuration."""
class quantization.FakeQuantize(nn.Module):
"""Fake quantization for QAT."""
def __init__(self, observer=MinMaxObserver, quant_min=0, quant_max=255, **observer_kwargs): ...
def forward(self, X): ...
def calculate_qparams(self): ...Export PyTorch models to ONNX format for interoperability.
def onnx.export(model, args, f, export_params=True, verbose=False, training=TrainingMode.EVAL,
input_names=None, output_names=None, operator_export_type=OperatorExportTypes.ONNX,
opset_version=None, do_constant_folding=True, dynamic_axes=None, keep_initializers_as_inputs=None,
custom_opsets=None, enable_onnx_checker=True, use_external_data_format=False):
"""
Export PyTorch model to ONNX format.
Parameters:
- model: PyTorch model to export
- args: Model input arguments
- f: File path or file-like object to save to
- export_params: Whether to export parameters
- verbose: Enable verbose output
- training: Training mode (EVAL, TRAINING, PRESERVE)
- input_names: Names for input nodes
- output_names: Names for output nodes
- opset_version: ONNX opset version
- dynamic_axes: Dynamic input/output axes
- custom_opsets: Custom operator sets
"""
def onnx.dynamo_export(model, *model_args, export_options=None, **model_kwargs) -> ONNXProgram:
"""Export using torch.export and Dynamo."""
class onnx.ONNXProgram:
"""ONNX program representation."""
def save(self, destination): ...
def model_proto(self): ...
def onnx.load(f) -> ModelProto:
"""Load ONNX model."""
def onnx.save(model, f, export_params=True):
"""Save ONNX model to file."""
class onnx.TrainingMode(Enum):
"""Training mode for ONNX export."""
EVAL = 0
TRAINING = 1
PRESERVE = 2
class onnx.OperatorExportTypes(Enum):
"""Operator export types."""
ONNX = 0
ONNX_ATEN = 1
ONNX_ATEN_FALLBACK = 2Optimization utilities for mobile deployment.
def utils.mobile_optimizer.optimize_for_mobile(script_module, optimization_blocklist=None, preserved_methods=None, backend='CPU'):
"""
Optimize TorchScript module for mobile deployment.
Parameters:
- script_module: TorchScript module to optimize
- optimization_blocklist: Operations to exclude from optimization
- preserved_methods: Methods to preserve during optimization
- backend: Target backend ('CPU', 'Vulkan', 'Metal')
Returns:
Optimized TorchScript module
"""
class utils.mobile_optimizer.LiteScriptModule:
"""Lightweight script module for mobile."""
def forward(self, *args): ...
def get_debug_info(self): ...NVIDIA TensorRT integration for GPU inference optimization.
def tensorrt.compile(model, inputs, enabled_precisions={torch.float}, workspace_size=1 << 22,
min_block_size=3, torch_executed_ops=None, torch_executed_modules=None):
"""
Compile model with TensorRT.
Parameters:
- model: PyTorch model to compile
- inputs: Example inputs for compilation
- enabled_precisions: Allowed precision types
- workspace_size: TensorRT workspace size
- min_block_size: Minimum block size for TensorRT subgraphs
Returns:
TensorRT compiled model
"""Automatic mixed precision training for performance and memory optimization.
class amp.GradScaler:
"""Gradient scaler for mixed precision training."""
def __init__(self, init_scale=2**16, growth_factor=2.0, backoff_factor=0.5, growth_interval=2000, enabled=True):
"""
Parameters:
- init_scale: Initial scale factor
- growth_factor: Scale growth factor
- backoff_factor: Scale reduction factor
- growth_interval: Steps between scale increases
- enabled: Whether scaler is enabled
"""
def scale(self, outputs): ...
def step(self, optimizer): ...
def update(self): ...
def unscale_(self, optimizer): ...
def get_scale(self): ...
def get_growth_factor(self): ...
def set_growth_factor(self, new_factor): ...
def get_backoff_factor(self): ...
def set_backoff_factor(self, new_factor): ...
def get_growth_interval(self): ...
def set_growth_interval(self, new_interval): ...
def is_enabled(self): ...
def state_dict(self): ...
def load_state_dict(self, state_dict): ...
def amp.autocast(device_type='cuda', dtype=None, enabled=True, cache_enabled=None):
"""
Context manager for automatic mixed precision.
Parameters:
- device_type: Device type ('cuda', 'cpu', 'xpu')
- dtype: Target dtype (torch.float16, torch.bfloat16)
- enabled: Whether autocast is enabled
- cache_enabled: Whether to cache autocast state
"""Advanced optimization techniques including pruning and sparsity.
def ao.pruning.prune_low_magnitude(model, amount, importance_scores=None, structured=False, dim=None):
"""
Prune model by removing low magnitude weights.
Parameters:
- model: Model to prune
- amount: Fraction of weights to prune
- importance_scores: Custom importance scores
- structured: Whether to use structured pruning
- dim: Dimension for structured pruning
Returns:
Pruned model
"""
class ao.pruning.WeightNormSparsifier:
"""Weight norm based sparsifier."""
def __init__(self, sparsity_level=0.5): ...
def update_mask(self, module, tensor_name, **kwargs): ...
class ao.quantization.QConfigMapping:
"""Quantization configuration mapping."""
def set_global(self, qconfig): ...
def set_object_type(self, object_type, qconfig): ...
def set_module_name(self, module_name, qconfig): ...
def ao.quantization.get_default_qconfig_mapping(backend='x86'):
"""Get default quantization configuration mapping."""
class ao.quantization.FusedMovingAvgObsFakeQuantize(nn.Module):
"""Fused moving average observer fake quantize."""
def __init__(self, observer=MovingAverageMinMaxObserver, **observer_kwargs): ...import torch
import torch.nn as nn
# Define model
class SimpleModel(nn.Module):
def __init__(self):
super(SimpleModel, self).__init__()
self.linear = nn.Linear(10, 5)
def forward(self, x):
return torch.relu(self.linear(x))
model = SimpleModel()
model.eval()
# Script compilation
scripted_model = torch.jit.script(model)
print(scripted_model.code)
# Trace compilation
example_input = torch.randn(1, 10)
traced_model = torch.jit.trace(model, example_input)
# Save/load
torch.jit.save(scripted_model, 'model_scripted.pt')
loaded_model = torch.jit.load('model_scripted.pt')
# Optimization for inference
optimized_model = torch.jit.optimize_for_inference(scripted_model)
print("TorchScript compilation completed")import torch
import torch.nn as nn
from torch.export import export
# Define model
class ExportModel(nn.Module):
def __init__(self):
super().__init__()
self.conv = nn.Conv2d(3, 16, 3, padding=1)
self.pool = nn.AdaptiveAvgPool2d((1, 1))
self.fc = nn.Linear(16, 10)
def forward(self, x):
x = torch.relu(self.conv(x))
x = self.pool(x)
x = x.flatten(1)
return self.fc(x)
model = ExportModel()
example_input = torch.randn(1, 3, 32, 32)
# Export to ExportedProgram
exported_program = export(model, (example_input,))
# Save exported program
torch.export.save(exported_program, 'exported_model.pt2')
# Load exported program
loaded_program = torch.export.load('exported_model.pt2')
# Use exported program
output = loaded_program.module()(example_input)
print(f"Export completed, output shape: {output.shape}")import torch
import torch.nn as nn
# Define model
model = nn.Sequential(
nn.Linear(100, 200),
nn.ReLU(),
nn.Linear(200, 100),
nn.ReLU(),
nn.Linear(100, 10)
)
# Compile with different modes
default_compiled = torch.compile(model)
fast_compiled = torch.compile(model, mode="reduce-overhead")
optimal_compiled = torch.compile(model, mode="max-autotune")
# Use as decorator
@torch.compile
def custom_function(x, y):
return x.matmul(y) + x.sum()
# Example usage
x = torch.randn(32, 100)
y = torch.randn(100, 50)
# Compiled function
result = custom_function(x, y)
# Compiled model
output = optimal_compiled(x)
print(f"Torch compile completed, output shape: {output.shape}")import torch
import torch.nn as nn
import torch.quantization as quant
# Define model
class QuantModel(nn.Module):
def __init__(self):
super().__init__()
self.quant = quant.QuantStub()
self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
self.relu1 = nn.ReLU()
self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
self.relu2 = nn.ReLU()
self.pool = nn.AdaptiveAvgPool2d((1, 1))
self.fc = nn.Linear(64, 10)
self.dequant = quant.DeQuantStub()
def forward(self, x):
x = self.quant(x)
x = self.relu1(self.conv1(x))
x = self.relu2(self.conv2(x))
x = self.pool(x)
x = x.flatten(1)
x = self.fc(x)
x = self.dequant(x)
return x
model = QuantModel()
model.eval()
# Dynamic quantization
quantized_model = quant.quantize_dynamic(
model, {nn.Linear}, dtype=torch.qint8
)
# Post-training static quantization
model.qconfig = quant.get_default_qconfig('fbgemm')
prepared_model = quant.prepare(model)
# Calibration (example data)
for _ in range(10):
calibration_data = torch.randn(1, 3, 32, 32)
prepared_model(calibration_data)
# Convert to quantized model
quantized_static_model = quant.convert(prepared_model)
print("Quantization completed")
print(f"Original model size: {sum(p.numel() for p in model.parameters())}")
print(f"Quantized model parameters: {sum(p.numel() for p in quantized_model.parameters())}")import torch
import torch.nn as nn
import torch.onnx
# Define model
class ONNXModel(nn.Module):
def __init__(self):
super().__init__()
self.backbone = nn.Sequential(
nn.Conv2d(3, 64, 7, stride=2, padding=3),
nn.BatchNorm2d(64),
nn.ReLU(inplace=True),
nn.AdaptiveAvgPool2d((1, 1)),
nn.Flatten(),
nn.Linear(64, 1000)
)
def forward(self, x):
return self.backbone(x)
model = ONNXModel()
model.eval()
# Example input
dummy_input = torch.randn(1, 3, 224, 224)
# Export to ONNX
torch.onnx.export(
model,
dummy_input,
"model.onnx",
export_params=True,
opset_version=11,
do_constant_folding=True,
input_names=['input'],
output_names=['output'],
dynamic_axes={
'input': {0: 'batch_size'},
'output': {0: 'batch_size'}
}
)
print("ONNX export completed")import torch
import torch.nn as nn
import torch.fx as fx
# Define model
class FXModel(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(3, 32, 3)
self.conv2 = nn.Conv2d(32, 64, 3)
self.relu = nn.ReLU()
self.pool = nn.AdaptiveAvgPool2d((1, 1))
self.fc = nn.Linear(64, 10)
def forward(self, x):
x = self.relu(self.conv1(x))
x = self.relu(self.conv2(x))
x = self.pool(x)
x = x.flatten(1)
x = self.fc(x)
return x
# Symbolic tracing
model = FXModel()
traced = fx.symbolic_trace(model)
# Print graph
print("Original graph:")
traced.graph.print_tabular()
# Graph manipulation - replace ReLU with GELU
for node in traced.graph.nodes:
if node.target == torch.relu:
with traced.graph.inserting_after(node):
new_node = traced.graph.call_function(torch.nn.functional.gelu, args=(node.args[0],))
node.replace_all_uses_with(new_node)
traced.graph.erase_node(node)
# Recompile
traced.recompile()
print("\nModified graph:")
traced.graph.print_tabular()
# Test modified model
test_input = torch.randn(1, 3, 32, 32)
output = traced(test_input)
print(f"FX transformation completed, output shape: {output.shape}")import torch
import torch.nn as nn
import torch.optim as optim
from torch.cuda.amp import autocast, GradScaler
# Define model and training setup
model = nn.Sequential(
nn.Linear(1000, 500),
nn.ReLU(),
nn.Linear(500, 100),
nn.ReLU(),
nn.Linear(100, 10)
).cuda()
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()
scaler = GradScaler()
# Training loop with mixed precision
model.train()
for epoch in range(5):
for batch_idx in range(100): # Simulate 100 batches
# Generate dummy data
data = torch.randn(32, 1000).cuda()
targets = torch.randint(0, 10, (32,)).cuda()
optimizer.zero_grad()
# Forward pass with autocast
with autocast():
outputs = model(data)
loss = criterion(outputs, targets)
# Backward pass with gradient scaling
scaler.scale(loss).backward()
scaler.step(optimizer)
scaler.update()
if batch_idx % 25 == 0:
print(f"Epoch {epoch}, Batch {batch_idx}, Loss: {loss.item():.4f}, Scale: {scaler.get_scale()}")
print("Mixed precision training completed")Install with Tessl CLI
npx tessl i tessl/pypi-torch