CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-warp-lang

A Python framework for high-performance simulation and graphics programming that JIT compiles Python functions to efficient GPU/CPU kernel code.

Overview
Eval results
Files

core-execution.mddocs/

Core Execution and Device Management

Essential functions for initializing Warp, managing devices, launching kernels, and controlling execution. These functions form the foundation for all Warp programs and must be understood to effectively use any other Warp capabilities.

Capabilities

Initialization

Initialize the Warp runtime and make all devices available for computation.

def init() -> None:
    """
    Initialize Warp and detect available devices.
    Must be called before using any other Warp functionality.
    """

Example:

import warp as wp
wp.init()  # Always call this first

Device Management

Query and control available computation devices (CPU and CUDA GPUs).

def is_cpu_available() -> bool:
    """Check if CPU device is available."""

def is_cuda_available() -> bool:
    """Check if CUDA GPU devices are available."""

def is_device_available(device: Device) -> bool:
    """Check if specific device is available."""

def get_devices() -> list:
    """Get list of all available devices."""

def get_preferred_device() -> Device:
    """Get the preferred device (CUDA if available, else CPU)."""

def get_device(ident: str = None) -> Device:
    """
    Get device by identifier.
    
    Args:
        ident: Device identifier like "cpu", "cuda:0", "cuda:1"
        
    Returns:
        Device object for the specified device
    """

def set_device(device: Device) -> None:
    """Set the current active device for subsequent operations."""

def synchronize_device(device: Device = None) -> None:
    """Wait for all operations on device to complete."""

CUDA Device Management

Specialized functions for managing CUDA GPU devices.

def get_cuda_devices() -> list:
    """Get list of available CUDA devices."""

def get_cuda_device_count() -> int:
    """Get number of available CUDA devices."""

def get_cuda_device(device_id: int = 0) -> Device:
    """Get CUDA device by index."""

def map_cuda_device(device_id: int) -> Device:
    """Map CUDA device for interop with other libraries."""

def unmap_cuda_device(device: Device) -> None:
    """Unmap previously mapped CUDA device."""

Kernel Execution

Launch compiled kernels on devices with specified thread dimensions.

def launch(kernel: Kernel,
          dim: int | Sequence[int],
          inputs: Sequence = [],
          outputs: Sequence = [],
          adj_inputs: Sequence = [],
          adj_outputs: Sequence = [],
          device: Device = None,
          stream: Stream = None,
          adjoint: bool = False,
          record_tape: bool = True,
          record_cmd: bool = False,
          max_blocks: int = 0,
          block_dim: int = 256) -> None:
    """
    Launch a kernel with specified thread count.
    
    Args:
        kernel: Compiled kernel function
        dim: Number of threads or tuple of dimensions
        inputs: Input arguments to kernel
        outputs: Output arguments
        adj_inputs: Adjoint input arguments for reverse mode
        adj_outputs: Adjoint output arguments for reverse mode
        device: Device to run on (uses current if None)
        stream: CUDA stream for async execution
        adjoint: Whether to run adjoint/backward pass
        record_tape: Whether to record operations for autodiff
        record_cmd: Whether to record for replay
        max_blocks: Maximum number of thread blocks
        block_dim: Number of threads per block
    """

def launch_tiled(kernel: Kernel,
                dim: tuple,
                inputs: list,
                outputs: list = None,
                device: Device = None,
                stream: Stream = None) -> None:
    """
    Launch a tiled kernel with 2D/3D thread organization.
    
    Args:
        dim: Tuple of thread dimensions (x, y, z)
        Other args same as launch()
    """

Synchronization

Control execution timing and wait for operations to complete.

def synchronize() -> None:
    """Wait for all pending operations to complete on all devices."""

def synchronize_device(device: Device = None) -> None:
    """Wait for operations on specific device to complete."""

def force_load(module=None) -> None:
    """Force compilation and loading of kernels."""

Module Management

Control kernel compilation and module loading behavior.

def load_module(module_name: str = None) -> Module:
    """Load or get existing module containing kernels."""

def get_module(module_name: str = None) -> Module:
    """Get module by name."""

def set_module_options(options: dict) -> None:
    """Set compilation options for modules."""

def get_module_options() -> dict:
    """Get current module compilation options."""

Array Creation

Create and initialize arrays on specified devices.

def zeros(shape: int | tuple[int, ...] | list[int] | None = None,
         dtype: type = float,
         device: Device = None,
         requires_grad: bool = False,
         pinned: bool = False) -> array:
    """Create array filled with zeros."""

def zeros_like(arr: array, 
              dtype: type = None, 
              device: Device = None) -> array:
    """Create zero array with same shape as existing array."""

def ones(shape: int | tuple[int, ...] | list[int] | None = None,
        dtype: type = float,
        device: Device = None,
        requires_grad: bool = False,
        pinned: bool = False) -> array:
    """Create array filled with ones."""

def ones_like(arr: array, 
             dtype: type = None, 
             device: Device = None) -> array:
    """Create ones array with same shape as existing array."""

def full(shape: int | tuple[int, ...] | list[int] | None = None,
        value=0,
        dtype: type = None,
        device: Device = None,
        requires_grad: bool = False,
        pinned: bool = False) -> array:
    """Create array filled with specified value."""

def full_like(arr: array, 
             value, 
             dtype: type = None, 
             device: Device = None) -> array:
    """Create filled array with same shape as existing array."""

def empty(shape: int | tuple[int, ...] | list[int] | None = None,
         dtype: type = float,
         device: Device = None,
         requires_grad: bool = False,
         pinned: bool = False) -> array:
    """Create uninitialized array (faster than zeros)."""

def empty_like(arr: array, 
              dtype: type = None, 
              device: Device = None) -> array:
    """Create empty array with same shape as existing array."""

def clone(arr: array, 
         device: Device = None) -> array:
    """Create deep copy of array."""

def copy(src: array, 
        dest: array, 
        src_offset: int = 0, 
        dest_offset: int = 0, 
        count: int = None) -> None:
    """Copy data between arrays."""

def from_numpy(arr: np.ndarray, 
              dtype: type = None, 
              device: Device = None) -> array:
    """Create Warp array from NumPy array."""

Usage Examples

Basic Device Setup

import warp as wp

# Initialize Warp
wp.init()

# Check available devices
if wp.is_cuda_available():
    device = wp.get_device("cuda:0")
    print(f"Using GPU: {device}")
else:
    device = wp.get_device("cpu") 
    print("Using CPU")

wp.set_device(device)

Kernel Launch Pattern

# Create arrays
n = 1000000
a = wp.ones(n, dtype=float, device=device)
b = wp.zeros(n, dtype=float, device=device)

# Launch kernel
wp.launch(my_kernel, dim=n, inputs=[a, b], device=device)

# Wait for completion
wp.synchronize_device(device)

Types

class Device:
    """Represents a computation device (CPU or GPU)."""
    
    def __str__(self) -> str:
        """String representation of device."""
    
    @property
    def context(self):
        """Device context for low-level operations."""

class Module:
    """Container for compiled kernels and functions."""
    
    def load(self) -> None:
        """Load/compile the module."""

class Kernel:
    """Compiled kernel function that can be launched."""
    
    def __call__(self, *args, **kwargs):
        """Direct kernel invocation (same as wp.launch)."""

class Function:
    """Compiled function that can be called from kernels."""
    
    def __call__(self, *args, **kwargs):
        """Function invocation."""

Install with Tessl CLI

npx tessl i tessl/pypi-warp-lang

docs

core-execution.md

fem.md

framework-integration.md

index.md

kernel-programming.md

optimization.md

rendering.md

types-arrays.md

utilities.md

tile.json