CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-pycuda

Python wrapper for Nvidia CUDA parallel computation API with object cleanup, automatic error checking, and convenient abstractions.

62

0.93x
Overview
Eval results
Files

gpu-arrays.mddocs/

GPU Arrays

High-level NumPy-like interface for GPU arrays supporting arithmetic operations, slicing, broadcasting, and seamless interoperability with NumPy arrays. GPUArray provides automatic memory management and Pythonic operations on GPU data.

Capabilities

Array Creation

Create GPU arrays from various sources with automatic memory management.

class GPUArray:
    def __init__(self, shape: tuple, dtype: np.dtype, allocator=None, order: str = "C"):
        """
        Create new GPU array.
        
        Parameters:
        - shape: tuple, array dimensions
        - dtype: numpy.dtype, element data type
        - allocator: memory allocator function (optional)
        - order: str, memory layout ("C" or "F")
        """
    
    @classmethod
    def from_array(cls, ary: np.ndarray, allocator=None) -> GPUArray:
        """Create GPU array from NumPy array."""

def empty(shape: tuple, dtype: np.dtype, allocator=None, order: str = "C") -> GPUArray:
    """
    Create uninitialized GPU array.
    
    Parameters:
    - shape: tuple, array dimensions
    - dtype: numpy.dtype, element data type
    - allocator: memory allocator function (optional)
    - order: str, memory layout ("C" or "F")
    
    Returns:
    GPUArray: new uninitialized array
    """

def zeros(shape: tuple, dtype: np.dtype, allocator=None, order: str = "C") -> GPUArray:
    """Create GPU array filled with zeros."""

def ones(shape: tuple, dtype: np.dtype, allocator=None, order: str = "C") -> GPUArray:
    """Create GPU array filled with ones."""

def full(shape: tuple, fill_value, dtype: np.dtype = None, allocator=None, order: str = "C") -> GPUArray:
    """
    Create GPU array filled with specified value.
    
    Parameters:
    - shape: tuple, array dimensions
    - fill_value: scalar, fill value
    - dtype: numpy.dtype, element data type (inferred if None)
    - allocator: memory allocator function (optional)
    - order: str, memory layout ("C" or "F")
    
    Returns:
    GPUArray: new filled array
    """

def to_gpu(ary: np.ndarray, allocator=None) -> GPUArray:
    """
    Copy NumPy array to GPU.
    
    Parameters:
    - ary: numpy.ndarray, source array
    - allocator: memory allocator function (optional)
    
    Returns:
    GPUArray: GPU copy of array
    """

def to_gpu_async(ary: np.ndarray, allocator=None, stream=None) -> GPUArray:
    """Copy NumPy array to GPU asynchronously."""

def arange(start, stop=None, step=1, dtype: np.dtype = None, allocator=None) -> GPUArray:
    """
    Create GPU array with evenly spaced values.
    
    Parameters:
    - start: scalar, start value (or stop if stop=None)
    - stop: scalar, stop value (optional)
    - step: scalar, step size
    - dtype: numpy.dtype, element data type
    - allocator: memory allocator function (optional)
    
    Returns:
    GPUArray: array with evenly spaced values
    """

Data Transfer

Transfer data between CPU and GPU with synchronous and asynchronous operations.

class GPUArray:
    def get(self, ary: np.ndarray = None, async_: bool = False, stream=None) -> np.ndarray:
        """
        Copy GPU array to CPU.
        
        Parameters:
        - ary: numpy.ndarray, destination array (optional)
        - async_: bool, perform asynchronous transfer
        - stream: Stream, CUDA stream for async transfer
        
        Returns:
        numpy.ndarray: CPU copy of array
        """
    
    def set(self, ary: np.ndarray, async_: bool = False, stream=None) -> None:
        """
        Copy CPU array to GPU.
        
        Parameters:
        - ary: numpy.ndarray, source array
        - async_: bool, perform asynchronous transfer
        - stream: Stream, CUDA stream for async transfer
        """
    
    def set_async(self, ary: np.ndarray, stream=None) -> None:
        """Copy CPU array to GPU asynchronously."""
    
    def get_async(self, stream=None) -> np.ndarray:
        """Copy GPU array to CPU asynchronously."""

Array Properties

Access array metadata and properties.

class GPUArray:
    @property
    def shape(self) -> tuple:
        """Array dimensions."""
    
    @property
    def dtype(self) -> np.dtype:
        """Element data type."""
    
    @property
    def size(self) -> int:
        """Total number of elements."""
    
    @property
    def nbytes(self) -> int:
        """Total bytes consumed by array."""
    
    @property
    def ndim(self) -> int:
        """Number of array dimensions."""
    
    @property
    def strides(self) -> tuple:
        """Bytes to step in each dimension."""
    
    @property
    def flags(self) -> dict:
        """Array flags (C_CONTIGUOUS, F_CONTIGUOUS, etc.)."""
    
    @property
    def itemsize(self) -> int:
        """Size of one array element in bytes."""
    
    @property
    def ptr(self) -> int:
        """GPU memory pointer as integer."""
    
    @property
    def gpudata(self) -> DeviceAllocation:
        """GPU memory allocation object."""

Arithmetic Operations

NumPy-compatible arithmetic operations with broadcasting support.

class GPUArray:
    def __add__(self, other) -> GPUArray:
        """Element-wise addition."""
    
    def __sub__(self, other) -> GPUArray:
        """Element-wise subtraction."""
    
    def __mul__(self, other) -> GPUArray:
        """Element-wise multiplication."""
    
    def __truediv__(self, other) -> GPUArray:
        """Element-wise division."""
    
    def __floordiv__(self, other) -> GPUArray:
        """Element-wise floor division."""
    
    def __mod__(self, other) -> GPUArray:
        """Element-wise remainder."""
    
    def __pow__(self, other) -> GPUArray:
        """Element-wise power."""
    
    def __neg__(self) -> GPUArray:
        """Element-wise negation."""
    
    def __abs__(self) -> GPUArray:
        """Element-wise absolute value."""
    
    # In-place operations
    def __iadd__(self, other) -> GPUArray:
        """In-place addition."""
    
    def __isub__(self, other) -> GPUArray:
        """In-place subtraction."""
    
    def __imul__(self, other) -> GPUArray:
        """In-place multiplication."""
    
    def __itruediv__(self, other) -> GPUArray:
        """In-place division."""

Comparison Operations

Element-wise comparison operations returning boolean arrays.

class GPUArray:
    def __eq__(self, other) -> GPUArray:
        """Element-wise equality."""
    
    def __ne__(self, other) -> GPUArray:
        """Element-wise inequality."""
    
    def __lt__(self, other) -> GPUArray:
        """Element-wise less than."""
    
    def __le__(self, other) -> GPUArray:
        """Element-wise less than or equal."""
    
    def __gt__(self, other) -> GPUArray:
        """Element-wise greater than."""
    
    def __ge__(self, other) -> GPUArray:
        """Element-wise greater than or equal."""

Array Indexing and Slicing

Advanced indexing and slicing operations similar to NumPy.

class GPUArray:
    def __getitem__(self, index) -> GPUArray:
        """
        Get array slice or elements.
        
        Parameters:
        - index: slice, int, or tuple of indices
        
        Returns:
        GPUArray: sliced array view or copy
        """
    
    def __setitem__(self, index, value) -> None:
        """
        Set array slice or elements.
        
        Parameters:
        - index: slice, int, or tuple of indices  
        - value: scalar or array-like, values to set
        """
    
    def take(self, indices: GPUArray, axis: int = None, out: GPUArray = None) -> GPUArray:
        """
        Take elements along axis.
        
        Parameters:
        - indices: GPUArray, indices to take
        - axis: int, axis along which to take (None for flattened)
        - out: GPUArray, output array (optional)
        
        Returns:
        GPUArray: array with taken elements
        """
    
    def put(self, indices: GPUArray, values, mode: str = "raise") -> None:
        """
        Put values at specified indices.
        
        Parameters:
        - indices: GPUArray, target indices
        - values: scalar or array-like, values to put
        - mode: str, how to handle out-of-bound indices
        """

Array Manipulation

Reshape, transpose, and manipulate array structure.

class GPUArray:
    def reshape(self, shape: tuple, order: str = "C") -> GPUArray:
        """
        Return array with new shape.
        
        Parameters:
        - shape: tuple, new shape
        - order: str, read/write order ("C" or "F")
        
        Returns:
        GPUArray: reshaped array view
        """
    
    def transpose(self, axes: tuple = None) -> GPUArray:
        """
        Return transposed array.
        
        Parameters:
        - axes: tuple, permutation of axes (optional)
        
        Returns:
        GPUArray: transposed array
        """
    
    @property
    def T(self) -> GPUArray:
        """Transposed array."""
    
    def flatten(self, order: str = "C") -> GPUArray:
        """
        Return flattened array.
        
        Parameters:
        - order: str, flatten order ("C" or "F")
        
        Returns:
        GPUArray: flattened array copy
        """
    
    def ravel(self, order: str = "C") -> GPUArray:
        """Return flattened array (view if possible)."""
    
    def squeeze(self, axis: int = None) -> GPUArray:
        """
        Remove single-dimensional entries.
        
        Parameters:
        - axis: int, axis to squeeze (optional)
        
        Returns:
        GPUArray: squeezed array
        """
    
    def repeat(self, repeats, axis: int = None) -> GPUArray:
        """
        Repeat elements of array.
        
        Parameters:
        - repeats: int or array-like, number of repetitions
        - axis: int, axis along which to repeat
        
        Returns:
        GPUArray: array with repeated elements
        """

Reductions

Reduction operations along axes with optional output arrays.

class GPUArray:
    def sum(self, axis: int = None, dtype: np.dtype = None, out: GPUArray = None, keepdims: bool = False) -> GPUArray:
        """
        Sum along axis.
        
        Parameters:
        - axis: int, axis to sum along (None for all)
        - dtype: numpy.dtype, output data type
        - out: GPUArray, output array (optional)
        - keepdims: bool, keep reduced dimensions
        
        Returns:
        GPUArray or scalar: sum result
        """
    
    def mean(self, axis: int = None, dtype: np.dtype = None, out: GPUArray = None, keepdims: bool = False) -> GPUArray:
        """Mean along axis."""
    
    def var(self, axis: int = None, dtype: np.dtype = None, out: GPUArray = None, keepdims: bool = False, ddof: int = 0) -> GPUArray:
        """Variance along axis."""
    
    def std(self, axis: int = None, dtype: np.dtype = None, out: GPUArray = None, keepdims: bool = False, ddof: int = 0) -> GPUArray:
        """Standard deviation along axis."""
    
    def min(self, axis: int = None, out: GPUArray = None, keepdims: bool = False) -> GPUArray:
        """Minimum along axis."""
    
    def max(self, axis: int = None, out: GPUArray = None, keepdims: bool = False) -> GPUArray:
        """Maximum along axis."""
    
    def dot(self, other: GPUArray) -> GPUArray:
        """
        Dot product with another array.
        
        Parameters:
        - other: GPUArray, other array
        
        Returns:
        GPUArray: dot product result
        """

Array Copying

Create copies and views of arrays.

class GPUArray:
    def copy(self, order: str = "C") -> GPUArray:
        """
        Create copy of array.
        
        Parameters:
        - order: str, memory layout of copy
        
        Returns:
        GPUArray: array copy
        """
    
    def view(self, dtype: np.dtype = None) -> GPUArray:
        """
        Create view of array.
        
        Parameters:
        - dtype: numpy.dtype, view data type (optional)
        
        Returns:
        GPUArray: array view
        """
    
    def astype(self, dtype: np.dtype, order: str = "K", copy: bool = True) -> GPUArray:
        """
        Cast array to different data type.
        
        Parameters:
        - dtype: numpy.dtype, target data type
        - order: str, memory layout
        - copy: bool, force copy even if not needed
        
        Returns:
        GPUArray: array with new data type
        """

Vector Types

PyCUDA provides CUDA vector types for efficient GPU computation.

# Vector type creation functions
def make_int2(x: int, y: int) -> np.ndarray: ...
def make_int3(x: int, y: int, z: int) -> np.ndarray: ...
def make_int4(x: int, y: int, z: int, w: int) -> np.ndarray: ...
def make_float2(x: float, y: float) -> np.ndarray: ...
def make_float3(x: float, y: float, z: float) -> np.ndarray: ...
def make_float4(x: float, y: float, z: float, w: float) -> np.ndarray: ...

# Vector types as numpy dtypes
vec = SimpleNamespace(
    char2=np.dtype([('x', np.int8), ('y', np.int8)]),
    char3=np.dtype([('x', np.int8), ('y', np.int8), ('z', np.int8)]),
    char4=np.dtype([('x', np.int8), ('y', np.int8), ('z', np.int8), ('w', np.int8)]),
    uchar2=np.dtype([('x', np.uint8), ('y', np.uint8)]),
    uchar3=np.dtype([('x', np.uint8), ('y', np.uint8), ('z', np.uint8)]),
    uchar4=np.dtype([('x', np.uint8), ('y', np.uint8), ('z', np.uint8), ('w', np.uint8)]),
    short2=np.dtype([('x', np.int16), ('y', np.int16)]),
    short3=np.dtype([('x', np.int16), ('y', np.int16), ('z', np.int16)]),
    short4=np.dtype([('x', np.int16), ('y', np.int16), ('z', np.int16), ('w', np.int16)]),
    ushort2=np.dtype([('x', np.uint16), ('y', np.uint16)]),
    ushort3=np.dtype([('x', np.uint16), ('y', np.uint16), ('z', np.uint16)]),
    ushort4=np.dtype([('x', np.uint16), ('y', np.uint16), ('z', np.uint16), ('w', np.uint16)]),
    int2=np.dtype([('x', np.int32), ('y', np.int32)]),
    int3=np.dtype([('x', np.int32), ('y', np.int32), ('z', np.int32)]),
    int4=np.dtype([('x', np.int32), ('y', np.int32), ('z', np.int32), ('w', np.int32)]),
    uint2=np.dtype([('x', np.uint32), ('y', np.uint32)]),
    uint3=np.dtype([('x', np.uint32), ('y', np.uint32), ('z', np.uint32)]),
    uint4=np.dtype([('x', np.uint32), ('y', np.uint32), ('z', np.uint32), ('w', np.uint32)]),
    float2=np.dtype([('x', np.float32), ('y', np.float32)]),
    float3=np.dtype([('x', np.float32), ('y', np.float32), ('z', np.float32)]),
    float4=np.dtype([('x', np.float32), ('y', np.float32), ('z', np.float32), ('w', np.float32)]),
    double2=np.dtype([('x', np.float64), ('y', np.float64)])
)

Install with Tessl CLI

npx tessl i tessl/pypi-pycuda

docs

algorithm-kernels.md

driver-api.md

gpu-arrays.md

index.md

kernel-compilation.md

math-functions.md

opengl-integration.md

random-numbers.md

tile.json