Python wrapper for Nvidia CUDA parallel computation API with object cleanup, automatic error checking, and convenient abstractions.
62
High-level NumPy-like interface for GPU arrays supporting arithmetic operations, slicing, broadcasting, and seamless interoperability with NumPy arrays. GPUArray provides automatic memory management and Pythonic operations on GPU data.
Create GPU arrays from various sources with automatic memory management.
class GPUArray:
def __init__(self, shape: tuple, dtype: np.dtype, allocator=None, order: str = "C"):
"""
Create new GPU array.
Parameters:
- shape: tuple, array dimensions
- dtype: numpy.dtype, element data type
- allocator: memory allocator function (optional)
- order: str, memory layout ("C" or "F")
"""
@classmethod
def from_array(cls, ary: np.ndarray, allocator=None) -> GPUArray:
"""Create GPU array from NumPy array."""
def empty(shape: tuple, dtype: np.dtype, allocator=None, order: str = "C") -> GPUArray:
"""
Create uninitialized GPU array.
Parameters:
- shape: tuple, array dimensions
- dtype: numpy.dtype, element data type
- allocator: memory allocator function (optional)
- order: str, memory layout ("C" or "F")
Returns:
GPUArray: new uninitialized array
"""
def zeros(shape: tuple, dtype: np.dtype, allocator=None, order: str = "C") -> GPUArray:
"""Create GPU array filled with zeros."""
def ones(shape: tuple, dtype: np.dtype, allocator=None, order: str = "C") -> GPUArray:
"""Create GPU array filled with ones."""
def full(shape: tuple, fill_value, dtype: np.dtype = None, allocator=None, order: str = "C") -> GPUArray:
"""
Create GPU array filled with specified value.
Parameters:
- shape: tuple, array dimensions
- fill_value: scalar, fill value
- dtype: numpy.dtype, element data type (inferred if None)
- allocator: memory allocator function (optional)
- order: str, memory layout ("C" or "F")
Returns:
GPUArray: new filled array
"""
def to_gpu(ary: np.ndarray, allocator=None) -> GPUArray:
"""
Copy NumPy array to GPU.
Parameters:
- ary: numpy.ndarray, source array
- allocator: memory allocator function (optional)
Returns:
GPUArray: GPU copy of array
"""
def to_gpu_async(ary: np.ndarray, allocator=None, stream=None) -> GPUArray:
"""Copy NumPy array to GPU asynchronously."""
def arange(start, stop=None, step=1, dtype: np.dtype = None, allocator=None) -> GPUArray:
"""
Create GPU array with evenly spaced values.
Parameters:
- start: scalar, start value (or stop if stop=None)
- stop: scalar, stop value (optional)
- step: scalar, step size
- dtype: numpy.dtype, element data type
- allocator: memory allocator function (optional)
Returns:
GPUArray: array with evenly spaced values
"""Transfer data between CPU and GPU with synchronous and asynchronous operations.
class GPUArray:
def get(self, ary: np.ndarray = None, async_: bool = False, stream=None) -> np.ndarray:
"""
Copy GPU array to CPU.
Parameters:
- ary: numpy.ndarray, destination array (optional)
- async_: bool, perform asynchronous transfer
- stream: Stream, CUDA stream for async transfer
Returns:
numpy.ndarray: CPU copy of array
"""
def set(self, ary: np.ndarray, async_: bool = False, stream=None) -> None:
"""
Copy CPU array to GPU.
Parameters:
- ary: numpy.ndarray, source array
- async_: bool, perform asynchronous transfer
- stream: Stream, CUDA stream for async transfer
"""
def set_async(self, ary: np.ndarray, stream=None) -> None:
"""Copy CPU array to GPU asynchronously."""
def get_async(self, stream=None) -> np.ndarray:
"""Copy GPU array to CPU asynchronously."""Access array metadata and properties.
class GPUArray:
@property
def shape(self) -> tuple:
"""Array dimensions."""
@property
def dtype(self) -> np.dtype:
"""Element data type."""
@property
def size(self) -> int:
"""Total number of elements."""
@property
def nbytes(self) -> int:
"""Total bytes consumed by array."""
@property
def ndim(self) -> int:
"""Number of array dimensions."""
@property
def strides(self) -> tuple:
"""Bytes to step in each dimension."""
@property
def flags(self) -> dict:
"""Array flags (C_CONTIGUOUS, F_CONTIGUOUS, etc.)."""
@property
def itemsize(self) -> int:
"""Size of one array element in bytes."""
@property
def ptr(self) -> int:
"""GPU memory pointer as integer."""
@property
def gpudata(self) -> DeviceAllocation:
"""GPU memory allocation object."""NumPy-compatible arithmetic operations with broadcasting support.
class GPUArray:
def __add__(self, other) -> GPUArray:
"""Element-wise addition."""
def __sub__(self, other) -> GPUArray:
"""Element-wise subtraction."""
def __mul__(self, other) -> GPUArray:
"""Element-wise multiplication."""
def __truediv__(self, other) -> GPUArray:
"""Element-wise division."""
def __floordiv__(self, other) -> GPUArray:
"""Element-wise floor division."""
def __mod__(self, other) -> GPUArray:
"""Element-wise remainder."""
def __pow__(self, other) -> GPUArray:
"""Element-wise power."""
def __neg__(self) -> GPUArray:
"""Element-wise negation."""
def __abs__(self) -> GPUArray:
"""Element-wise absolute value."""
# In-place operations
def __iadd__(self, other) -> GPUArray:
"""In-place addition."""
def __isub__(self, other) -> GPUArray:
"""In-place subtraction."""
def __imul__(self, other) -> GPUArray:
"""In-place multiplication."""
def __itruediv__(self, other) -> GPUArray:
"""In-place division."""Element-wise comparison operations returning boolean arrays.
class GPUArray:
def __eq__(self, other) -> GPUArray:
"""Element-wise equality."""
def __ne__(self, other) -> GPUArray:
"""Element-wise inequality."""
def __lt__(self, other) -> GPUArray:
"""Element-wise less than."""
def __le__(self, other) -> GPUArray:
"""Element-wise less than or equal."""
def __gt__(self, other) -> GPUArray:
"""Element-wise greater than."""
def __ge__(self, other) -> GPUArray:
"""Element-wise greater than or equal."""Advanced indexing and slicing operations similar to NumPy.
class GPUArray:
def __getitem__(self, index) -> GPUArray:
"""
Get array slice or elements.
Parameters:
- index: slice, int, or tuple of indices
Returns:
GPUArray: sliced array view or copy
"""
def __setitem__(self, index, value) -> None:
"""
Set array slice or elements.
Parameters:
- index: slice, int, or tuple of indices
- value: scalar or array-like, values to set
"""
def take(self, indices: GPUArray, axis: int = None, out: GPUArray = None) -> GPUArray:
"""
Take elements along axis.
Parameters:
- indices: GPUArray, indices to take
- axis: int, axis along which to take (None for flattened)
- out: GPUArray, output array (optional)
Returns:
GPUArray: array with taken elements
"""
def put(self, indices: GPUArray, values, mode: str = "raise") -> None:
"""
Put values at specified indices.
Parameters:
- indices: GPUArray, target indices
- values: scalar or array-like, values to put
- mode: str, how to handle out-of-bound indices
"""Reshape, transpose, and manipulate array structure.
class GPUArray:
def reshape(self, shape: tuple, order: str = "C") -> GPUArray:
"""
Return array with new shape.
Parameters:
- shape: tuple, new shape
- order: str, read/write order ("C" or "F")
Returns:
GPUArray: reshaped array view
"""
def transpose(self, axes: tuple = None) -> GPUArray:
"""
Return transposed array.
Parameters:
- axes: tuple, permutation of axes (optional)
Returns:
GPUArray: transposed array
"""
@property
def T(self) -> GPUArray:
"""Transposed array."""
def flatten(self, order: str = "C") -> GPUArray:
"""
Return flattened array.
Parameters:
- order: str, flatten order ("C" or "F")
Returns:
GPUArray: flattened array copy
"""
def ravel(self, order: str = "C") -> GPUArray:
"""Return flattened array (view if possible)."""
def squeeze(self, axis: int = None) -> GPUArray:
"""
Remove single-dimensional entries.
Parameters:
- axis: int, axis to squeeze (optional)
Returns:
GPUArray: squeezed array
"""
def repeat(self, repeats, axis: int = None) -> GPUArray:
"""
Repeat elements of array.
Parameters:
- repeats: int or array-like, number of repetitions
- axis: int, axis along which to repeat
Returns:
GPUArray: array with repeated elements
"""Reduction operations along axes with optional output arrays.
class GPUArray:
def sum(self, axis: int = None, dtype: np.dtype = None, out: GPUArray = None, keepdims: bool = False) -> GPUArray:
"""
Sum along axis.
Parameters:
- axis: int, axis to sum along (None for all)
- dtype: numpy.dtype, output data type
- out: GPUArray, output array (optional)
- keepdims: bool, keep reduced dimensions
Returns:
GPUArray or scalar: sum result
"""
def mean(self, axis: int = None, dtype: np.dtype = None, out: GPUArray = None, keepdims: bool = False) -> GPUArray:
"""Mean along axis."""
def var(self, axis: int = None, dtype: np.dtype = None, out: GPUArray = None, keepdims: bool = False, ddof: int = 0) -> GPUArray:
"""Variance along axis."""
def std(self, axis: int = None, dtype: np.dtype = None, out: GPUArray = None, keepdims: bool = False, ddof: int = 0) -> GPUArray:
"""Standard deviation along axis."""
def min(self, axis: int = None, out: GPUArray = None, keepdims: bool = False) -> GPUArray:
"""Minimum along axis."""
def max(self, axis: int = None, out: GPUArray = None, keepdims: bool = False) -> GPUArray:
"""Maximum along axis."""
def dot(self, other: GPUArray) -> GPUArray:
"""
Dot product with another array.
Parameters:
- other: GPUArray, other array
Returns:
GPUArray: dot product result
"""Create copies and views of arrays.
class GPUArray:
def copy(self, order: str = "C") -> GPUArray:
"""
Create copy of array.
Parameters:
- order: str, memory layout of copy
Returns:
GPUArray: array copy
"""
def view(self, dtype: np.dtype = None) -> GPUArray:
"""
Create view of array.
Parameters:
- dtype: numpy.dtype, view data type (optional)
Returns:
GPUArray: array view
"""
def astype(self, dtype: np.dtype, order: str = "K", copy: bool = True) -> GPUArray:
"""
Cast array to different data type.
Parameters:
- dtype: numpy.dtype, target data type
- order: str, memory layout
- copy: bool, force copy even if not needed
Returns:
GPUArray: array with new data type
"""PyCUDA provides CUDA vector types for efficient GPU computation.
# Vector type creation functions
def make_int2(x: int, y: int) -> np.ndarray: ...
def make_int3(x: int, y: int, z: int) -> np.ndarray: ...
def make_int4(x: int, y: int, z: int, w: int) -> np.ndarray: ...
def make_float2(x: float, y: float) -> np.ndarray: ...
def make_float3(x: float, y: float, z: float) -> np.ndarray: ...
def make_float4(x: float, y: float, z: float, w: float) -> np.ndarray: ...
# Vector types as numpy dtypes
vec = SimpleNamespace(
char2=np.dtype([('x', np.int8), ('y', np.int8)]),
char3=np.dtype([('x', np.int8), ('y', np.int8), ('z', np.int8)]),
char4=np.dtype([('x', np.int8), ('y', np.int8), ('z', np.int8), ('w', np.int8)]),
uchar2=np.dtype([('x', np.uint8), ('y', np.uint8)]),
uchar3=np.dtype([('x', np.uint8), ('y', np.uint8), ('z', np.uint8)]),
uchar4=np.dtype([('x', np.uint8), ('y', np.uint8), ('z', np.uint8), ('w', np.uint8)]),
short2=np.dtype([('x', np.int16), ('y', np.int16)]),
short3=np.dtype([('x', np.int16), ('y', np.int16), ('z', np.int16)]),
short4=np.dtype([('x', np.int16), ('y', np.int16), ('z', np.int16), ('w', np.int16)]),
ushort2=np.dtype([('x', np.uint16), ('y', np.uint16)]),
ushort3=np.dtype([('x', np.uint16), ('y', np.uint16), ('z', np.uint16)]),
ushort4=np.dtype([('x', np.uint16), ('y', np.uint16), ('z', np.uint16), ('w', np.uint16)]),
int2=np.dtype([('x', np.int32), ('y', np.int32)]),
int3=np.dtype([('x', np.int32), ('y', np.int32), ('z', np.int32)]),
int4=np.dtype([('x', np.int32), ('y', np.int32), ('z', np.int32), ('w', np.int32)]),
uint2=np.dtype([('x', np.uint32), ('y', np.uint32)]),
uint3=np.dtype([('x', np.uint32), ('y', np.uint32), ('z', np.uint32)]),
uint4=np.dtype([('x', np.uint32), ('y', np.uint32), ('z', np.uint32), ('w', np.uint32)]),
float2=np.dtype([('x', np.float32), ('y', np.float32)]),
float3=np.dtype([('x', np.float32), ('y', np.float32), ('z', np.float32)]),
float4=np.dtype([('x', np.float32), ('y', np.float32), ('z', np.float32), ('w', np.float32)]),
double2=np.dtype([('x', np.float64), ('y', np.float64)])
)Install with Tessl CLI
npx tessl i tessl/pypi-pycudadocs
evals
scenario-1
scenario-2
scenario-3
scenario-4
scenario-5
scenario-6
scenario-7
scenario-8
scenario-9
scenario-10