Tessl Tile for pypi/pycuda@2025.1.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

algorithm-kernels.md driver-api.md gpu-arrays.md index.md kernel-compilation.md math-functions.md opengl-integration.md random-numbers.md

gpu-arrays.mddocs/

0
# GPU Arrays
1

2
High-level NumPy-like interface for GPU arrays supporting arithmetic operations, slicing, broadcasting, and seamless interoperability with NumPy arrays. GPUArray provides automatic memory management and Pythonic operations on GPU data.
3

4
## Capabilities
5

6
### Array Creation
7

8
Create GPU arrays from various sources with automatic memory management.
9

10
```python { .api }
11
class GPUArray:
12
    def __init__(self, shape: tuple, dtype: np.dtype, allocator=None, order: str = "C"):
13
        """
14
        Create new GPU array.
15
        
16
        Parameters:
17
        - shape: tuple, array dimensions
18
        - dtype: numpy.dtype, element data type
19
        - allocator: memory allocator function (optional)
20
        - order: str, memory layout ("C" or "F")
21
        """
22
    
23
    @classmethod
24
    def from_array(cls, ary: np.ndarray, allocator=None) -> GPUArray:
25
        """Create GPU array from NumPy array."""
26

27
def empty(shape: tuple, dtype: np.dtype, allocator=None, order: str = "C") -> GPUArray:
28
    """
29
    Create uninitialized GPU array.
30
    
31
    Parameters:
32
    - shape: tuple, array dimensions
33
    - dtype: numpy.dtype, element data type
34
    - allocator: memory allocator function (optional)
35
    - order: str, memory layout ("C" or "F")
36
    
37
    Returns:
38
    GPUArray: new uninitialized array
39
    """
40

41
def zeros(shape: tuple, dtype: np.dtype, allocator=None, order: str = "C") -> GPUArray:
42
    """Create GPU array filled with zeros."""
43

44
def ones(shape: tuple, dtype: np.dtype, allocator=None, order: str = "C") -> GPUArray:
45
    """Create GPU array filled with ones."""
46

47
def full(shape: tuple, fill_value, dtype: np.dtype = None, allocator=None, order: str = "C") -> GPUArray:
48
    """
49
    Create GPU array filled with specified value.
50
    
51
    Parameters:
52
    - shape: tuple, array dimensions
53
    - fill_value: scalar, fill value
54
    - dtype: numpy.dtype, element data type (inferred if None)
55
    - allocator: memory allocator function (optional)
56
    - order: str, memory layout ("C" or "F")
57
    
58
    Returns:
59
    GPUArray: new filled array
60
    """
61

62
def to_gpu(ary: np.ndarray, allocator=None) -> GPUArray:
63
    """
64
    Copy NumPy array to GPU.
65
    
66
    Parameters:
67
    - ary: numpy.ndarray, source array
68
    - allocator: memory allocator function (optional)
69
    
70
    Returns:
71
    GPUArray: GPU copy of array
72
    """
73

74
def to_gpu_async(ary: np.ndarray, allocator=None, stream=None) -> GPUArray:
75
    """Copy NumPy array to GPU asynchronously."""
76

77
def arange(start, stop=None, step=1, dtype: np.dtype = None, allocator=None) -> GPUArray:
78
    """
79
    Create GPU array with evenly spaced values.
80
    
81
    Parameters:
82
    - start: scalar, start value (or stop if stop=None)
83
    - stop: scalar, stop value (optional)
84
    - step: scalar, step size
85
    - dtype: numpy.dtype, element data type
86
    - allocator: memory allocator function (optional)
87
    
88
    Returns:
89
    GPUArray: array with evenly spaced values
90
    """
91
```
92

93
### Data Transfer
94

95
Transfer data between CPU and GPU with synchronous and asynchronous operations.
96

97
```python { .api }
98
class GPUArray:
99
    def get(self, ary: np.ndarray = None, async_: bool = False, stream=None) -> np.ndarray:
100
        """
101
        Copy GPU array to CPU.
102
        
103
        Parameters:
104
        - ary: numpy.ndarray, destination array (optional)
105
        - async_: bool, perform asynchronous transfer
106
        - stream: Stream, CUDA stream for async transfer
107
        
108
        Returns:
109
        numpy.ndarray: CPU copy of array
110
        """
111
    
112
    def set(self, ary: np.ndarray, async_: bool = False, stream=None) -> None:
113
        """
114
        Copy CPU array to GPU.
115
        
116
        Parameters:
117
        - ary: numpy.ndarray, source array
118
        - async_: bool, perform asynchronous transfer
119
        - stream: Stream, CUDA stream for async transfer
120
        """
121
    
122
    def set_async(self, ary: np.ndarray, stream=None) -> None:
123
        """Copy CPU array to GPU asynchronously."""
124
    
125
    def get_async(self, stream=None) -> np.ndarray:
126
        """Copy GPU array to CPU asynchronously."""
127
```
128

129
### Array Properties
130

131
Access array metadata and properties.
132

133
```python { .api }
134
class GPUArray:
135
    @property
136
    def shape(self) -> tuple:
137
        """Array dimensions."""
138
    
139
    @property
140
    def dtype(self) -> np.dtype:
141
        """Element data type."""
142
    
143
    @property
144
    def size(self) -> int:
145
        """Total number of elements."""
146
    
147
    @property
148
    def nbytes(self) -> int:
149
        """Total bytes consumed by array."""
150
    
151
    @property
152
    def ndim(self) -> int:
153
        """Number of array dimensions."""
154
    
155
    @property
156
    def strides(self) -> tuple:
157
        """Bytes to step in each dimension."""
158
    
159
    @property
160
    def flags(self) -> dict:
161
        """Array flags (C_CONTIGUOUS, F_CONTIGUOUS, etc.)."""
162
    
163
    @property
164
    def itemsize(self) -> int:
165
        """Size of one array element in bytes."""
166
    
167
    @property
168
    def ptr(self) -> int:
169
        """GPU memory pointer as integer."""
170
    
171
    @property
172
    def gpudata(self) -> DeviceAllocation:
173
        """GPU memory allocation object."""
174
```
175

176
### Arithmetic Operations
177

178
NumPy-compatible arithmetic operations with broadcasting support.
179

180
```python { .api }
181
class GPUArray:
182
    def __add__(self, other) -> GPUArray:
183
        """Element-wise addition."""
184
    
185
    def __sub__(self, other) -> GPUArray:
186
        """Element-wise subtraction."""
187
    
188
    def __mul__(self, other) -> GPUArray:
189
        """Element-wise multiplication."""
190
    
191
    def __truediv__(self, other) -> GPUArray:
192
        """Element-wise division."""
193
    
194
    def __floordiv__(self, other) -> GPUArray:
195
        """Element-wise floor division."""
196
    
197
    def __mod__(self, other) -> GPUArray:
198
        """Element-wise remainder."""
199
    
200
    def __pow__(self, other) -> GPUArray:
201
        """Element-wise power."""
202
    
203
    def __neg__(self) -> GPUArray:
204
        """Element-wise negation."""
205
    
206
    def __abs__(self) -> GPUArray:
207
        """Element-wise absolute value."""
208
    
209
    # In-place operations
210
    def __iadd__(self, other) -> GPUArray:
211
        """In-place addition."""
212
    
213
    def __isub__(self, other) -> GPUArray:
214
        """In-place subtraction."""
215
    
216
    def __imul__(self, other) -> GPUArray:
217
        """In-place multiplication."""
218
    
219
    def __itruediv__(self, other) -> GPUArray:
220
        """In-place division."""
221
```
222

223
### Comparison Operations
224

225
Element-wise comparison operations returning boolean arrays.
226

227
```python { .api }
228
class GPUArray:
229
    def __eq__(self, other) -> GPUArray:
230
        """Element-wise equality."""
231
    
232
    def __ne__(self, other) -> GPUArray:
233
        """Element-wise inequality."""
234
    
235
    def __lt__(self, other) -> GPUArray:
236
        """Element-wise less than."""
237
    
238
    def __le__(self, other) -> GPUArray:
239
        """Element-wise less than or equal."""
240
    
241
    def __gt__(self, other) -> GPUArray:
242
        """Element-wise greater than."""
243
    
244
    def __ge__(self, other) -> GPUArray:
245
        """Element-wise greater than or equal."""
246
```
247

248
### Array Indexing and Slicing
249

250
Advanced indexing and slicing operations similar to NumPy.
251

252
```python { .api }
253
class GPUArray:
254
    def __getitem__(self, index) -> GPUArray:
255
        """
256
        Get array slice or elements.
257
        
258
        Parameters:
259
        - index: slice, int, or tuple of indices
260
        
261
        Returns:
262
        GPUArray: sliced array view or copy
263
        """
264
    
265
    def __setitem__(self, index, value) -> None:
266
        """
267
        Set array slice or elements.
268
        
269
        Parameters:
270
        - index: slice, int, or tuple of indices  
271
        - value: scalar or array-like, values to set
272
        """
273
    
274
    def take(self, indices: GPUArray, axis: int = None, out: GPUArray = None) -> GPUArray:
275
        """
276
        Take elements along axis.
277
        
278
        Parameters:
279
        - indices: GPUArray, indices to take
280
        - axis: int, axis along which to take (None for flattened)
281
        - out: GPUArray, output array (optional)
282
        
283
        Returns:
284
        GPUArray: array with taken elements
285
        """
286
    
287
    def put(self, indices: GPUArray, values, mode: str = "raise") -> None:
288
        """
289
        Put values at specified indices.
290
        
291
        Parameters:
292
        - indices: GPUArray, target indices
293
        - values: scalar or array-like, values to put
294
        - mode: str, how to handle out-of-bound indices
295
        """
296
```
297

298
### Array Manipulation
299

300
Reshape, transpose, and manipulate array structure.
301

302
```python { .api }
303
class GPUArray:
304
    def reshape(self, shape: tuple, order: str = "C") -> GPUArray:
305
        """
306
        Return array with new shape.
307
        
308
        Parameters:
309
        - shape: tuple, new shape
310
        - order: str, read/write order ("C" or "F")
311
        
312
        Returns:
313
        GPUArray: reshaped array view
314
        """
315
    
316
    def transpose(self, axes: tuple = None) -> GPUArray:
317
        """
318
        Return transposed array.
319
        
320
        Parameters:
321
        - axes: tuple, permutation of axes (optional)
322
        
323
        Returns:
324
        GPUArray: transposed array
325
        """
326
    
327
    @property
328
    def T(self) -> GPUArray:
329
        """Transposed array."""
330
    
331
    def flatten(self, order: str = "C") -> GPUArray:
332
        """
333
        Return flattened array.
334
        
335
        Parameters:
336
        - order: str, flatten order ("C" or "F")
337
        
338
        Returns:
339
        GPUArray: flattened array copy
340
        """
341
    
342
    def ravel(self, order: str = "C") -> GPUArray:
343
        """Return flattened array (view if possible)."""
344
    
345
    def squeeze(self, axis: int = None) -> GPUArray:
346
        """
347
        Remove single-dimensional entries.
348
        
349
        Parameters:
350
        - axis: int, axis to squeeze (optional)
351
        
352
        Returns:
353
        GPUArray: squeezed array
354
        """
355
    
356
    def repeat(self, repeats, axis: int = None) -> GPUArray:
357
        """
358
        Repeat elements of array.
359
        
360
        Parameters:
361
        - repeats: int or array-like, number of repetitions
362
        - axis: int, axis along which to repeat
363
        
364
        Returns:
365
        GPUArray: array with repeated elements
366
        """
367
```
368

369
### Reductions
370

371
Reduction operations along axes with optional output arrays.
372

373
```python { .api }
374
class GPUArray:
375
    def sum(self, axis: int = None, dtype: np.dtype = None, out: GPUArray = None, keepdims: bool = False) -> GPUArray:
376
        """
377
        Sum along axis.
378
        
379
        Parameters:
380
        - axis: int, axis to sum along (None for all)
381
        - dtype: numpy.dtype, output data type
382
        - out: GPUArray, output array (optional)
383
        - keepdims: bool, keep reduced dimensions
384
        
385
        Returns:
386
        GPUArray or scalar: sum result
387
        """
388
    
389
    def mean(self, axis: int = None, dtype: np.dtype = None, out: GPUArray = None, keepdims: bool = False) -> GPUArray:
390
        """Mean along axis."""
391
    
392
    def var(self, axis: int = None, dtype: np.dtype = None, out: GPUArray = None, keepdims: bool = False, ddof: int = 0) -> GPUArray:
393
        """Variance along axis."""
394
    
395
    def std(self, axis: int = None, dtype: np.dtype = None, out: GPUArray = None, keepdims: bool = False, ddof: int = 0) -> GPUArray:
396
        """Standard deviation along axis."""
397
    
398
    def min(self, axis: int = None, out: GPUArray = None, keepdims: bool = False) -> GPUArray:
399
        """Minimum along axis."""
400
    
401
    def max(self, axis: int = None, out: GPUArray = None, keepdims: bool = False) -> GPUArray:
402
        """Maximum along axis."""
403
    
404
    def dot(self, other: GPUArray) -> GPUArray:
405
        """
406
        Dot product with another array.
407
        
408
        Parameters:
409
        - other: GPUArray, other array
410
        
411
        Returns:
412
        GPUArray: dot product result
413
        """
414
```
415

416
### Array Copying
417

418
Create copies and views of arrays.
419

420
```python { .api }
421
class GPUArray:
422
    def copy(self, order: str = "C") -> GPUArray:
423
        """
424
        Create copy of array.
425
        
426
        Parameters:
427
        - order: str, memory layout of copy
428
        
429
        Returns:
430
        GPUArray: array copy
431
        """
432
    
433
    def view(self, dtype: np.dtype = None) -> GPUArray:
434
        """
435
        Create view of array.
436
        
437
        Parameters:
438
        - dtype: numpy.dtype, view data type (optional)
439
        
440
        Returns:
441
        GPUArray: array view
442
        """
443
    
444
    def astype(self, dtype: np.dtype, order: str = "K", copy: bool = True) -> GPUArray:
445
        """
446
        Cast array to different data type.
447
        
448
        Parameters:
449
        - dtype: numpy.dtype, target data type
450
        - order: str, memory layout
451
        - copy: bool, force copy even if not needed
452
        
453
        Returns:
454
        GPUArray: array with new data type
455
        """
456
```
457

458
## Vector Types
459

460
PyCUDA provides CUDA vector types for efficient GPU computation.
461

462
```python { .api }
463
# Vector type creation functions
464
def make_int2(x: int, y: int) -> np.ndarray: ...
465
def make_int3(x: int, y: int, z: int) -> np.ndarray: ...
466
def make_int4(x: int, y: int, z: int, w: int) -> np.ndarray: ...
467
def make_float2(x: float, y: float) -> np.ndarray: ...
468
def make_float3(x: float, y: float, z: float) -> np.ndarray: ...
469
def make_float4(x: float, y: float, z: float, w: float) -> np.ndarray: ...
470

471
# Vector types as numpy dtypes
472
vec = SimpleNamespace(
473
    char2=np.dtype([('x', np.int8), ('y', np.int8)]),
474
    char3=np.dtype([('x', np.int8), ('y', np.int8), ('z', np.int8)]),
475
    char4=np.dtype([('x', np.int8), ('y', np.int8), ('z', np.int8), ('w', np.int8)]),
476
    uchar2=np.dtype([('x', np.uint8), ('y', np.uint8)]),
477
    uchar3=np.dtype([('x', np.uint8), ('y', np.uint8), ('z', np.uint8)]),
478
    uchar4=np.dtype([('x', np.uint8), ('y', np.uint8), ('z', np.uint8), ('w', np.uint8)]),
479
    short2=np.dtype([('x', np.int16), ('y', np.int16)]),
480
    short3=np.dtype([('x', np.int16), ('y', np.int16), ('z', np.int16)]),
481
    short4=np.dtype([('x', np.int16), ('y', np.int16), ('z', np.int16), ('w', np.int16)]),
482
    ushort2=np.dtype([('x', np.uint16), ('y', np.uint16)]),
483
    ushort3=np.dtype([('x', np.uint16), ('y', np.uint16), ('z', np.uint16)]),
484
    ushort4=np.dtype([('x', np.uint16), ('y', np.uint16), ('z', np.uint16), ('w', np.uint16)]),
485
    int2=np.dtype([('x', np.int32), ('y', np.int32)]),
486
    int3=np.dtype([('x', np.int32), ('y', np.int32), ('z', np.int32)]),
487
    int4=np.dtype([('x', np.int32), ('y', np.int32), ('z', np.int32), ('w', np.int32)]),
488
    uint2=np.dtype([('x', np.uint32), ('y', np.uint32)]),
489
    uint3=np.dtype([('x', np.uint32), ('y', np.uint32), ('z', np.uint32)]),
490
    uint4=np.dtype([('x', np.uint32), ('y', np.uint32), ('z', np.uint32), ('w', np.uint32)]),
491
    float2=np.dtype([('x', np.float32), ('y', np.float32)]),
492
    float3=np.dtype([('x', np.float32), ('y', np.float32), ('z', np.float32)]),
493
    float4=np.dtype([('x', np.float32), ('y', np.float32), ('z', np.float32), ('w', np.float32)]),
494
    double2=np.dtype([('x', np.float64), ('y', np.float64)])
495
)
496
```

Version

Tile

Files

gpu-arrays.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

gpu-arrays.mddocs/