0
# GPU Arrays
1
2
High-level NumPy-like interface for GPU arrays supporting arithmetic operations, slicing, broadcasting, and seamless interoperability with NumPy arrays. GPUArray provides automatic memory management and Pythonic operations on GPU data.
3
4
## Capabilities
5
6
### Array Creation
7
8
Create GPU arrays from various sources with automatic memory management.
9
10
```python { .api }
11
class GPUArray:
12
def __init__(self, shape: tuple, dtype: np.dtype, allocator=None, order: str = "C"):
13
"""
14
Create new GPU array.
15
16
Parameters:
17
- shape: tuple, array dimensions
18
- dtype: numpy.dtype, element data type
19
- allocator: memory allocator function (optional)
20
- order: str, memory layout ("C" or "F")
21
"""
22
23
@classmethod
24
def from_array(cls, ary: np.ndarray, allocator=None) -> GPUArray:
25
"""Create GPU array from NumPy array."""
26
27
def empty(shape: tuple, dtype: np.dtype, allocator=None, order: str = "C") -> GPUArray:
28
"""
29
Create uninitialized GPU array.
30
31
Parameters:
32
- shape: tuple, array dimensions
33
- dtype: numpy.dtype, element data type
34
- allocator: memory allocator function (optional)
35
- order: str, memory layout ("C" or "F")
36
37
Returns:
38
GPUArray: new uninitialized array
39
"""
40
41
def zeros(shape: tuple, dtype: np.dtype, allocator=None, order: str = "C") -> GPUArray:
42
"""Create GPU array filled with zeros."""
43
44
def ones(shape: tuple, dtype: np.dtype, allocator=None, order: str = "C") -> GPUArray:
45
"""Create GPU array filled with ones."""
46
47
def full(shape: tuple, fill_value, dtype: np.dtype = None, allocator=None, order: str = "C") -> GPUArray:
48
"""
49
Create GPU array filled with specified value.
50
51
Parameters:
52
- shape: tuple, array dimensions
53
- fill_value: scalar, fill value
54
- dtype: numpy.dtype, element data type (inferred if None)
55
- allocator: memory allocator function (optional)
56
- order: str, memory layout ("C" or "F")
57
58
Returns:
59
GPUArray: new filled array
60
"""
61
62
def to_gpu(ary: np.ndarray, allocator=None) -> GPUArray:
63
"""
64
Copy NumPy array to GPU.
65
66
Parameters:
67
- ary: numpy.ndarray, source array
68
- allocator: memory allocator function (optional)
69
70
Returns:
71
GPUArray: GPU copy of array
72
"""
73
74
def to_gpu_async(ary: np.ndarray, allocator=None, stream=None) -> GPUArray:
75
"""Copy NumPy array to GPU asynchronously."""
76
77
def arange(start, stop=None, step=1, dtype: np.dtype = None, allocator=None) -> GPUArray:
78
"""
79
Create GPU array with evenly spaced values.
80
81
Parameters:
82
- start: scalar, start value (or stop if stop=None)
83
- stop: scalar, stop value (optional)
84
- step: scalar, step size
85
- dtype: numpy.dtype, element data type
86
- allocator: memory allocator function (optional)
87
88
Returns:
89
GPUArray: array with evenly spaced values
90
"""
91
```
92
93
### Data Transfer
94
95
Transfer data between CPU and GPU with synchronous and asynchronous operations.
96
97
```python { .api }
98
class GPUArray:
99
def get(self, ary: np.ndarray = None, async_: bool = False, stream=None) -> np.ndarray:
100
"""
101
Copy GPU array to CPU.
102
103
Parameters:
104
- ary: numpy.ndarray, destination array (optional)
105
- async_: bool, perform asynchronous transfer
106
- stream: Stream, CUDA stream for async transfer
107
108
Returns:
109
numpy.ndarray: CPU copy of array
110
"""
111
112
def set(self, ary: np.ndarray, async_: bool = False, stream=None) -> None:
113
"""
114
Copy CPU array to GPU.
115
116
Parameters:
117
- ary: numpy.ndarray, source array
118
- async_: bool, perform asynchronous transfer
119
- stream: Stream, CUDA stream for async transfer
120
"""
121
122
def set_async(self, ary: np.ndarray, stream=None) -> None:
123
"""Copy CPU array to GPU asynchronously."""
124
125
def get_async(self, stream=None) -> np.ndarray:
126
"""Copy GPU array to CPU asynchronously."""
127
```
128
129
### Array Properties
130
131
Access array metadata and properties.
132
133
```python { .api }
134
class GPUArray:
135
@property
136
def shape(self) -> tuple:
137
"""Array dimensions."""
138
139
@property
140
def dtype(self) -> np.dtype:
141
"""Element data type."""
142
143
@property
144
def size(self) -> int:
145
"""Total number of elements."""
146
147
@property
148
def nbytes(self) -> int:
149
"""Total bytes consumed by array."""
150
151
@property
152
def ndim(self) -> int:
153
"""Number of array dimensions."""
154
155
@property
156
def strides(self) -> tuple:
157
"""Bytes to step in each dimension."""
158
159
@property
160
def flags(self) -> dict:
161
"""Array flags (C_CONTIGUOUS, F_CONTIGUOUS, etc.)."""
162
163
@property
164
def itemsize(self) -> int:
165
"""Size of one array element in bytes."""
166
167
@property
168
def ptr(self) -> int:
169
"""GPU memory pointer as integer."""
170
171
@property
172
def gpudata(self) -> DeviceAllocation:
173
"""GPU memory allocation object."""
174
```
175
176
### Arithmetic Operations
177
178
NumPy-compatible arithmetic operations with broadcasting support.
179
180
```python { .api }
181
class GPUArray:
182
def __add__(self, other) -> GPUArray:
183
"""Element-wise addition."""
184
185
def __sub__(self, other) -> GPUArray:
186
"""Element-wise subtraction."""
187
188
def __mul__(self, other) -> GPUArray:
189
"""Element-wise multiplication."""
190
191
def __truediv__(self, other) -> GPUArray:
192
"""Element-wise division."""
193
194
def __floordiv__(self, other) -> GPUArray:
195
"""Element-wise floor division."""
196
197
def __mod__(self, other) -> GPUArray:
198
"""Element-wise remainder."""
199
200
def __pow__(self, other) -> GPUArray:
201
"""Element-wise power."""
202
203
def __neg__(self) -> GPUArray:
204
"""Element-wise negation."""
205
206
def __abs__(self) -> GPUArray:
207
"""Element-wise absolute value."""
208
209
# In-place operations
210
def __iadd__(self, other) -> GPUArray:
211
"""In-place addition."""
212
213
def __isub__(self, other) -> GPUArray:
214
"""In-place subtraction."""
215
216
def __imul__(self, other) -> GPUArray:
217
"""In-place multiplication."""
218
219
def __itruediv__(self, other) -> GPUArray:
220
"""In-place division."""
221
```
222
223
### Comparison Operations
224
225
Element-wise comparison operations returning boolean arrays.
226
227
```python { .api }
228
class GPUArray:
229
def __eq__(self, other) -> GPUArray:
230
"""Element-wise equality."""
231
232
def __ne__(self, other) -> GPUArray:
233
"""Element-wise inequality."""
234
235
def __lt__(self, other) -> GPUArray:
236
"""Element-wise less than."""
237
238
def __le__(self, other) -> GPUArray:
239
"""Element-wise less than or equal."""
240
241
def __gt__(self, other) -> GPUArray:
242
"""Element-wise greater than."""
243
244
def __ge__(self, other) -> GPUArray:
245
"""Element-wise greater than or equal."""
246
```
247
248
### Array Indexing and Slicing
249
250
Advanced indexing and slicing operations similar to NumPy.
251
252
```python { .api }
253
class GPUArray:
254
def __getitem__(self, index) -> GPUArray:
255
"""
256
Get array slice or elements.
257
258
Parameters:
259
- index: slice, int, or tuple of indices
260
261
Returns:
262
GPUArray: sliced array view or copy
263
"""
264
265
def __setitem__(self, index, value) -> None:
266
"""
267
Set array slice or elements.
268
269
Parameters:
270
- index: slice, int, or tuple of indices
271
- value: scalar or array-like, values to set
272
"""
273
274
def take(self, indices: GPUArray, axis: int = None, out: GPUArray = None) -> GPUArray:
275
"""
276
Take elements along axis.
277
278
Parameters:
279
- indices: GPUArray, indices to take
280
- axis: int, axis along which to take (None for flattened)
281
- out: GPUArray, output array (optional)
282
283
Returns:
284
GPUArray: array with taken elements
285
"""
286
287
def put(self, indices: GPUArray, values, mode: str = "raise") -> None:
288
"""
289
Put values at specified indices.
290
291
Parameters:
292
- indices: GPUArray, target indices
293
- values: scalar or array-like, values to put
294
- mode: str, how to handle out-of-bound indices
295
"""
296
```
297
298
### Array Manipulation
299
300
Reshape, transpose, and manipulate array structure.
301
302
```python { .api }
303
class GPUArray:
304
def reshape(self, shape: tuple, order: str = "C") -> GPUArray:
305
"""
306
Return array with new shape.
307
308
Parameters:
309
- shape: tuple, new shape
310
- order: str, read/write order ("C" or "F")
311
312
Returns:
313
GPUArray: reshaped array view
314
"""
315
316
def transpose(self, axes: tuple = None) -> GPUArray:
317
"""
318
Return transposed array.
319
320
Parameters:
321
- axes: tuple, permutation of axes (optional)
322
323
Returns:
324
GPUArray: transposed array
325
"""
326
327
@property
328
def T(self) -> GPUArray:
329
"""Transposed array."""
330
331
def flatten(self, order: str = "C") -> GPUArray:
332
"""
333
Return flattened array.
334
335
Parameters:
336
- order: str, flatten order ("C" or "F")
337
338
Returns:
339
GPUArray: flattened array copy
340
"""
341
342
def ravel(self, order: str = "C") -> GPUArray:
343
"""Return flattened array (view if possible)."""
344
345
def squeeze(self, axis: int = None) -> GPUArray:
346
"""
347
Remove single-dimensional entries.
348
349
Parameters:
350
- axis: int, axis to squeeze (optional)
351
352
Returns:
353
GPUArray: squeezed array
354
"""
355
356
def repeat(self, repeats, axis: int = None) -> GPUArray:
357
"""
358
Repeat elements of array.
359
360
Parameters:
361
- repeats: int or array-like, number of repetitions
362
- axis: int, axis along which to repeat
363
364
Returns:
365
GPUArray: array with repeated elements
366
"""
367
```
368
369
### Reductions
370
371
Reduction operations along axes with optional output arrays.
372
373
```python { .api }
374
class GPUArray:
375
def sum(self, axis: int = None, dtype: np.dtype = None, out: GPUArray = None, keepdims: bool = False) -> GPUArray:
376
"""
377
Sum along axis.
378
379
Parameters:
380
- axis: int, axis to sum along (None for all)
381
- dtype: numpy.dtype, output data type
382
- out: GPUArray, output array (optional)
383
- keepdims: bool, keep reduced dimensions
384
385
Returns:
386
GPUArray or scalar: sum result
387
"""
388
389
def mean(self, axis: int = None, dtype: np.dtype = None, out: GPUArray = None, keepdims: bool = False) -> GPUArray:
390
"""Mean along axis."""
391
392
def var(self, axis: int = None, dtype: np.dtype = None, out: GPUArray = None, keepdims: bool = False, ddof: int = 0) -> GPUArray:
393
"""Variance along axis."""
394
395
def std(self, axis: int = None, dtype: np.dtype = None, out: GPUArray = None, keepdims: bool = False, ddof: int = 0) -> GPUArray:
396
"""Standard deviation along axis."""
397
398
def min(self, axis: int = None, out: GPUArray = None, keepdims: bool = False) -> GPUArray:
399
"""Minimum along axis."""
400
401
def max(self, axis: int = None, out: GPUArray = None, keepdims: bool = False) -> GPUArray:
402
"""Maximum along axis."""
403
404
def dot(self, other: GPUArray) -> GPUArray:
405
"""
406
Dot product with another array.
407
408
Parameters:
409
- other: GPUArray, other array
410
411
Returns:
412
GPUArray: dot product result
413
"""
414
```
415
416
### Array Copying
417
418
Create copies and views of arrays.
419
420
```python { .api }
421
class GPUArray:
422
def copy(self, order: str = "C") -> GPUArray:
423
"""
424
Create copy of array.
425
426
Parameters:
427
- order: str, memory layout of copy
428
429
Returns:
430
GPUArray: array copy
431
"""
432
433
def view(self, dtype: np.dtype = None) -> GPUArray:
434
"""
435
Create view of array.
436
437
Parameters:
438
- dtype: numpy.dtype, view data type (optional)
439
440
Returns:
441
GPUArray: array view
442
"""
443
444
def astype(self, dtype: np.dtype, order: str = "K", copy: bool = True) -> GPUArray:
445
"""
446
Cast array to different data type.
447
448
Parameters:
449
- dtype: numpy.dtype, target data type
450
- order: str, memory layout
451
- copy: bool, force copy even if not needed
452
453
Returns:
454
GPUArray: array with new data type
455
"""
456
```
457
458
## Vector Types
459
460
PyCUDA provides CUDA vector types for efficient GPU computation.
461
462
```python { .api }
463
# Vector type creation functions
464
def make_int2(x: int, y: int) -> np.ndarray: ...
465
def make_int3(x: int, y: int, z: int) -> np.ndarray: ...
466
def make_int4(x: int, y: int, z: int, w: int) -> np.ndarray: ...
467
def make_float2(x: float, y: float) -> np.ndarray: ...
468
def make_float3(x: float, y: float, z: float) -> np.ndarray: ...
469
def make_float4(x: float, y: float, z: float, w: float) -> np.ndarray: ...
470
471
# Vector types as numpy dtypes
472
vec = SimpleNamespace(
473
char2=np.dtype([('x', np.int8), ('y', np.int8)]),
474
char3=np.dtype([('x', np.int8), ('y', np.int8), ('z', np.int8)]),
475
char4=np.dtype([('x', np.int8), ('y', np.int8), ('z', np.int8), ('w', np.int8)]),
476
uchar2=np.dtype([('x', np.uint8), ('y', np.uint8)]),
477
uchar3=np.dtype([('x', np.uint8), ('y', np.uint8), ('z', np.uint8)]),
478
uchar4=np.dtype([('x', np.uint8), ('y', np.uint8), ('z', np.uint8), ('w', np.uint8)]),
479
short2=np.dtype([('x', np.int16), ('y', np.int16)]),
480
short3=np.dtype([('x', np.int16), ('y', np.int16), ('z', np.int16)]),
481
short4=np.dtype([('x', np.int16), ('y', np.int16), ('z', np.int16), ('w', np.int16)]),
482
ushort2=np.dtype([('x', np.uint16), ('y', np.uint16)]),
483
ushort3=np.dtype([('x', np.uint16), ('y', np.uint16), ('z', np.uint16)]),
484
ushort4=np.dtype([('x', np.uint16), ('y', np.uint16), ('z', np.uint16), ('w', np.uint16)]),
485
int2=np.dtype([('x', np.int32), ('y', np.int32)]),
486
int3=np.dtype([('x', np.int32), ('y', np.int32), ('z', np.int32)]),
487
int4=np.dtype([('x', np.int32), ('y', np.int32), ('z', np.int32), ('w', np.int32)]),
488
uint2=np.dtype([('x', np.uint32), ('y', np.uint32)]),
489
uint3=np.dtype([('x', np.uint32), ('y', np.uint32), ('z', np.uint32)]),
490
uint4=np.dtype([('x', np.uint32), ('y', np.uint32), ('z', np.uint32), ('w', np.uint32)]),
491
float2=np.dtype([('x', np.float32), ('y', np.float32)]),
492
float3=np.dtype([('x', np.float32), ('y', np.float32), ('z', np.float32)]),
493
float4=np.dtype([('x', np.float32), ('y', np.float32), ('z', np.float32), ('w', np.float32)]),
494
double2=np.dtype([('x', np.float64), ('y', np.float64)])
495
)
496
```