or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

algorithm-kernels.mddriver-api.mdgpu-arrays.mdindex.mdkernel-compilation.mdmath-functions.mdopengl-integration.mdrandom-numbers.md

gpu-arrays.mddocs/

0

# GPU Arrays

1

2

High-level NumPy-like interface for GPU arrays supporting arithmetic operations, slicing, broadcasting, and seamless interoperability with NumPy arrays. GPUArray provides automatic memory management and Pythonic operations on GPU data.

3

4

## Capabilities

5

6

### Array Creation

7

8

Create GPU arrays from various sources with automatic memory management.

9

10

```python { .api }

11

class GPUArray:

12

def __init__(self, shape: tuple, dtype: np.dtype, allocator=None, order: str = "C"):

13

"""

14

Create new GPU array.

15

16

Parameters:

17

- shape: tuple, array dimensions

18

- dtype: numpy.dtype, element data type

19

- allocator: memory allocator function (optional)

20

- order: str, memory layout ("C" or "F")

21

"""

22

23

@classmethod

24

def from_array(cls, ary: np.ndarray, allocator=None) -> GPUArray:

25

"""Create GPU array from NumPy array."""

26

27

def empty(shape: tuple, dtype: np.dtype, allocator=None, order: str = "C") -> GPUArray:

28

"""

29

Create uninitialized GPU array.

30

31

Parameters:

32

- shape: tuple, array dimensions

33

- dtype: numpy.dtype, element data type

34

- allocator: memory allocator function (optional)

35

- order: str, memory layout ("C" or "F")

36

37

Returns:

38

GPUArray: new uninitialized array

39

"""

40

41

def zeros(shape: tuple, dtype: np.dtype, allocator=None, order: str = "C") -> GPUArray:

42

"""Create GPU array filled with zeros."""

43

44

def ones(shape: tuple, dtype: np.dtype, allocator=None, order: str = "C") -> GPUArray:

45

"""Create GPU array filled with ones."""

46

47

def full(shape: tuple, fill_value, dtype: np.dtype = None, allocator=None, order: str = "C") -> GPUArray:

48

"""

49

Create GPU array filled with specified value.

50

51

Parameters:

52

- shape: tuple, array dimensions

53

- fill_value: scalar, fill value

54

- dtype: numpy.dtype, element data type (inferred if None)

55

- allocator: memory allocator function (optional)

56

- order: str, memory layout ("C" or "F")

57

58

Returns:

59

GPUArray: new filled array

60

"""

61

62

def to_gpu(ary: np.ndarray, allocator=None) -> GPUArray:

63

"""

64

Copy NumPy array to GPU.

65

66

Parameters:

67

- ary: numpy.ndarray, source array

68

- allocator: memory allocator function (optional)

69

70

Returns:

71

GPUArray: GPU copy of array

72

"""

73

74

def to_gpu_async(ary: np.ndarray, allocator=None, stream=None) -> GPUArray:

75

"""Copy NumPy array to GPU asynchronously."""

76

77

def arange(start, stop=None, step=1, dtype: np.dtype = None, allocator=None) -> GPUArray:

78

"""

79

Create GPU array with evenly spaced values.

80

81

Parameters:

82

- start: scalar, start value (or stop if stop=None)

83

- stop: scalar, stop value (optional)

84

- step: scalar, step size

85

- dtype: numpy.dtype, element data type

86

- allocator: memory allocator function (optional)

87

88

Returns:

89

GPUArray: array with evenly spaced values

90

"""

91

```

92

93

### Data Transfer

94

95

Transfer data between CPU and GPU with synchronous and asynchronous operations.

96

97

```python { .api }

98

class GPUArray:

99

def get(self, ary: np.ndarray = None, async_: bool = False, stream=None) -> np.ndarray:

100

"""

101

Copy GPU array to CPU.

102

103

Parameters:

104

- ary: numpy.ndarray, destination array (optional)

105

- async_: bool, perform asynchronous transfer

106

- stream: Stream, CUDA stream for async transfer

107

108

Returns:

109

numpy.ndarray: CPU copy of array

110

"""

111

112

def set(self, ary: np.ndarray, async_: bool = False, stream=None) -> None:

113

"""

114

Copy CPU array to GPU.

115

116

Parameters:

117

- ary: numpy.ndarray, source array

118

- async_: bool, perform asynchronous transfer

119

- stream: Stream, CUDA stream for async transfer

120

"""

121

122

def set_async(self, ary: np.ndarray, stream=None) -> None:

123

"""Copy CPU array to GPU asynchronously."""

124

125

def get_async(self, stream=None) -> np.ndarray:

126

"""Copy GPU array to CPU asynchronously."""

127

```

128

129

### Array Properties

130

131

Access array metadata and properties.

132

133

```python { .api }

134

class GPUArray:

135

@property

136

def shape(self) -> tuple:

137

"""Array dimensions."""

138

139

@property

140

def dtype(self) -> np.dtype:

141

"""Element data type."""

142

143

@property

144

def size(self) -> int:

145

"""Total number of elements."""

146

147

@property

148

def nbytes(self) -> int:

149

"""Total bytes consumed by array."""

150

151

@property

152

def ndim(self) -> int:

153

"""Number of array dimensions."""

154

155

@property

156

def strides(self) -> tuple:

157

"""Bytes to step in each dimension."""

158

159

@property

160

def flags(self) -> dict:

161

"""Array flags (C_CONTIGUOUS, F_CONTIGUOUS, etc.)."""

162

163

@property

164

def itemsize(self) -> int:

165

"""Size of one array element in bytes."""

166

167

@property

168

def ptr(self) -> int:

169

"""GPU memory pointer as integer."""

170

171

@property

172

def gpudata(self) -> DeviceAllocation:

173

"""GPU memory allocation object."""

174

```

175

176

### Arithmetic Operations

177

178

NumPy-compatible arithmetic operations with broadcasting support.

179

180

```python { .api }

181

class GPUArray:

182

def __add__(self, other) -> GPUArray:

183

"""Element-wise addition."""

184

185

def __sub__(self, other) -> GPUArray:

186

"""Element-wise subtraction."""

187

188

def __mul__(self, other) -> GPUArray:

189

"""Element-wise multiplication."""

190

191

def __truediv__(self, other) -> GPUArray:

192

"""Element-wise division."""

193

194

def __floordiv__(self, other) -> GPUArray:

195

"""Element-wise floor division."""

196

197

def __mod__(self, other) -> GPUArray:

198

"""Element-wise remainder."""

199

200

def __pow__(self, other) -> GPUArray:

201

"""Element-wise power."""

202

203

def __neg__(self) -> GPUArray:

204

"""Element-wise negation."""

205

206

def __abs__(self) -> GPUArray:

207

"""Element-wise absolute value."""

208

209

# In-place operations

210

def __iadd__(self, other) -> GPUArray:

211

"""In-place addition."""

212

213

def __isub__(self, other) -> GPUArray:

214

"""In-place subtraction."""

215

216

def __imul__(self, other) -> GPUArray:

217

"""In-place multiplication."""

218

219

def __itruediv__(self, other) -> GPUArray:

220

"""In-place division."""

221

```

222

223

### Comparison Operations

224

225

Element-wise comparison operations returning boolean arrays.

226

227

```python { .api }

228

class GPUArray:

229

def __eq__(self, other) -> GPUArray:

230

"""Element-wise equality."""

231

232

def __ne__(self, other) -> GPUArray:

233

"""Element-wise inequality."""

234

235

def __lt__(self, other) -> GPUArray:

236

"""Element-wise less than."""

237

238

def __le__(self, other) -> GPUArray:

239

"""Element-wise less than or equal."""

240

241

def __gt__(self, other) -> GPUArray:

242

"""Element-wise greater than."""

243

244

def __ge__(self, other) -> GPUArray:

245

"""Element-wise greater than or equal."""

246

```

247

248

### Array Indexing and Slicing

249

250

Advanced indexing and slicing operations similar to NumPy.

251

252

```python { .api }

253

class GPUArray:

254

def __getitem__(self, index) -> GPUArray:

255

"""

256

Get array slice or elements.

257

258

Parameters:

259

- index: slice, int, or tuple of indices

260

261

Returns:

262

GPUArray: sliced array view or copy

263

"""

264

265

def __setitem__(self, index, value) -> None:

266

"""

267

Set array slice or elements.

268

269

Parameters:

270

- index: slice, int, or tuple of indices

271

- value: scalar or array-like, values to set

272

"""

273

274

def take(self, indices: GPUArray, axis: int = None, out: GPUArray = None) -> GPUArray:

275

"""

276

Take elements along axis.

277

278

Parameters:

279

- indices: GPUArray, indices to take

280

- axis: int, axis along which to take (None for flattened)

281

- out: GPUArray, output array (optional)

282

283

Returns:

284

GPUArray: array with taken elements

285

"""

286

287

def put(self, indices: GPUArray, values, mode: str = "raise") -> None:

288

"""

289

Put values at specified indices.

290

291

Parameters:

292

- indices: GPUArray, target indices

293

- values: scalar or array-like, values to put

294

- mode: str, how to handle out-of-bound indices

295

"""

296

```

297

298

### Array Manipulation

299

300

Reshape, transpose, and manipulate array structure.

301

302

```python { .api }

303

class GPUArray:

304

def reshape(self, shape: tuple, order: str = "C") -> GPUArray:

305

"""

306

Return array with new shape.

307

308

Parameters:

309

- shape: tuple, new shape

310

- order: str, read/write order ("C" or "F")

311

312

Returns:

313

GPUArray: reshaped array view

314

"""

315

316

def transpose(self, axes: tuple = None) -> GPUArray:

317

"""

318

Return transposed array.

319

320

Parameters:

321

- axes: tuple, permutation of axes (optional)

322

323

Returns:

324

GPUArray: transposed array

325

"""

326

327

@property

328

def T(self) -> GPUArray:

329

"""Transposed array."""

330

331

def flatten(self, order: str = "C") -> GPUArray:

332

"""

333

Return flattened array.

334

335

Parameters:

336

- order: str, flatten order ("C" or "F")

337

338

Returns:

339

GPUArray: flattened array copy

340

"""

341

342

def ravel(self, order: str = "C") -> GPUArray:

343

"""Return flattened array (view if possible)."""

344

345

def squeeze(self, axis: int = None) -> GPUArray:

346

"""

347

Remove single-dimensional entries.

348

349

Parameters:

350

- axis: int, axis to squeeze (optional)

351

352

Returns:

353

GPUArray: squeezed array

354

"""

355

356

def repeat(self, repeats, axis: int = None) -> GPUArray:

357

"""

358

Repeat elements of array.

359

360

Parameters:

361

- repeats: int or array-like, number of repetitions

362

- axis: int, axis along which to repeat

363

364

Returns:

365

GPUArray: array with repeated elements

366

"""

367

```

368

369

### Reductions

370

371

Reduction operations along axes with optional output arrays.

372

373

```python { .api }

374

class GPUArray:

375

def sum(self, axis: int = None, dtype: np.dtype = None, out: GPUArray = None, keepdims: bool = False) -> GPUArray:

376

"""

377

Sum along axis.

378

379

Parameters:

380

- axis: int, axis to sum along (None for all)

381

- dtype: numpy.dtype, output data type

382

- out: GPUArray, output array (optional)

383

- keepdims: bool, keep reduced dimensions

384

385

Returns:

386

GPUArray or scalar: sum result

387

"""

388

389

def mean(self, axis: int = None, dtype: np.dtype = None, out: GPUArray = None, keepdims: bool = False) -> GPUArray:

390

"""Mean along axis."""

391

392

def var(self, axis: int = None, dtype: np.dtype = None, out: GPUArray = None, keepdims: bool = False, ddof: int = 0) -> GPUArray:

393

"""Variance along axis."""

394

395

def std(self, axis: int = None, dtype: np.dtype = None, out: GPUArray = None, keepdims: bool = False, ddof: int = 0) -> GPUArray:

396

"""Standard deviation along axis."""

397

398

def min(self, axis: int = None, out: GPUArray = None, keepdims: bool = False) -> GPUArray:

399

"""Minimum along axis."""

400

401

def max(self, axis: int = None, out: GPUArray = None, keepdims: bool = False) -> GPUArray:

402

"""Maximum along axis."""

403

404

def dot(self, other: GPUArray) -> GPUArray:

405

"""

406

Dot product with another array.

407

408

Parameters:

409

- other: GPUArray, other array

410

411

Returns:

412

GPUArray: dot product result

413

"""

414

```

415

416

### Array Copying

417

418

Create copies and views of arrays.

419

420

```python { .api }

421

class GPUArray:

422

def copy(self, order: str = "C") -> GPUArray:

423

"""

424

Create copy of array.

425

426

Parameters:

427

- order: str, memory layout of copy

428

429

Returns:

430

GPUArray: array copy

431

"""

432

433

def view(self, dtype: np.dtype = None) -> GPUArray:

434

"""

435

Create view of array.

436

437

Parameters:

438

- dtype: numpy.dtype, view data type (optional)

439

440

Returns:

441

GPUArray: array view

442

"""

443

444

def astype(self, dtype: np.dtype, order: str = "K", copy: bool = True) -> GPUArray:

445

"""

446

Cast array to different data type.

447

448

Parameters:

449

- dtype: numpy.dtype, target data type

450

- order: str, memory layout

451

- copy: bool, force copy even if not needed

452

453

Returns:

454

GPUArray: array with new data type

455

"""

456

```

457

458

## Vector Types

459

460

PyCUDA provides CUDA vector types for efficient GPU computation.

461

462

```python { .api }

463

# Vector type creation functions

464

def make_int2(x: int, y: int) -> np.ndarray: ...

465

def make_int3(x: int, y: int, z: int) -> np.ndarray: ...

466

def make_int4(x: int, y: int, z: int, w: int) -> np.ndarray: ...

467

def make_float2(x: float, y: float) -> np.ndarray: ...

468

def make_float3(x: float, y: float, z: float) -> np.ndarray: ...

469

def make_float4(x: float, y: float, z: float, w: float) -> np.ndarray: ...

470

471

# Vector types as numpy dtypes

472

vec = SimpleNamespace(

473

char2=np.dtype([('x', np.int8), ('y', np.int8)]),

474

char3=np.dtype([('x', np.int8), ('y', np.int8), ('z', np.int8)]),

475

char4=np.dtype([('x', np.int8), ('y', np.int8), ('z', np.int8), ('w', np.int8)]),

476

uchar2=np.dtype([('x', np.uint8), ('y', np.uint8)]),

477

uchar3=np.dtype([('x', np.uint8), ('y', np.uint8), ('z', np.uint8)]),

478

uchar4=np.dtype([('x', np.uint8), ('y', np.uint8), ('z', np.uint8), ('w', np.uint8)]),

479

short2=np.dtype([('x', np.int16), ('y', np.int16)]),

480

short3=np.dtype([('x', np.int16), ('y', np.int16), ('z', np.int16)]),

481

short4=np.dtype([('x', np.int16), ('y', np.int16), ('z', np.int16), ('w', np.int16)]),

482

ushort2=np.dtype([('x', np.uint16), ('y', np.uint16)]),

483

ushort3=np.dtype([('x', np.uint16), ('y', np.uint16), ('z', np.uint16)]),

484

ushort4=np.dtype([('x', np.uint16), ('y', np.uint16), ('z', np.uint16), ('w', np.uint16)]),

485

int2=np.dtype([('x', np.int32), ('y', np.int32)]),

486

int3=np.dtype([('x', np.int32), ('y', np.int32), ('z', np.int32)]),

487

int4=np.dtype([('x', np.int32), ('y', np.int32), ('z', np.int32), ('w', np.int32)]),

488

uint2=np.dtype([('x', np.uint32), ('y', np.uint32)]),

489

uint3=np.dtype([('x', np.uint32), ('y', np.uint32), ('z', np.uint32)]),

490

uint4=np.dtype([('x', np.uint32), ('y', np.uint32), ('z', np.uint32), ('w', np.uint32)]),

491

float2=np.dtype([('x', np.float32), ('y', np.float32)]),

492

float3=np.dtype([('x', np.float32), ('y', np.float32), ('z', np.float32)]),

493

float4=np.dtype([('x', np.float32), ('y', np.float32), ('z', np.float32), ('w', np.float32)]),

494

double2=np.dtype([('x', np.float64), ('y', np.float64)])

495

)

496

```