or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

core-execution.mdfem.mdframework-integration.mdindex.mdkernel-programming.mdoptimization.mdrendering.mdtypes-arrays.mdutilities.md

core-execution.mddocs/

0

# Core Execution and Device Management

1

2

Essential functions for initializing Warp, managing devices, launching kernels, and controlling execution. These functions form the foundation for all Warp programs and must be understood to effectively use any other Warp capabilities.

3

4

## Capabilities

5

6

### Initialization

7

8

Initialize the Warp runtime and make all devices available for computation.

9

10

```python { .api }

11

def init() -> None:

12

"""

13

Initialize Warp and detect available devices.

14

Must be called before using any other Warp functionality.

15

"""

16

```

17

18

Example:

19

```python

20

import warp as wp

21

wp.init() # Always call this first

22

```

23

24

### Device Management

25

26

Query and control available computation devices (CPU and CUDA GPUs).

27

28

```python { .api }

29

def is_cpu_available() -> bool:

30

"""Check if CPU device is available."""

31

32

def is_cuda_available() -> bool:

33

"""Check if CUDA GPU devices are available."""

34

35

def is_device_available(device: Device) -> bool:

36

"""Check if specific device is available."""

37

38

def get_devices() -> list:

39

"""Get list of all available devices."""

40

41

def get_preferred_device() -> Device:

42

"""Get the preferred device (CUDA if available, else CPU)."""

43

44

def get_device(ident: str = None) -> Device:

45

"""

46

Get device by identifier.

47

48

Args:

49

ident: Device identifier like "cpu", "cuda:0", "cuda:1"

50

51

Returns:

52

Device object for the specified device

53

"""

54

55

def set_device(device: Device) -> None:

56

"""Set the current active device for subsequent operations."""

57

58

def synchronize_device(device: Device = None) -> None:

59

"""Wait for all operations on device to complete."""

60

```

61

62

### CUDA Device Management

63

64

Specialized functions for managing CUDA GPU devices.

65

66

```python { .api }

67

def get_cuda_devices() -> list:

68

"""Get list of available CUDA devices."""

69

70

def get_cuda_device_count() -> int:

71

"""Get number of available CUDA devices."""

72

73

def get_cuda_device(device_id: int = 0) -> Device:

74

"""Get CUDA device by index."""

75

76

def map_cuda_device(device_id: int) -> Device:

77

"""Map CUDA device for interop with other libraries."""

78

79

def unmap_cuda_device(device: Device) -> None:

80

"""Unmap previously mapped CUDA device."""

81

```

82

83

### Kernel Execution

84

85

Launch compiled kernels on devices with specified thread dimensions.

86

87

```python { .api }

88

def launch(kernel: Kernel,

89

dim: int | Sequence[int],

90

inputs: Sequence = [],

91

outputs: Sequence = [],

92

adj_inputs: Sequence = [],

93

adj_outputs: Sequence = [],

94

device: Device = None,

95

stream: Stream = None,

96

adjoint: bool = False,

97

record_tape: bool = True,

98

record_cmd: bool = False,

99

max_blocks: int = 0,

100

block_dim: int = 256) -> None:

101

"""

102

Launch a kernel with specified thread count.

103

104

Args:

105

kernel: Compiled kernel function

106

dim: Number of threads or tuple of dimensions

107

inputs: Input arguments to kernel

108

outputs: Output arguments

109

adj_inputs: Adjoint input arguments for reverse mode

110

adj_outputs: Adjoint output arguments for reverse mode

111

device: Device to run on (uses current if None)

112

stream: CUDA stream for async execution

113

adjoint: Whether to run adjoint/backward pass

114

record_tape: Whether to record operations for autodiff

115

record_cmd: Whether to record for replay

116

max_blocks: Maximum number of thread blocks

117

block_dim: Number of threads per block

118

"""

119

120

def launch_tiled(kernel: Kernel,

121

dim: tuple,

122

inputs: list,

123

outputs: list = None,

124

device: Device = None,

125

stream: Stream = None) -> None:

126

"""

127

Launch a tiled kernel with 2D/3D thread organization.

128

129

Args:

130

dim: Tuple of thread dimensions (x, y, z)

131

Other args same as launch()

132

"""

133

```

134

135

### Synchronization

136

137

Control execution timing and wait for operations to complete.

138

139

```python { .api }

140

def synchronize() -> None:

141

"""Wait for all pending operations to complete on all devices."""

142

143

def synchronize_device(device: Device = None) -> None:

144

"""Wait for operations on specific device to complete."""

145

146

def force_load(module=None) -> None:

147

"""Force compilation and loading of kernels."""

148

```

149

150

### Module Management

151

152

Control kernel compilation and module loading behavior.

153

154

```python { .api }

155

def load_module(module_name: str = None) -> Module:

156

"""Load or get existing module containing kernels."""

157

158

def get_module(module_name: str = None) -> Module:

159

"""Get module by name."""

160

161

def set_module_options(options: dict) -> None:

162

"""Set compilation options for modules."""

163

164

def get_module_options() -> dict:

165

"""Get current module compilation options."""

166

```

167

168

### Array Creation

169

170

Create and initialize arrays on specified devices.

171

172

```python { .api }

173

def zeros(shape: int | tuple[int, ...] | list[int] | None = None,

174

dtype: type = float,

175

device: Device = None,

176

requires_grad: bool = False,

177

pinned: bool = False) -> array:

178

"""Create array filled with zeros."""

179

180

def zeros_like(arr: array,

181

dtype: type = None,

182

device: Device = None) -> array:

183

"""Create zero array with same shape as existing array."""

184

185

def ones(shape: int | tuple[int, ...] | list[int] | None = None,

186

dtype: type = float,

187

device: Device = None,

188

requires_grad: bool = False,

189

pinned: bool = False) -> array:

190

"""Create array filled with ones."""

191

192

def ones_like(arr: array,

193

dtype: type = None,

194

device: Device = None) -> array:

195

"""Create ones array with same shape as existing array."""

196

197

def full(shape: int | tuple[int, ...] | list[int] | None = None,

198

value=0,

199

dtype: type = None,

200

device: Device = None,

201

requires_grad: bool = False,

202

pinned: bool = False) -> array:

203

"""Create array filled with specified value."""

204

205

def full_like(arr: array,

206

value,

207

dtype: type = None,

208

device: Device = None) -> array:

209

"""Create filled array with same shape as existing array."""

210

211

def empty(shape: int | tuple[int, ...] | list[int] | None = None,

212

dtype: type = float,

213

device: Device = None,

214

requires_grad: bool = False,

215

pinned: bool = False) -> array:

216

"""Create uninitialized array (faster than zeros)."""

217

218

def empty_like(arr: array,

219

dtype: type = None,

220

device: Device = None) -> array:

221

"""Create empty array with same shape as existing array."""

222

223

def clone(arr: array,

224

device: Device = None) -> array:

225

"""Create deep copy of array."""

226

227

def copy(src: array,

228

dest: array,

229

src_offset: int = 0,

230

dest_offset: int = 0,

231

count: int = None) -> None:

232

"""Copy data between arrays."""

233

234

def from_numpy(arr: np.ndarray,

235

dtype: type = None,

236

device: Device = None) -> array:

237

"""Create Warp array from NumPy array."""

238

```

239

240

## Usage Examples

241

242

### Basic Device Setup

243

```python

244

import warp as wp

245

246

# Initialize Warp

247

wp.init()

248

249

# Check available devices

250

if wp.is_cuda_available():

251

device = wp.get_device("cuda:0")

252

print(f"Using GPU: {device}")

253

else:

254

device = wp.get_device("cpu")

255

print("Using CPU")

256

257

wp.set_device(device)

258

```

259

260

### Kernel Launch Pattern

261

```python

262

# Create arrays

263

n = 1000000

264

a = wp.ones(n, dtype=float, device=device)

265

b = wp.zeros(n, dtype=float, device=device)

266

267

# Launch kernel

268

wp.launch(my_kernel, dim=n, inputs=[a, b], device=device)

269

270

# Wait for completion

271

wp.synchronize_device(device)

272

```

273

274

## Types

275

276

```python { .api }

277

class Device:

278

"""Represents a computation device (CPU or GPU)."""

279

280

def __str__(self) -> str:

281

"""String representation of device."""

282

283

@property

284

def context(self):

285

"""Device context for low-level operations."""

286

287

class Module:

288

"""Container for compiled kernels and functions."""

289

290

def load(self) -> None:

291

"""Load/compile the module."""

292

293

class Kernel:

294

"""Compiled kernel function that can be launched."""

295

296

def __call__(self, *args, **kwargs):

297

"""Direct kernel invocation (same as wp.launch)."""

298

299

class Function:

300

"""Compiled function that can be called from kernels."""

301

302

def __call__(self, *args, **kwargs):

303

"""Function invocation."""

304

```