Tessl Tile for pypi/warp-lang@1.8.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

core-execution.md fem.md framework-integration.md index.md kernel-programming.md optimization.md rendering.md types-arrays.md utilities.md

core-execution.mddocs/

0
# Core Execution and Device Management
1

2
Essential functions for initializing Warp, managing devices, launching kernels, and controlling execution. These functions form the foundation for all Warp programs and must be understood to effectively use any other Warp capabilities.
3

4
## Capabilities
5

6
### Initialization
7

8
Initialize the Warp runtime and make all devices available for computation.
9

10
```python { .api }
11
def init() -> None:
12
    """
13
    Initialize Warp and detect available devices.
14
    Must be called before using any other Warp functionality.
15
    """
16
```
17

18
Example:
19
```python
20
import warp as wp
21
wp.init()  # Always call this first
22
```
23

24
### Device Management
25

26
Query and control available computation devices (CPU and CUDA GPUs).
27

28
```python { .api }
29
def is_cpu_available() -> bool:
30
    """Check if CPU device is available."""
31

32
def is_cuda_available() -> bool:
33
    """Check if CUDA GPU devices are available."""
34

35
def is_device_available(device: Device) -> bool:
36
    """Check if specific device is available."""
37

38
def get_devices() -> list:
39
    """Get list of all available devices."""
40

41
def get_preferred_device() -> Device:
42
    """Get the preferred device (CUDA if available, else CPU)."""
43

44
def get_device(ident: str = None) -> Device:
45
    """
46
    Get device by identifier.
47
    
48
    Args:
49
        ident: Device identifier like "cpu", "cuda:0", "cuda:1"
50
        
51
    Returns:
52
        Device object for the specified device
53
    """
54

55
def set_device(device: Device) -> None:
56
    """Set the current active device for subsequent operations."""
57

58
def synchronize_device(device: Device = None) -> None:
59
    """Wait for all operations on device to complete."""
60
```
61

62
### CUDA Device Management
63

64
Specialized functions for managing CUDA GPU devices.
65

66
```python { .api }
67
def get_cuda_devices() -> list:
68
    """Get list of available CUDA devices."""
69

70
def get_cuda_device_count() -> int:
71
    """Get number of available CUDA devices."""
72

73
def get_cuda_device(device_id: int = 0) -> Device:
74
    """Get CUDA device by index."""
75

76
def map_cuda_device(device_id: int) -> Device:
77
    """Map CUDA device for interop with other libraries."""
78

79
def unmap_cuda_device(device: Device) -> None:
80
    """Unmap previously mapped CUDA device."""
81
```
82

83
### Kernel Execution
84

85
Launch compiled kernels on devices with specified thread dimensions.
86

87
```python { .api }
88
def launch(kernel: Kernel,
89
          dim: int | Sequence[int],
90
          inputs: Sequence = [],
91
          outputs: Sequence = [],
92
          adj_inputs: Sequence = [],
93
          adj_outputs: Sequence = [],
94
          device: Device = None,
95
          stream: Stream = None,
96
          adjoint: bool = False,
97
          record_tape: bool = True,
98
          record_cmd: bool = False,
99
          max_blocks: int = 0,
100
          block_dim: int = 256) -> None:
101
    """
102
    Launch a kernel with specified thread count.
103
    
104
    Args:
105
        kernel: Compiled kernel function
106
        dim: Number of threads or tuple of dimensions
107
        inputs: Input arguments to kernel
108
        outputs: Output arguments
109
        adj_inputs: Adjoint input arguments for reverse mode
110
        adj_outputs: Adjoint output arguments for reverse mode
111
        device: Device to run on (uses current if None)
112
        stream: CUDA stream for async execution
113
        adjoint: Whether to run adjoint/backward pass
114
        record_tape: Whether to record operations for autodiff
115
        record_cmd: Whether to record for replay
116
        max_blocks: Maximum number of thread blocks
117
        block_dim: Number of threads per block
118
    """
119

120
def launch_tiled(kernel: Kernel,
121
                dim: tuple,
122
                inputs: list,
123
                outputs: list = None,
124
                device: Device = None,
125
                stream: Stream = None) -> None:
126
    """
127
    Launch a tiled kernel with 2D/3D thread organization.
128
    
129
    Args:
130
        dim: Tuple of thread dimensions (x, y, z)
131
        Other args same as launch()
132
    """
133
```
134

135
### Synchronization
136

137
Control execution timing and wait for operations to complete.
138

139
```python { .api }
140
def synchronize() -> None:
141
    """Wait for all pending operations to complete on all devices."""
142

143
def synchronize_device(device: Device = None) -> None:
144
    """Wait for operations on specific device to complete."""
145

146
def force_load(module=None) -> None:
147
    """Force compilation and loading of kernels."""
148
```
149

150
### Module Management
151

152
Control kernel compilation and module loading behavior.
153

154
```python { .api }
155
def load_module(module_name: str = None) -> Module:
156
    """Load or get existing module containing kernels."""
157

158
def get_module(module_name: str = None) -> Module:
159
    """Get module by name."""
160

161
def set_module_options(options: dict) -> None:
162
    """Set compilation options for modules."""
163

164
def get_module_options() -> dict:
165
    """Get current module compilation options."""
166
```
167

168
### Array Creation
169

170
Create and initialize arrays on specified devices.
171

172
```python { .api }
173
def zeros(shape: int | tuple[int, ...] | list[int] | None = None,
174
         dtype: type = float,
175
         device: Device = None,
176
         requires_grad: bool = False,
177
         pinned: bool = False) -> array:
178
    """Create array filled with zeros."""
179

180
def zeros_like(arr: array, 
181
              dtype: type = None, 
182
              device: Device = None) -> array:
183
    """Create zero array with same shape as existing array."""
184

185
def ones(shape: int | tuple[int, ...] | list[int] | None = None,
186
        dtype: type = float,
187
        device: Device = None,
188
        requires_grad: bool = False,
189
        pinned: bool = False) -> array:
190
    """Create array filled with ones."""
191

192
def ones_like(arr: array, 
193
             dtype: type = None, 
194
             device: Device = None) -> array:
195
    """Create ones array with same shape as existing array."""
196

197
def full(shape: int | tuple[int, ...] | list[int] | None = None,
198
        value=0,
199
        dtype: type = None,
200
        device: Device = None,
201
        requires_grad: bool = False,
202
        pinned: bool = False) -> array:
203
    """Create array filled with specified value."""
204

205
def full_like(arr: array, 
206
             value, 
207
             dtype: type = None, 
208
             device: Device = None) -> array:
209
    """Create filled array with same shape as existing array."""
210

211
def empty(shape: int | tuple[int, ...] | list[int] | None = None,
212
         dtype: type = float,
213
         device: Device = None,
214
         requires_grad: bool = False,
215
         pinned: bool = False) -> array:
216
    """Create uninitialized array (faster than zeros)."""
217

218
def empty_like(arr: array, 
219
              dtype: type = None, 
220
              device: Device = None) -> array:
221
    """Create empty array with same shape as existing array."""
222

223
def clone(arr: array, 
224
         device: Device = None) -> array:
225
    """Create deep copy of array."""
226

227
def copy(src: array, 
228
        dest: array, 
229
        src_offset: int = 0, 
230
        dest_offset: int = 0, 
231
        count: int = None) -> None:
232
    """Copy data between arrays."""
233

234
def from_numpy(arr: np.ndarray, 
235
              dtype: type = None, 
236
              device: Device = None) -> array:
237
    """Create Warp array from NumPy array."""
238
```
239

240
## Usage Examples
241

242
### Basic Device Setup
243
```python
244
import warp as wp
245

246
# Initialize Warp
247
wp.init()
248

249
# Check available devices
250
if wp.is_cuda_available():
251
    device = wp.get_device("cuda:0")
252
    print(f"Using GPU: {device}")
253
else:
254
    device = wp.get_device("cpu") 
255
    print("Using CPU")
256

257
wp.set_device(device)
258
```
259

260
### Kernel Launch Pattern
261
```python
262
# Create arrays
263
n = 1000000
264
a = wp.ones(n, dtype=float, device=device)
265
b = wp.zeros(n, dtype=float, device=device)
266

267
# Launch kernel
268
wp.launch(my_kernel, dim=n, inputs=[a, b], device=device)
269

270
# Wait for completion
271
wp.synchronize_device(device)
272
```
273

274
## Types
275

276
```python { .api }
277
class Device:
278
    """Represents a computation device (CPU or GPU)."""
279
    
280
    def __str__(self) -> str:
281
        """String representation of device."""
282
    
283
    @property
284
    def context(self):
285
        """Device context for low-level operations."""
286

287
class Module:
288
    """Container for compiled kernels and functions."""
289
    
290
    def load(self) -> None:
291
        """Load/compile the module."""
292

293
class Kernel:
294
    """Compiled kernel function that can be launched."""
295
    
296
    def __call__(self, *args, **kwargs):
297
        """Direct kernel invocation (same as wp.launch)."""
298

299
class Function:
300
    """Compiled function that can be called from kernels."""
301
    
302
    def __call__(self, *args, **kwargs):
303
        """Function invocation."""
304
```

Version

Tile

Files

core-execution.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

core-execution.mddocs/