Tessl Tile for pypi/itk@5.4.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

data-structures.md filtering.md finite-difference.md gpu-computing.md image-adaptors.md image-functions.md index.md io.md mesh.md numpy-integration.md registration.md segmentation.md spatial-objects.md transforms.md

gpu-computing.mddocs/

0
# GPU Computing
1

2
GPU-accelerated computing support using OpenCL for high-performance image processing operations. ITK's GPU framework provides seamless acceleration of common image processing tasks while maintaining the same API as CPU-based operations.
3

4
## Capabilities
5

6
### GPU Memory Management
7

8
Classes for managing GPU memory and synchronization between CPU and GPU data.
9

10
```python { .api }
11
class GPUImage[TPixel, VImageDimension]:
12
    """
13
    GPU-resident image container with automatic CPU/GPU synchronization.
14
    
15
    Template Parameters:
16
    - TPixel: Pixel data type
17
    - VImageDimension: Image dimensionality
18
    """
19
    def New() -> GPUImage[TPixel, VImageDimension]: ...
20
    def GetGPUDataManager(self) -> GPUDataManager: ...
21
    def UpdateGPUBuffer(self) -> None: ...
22
    def UpdateCPUBuffer(self) -> None: ...
23
    def GetGPUBufferPointer(self) -> GPUBufferPointer: ...
24
    def GetCPUBufferPointer(self) -> CPUBufferPointer: ...
25
    def Graft(self, data: DataObject) -> None: ...
26
    def Initialize(self) -> None: ...
27
    def FillBuffer(self, value: TPixel) -> None: ...
28
    def SetPixel(self, index: Index[VImageDimension], value: TPixel) -> None: ...
29
    def GetPixel(self, index: Index[VImageDimension]) -> TPixel: ...
30
    def SetGPUDataDirty(self) -> None: ...
31
    def SetCPUDataDirty(self) -> None: ...
32

33
class GPUImageDataManager[TImageType]:
34
    """Manages CPU/GPU memory synchronization for images."""
35
    def New() -> GPUImageDataManager[TImageType]: ...
36
    def SetBufferSize(self, size: int) -> None: ...
37
    def SetCPUBufferPointer(self, ptr: PixelType) -> None: ...
38
    def SetCPUDirtyFlag(self, isDirty: bool) -> None: ...
39
    def SetGPUDirtyFlag(self, isDirty: bool) -> None: ...
40
    def IsCPUBufferDirty(self) -> bool: ...
41
    def IsGPUBufferDirty(self) -> bool: ...
42
    def UpdateCPUBuffer(self) -> None: ...
43
    def UpdateGPUBuffer(self) -> None: ...
44
    def Allocate(self) -> bool: ...
45
    def SetBufferFlag(self, flags: int) -> None: ...
46
    def GetGPUBufferPointer(self) -> GPUBufferPointer: ...
47
    def GetCPUBufferPointer(self) -> CPUBufferPointer: ...
48

49
class GPUDataManager:
50
    """Base class for GPU memory management."""
51
    def SetBufferSize(self, size: int) -> None: ...
52
    def SetCPUBufferPointer(self, ptr: void) -> None: ...
53
    def SetCPUDirtyFlag(self, isDirty: bool) -> None: ...
54
    def SetGPUDirtyFlag(self, isDirty: bool) -> None: ...
55
    def IsCPUBufferDirty(self) -> bool: ...
56
    def IsGPUBufferDirty(self) -> bool: ...
57
    def UpdateCPUBuffer(self) -> None: ...
58
    def UpdateGPUBuffer(self) -> None: ...
59
    def Allocate(self) -> bool: ...
60
    def Initialize(self) -> None: ...
61
    def SetBufferFlag(self, flags: int) -> None: ...
62
    def GetBufferSize(self) -> int: ...
63
```
64

65
### OpenCL Context Management
66

67
Classes for managing OpenCL context, devices, and execution environment.
68

69
```python { .api }
70
class GPUContextManager:
71
    """Singleton class managing OpenCL context and devices."""
72
    @staticmethod
73
    def GetInstance() -> GPUContextManager: ...
74
    def GetNumberOfDevices(self) -> int: ...
75
    def GetCurrentDevice(self) -> int: ...
76
    def SetCurrentDevice(self, device: int) -> bool: ...
77
    def GetDeviceName(self, device: int) -> str: ...
78
    def GetDeviceVersion(self, device: int) -> str: ...
79
    def GetMaxWorkGroupSize(self, device: int) -> int: ...
80
    def GetMaxWorkItemSizes(self, device: int) -> list[int]: ...
81
    def GetLocalMemorySize(self, device: int) -> int: ...
82
    def GetGlobalMemorySize(self, device: int) -> int: ...
83
    def GetMaxComputeUnits(self, device: int) -> int: ...
84
    def GetMaxClockFrequency(self, device: int) -> int: ...
85
    def CreateCommandQueue(self) -> CommandQueueType: ...
86
    def GetCommandQueue(self, device: int) -> CommandQueueType: ...
87
    def GetContext(self) -> ContextType: ...
88
    def GetProgram(self, device: int) -> ProgramType: ...
89
    def GetDevice(self, device: int) -> DeviceType: ...
90

91
class GPUKernelManager:
92
    """Manages OpenCL kernel compilation and execution."""
93
    def New() -> GPUKernelManager: ...
94
    def SetKernelSource(self, source: str) -> None: ...
95
    def SetKernelSourceFile(self, filename: str) -> None: ...
96
    def CreateKernel(self, kernel_name: str) -> int: ...
97
    def GetKernel(self, kernel_id: int) -> KernelType: ...
98
    def SetKernelArg(self, kernel_id: int, arg_index: int, arg: ArgumentType) -> bool: ...
99
    def SetKernelArgWithImage(self, kernel_id: int, arg_index: int, image: GPUDataManager) -> bool: ...
100
    def LaunchKernel(self, kernel_id: int, global_work_size: list[int], local_work_size: list[int]) -> bool: ...
101
    def SetGlobalWorkSize(self, dimension: int, global_work_size: list[int]) -> None: ...
102
    def SetLocalWorkSize(self, dimension: int, local_work_size: list[int]) -> None: ...
103
    def GetLocalWorkSize(self, dimension: int) -> list[int]: ...
104
    def GetGlobalWorkSize(self, dimension: int) -> list[int]: ...
105
```
106

107
### GPU Image Processing Operations
108

109
GPU-accelerated versions of common image processing operations.
110

111
```python { .api }
112
class GPUImageOpsKernel:
113
    """Collection of basic GPU image operation kernels."""
114
    def New() -> GPUImageOpsKernel: ...
115
    def GetOpenCLSource(self) -> str: ...
116
    def LoadProgramFromFile(self, filename: str) -> bool: ...
117
    def LoadProgramFromString(self, source: str) -> bool: ...
118

119
class GPUMeanImageFilter[TInputImage, TOutputImage]:
120
    """GPU-accelerated mean filtering."""
121
    def New() -> GPUMeanImageFilter[TInputImage, TOutputImage]: ...
122
    def SetRadius(self, radius: InputSizeType) -> None: ...
123
    def GetRadius(self) -> InputSizeType: ...
124
    def GetKernelRadius(self) -> int: ...
125

126
class GPUBinaryThresholdImageFilter[TInputImage, TOutputImage]:
127
    """GPU-accelerated binary thresholding."""
128
    def New() -> GPUBinaryThresholdImageFilter[TInputImage, TOutputImage]: ...
129
    def SetLowerThreshold(self, threshold: InputPixelType) -> None: ...
130
    def GetLowerThreshold(self) -> InputPixelType: ...
131
    def SetUpperThreshold(self, threshold: InputPixelType) -> None: ...
132
    def GetUpperThreshold(self) -> InputPixelType: ...
133
    def SetInsideValue(self, value: OutputPixelType) -> None: ...
134
    def GetInsideValue(self) -> OutputPixelType: ...
135
    def SetOutsideValue(self, value: OutputPixelType) -> None: ...
136
    def GetOutsideValue(self) -> OutputPixelType: ...
137

138
class GPUGradientImageFilter[TInputImage, TOutputImage]:
139
    """GPU-accelerated gradient computation."""
140
    def New() -> GPUGradientImageFilter[TInputImage, TOutputImage]: ...
141
    def SetUseImageSpacing(self, flag: bool) -> None: ...
142
    def GetUseImageSpacing(self) -> bool: ...
143
    def SetUseImageDirection(self, flag: bool) -> None: ...
144
    def GetUseImageDirection(self) -> bool: ...
145

146
class GPUNeighborhoodOperatorImageFilter[TInputImage, TOutputImage, TOperatorValueType, TParentImageFilter]:
147
    """GPU-accelerated neighborhood operations."""
148
    def New() -> GPUNeighborhoodOperatorImageFilter[TInputImage, TOutputImage, TOperatorValueType, TParentImageFilter]: ...
149
    def SetOperator(self, op: OperatorType) -> None: ...
150
    def GetOperator(self) -> OperatorType: ...
151
    def OverrideBoundaryCondition(self, condition: BoundaryConditionType) -> None: ...
152
```
153

154
### GPU Finite Difference Framework
155

156
GPU-accelerated finite difference solvers for PDE-based image processing.
157

158
```python { .api }
159
class GPUFiniteDifferenceImageFilter[TInputImage, TOutputImage, TParentImageFilter]:
160
    """
161
    GPU-accelerated finite difference solver base class.
162
    
163
    Template Parameters:
164
    - TInputImage: Input image type
165
    - TOutputImage: Output image type  
166
    - TParentImageFilter: CPU parent filter type
167
    """
168
    def New() -> GPUFiniteDifferenceImageFilter[TInputImage, TOutputImage, TParentImageFilter]: ...
169
    def SetNumberOfIterations(self, iterations: IdentifierType) -> None: ...
170
    def GetNumberOfIterations(self) -> IdentifierType: ...
171
    def SetTimeStep(self, time_step: TimeStepType) -> None: ...
172
    def GetTimeStep(self) -> TimeStepType: ...
173
    def SetMaximumRMSError(self, error: double) -> None: ...
174
    def GetMaximumRMSError(self) -> double: ...
175
    def GetRMSChange(self) -> double: ...
176
    def GPUGenerateData(self) -> None: ...
177
    def GPUCalculateChange(self) -> TimeStepType: ...
178
    def GPUApplyUpdate(self, time_step: TimeStepType) -> None: ...
179

180
class GPUDenseFiniteDifferenceImageFilter[TInputImage, TOutputImage, TParentImageFilter]:
181
    """GPU-accelerated dense finite difference solver."""
182
    def New() -> GPUDenseFiniteDifferenceImageFilter[TInputImage, TOutputImage, TParentImageFilter]: ...
183
    def GetFiniteDifferenceFunction(self) -> FiniteDifferenceFunctionType: ...
184
    def SetFiniteDifferenceFunction(self, function: FiniteDifferenceFunctionType) -> None: ...
185

186
class GPUFiniteDifferenceFunction[TImageType]:
187
    """Base class for GPU finite difference functions."""
188
    def ComputeUpdate(self, neighborhood: NeighborhoodType, global_data: GlobalDataStruct,
189
                     gd: GlobalDataStruct) -> PixelType: ...
190
    def GetRadius(self) -> RadiusType: ...
191
    def SetTimeStep(self, time_step: TimeStepType) -> None: ...
192
    def GetTimeStep(self) -> TimeStepType: ...
193
    def GPUComputeUpdate(self, input: ImagePointer, output: ImagePointer, 
194
                        global_data: GlobalDataStruct) -> TimeStepType: ...
195
    def GPUCalculateChange(self, input: ImagePointer) -> TimeStepType: ...
196
    def GPUApplyUpdate(self, input: ImagePointer, update: ImagePointer, 
197
                      time_step: TimeStepType) -> None: ...
198
```
199

200
### GPU Anisotropic Diffusion
201

202
GPU implementations of anisotropic diffusion algorithms.
203

204
```python { .api }
205
class GPUGradientAnisotropicDiffusionImageFilter[TInputImage, TOutputImage]:
206
    """GPU-accelerated gradient-based anisotropic diffusion."""
207
    def New() -> GPUGradientAnisotropicDiffusionImageFilter[TInputImage, TOutputImage]: ...
208
    def SetNumberOfIterations(self, iterations: int) -> None: ...
209
    def GetNumberOfIterations(self) -> int: ...
210
    def SetTimeStep(self, time_step: TimeStepType) -> None: ...
211
    def GetTimeStep(self) -> TimeStepType: ...
212
    def SetConductanceParameter(self, conductance: double) -> None: ...
213
    def GetConductanceParameter(self) -> double: ...
214

215
class GPUCurvatureAnisotropicDiffusionImageFilter[TInputImage, TOutputImage]:
216
    """GPU-accelerated curvature-based anisotropic diffusion."""
217
    def New() -> GPUCurvatureAnisotropicDiffusionImageFilter[TInputImage, TOutputImage]: ...
218
    def SetNumberOfIterations(self, iterations: int) -> None: ...
219
    def GetNumberOfIterations(self) -> int: ...
220
    def SetTimeStep(self, time_step: TimeStepType) -> None: ...
221
    def GetTimeStep(self) -> TimeStepType: ...
222
    def SetConductanceParameter(self, conductance: double) -> None: ...
223
    def GetConductanceParameter(self) -> double: ...
224

225
class GPUAnisotropicDiffusionFunction[TImage]:
226
    """Base GPU anisotropic diffusion function."""
227
    def ComputeUpdate(self, neighborhood: NeighborhoodType, global_data: GlobalDataStruct,
228
                     gd: GlobalDataStruct) -> PixelType: ...
229
    def SetConductanceParameter(self, conductance: double) -> None: ...
230
    def GetConductanceParameter(self) -> double: ...
231
    def SetTimeStep(self, time_step: TimeStepType) -> None: ...
232
    def GetTimeStep(self) -> TimeStepType: ...
233
```
234

235
### GPU Utilities and Helpers
236

237
Utility classes for GPU operations and performance monitoring.
238

239
```python { .api }
240
class GPUInPlaceImageFilter[TInputImage, TParentImageFilter]:
241
    """Base class for GPU filters that modify images in-place."""
242
    def New() -> GPUInPlaceImageFilter[TInputImage, TParentImageFilter]: ...
243
    def SetInPlace(self, flag: bool) -> None: ...
244
    def GetInPlace(self) -> bool: ...
245
    def CanRunInPlace(self) -> bool: ...
246

247
class GPUImageToImageFilter[TInputImage, TOutputImage, TParentImageFilter]:
248
    """Base class for GPU image-to-image filters."""
249
    def New() -> GPUImageToImageFilter[TInputImage, TOutputImage, TParentImageFilter]: ...
250
    def GetGPUEnabled(self) -> bool: ...
251
    def SetGPUEnabled(self, flag: bool) -> None: ...
252
    def GPUGenerateData(self) -> None: ...
253

254
class GPUTimer:
255
    """High-resolution timer for GPU operations."""
256
    def New() -> GPUTimer: ...
257
    def Start(self) -> None: ...
258
    def Stop(self) -> None: ...
259
    def GetElapsedTime(self) -> double: ...
260
    def Reset(self) -> None: ...
261
```
262

263
## Usage Examples
264

265
### Basic GPU Image Processing
266

267
```python
268
import itk
269
import numpy as np
270

271
# Check if GPU support is available
272
gpu_context = itk.GPUContextManager.GetInstance()
273
if gpu_context.GetNumberOfDevices() == 0:
274
    print("No GPU devices available")
275
    exit()
276

277
print(f"Found {gpu_context.GetNumberOfDevices()} GPU device(s)")
278
print(f"Using device: {gpu_context.GetDeviceName(0)}")
279

280
# Create test image
281
ImageType = itk.Image[itk.F, 2]
282
image = ImageType.New()
283

284
size = itk.Size[2]([256, 256])
285
region = itk.ImageRegion[2]()
286
region.SetSize(size)
287
image.SetRegions(region)
288
image.Allocate()
289

290
# Fill with noisy pattern
291
np.random.seed(42)
292
for x in range(256):
293
    for y in range(256):
294
        idx = itk.Index[2]([x, y])
295
        # Create pattern with noise
296
        value = 100.0 + 50.0 * np.sin(x/10.0) * np.cos(y/10.0)
297
        noise = np.random.normal(0, 10)
298
        image.SetPixel(idx, value + noise)
299

300
# Create GPU image
301
GPUImageType = itk.GPUImage[itk.F, 2]
302
gpu_image = GPUImageType.New()
303
gpu_image.Graft(image)  # Copy CPU image to GPU
304

305
print("Image copied to GPU")
306
print(f"GPU buffer size: {gpu_image.GetGPUDataManager().GetBufferSize()} bytes")
307

308
# Apply GPU mean filter
309
GPUMeanFilterType = itk.GPUMeanImageFilter[GPUImageType, GPUImageType]
310
gpu_mean_filter = GPUMeanFilterType.New()
311
gpu_mean_filter.SetInput(gpu_image)
312
gpu_mean_filter.SetRadius(itk.Size[2]([2, 2]))  # 5x5 kernel
313

314
# Time the GPU operation
315
gpu_timer = itk.GPUTimer.New()
316
gpu_timer.Start()
317
gpu_mean_filter.Update()
318
gpu_timer.Stop()
319

320
gpu_result = gpu_mean_filter.GetOutput()
321
gpu_time = gpu_timer.GetElapsedTime()
322

323
print(f"GPU mean filtering completed in {gpu_time:.3f} ms")
324

325
# Compare with CPU version for verification
326
CPUMeanFilterType = itk.MeanImageFilter[ImageType, ImageType]
327
cpu_mean_filter = CPUMeanFilterType.New()
328
cpu_mean_filter.SetInput(image)
329
cpu_mean_filter.SetRadius(itk.Size[2]([2, 2]))
330

331
cpu_timer = itk.GPUTimer.New()
332
cpu_timer.Start()
333
cpu_mean_filter.Update()
334
cpu_timer.Stop()
335

336
cpu_result = cpu_mean_filter.GetOutput()
337
cpu_time = cpu_timer.GetElapsedTime()
338

339
print(f"CPU mean filtering completed in {cpu_time:.3f} ms")
340
print(f"GPU speedup: {cpu_time/gpu_time:.2f}x")
341

342
# Copy GPU result back to CPU for comparison
343
gpu_result.UpdateCPUBuffer()
344

345
# Compare results at a few points
346
test_indices = [
347
    itk.Index[2]([50, 50]),
348
    itk.Index[2]([100, 100]),
349
    itk.Index[2]([200, 200])
350
]
351

352
max_diff = 0.0
353
for idx in test_indices:
354
    gpu_val = gpu_result.GetPixel(idx)
355
    cpu_val = cpu_result.GetPixel(idx)
356
    diff = abs(gpu_val - cpu_val)
357
    max_diff = max(max_diff, diff)
358
    print(f"Index {idx}: GPU={gpu_val:.3f}, CPU={cpu_val:.3f}, Diff={diff:.6f}")
359

360
print(f"Maximum difference: {max_diff:.6f}")
361
```
362

363
### GPU Binary Thresholding
364

365
```python
366
import itk
367
import numpy as np
368

369
# Create test image with varying intensities
370
ImageType = itk.Image[itk.UC, 2]  # Use unsigned char
371
image = ImageType.New()
372

373
size = itk.Size[2]([512, 512])
374
region = itk.ImageRegion[2]()
375
region.SetSize(size)
376
image.SetRegions(region)
377
image.Allocate()
378

379
# Create gradient pattern
380
for x in range(512):
381
    for y in range(512):
382
        idx = itk.Index[2]([x, y])
383
        value = int((x + y) * 255 / 1022)  # Scale to 0-255
384
        image.SetPixel(idx, value)
385

386
# Create GPU image
387
GPUImageType = itk.GPUImage[itk.UC, 2]
388
gpu_input = GPUImageType.New()
389
gpu_input.Graft(image)
390

391
# Create GPU binary threshold filter
392
GPUBinaryImageType = itk.GPUImage[itk.UC, 2]
393
GPUThresholdFilterType = itk.GPUBinaryThresholdImageFilter[GPUImageType, GPUBinaryImageType]
394

395
gpu_threshold_filter = GPUThresholdFilterType.New()
396
gpu_threshold_filter.SetInput(gpu_input)
397
gpu_threshold_filter.SetLowerThreshold(100)
398
gpu_threshold_filter.SetUpperThreshold(200)
399
gpu_threshold_filter.SetInsideValue(255)
400
gpu_threshold_filter.SetOutsideValue(0)
401

402
# Time GPU execution
403
gpu_timer = itk.GPUTimer.New()
404
gpu_timer.Start()
405
gpu_threshold_filter.Update()
406
gpu_timer.Stop()
407

408
gpu_result = gpu_threshold_filter.GetOutput()
409
gpu_time = gpu_timer.GetElapsedTime()
410

411
print(f"GPU binary thresholding completed in {gpu_time:.3f} ms")
412

413
# Compare with CPU version
414
CPUThresholdFilterType = itk.BinaryThresholdImageFilter[ImageType, ImageType]
415
cpu_threshold_filter = CPUThresholdFilterType.New()
416
cpu_threshold_filter.SetInput(image)
417
cpu_threshold_filter.SetLowerThreshold(100)
418
cpu_threshold_filter.SetUpperThreshold(200)
419
cpu_threshold_filter.SetInsideValue(255)
420
cpu_threshold_filter.SetOutsideValue(0)
421

422
cpu_timer = itk.GPUTimer.New()
423
cpu_timer.Start()
424
cpu_threshold_filter.Update()
425
cpu_timer.Stop()
426

427
cpu_result = cpu_threshold_filter.GetOutput()
428
cpu_time = cpu_timer.GetElapsedTime()
429

430
print(f"CPU binary thresholding completed in {cpu_time:.3f} ms")
431
print(f"GPU speedup: {cpu_time/gpu_time:.2f}x")
432

433
# Verify results match
434
gpu_result.UpdateCPUBuffer()
435

436
# Count threshold pixels
437
gpu_count = 0
438
cpu_count = 0
439
for x in range(512):
440
    for y in range(512):
441
        idx = itk.Index[2]([x, y])
442
        if gpu_result.GetPixel(idx) == 255:
443
            gpu_count += 1
444
        if cpu_result.GetPixel(idx) == 255:
445
            cpu_count += 1
446

447
print(f"GPU thresholded pixels: {gpu_count}")
448
print(f"CPU thresholded pixels: {cpu_count}")
449
print(f"Results match: {gpu_count == cpu_count}")
450
```
451

452
### GPU Anisotropic Diffusion
453

454
```python
455
import itk
456
import numpy as np
457

458
# Create noisy test image
459
ImageType = itk.Image[itk.F, 2]
460
image = ImageType.New()
461

462
size = itk.Size[2]([256, 256])
463
region = itk.ImageRegion[2]()
464
region.SetSize(size)
465
image.SetRegions(region)
466
image.Allocate()
467

468
# Create image with edges and noise
469
np.random.seed(42)
470
for x in range(256):
471
    for y in range(256):
472
        idx = itk.Index[2]([x, y])
473
        
474
        # Create step pattern
475
        base_value = 200.0 if (x > 128 and y > 128) else 100.0
476
        if x > 128 and y <= 128:
477
            base_value = 150.0
478
        if x <= 128 and y > 128:
479
            base_value = 75.0
480
            
481
        # Add noise
482
        noise = np.random.normal(0, 15)
483
        value = base_value + noise
484
        image.SetPixel(idx, max(0.0, min(255.0, value)))
485

486
# Create GPU image
487
GPUImageType = itk.GPUImage[itk.F, 2]
488
gpu_input = GPUImageType.New()
489
gpu_input.Graft(image)
490

491
# Create GPU anisotropic diffusion filter
492
GPUDiffusionFilterType = itk.GPUGradientAnisotropicDiffusionImageFilter[GPUImageType, GPUImageType]
493
gpu_diffusion_filter = GPUDiffusionFilterType.New()
494
gpu_diffusion_filter.SetInput(gpu_input)
495
gpu_diffusion_filter.SetNumberOfIterations(50)
496
gpu_diffusion_filter.SetTimeStep(0.0625)  # Stable time step for 2D
497
gpu_diffusion_filter.SetConductanceParameter(5.0)
498

499
# Time GPU execution
500
gpu_timer = itk.GPUTimer.New()
501
gpu_timer.Start()
502
gpu_diffusion_filter.Update()
503
gpu_timer.Stop()
504

505
gpu_result = gpu_diffusion_filter.GetOutput()
506
gpu_time = gpu_timer.GetElapsedTime()
507

508
print(f"GPU anisotropic diffusion completed in {gpu_time:.3f} ms")
509
print(f"GPU iterations: {gpu_diffusion_filter.GetElapsedIterations()}")
510
print(f"GPU final RMS change: {gpu_diffusion_filter.GetRMSChange():.6f}")
511

512
# Compare with CPU version
513
CPUDiffusionFilterType = itk.GradientAnisotropicDiffusionImageFilter[ImageType, ImageType]
514
cpu_diffusion_filter = CPUDiffusionFilterType.New()
515
cpu_diffusion_filter.SetInput(image)
516
cpu_diffusion_filter.SetNumberOfIterations(50)
517
cpu_diffusion_filter.SetTimeStep(0.0625)
518
cpu_diffusion_filter.SetConductanceParameter(5.0)
519

520
cpu_timer = itk.GPUTimer.New()
521
cpu_timer.Start()
522
cpu_diffusion_filter.Update()
523
cpu_timer.Stop()
524

525
cpu_result = cpu_diffusion_filter.GetOutput()
526
cpu_time = cpu_timer.GetElapsedTime()
527

528
print(f"CPU anisotropic diffusion completed in {cpu_time:.3f} ms")
529
print(f"CPU iterations: {cpu_diffusion_filter.GetElapsedIterations()}")
530
print(f"CPU final RMS change: {cpu_diffusion_filter.GetRMSChange():.6f}")
531
print(f"GPU speedup: {cpu_time/gpu_time:.2f}x")
532

533
# Copy GPU result to CPU for comparison
534
gpu_result.UpdateCPUBuffer()
535

536
# Compare results at several points
537
test_points = [
538
    itk.Index[2]([64, 64]),   # Bottom-left region
539
    itk.Index[2]([192, 64]),  # Bottom-right region  
540
    itk.Index[2]([64, 192]),  # Top-left region
541
    itk.Index[2]([192, 192]), # Top-right region
542
    itk.Index[2]([128, 128])  # Center (edge region)
543
]
544

545
print("\nComparison of GPU vs CPU results:")
546
total_diff = 0.0
547
for idx in test_points:
548
    gpu_val = gpu_result.GetPixel(idx)
549
    cpu_val = cpu_result.GetPixel(idx)
550
    diff = abs(gpu_val - cpu_val)
551
    total_diff += diff
552
    print(f"Index {idx}: GPU={gpu_val:.3f}, CPU={cpu_val:.3f}, Diff={diff:.6f}")
553

554
avg_diff = total_diff / len(test_points)
555
print(f"Average difference: {avg_diff:.6f}")
556
```
557

558
### Custom GPU Kernel
559

560
```python
561
import itk
562

563
# This example shows how to create and use custom GPU kernels
564
# Note: In practice, kernel source would be more complex
565

566
# Define custom OpenCL kernel source
567
kernel_source = """
568
__kernel void custom_add_constant(
569
    __global float* input,
570
    __global float* output,
571
    const float constant,
572
    const int width,
573
    const int height)
574
{
575
    int gid_x = get_global_id(0);
576
    int gid_y = get_global_id(1);
577
    
578
    if (gid_x >= width || gid_y >= height)
579
        return;
580
    
581
    int idx = gid_y * width + gid_x;
582
    output[idx] = input[idx] + constant;
583
}
584
"""
585

586
# Create kernel manager
587
kernel_manager = itk.GPUKernelManager.New()
588
kernel_manager.SetKernelSource(kernel_source)
589

590
# Create kernel
591
kernel_id = kernel_manager.CreateKernel("custom_add_constant")
592

593
if kernel_id >= 0:
594
    print(f"Custom kernel created with ID: {kernel_id}")
595
    
596
    # Create test image
597
    ImageType = itk.Image[itk.F, 2]
598
    image = ImageType.New()
599
    
600
    size = itk.Size[2]([64, 64])
601
    region = itk.ImageRegion[2]()
602
    region.SetSize(size)
603
    image.SetRegions(region)
604
    image.Allocate()
605
    
606
    # Fill with test data
607
    for x in range(64):
608
        for y in range(64):
609
            idx = itk.Index[2]([x, y])
610
            value = float(x + y)
611
            image.SetPixel(idx, value)
612
    
613
    # Create GPU images
614
    GPUImageType = itk.GPUImage[itk.F, 2]
615
    gpu_input = GPUImageType.New()
616
    gpu_input.Graft(image)
617
    
618
    gpu_output = GPUImageType.New()
619
    gpu_output.SetRegions(region)
620
    gpu_output.Allocate()
621
    
622
    # Set kernel arguments
623
    constant_value = 42.0
624
    kernel_manager.SetKernelArgWithImage(kernel_id, 0, gpu_input.GetGPUDataManager())
625
    kernel_manager.SetKernelArgWithImage(kernel_id, 1, gpu_output.GetGPUDataManager())
626
    kernel_manager.SetKernelArg(kernel_id, 2, constant_value)
627
    kernel_manager.SetKernelArg(kernel_id, 3, 64)  # width
628
    kernel_manager.SetKernelArg(kernel_id, 4, 64)  # height
629
    
630
    # Launch kernel
631
    global_work_size = [64, 64]
632
    local_work_size = [8, 8]
633
    
634
    success = kernel_manager.LaunchKernel(kernel_id, global_work_size, local_work_size)
635
    
636
    if success:
637
        print("Custom kernel executed successfully")
638
        
639
        # Copy result back to CPU
640
        gpu_output.UpdateCPUBuffer()
641
        
642
        # Verify results
643
        test_idx = itk.Index[2]([10, 20])
644
        input_val = image.GetPixel(test_idx)
645
        output_val = gpu_output.GetPixel(test_idx)
646
        expected = input_val + constant_value
647
        
648
        print(f"Input: {input_val}, Output: {output_val}, Expected: {expected}")
649
        print(f"Kernel working correctly: {abs(output_val - expected) < 1e-6}")
650
    else:
651
        print("Failed to launch custom kernel")
652
else:
653
    print("Failed to create custom kernel")
654
```
655

656
### GPU Memory Management
657

658
```python
659
import itk
660

661
# Create test images of different sizes
662
sizes = [
663
    itk.Size[2]([128, 128]),
664
    itk.Size[2]([256, 256]),
665
    itk.Size[2]([512, 512])
666
]
667

668
ImageType = itk.Image[itk.F, 2]
669
GPUImageType = itk.GPUImage[itk.F, 2]
670

671
gpu_images = []
672

673
print("GPU Memory Management Example")
674
print("=" * 40)
675

676
for i, size in enumerate(sizes):
677
    # Create CPU image
678
    image = ImageType.New()
679
    region = itk.ImageRegion[2]()
680
    region.SetSize(size)
681
    image.SetRegions(region)
682
    image.Allocate()
683
    
684
    # Fill with data
685
    for x in range(size[0]):
686
        for y in range(size[1]):
687
            idx = itk.Index[2]([x, y])
688
            value = float((x + y) % 100)
689
            image.SetPixel(idx, value)
690
    
691
    # Create GPU image
692
    gpu_image = GPUImageType.New()
693
    gpu_image.Graft(image)
694
    
695
    # Check memory usage
696
    data_manager = gpu_image.GetGPUDataManager()
697
    buffer_size = data_manager.GetBufferSize()
698
    
699
    print(f"Image {i+1}: {size[0]}x{size[1]}")
700
    print(f"  Buffer size: {buffer_size} bytes ({buffer_size/(1024*1024):.2f} MB)")
701
    print(f"  CPU dirty: {data_manager.IsCPUBufferDirty()}")
702
    print(f"  GPU dirty: {data_manager.IsGPUBufferDirty()}")
703
    
704
    # Modify GPU data to make CPU buffer dirty
705
    gpu_image.SetGPUDataDirty()
706
    print(f"  After GPU modification - CPU dirty: {data_manager.IsCPUBufferDirty()}")
707
    
708
    # Update CPU buffer
709
    gpu_image.UpdateCPUBuffer()
710
    print(f"  After CPU update - CPU dirty: {data_manager.IsCPUBufferDirty()}")
711
    
712
    gpu_images.append(gpu_image)
713
    print()
714

715
# Test synchronization
716
print("Testing CPU/GPU synchronization:")
717
test_gpu_image = gpu_images[0]
718
test_idx = itk.Index[2]([50, 50])
719

720
# Modify on CPU side
721
original_value = test_gpu_image.GetPixel(test_idx)
722
new_value = original_value + 100.0
723
test_gpu_image.SetPixel(test_idx, new_value)
724

725
print(f"Original value: {original_value}")
726
print(f"Modified CPU value: {test_gpu_image.GetPixel(test_idx)}")
727

728
# Check if GPU buffer needs updating
729
data_manager = test_gpu_image.GetGPUDataManager()
730
print(f"GPU buffer dirty after CPU modification: {data_manager.IsGPUBufferDirty()}")
731

732
# Update GPU buffer
733
test_gpu_image.UpdateGPUBuffer()
734
print(f"GPU buffer dirty after update: {data_manager.IsGPUBufferDirty()}")
735

736
print("\nMemory management example completed")
737
```

Version

Tile

Files

gpu-computing.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

gpu-computing.mddocs/