0
# GPU Computing
1
2
GPU-accelerated computing support using OpenCL for high-performance image processing operations. ITK's GPU framework provides seamless acceleration of common image processing tasks while maintaining the same API as CPU-based operations.
3
4
## Capabilities
5
6
### GPU Memory Management
7
8
Classes for managing GPU memory and synchronization between CPU and GPU data.
9
10
```python { .api }
11
class GPUImage[TPixel, VImageDimension]:
12
"""
13
GPU-resident image container with automatic CPU/GPU synchronization.
14
15
Template Parameters:
16
- TPixel: Pixel data type
17
- VImageDimension: Image dimensionality
18
"""
19
def New() -> GPUImage[TPixel, VImageDimension]: ...
20
def GetGPUDataManager(self) -> GPUDataManager: ...
21
def UpdateGPUBuffer(self) -> None: ...
22
def UpdateCPUBuffer(self) -> None: ...
23
def GetGPUBufferPointer(self) -> GPUBufferPointer: ...
24
def GetCPUBufferPointer(self) -> CPUBufferPointer: ...
25
def Graft(self, data: DataObject) -> None: ...
26
def Initialize(self) -> None: ...
27
def FillBuffer(self, value: TPixel) -> None: ...
28
def SetPixel(self, index: Index[VImageDimension], value: TPixel) -> None: ...
29
def GetPixel(self, index: Index[VImageDimension]) -> TPixel: ...
30
def SetGPUDataDirty(self) -> None: ...
31
def SetCPUDataDirty(self) -> None: ...
32
33
class GPUImageDataManager[TImageType]:
34
"""Manages CPU/GPU memory synchronization for images."""
35
def New() -> GPUImageDataManager[TImageType]: ...
36
def SetBufferSize(self, size: int) -> None: ...
37
def SetCPUBufferPointer(self, ptr: PixelType) -> None: ...
38
def SetCPUDirtyFlag(self, isDirty: bool) -> None: ...
39
def SetGPUDirtyFlag(self, isDirty: bool) -> None: ...
40
def IsCPUBufferDirty(self) -> bool: ...
41
def IsGPUBufferDirty(self) -> bool: ...
42
def UpdateCPUBuffer(self) -> None: ...
43
def UpdateGPUBuffer(self) -> None: ...
44
def Allocate(self) -> bool: ...
45
def SetBufferFlag(self, flags: int) -> None: ...
46
def GetGPUBufferPointer(self) -> GPUBufferPointer: ...
47
def GetCPUBufferPointer(self) -> CPUBufferPointer: ...
48
49
class GPUDataManager:
50
"""Base class for GPU memory management."""
51
def SetBufferSize(self, size: int) -> None: ...
52
def SetCPUBufferPointer(self, ptr: void) -> None: ...
53
def SetCPUDirtyFlag(self, isDirty: bool) -> None: ...
54
def SetGPUDirtyFlag(self, isDirty: bool) -> None: ...
55
def IsCPUBufferDirty(self) -> bool: ...
56
def IsGPUBufferDirty(self) -> bool: ...
57
def UpdateCPUBuffer(self) -> None: ...
58
def UpdateGPUBuffer(self) -> None: ...
59
def Allocate(self) -> bool: ...
60
def Initialize(self) -> None: ...
61
def SetBufferFlag(self, flags: int) -> None: ...
62
def GetBufferSize(self) -> int: ...
63
```
64
65
### OpenCL Context Management
66
67
Classes for managing OpenCL context, devices, and execution environment.
68
69
```python { .api }
70
class GPUContextManager:
71
"""Singleton class managing OpenCL context and devices."""
72
@staticmethod
73
def GetInstance() -> GPUContextManager: ...
74
def GetNumberOfDevices(self) -> int: ...
75
def GetCurrentDevice(self) -> int: ...
76
def SetCurrentDevice(self, device: int) -> bool: ...
77
def GetDeviceName(self, device: int) -> str: ...
78
def GetDeviceVersion(self, device: int) -> str: ...
79
def GetMaxWorkGroupSize(self, device: int) -> int: ...
80
def GetMaxWorkItemSizes(self, device: int) -> list[int]: ...
81
def GetLocalMemorySize(self, device: int) -> int: ...
82
def GetGlobalMemorySize(self, device: int) -> int: ...
83
def GetMaxComputeUnits(self, device: int) -> int: ...
84
def GetMaxClockFrequency(self, device: int) -> int: ...
85
def CreateCommandQueue(self) -> CommandQueueType: ...
86
def GetCommandQueue(self, device: int) -> CommandQueueType: ...
87
def GetContext(self) -> ContextType: ...
88
def GetProgram(self, device: int) -> ProgramType: ...
89
def GetDevice(self, device: int) -> DeviceType: ...
90
91
class GPUKernelManager:
92
"""Manages OpenCL kernel compilation and execution."""
93
def New() -> GPUKernelManager: ...
94
def SetKernelSource(self, source: str) -> None: ...
95
def SetKernelSourceFile(self, filename: str) -> None: ...
96
def CreateKernel(self, kernel_name: str) -> int: ...
97
def GetKernel(self, kernel_id: int) -> KernelType: ...
98
def SetKernelArg(self, kernel_id: int, arg_index: int, arg: ArgumentType) -> bool: ...
99
def SetKernelArgWithImage(self, kernel_id: int, arg_index: int, image: GPUDataManager) -> bool: ...
100
def LaunchKernel(self, kernel_id: int, global_work_size: list[int], local_work_size: list[int]) -> bool: ...
101
def SetGlobalWorkSize(self, dimension: int, global_work_size: list[int]) -> None: ...
102
def SetLocalWorkSize(self, dimension: int, local_work_size: list[int]) -> None: ...
103
def GetLocalWorkSize(self, dimension: int) -> list[int]: ...
104
def GetGlobalWorkSize(self, dimension: int) -> list[int]: ...
105
```
106
107
### GPU Image Processing Operations
108
109
GPU-accelerated versions of common image processing operations.
110
111
```python { .api }
112
class GPUImageOpsKernel:
113
"""Collection of basic GPU image operation kernels."""
114
def New() -> GPUImageOpsKernel: ...
115
def GetOpenCLSource(self) -> str: ...
116
def LoadProgramFromFile(self, filename: str) -> bool: ...
117
def LoadProgramFromString(self, source: str) -> bool: ...
118
119
class GPUMeanImageFilter[TInputImage, TOutputImage]:
120
"""GPU-accelerated mean filtering."""
121
def New() -> GPUMeanImageFilter[TInputImage, TOutputImage]: ...
122
def SetRadius(self, radius: InputSizeType) -> None: ...
123
def GetRadius(self) -> InputSizeType: ...
124
def GetKernelRadius(self) -> int: ...
125
126
class GPUBinaryThresholdImageFilter[TInputImage, TOutputImage]:
127
"""GPU-accelerated binary thresholding."""
128
def New() -> GPUBinaryThresholdImageFilter[TInputImage, TOutputImage]: ...
129
def SetLowerThreshold(self, threshold: InputPixelType) -> None: ...
130
def GetLowerThreshold(self) -> InputPixelType: ...
131
def SetUpperThreshold(self, threshold: InputPixelType) -> None: ...
132
def GetUpperThreshold(self) -> InputPixelType: ...
133
def SetInsideValue(self, value: OutputPixelType) -> None: ...
134
def GetInsideValue(self) -> OutputPixelType: ...
135
def SetOutsideValue(self, value: OutputPixelType) -> None: ...
136
def GetOutsideValue(self) -> OutputPixelType: ...
137
138
class GPUGradientImageFilter[TInputImage, TOutputImage]:
139
"""GPU-accelerated gradient computation."""
140
def New() -> GPUGradientImageFilter[TInputImage, TOutputImage]: ...
141
def SetUseImageSpacing(self, flag: bool) -> None: ...
142
def GetUseImageSpacing(self) -> bool: ...
143
def SetUseImageDirection(self, flag: bool) -> None: ...
144
def GetUseImageDirection(self) -> bool: ...
145
146
class GPUNeighborhoodOperatorImageFilter[TInputImage, TOutputImage, TOperatorValueType, TParentImageFilter]:
147
"""GPU-accelerated neighborhood operations."""
148
def New() -> GPUNeighborhoodOperatorImageFilter[TInputImage, TOutputImage, TOperatorValueType, TParentImageFilter]: ...
149
def SetOperator(self, op: OperatorType) -> None: ...
150
def GetOperator(self) -> OperatorType: ...
151
def OverrideBoundaryCondition(self, condition: BoundaryConditionType) -> None: ...
152
```
153
154
### GPU Finite Difference Framework
155
156
GPU-accelerated finite difference solvers for PDE-based image processing.
157
158
```python { .api }
159
class GPUFiniteDifferenceImageFilter[TInputImage, TOutputImage, TParentImageFilter]:
160
"""
161
GPU-accelerated finite difference solver base class.
162
163
Template Parameters:
164
- TInputImage: Input image type
165
- TOutputImage: Output image type
166
- TParentImageFilter: CPU parent filter type
167
"""
168
def New() -> GPUFiniteDifferenceImageFilter[TInputImage, TOutputImage, TParentImageFilter]: ...
169
def SetNumberOfIterations(self, iterations: IdentifierType) -> None: ...
170
def GetNumberOfIterations(self) -> IdentifierType: ...
171
def SetTimeStep(self, time_step: TimeStepType) -> None: ...
172
def GetTimeStep(self) -> TimeStepType: ...
173
def SetMaximumRMSError(self, error: double) -> None: ...
174
def GetMaximumRMSError(self) -> double: ...
175
def GetRMSChange(self) -> double: ...
176
def GPUGenerateData(self) -> None: ...
177
def GPUCalculateChange(self) -> TimeStepType: ...
178
def GPUApplyUpdate(self, time_step: TimeStepType) -> None: ...
179
180
class GPUDenseFiniteDifferenceImageFilter[TInputImage, TOutputImage, TParentImageFilter]:
181
"""GPU-accelerated dense finite difference solver."""
182
def New() -> GPUDenseFiniteDifferenceImageFilter[TInputImage, TOutputImage, TParentImageFilter]: ...
183
def GetFiniteDifferenceFunction(self) -> FiniteDifferenceFunctionType: ...
184
def SetFiniteDifferenceFunction(self, function: FiniteDifferenceFunctionType) -> None: ...
185
186
class GPUFiniteDifferenceFunction[TImageType]:
187
"""Base class for GPU finite difference functions."""
188
def ComputeUpdate(self, neighborhood: NeighborhoodType, global_data: GlobalDataStruct,
189
gd: GlobalDataStruct) -> PixelType: ...
190
def GetRadius(self) -> RadiusType: ...
191
def SetTimeStep(self, time_step: TimeStepType) -> None: ...
192
def GetTimeStep(self) -> TimeStepType: ...
193
def GPUComputeUpdate(self, input: ImagePointer, output: ImagePointer,
194
global_data: GlobalDataStruct) -> TimeStepType: ...
195
def GPUCalculateChange(self, input: ImagePointer) -> TimeStepType: ...
196
def GPUApplyUpdate(self, input: ImagePointer, update: ImagePointer,
197
time_step: TimeStepType) -> None: ...
198
```
199
200
### GPU Anisotropic Diffusion
201
202
GPU implementations of anisotropic diffusion algorithms.
203
204
```python { .api }
205
class GPUGradientAnisotropicDiffusionImageFilter[TInputImage, TOutputImage]:
206
"""GPU-accelerated gradient-based anisotropic diffusion."""
207
def New() -> GPUGradientAnisotropicDiffusionImageFilter[TInputImage, TOutputImage]: ...
208
def SetNumberOfIterations(self, iterations: int) -> None: ...
209
def GetNumberOfIterations(self) -> int: ...
210
def SetTimeStep(self, time_step: TimeStepType) -> None: ...
211
def GetTimeStep(self) -> TimeStepType: ...
212
def SetConductanceParameter(self, conductance: double) -> None: ...
213
def GetConductanceParameter(self) -> double: ...
214
215
class GPUCurvatureAnisotropicDiffusionImageFilter[TInputImage, TOutputImage]:
216
"""GPU-accelerated curvature-based anisotropic diffusion."""
217
def New() -> GPUCurvatureAnisotropicDiffusionImageFilter[TInputImage, TOutputImage]: ...
218
def SetNumberOfIterations(self, iterations: int) -> None: ...
219
def GetNumberOfIterations(self) -> int: ...
220
def SetTimeStep(self, time_step: TimeStepType) -> None: ...
221
def GetTimeStep(self) -> TimeStepType: ...
222
def SetConductanceParameter(self, conductance: double) -> None: ...
223
def GetConductanceParameter(self) -> double: ...
224
225
class GPUAnisotropicDiffusionFunction[TImage]:
226
"""Base GPU anisotropic diffusion function."""
227
def ComputeUpdate(self, neighborhood: NeighborhoodType, global_data: GlobalDataStruct,
228
gd: GlobalDataStruct) -> PixelType: ...
229
def SetConductanceParameter(self, conductance: double) -> None: ...
230
def GetConductanceParameter(self) -> double: ...
231
def SetTimeStep(self, time_step: TimeStepType) -> None: ...
232
def GetTimeStep(self) -> TimeStepType: ...
233
```
234
235
### GPU Utilities and Helpers
236
237
Utility classes for GPU operations and performance monitoring.
238
239
```python { .api }
240
class GPUInPlaceImageFilter[TInputImage, TParentImageFilter]:
241
"""Base class for GPU filters that modify images in-place."""
242
def New() -> GPUInPlaceImageFilter[TInputImage, TParentImageFilter]: ...
243
def SetInPlace(self, flag: bool) -> None: ...
244
def GetInPlace(self) -> bool: ...
245
def CanRunInPlace(self) -> bool: ...
246
247
class GPUImageToImageFilter[TInputImage, TOutputImage, TParentImageFilter]:
248
"""Base class for GPU image-to-image filters."""
249
def New() -> GPUImageToImageFilter[TInputImage, TOutputImage, TParentImageFilter]: ...
250
def GetGPUEnabled(self) -> bool: ...
251
def SetGPUEnabled(self, flag: bool) -> None: ...
252
def GPUGenerateData(self) -> None: ...
253
254
class GPUTimer:
255
"""High-resolution timer for GPU operations."""
256
def New() -> GPUTimer: ...
257
def Start(self) -> None: ...
258
def Stop(self) -> None: ...
259
def GetElapsedTime(self) -> double: ...
260
def Reset(self) -> None: ...
261
```
262
263
## Usage Examples
264
265
### Basic GPU Image Processing
266
267
```python
268
import itk
269
import numpy as np
270
271
# Check if GPU support is available
272
gpu_context = itk.GPUContextManager.GetInstance()
273
if gpu_context.GetNumberOfDevices() == 0:
274
print("No GPU devices available")
275
exit()
276
277
print(f"Found {gpu_context.GetNumberOfDevices()} GPU device(s)")
278
print(f"Using device: {gpu_context.GetDeviceName(0)}")
279
280
# Create test image
281
ImageType = itk.Image[itk.F, 2]
282
image = ImageType.New()
283
284
size = itk.Size[2]([256, 256])
285
region = itk.ImageRegion[2]()
286
region.SetSize(size)
287
image.SetRegions(region)
288
image.Allocate()
289
290
# Fill with noisy pattern
291
np.random.seed(42)
292
for x in range(256):
293
for y in range(256):
294
idx = itk.Index[2]([x, y])
295
# Create pattern with noise
296
value = 100.0 + 50.0 * np.sin(x/10.0) * np.cos(y/10.0)
297
noise = np.random.normal(0, 10)
298
image.SetPixel(idx, value + noise)
299
300
# Create GPU image
301
GPUImageType = itk.GPUImage[itk.F, 2]
302
gpu_image = GPUImageType.New()
303
gpu_image.Graft(image) # Copy CPU image to GPU
304
305
print("Image copied to GPU")
306
print(f"GPU buffer size: {gpu_image.GetGPUDataManager().GetBufferSize()} bytes")
307
308
# Apply GPU mean filter
309
GPUMeanFilterType = itk.GPUMeanImageFilter[GPUImageType, GPUImageType]
310
gpu_mean_filter = GPUMeanFilterType.New()
311
gpu_mean_filter.SetInput(gpu_image)
312
gpu_mean_filter.SetRadius(itk.Size[2]([2, 2])) # 5x5 kernel
313
314
# Time the GPU operation
315
gpu_timer = itk.GPUTimer.New()
316
gpu_timer.Start()
317
gpu_mean_filter.Update()
318
gpu_timer.Stop()
319
320
gpu_result = gpu_mean_filter.GetOutput()
321
gpu_time = gpu_timer.GetElapsedTime()
322
323
print(f"GPU mean filtering completed in {gpu_time:.3f} ms")
324
325
# Compare with CPU version for verification
326
CPUMeanFilterType = itk.MeanImageFilter[ImageType, ImageType]
327
cpu_mean_filter = CPUMeanFilterType.New()
328
cpu_mean_filter.SetInput(image)
329
cpu_mean_filter.SetRadius(itk.Size[2]([2, 2]))
330
331
cpu_timer = itk.GPUTimer.New()
332
cpu_timer.Start()
333
cpu_mean_filter.Update()
334
cpu_timer.Stop()
335
336
cpu_result = cpu_mean_filter.GetOutput()
337
cpu_time = cpu_timer.GetElapsedTime()
338
339
print(f"CPU mean filtering completed in {cpu_time:.3f} ms")
340
print(f"GPU speedup: {cpu_time/gpu_time:.2f}x")
341
342
# Copy GPU result back to CPU for comparison
343
gpu_result.UpdateCPUBuffer()
344
345
# Compare results at a few points
346
test_indices = [
347
itk.Index[2]([50, 50]),
348
itk.Index[2]([100, 100]),
349
itk.Index[2]([200, 200])
350
]
351
352
max_diff = 0.0
353
for idx in test_indices:
354
gpu_val = gpu_result.GetPixel(idx)
355
cpu_val = cpu_result.GetPixel(idx)
356
diff = abs(gpu_val - cpu_val)
357
max_diff = max(max_diff, diff)
358
print(f"Index {idx}: GPU={gpu_val:.3f}, CPU={cpu_val:.3f}, Diff={diff:.6f}")
359
360
print(f"Maximum difference: {max_diff:.6f}")
361
```
362
363
### GPU Binary Thresholding
364
365
```python
366
import itk
367
import numpy as np
368
369
# Create test image with varying intensities
370
ImageType = itk.Image[itk.UC, 2] # Use unsigned char
371
image = ImageType.New()
372
373
size = itk.Size[2]([512, 512])
374
region = itk.ImageRegion[2]()
375
region.SetSize(size)
376
image.SetRegions(region)
377
image.Allocate()
378
379
# Create gradient pattern
380
for x in range(512):
381
for y in range(512):
382
idx = itk.Index[2]([x, y])
383
value = int((x + y) * 255 / 1022) # Scale to 0-255
384
image.SetPixel(idx, value)
385
386
# Create GPU image
387
GPUImageType = itk.GPUImage[itk.UC, 2]
388
gpu_input = GPUImageType.New()
389
gpu_input.Graft(image)
390
391
# Create GPU binary threshold filter
392
GPUBinaryImageType = itk.GPUImage[itk.UC, 2]
393
GPUThresholdFilterType = itk.GPUBinaryThresholdImageFilter[GPUImageType, GPUBinaryImageType]
394
395
gpu_threshold_filter = GPUThresholdFilterType.New()
396
gpu_threshold_filter.SetInput(gpu_input)
397
gpu_threshold_filter.SetLowerThreshold(100)
398
gpu_threshold_filter.SetUpperThreshold(200)
399
gpu_threshold_filter.SetInsideValue(255)
400
gpu_threshold_filter.SetOutsideValue(0)
401
402
# Time GPU execution
403
gpu_timer = itk.GPUTimer.New()
404
gpu_timer.Start()
405
gpu_threshold_filter.Update()
406
gpu_timer.Stop()
407
408
gpu_result = gpu_threshold_filter.GetOutput()
409
gpu_time = gpu_timer.GetElapsedTime()
410
411
print(f"GPU binary thresholding completed in {gpu_time:.3f} ms")
412
413
# Compare with CPU version
414
CPUThresholdFilterType = itk.BinaryThresholdImageFilter[ImageType, ImageType]
415
cpu_threshold_filter = CPUThresholdFilterType.New()
416
cpu_threshold_filter.SetInput(image)
417
cpu_threshold_filter.SetLowerThreshold(100)
418
cpu_threshold_filter.SetUpperThreshold(200)
419
cpu_threshold_filter.SetInsideValue(255)
420
cpu_threshold_filter.SetOutsideValue(0)
421
422
cpu_timer = itk.GPUTimer.New()
423
cpu_timer.Start()
424
cpu_threshold_filter.Update()
425
cpu_timer.Stop()
426
427
cpu_result = cpu_threshold_filter.GetOutput()
428
cpu_time = cpu_timer.GetElapsedTime()
429
430
print(f"CPU binary thresholding completed in {cpu_time:.3f} ms")
431
print(f"GPU speedup: {cpu_time/gpu_time:.2f}x")
432
433
# Verify results match
434
gpu_result.UpdateCPUBuffer()
435
436
# Count threshold pixels
437
gpu_count = 0
438
cpu_count = 0
439
for x in range(512):
440
for y in range(512):
441
idx = itk.Index[2]([x, y])
442
if gpu_result.GetPixel(idx) == 255:
443
gpu_count += 1
444
if cpu_result.GetPixel(idx) == 255:
445
cpu_count += 1
446
447
print(f"GPU thresholded pixels: {gpu_count}")
448
print(f"CPU thresholded pixels: {cpu_count}")
449
print(f"Results match: {gpu_count == cpu_count}")
450
```
451
452
### GPU Anisotropic Diffusion
453
454
```python
455
import itk
456
import numpy as np
457
458
# Create noisy test image
459
ImageType = itk.Image[itk.F, 2]
460
image = ImageType.New()
461
462
size = itk.Size[2]([256, 256])
463
region = itk.ImageRegion[2]()
464
region.SetSize(size)
465
image.SetRegions(region)
466
image.Allocate()
467
468
# Create image with edges and noise
469
np.random.seed(42)
470
for x in range(256):
471
for y in range(256):
472
idx = itk.Index[2]([x, y])
473
474
# Create step pattern
475
base_value = 200.0 if (x > 128 and y > 128) else 100.0
476
if x > 128 and y <= 128:
477
base_value = 150.0
478
if x <= 128 and y > 128:
479
base_value = 75.0
480
481
# Add noise
482
noise = np.random.normal(0, 15)
483
value = base_value + noise
484
image.SetPixel(idx, max(0.0, min(255.0, value)))
485
486
# Create GPU image
487
GPUImageType = itk.GPUImage[itk.F, 2]
488
gpu_input = GPUImageType.New()
489
gpu_input.Graft(image)
490
491
# Create GPU anisotropic diffusion filter
492
GPUDiffusionFilterType = itk.GPUGradientAnisotropicDiffusionImageFilter[GPUImageType, GPUImageType]
493
gpu_diffusion_filter = GPUDiffusionFilterType.New()
494
gpu_diffusion_filter.SetInput(gpu_input)
495
gpu_diffusion_filter.SetNumberOfIterations(50)
496
gpu_diffusion_filter.SetTimeStep(0.0625) # Stable time step for 2D
497
gpu_diffusion_filter.SetConductanceParameter(5.0)
498
499
# Time GPU execution
500
gpu_timer = itk.GPUTimer.New()
501
gpu_timer.Start()
502
gpu_diffusion_filter.Update()
503
gpu_timer.Stop()
504
505
gpu_result = gpu_diffusion_filter.GetOutput()
506
gpu_time = gpu_timer.GetElapsedTime()
507
508
print(f"GPU anisotropic diffusion completed in {gpu_time:.3f} ms")
509
print(f"GPU iterations: {gpu_diffusion_filter.GetElapsedIterations()}")
510
print(f"GPU final RMS change: {gpu_diffusion_filter.GetRMSChange():.6f}")
511
512
# Compare with CPU version
513
CPUDiffusionFilterType = itk.GradientAnisotropicDiffusionImageFilter[ImageType, ImageType]
514
cpu_diffusion_filter = CPUDiffusionFilterType.New()
515
cpu_diffusion_filter.SetInput(image)
516
cpu_diffusion_filter.SetNumberOfIterations(50)
517
cpu_diffusion_filter.SetTimeStep(0.0625)
518
cpu_diffusion_filter.SetConductanceParameter(5.0)
519
520
cpu_timer = itk.GPUTimer.New()
521
cpu_timer.Start()
522
cpu_diffusion_filter.Update()
523
cpu_timer.Stop()
524
525
cpu_result = cpu_diffusion_filter.GetOutput()
526
cpu_time = cpu_timer.GetElapsedTime()
527
528
print(f"CPU anisotropic diffusion completed in {cpu_time:.3f} ms")
529
print(f"CPU iterations: {cpu_diffusion_filter.GetElapsedIterations()}")
530
print(f"CPU final RMS change: {cpu_diffusion_filter.GetRMSChange():.6f}")
531
print(f"GPU speedup: {cpu_time/gpu_time:.2f}x")
532
533
# Copy GPU result to CPU for comparison
534
gpu_result.UpdateCPUBuffer()
535
536
# Compare results at several points
537
test_points = [
538
itk.Index[2]([64, 64]), # Bottom-left region
539
itk.Index[2]([192, 64]), # Bottom-right region
540
itk.Index[2]([64, 192]), # Top-left region
541
itk.Index[2]([192, 192]), # Top-right region
542
itk.Index[2]([128, 128]) # Center (edge region)
543
]
544
545
print("\nComparison of GPU vs CPU results:")
546
total_diff = 0.0
547
for idx in test_points:
548
gpu_val = gpu_result.GetPixel(idx)
549
cpu_val = cpu_result.GetPixel(idx)
550
diff = abs(gpu_val - cpu_val)
551
total_diff += diff
552
print(f"Index {idx}: GPU={gpu_val:.3f}, CPU={cpu_val:.3f}, Diff={diff:.6f}")
553
554
avg_diff = total_diff / len(test_points)
555
print(f"Average difference: {avg_diff:.6f}")
556
```
557
558
### Custom GPU Kernel
559
560
```python
561
import itk
562
563
# This example shows how to create and use custom GPU kernels
564
# Note: In practice, kernel source would be more complex
565
566
# Define custom OpenCL kernel source
567
kernel_source = """
568
__kernel void custom_add_constant(
569
__global float* input,
570
__global float* output,
571
const float constant,
572
const int width,
573
const int height)
574
{
575
int gid_x = get_global_id(0);
576
int gid_y = get_global_id(1);
577
578
if (gid_x >= width || gid_y >= height)
579
return;
580
581
int idx = gid_y * width + gid_x;
582
output[idx] = input[idx] + constant;
583
}
584
"""
585
586
# Create kernel manager
587
kernel_manager = itk.GPUKernelManager.New()
588
kernel_manager.SetKernelSource(kernel_source)
589
590
# Create kernel
591
kernel_id = kernel_manager.CreateKernel("custom_add_constant")
592
593
if kernel_id >= 0:
594
print(f"Custom kernel created with ID: {kernel_id}")
595
596
# Create test image
597
ImageType = itk.Image[itk.F, 2]
598
image = ImageType.New()
599
600
size = itk.Size[2]([64, 64])
601
region = itk.ImageRegion[2]()
602
region.SetSize(size)
603
image.SetRegions(region)
604
image.Allocate()
605
606
# Fill with test data
607
for x in range(64):
608
for y in range(64):
609
idx = itk.Index[2]([x, y])
610
value = float(x + y)
611
image.SetPixel(idx, value)
612
613
# Create GPU images
614
GPUImageType = itk.GPUImage[itk.F, 2]
615
gpu_input = GPUImageType.New()
616
gpu_input.Graft(image)
617
618
gpu_output = GPUImageType.New()
619
gpu_output.SetRegions(region)
620
gpu_output.Allocate()
621
622
# Set kernel arguments
623
constant_value = 42.0
624
kernel_manager.SetKernelArgWithImage(kernel_id, 0, gpu_input.GetGPUDataManager())
625
kernel_manager.SetKernelArgWithImage(kernel_id, 1, gpu_output.GetGPUDataManager())
626
kernel_manager.SetKernelArg(kernel_id, 2, constant_value)
627
kernel_manager.SetKernelArg(kernel_id, 3, 64) # width
628
kernel_manager.SetKernelArg(kernel_id, 4, 64) # height
629
630
# Launch kernel
631
global_work_size = [64, 64]
632
local_work_size = [8, 8]
633
634
success = kernel_manager.LaunchKernel(kernel_id, global_work_size, local_work_size)
635
636
if success:
637
print("Custom kernel executed successfully")
638
639
# Copy result back to CPU
640
gpu_output.UpdateCPUBuffer()
641
642
# Verify results
643
test_idx = itk.Index[2]([10, 20])
644
input_val = image.GetPixel(test_idx)
645
output_val = gpu_output.GetPixel(test_idx)
646
expected = input_val + constant_value
647
648
print(f"Input: {input_val}, Output: {output_val}, Expected: {expected}")
649
print(f"Kernel working correctly: {abs(output_val - expected) < 1e-6}")
650
else:
651
print("Failed to launch custom kernel")
652
else:
653
print("Failed to create custom kernel")
654
```
655
656
### GPU Memory Management
657
658
```python
659
import itk
660
661
# Create test images of different sizes
662
sizes = [
663
itk.Size[2]([128, 128]),
664
itk.Size[2]([256, 256]),
665
itk.Size[2]([512, 512])
666
]
667
668
ImageType = itk.Image[itk.F, 2]
669
GPUImageType = itk.GPUImage[itk.F, 2]
670
671
gpu_images = []
672
673
print("GPU Memory Management Example")
674
print("=" * 40)
675
676
for i, size in enumerate(sizes):
677
# Create CPU image
678
image = ImageType.New()
679
region = itk.ImageRegion[2]()
680
region.SetSize(size)
681
image.SetRegions(region)
682
image.Allocate()
683
684
# Fill with data
685
for x in range(size[0]):
686
for y in range(size[1]):
687
idx = itk.Index[2]([x, y])
688
value = float((x + y) % 100)
689
image.SetPixel(idx, value)
690
691
# Create GPU image
692
gpu_image = GPUImageType.New()
693
gpu_image.Graft(image)
694
695
# Check memory usage
696
data_manager = gpu_image.GetGPUDataManager()
697
buffer_size = data_manager.GetBufferSize()
698
699
print(f"Image {i+1}: {size[0]}x{size[1]}")
700
print(f" Buffer size: {buffer_size} bytes ({buffer_size/(1024*1024):.2f} MB)")
701
print(f" CPU dirty: {data_manager.IsCPUBufferDirty()}")
702
print(f" GPU dirty: {data_manager.IsGPUBufferDirty()}")
703
704
# Modify GPU data to make CPU buffer dirty
705
gpu_image.SetGPUDataDirty()
706
print(f" After GPU modification - CPU dirty: {data_manager.IsCPUBufferDirty()}")
707
708
# Update CPU buffer
709
gpu_image.UpdateCPUBuffer()
710
print(f" After CPU update - CPU dirty: {data_manager.IsCPUBufferDirty()}")
711
712
gpu_images.append(gpu_image)
713
print()
714
715
# Test synchronization
716
print("Testing CPU/GPU synchronization:")
717
test_gpu_image = gpu_images[0]
718
test_idx = itk.Index[2]([50, 50])
719
720
# Modify on CPU side
721
original_value = test_gpu_image.GetPixel(test_idx)
722
new_value = original_value + 100.0
723
test_gpu_image.SetPixel(test_idx, new_value)
724
725
print(f"Original value: {original_value}")
726
print(f"Modified CPU value: {test_gpu_image.GetPixel(test_idx)}")
727
728
# Check if GPU buffer needs updating
729
data_manager = test_gpu_image.GetGPUDataManager()
730
print(f"GPU buffer dirty after CPU modification: {data_manager.IsGPUBufferDirty()}")
731
732
# Update GPU buffer
733
test_gpu_image.UpdateGPUBuffer()
734
print(f"GPU buffer dirty after update: {data_manager.IsGPUBufferDirty()}")
735
736
print("\nMemory management example completed")
737
```