or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

data-structures.mdfiltering.mdfinite-difference.mdgpu-computing.mdimage-adaptors.mdimage-functions.mdindex.mdio.mdmesh.mdnumpy-integration.mdregistration.mdsegmentation.mdspatial-objects.mdtransforms.md

gpu-computing.mddocs/

0

# GPU Computing

1

2

GPU-accelerated computing support using OpenCL for high-performance image processing operations. ITK's GPU framework provides seamless acceleration of common image processing tasks while maintaining the same API as CPU-based operations.

3

4

## Capabilities

5

6

### GPU Memory Management

7

8

Classes for managing GPU memory and synchronization between CPU and GPU data.

9

10

```python { .api }

11

class GPUImage[TPixel, VImageDimension]:

12

"""

13

GPU-resident image container with automatic CPU/GPU synchronization.

14

15

Template Parameters:

16

- TPixel: Pixel data type

17

- VImageDimension: Image dimensionality

18

"""

19

def New() -> GPUImage[TPixel, VImageDimension]: ...

20

def GetGPUDataManager(self) -> GPUDataManager: ...

21

def UpdateGPUBuffer(self) -> None: ...

22

def UpdateCPUBuffer(self) -> None: ...

23

def GetGPUBufferPointer(self) -> GPUBufferPointer: ...

24

def GetCPUBufferPointer(self) -> CPUBufferPointer: ...

25

def Graft(self, data: DataObject) -> None: ...

26

def Initialize(self) -> None: ...

27

def FillBuffer(self, value: TPixel) -> None: ...

28

def SetPixel(self, index: Index[VImageDimension], value: TPixel) -> None: ...

29

def GetPixel(self, index: Index[VImageDimension]) -> TPixel: ...

30

def SetGPUDataDirty(self) -> None: ...

31

def SetCPUDataDirty(self) -> None: ...

32

33

class GPUImageDataManager[TImageType]:

34

"""Manages CPU/GPU memory synchronization for images."""

35

def New() -> GPUImageDataManager[TImageType]: ...

36

def SetBufferSize(self, size: int) -> None: ...

37

def SetCPUBufferPointer(self, ptr: PixelType) -> None: ...

38

def SetCPUDirtyFlag(self, isDirty: bool) -> None: ...

39

def SetGPUDirtyFlag(self, isDirty: bool) -> None: ...

40

def IsCPUBufferDirty(self) -> bool: ...

41

def IsGPUBufferDirty(self) -> bool: ...

42

def UpdateCPUBuffer(self) -> None: ...

43

def UpdateGPUBuffer(self) -> None: ...

44

def Allocate(self) -> bool: ...

45

def SetBufferFlag(self, flags: int) -> None: ...

46

def GetGPUBufferPointer(self) -> GPUBufferPointer: ...

47

def GetCPUBufferPointer(self) -> CPUBufferPointer: ...

48

49

class GPUDataManager:

50

"""Base class for GPU memory management."""

51

def SetBufferSize(self, size: int) -> None: ...

52

def SetCPUBufferPointer(self, ptr: void) -> None: ...

53

def SetCPUDirtyFlag(self, isDirty: bool) -> None: ...

54

def SetGPUDirtyFlag(self, isDirty: bool) -> None: ...

55

def IsCPUBufferDirty(self) -> bool: ...

56

def IsGPUBufferDirty(self) -> bool: ...

57

def UpdateCPUBuffer(self) -> None: ...

58

def UpdateGPUBuffer(self) -> None: ...

59

def Allocate(self) -> bool: ...

60

def Initialize(self) -> None: ...

61

def SetBufferFlag(self, flags: int) -> None: ...

62

def GetBufferSize(self) -> int: ...

63

```

64

65

### OpenCL Context Management

66

67

Classes for managing OpenCL context, devices, and execution environment.

68

69

```python { .api }

70

class GPUContextManager:

71

"""Singleton class managing OpenCL context and devices."""

72

@staticmethod

73

def GetInstance() -> GPUContextManager: ...

74

def GetNumberOfDevices(self) -> int: ...

75

def GetCurrentDevice(self) -> int: ...

76

def SetCurrentDevice(self, device: int) -> bool: ...

77

def GetDeviceName(self, device: int) -> str: ...

78

def GetDeviceVersion(self, device: int) -> str: ...

79

def GetMaxWorkGroupSize(self, device: int) -> int: ...

80

def GetMaxWorkItemSizes(self, device: int) -> list[int]: ...

81

def GetLocalMemorySize(self, device: int) -> int: ...

82

def GetGlobalMemorySize(self, device: int) -> int: ...

83

def GetMaxComputeUnits(self, device: int) -> int: ...

84

def GetMaxClockFrequency(self, device: int) -> int: ...

85

def CreateCommandQueue(self) -> CommandQueueType: ...

86

def GetCommandQueue(self, device: int) -> CommandQueueType: ...

87

def GetContext(self) -> ContextType: ...

88

def GetProgram(self, device: int) -> ProgramType: ...

89

def GetDevice(self, device: int) -> DeviceType: ...

90

91

class GPUKernelManager:

92

"""Manages OpenCL kernel compilation and execution."""

93

def New() -> GPUKernelManager: ...

94

def SetKernelSource(self, source: str) -> None: ...

95

def SetKernelSourceFile(self, filename: str) -> None: ...

96

def CreateKernel(self, kernel_name: str) -> int: ...

97

def GetKernel(self, kernel_id: int) -> KernelType: ...

98

def SetKernelArg(self, kernel_id: int, arg_index: int, arg: ArgumentType) -> bool: ...

99

def SetKernelArgWithImage(self, kernel_id: int, arg_index: int, image: GPUDataManager) -> bool: ...

100

def LaunchKernel(self, kernel_id: int, global_work_size: list[int], local_work_size: list[int]) -> bool: ...

101

def SetGlobalWorkSize(self, dimension: int, global_work_size: list[int]) -> None: ...

102

def SetLocalWorkSize(self, dimension: int, local_work_size: list[int]) -> None: ...

103

def GetLocalWorkSize(self, dimension: int) -> list[int]: ...

104

def GetGlobalWorkSize(self, dimension: int) -> list[int]: ...

105

```

106

107

### GPU Image Processing Operations

108

109

GPU-accelerated versions of common image processing operations.

110

111

```python { .api }

112

class GPUImageOpsKernel:

113

"""Collection of basic GPU image operation kernels."""

114

def New() -> GPUImageOpsKernel: ...

115

def GetOpenCLSource(self) -> str: ...

116

def LoadProgramFromFile(self, filename: str) -> bool: ...

117

def LoadProgramFromString(self, source: str) -> bool: ...

118

119

class GPUMeanImageFilter[TInputImage, TOutputImage]:

120

"""GPU-accelerated mean filtering."""

121

def New() -> GPUMeanImageFilter[TInputImage, TOutputImage]: ...

122

def SetRadius(self, radius: InputSizeType) -> None: ...

123

def GetRadius(self) -> InputSizeType: ...

124

def GetKernelRadius(self) -> int: ...

125

126

class GPUBinaryThresholdImageFilter[TInputImage, TOutputImage]:

127

"""GPU-accelerated binary thresholding."""

128

def New() -> GPUBinaryThresholdImageFilter[TInputImage, TOutputImage]: ...

129

def SetLowerThreshold(self, threshold: InputPixelType) -> None: ...

130

def GetLowerThreshold(self) -> InputPixelType: ...

131

def SetUpperThreshold(self, threshold: InputPixelType) -> None: ...

132

def GetUpperThreshold(self) -> InputPixelType: ...

133

def SetInsideValue(self, value: OutputPixelType) -> None: ...

134

def GetInsideValue(self) -> OutputPixelType: ...

135

def SetOutsideValue(self, value: OutputPixelType) -> None: ...

136

def GetOutsideValue(self) -> OutputPixelType: ...

137

138

class GPUGradientImageFilter[TInputImage, TOutputImage]:

139

"""GPU-accelerated gradient computation."""

140

def New() -> GPUGradientImageFilter[TInputImage, TOutputImage]: ...

141

def SetUseImageSpacing(self, flag: bool) -> None: ...

142

def GetUseImageSpacing(self) -> bool: ...

143

def SetUseImageDirection(self, flag: bool) -> None: ...

144

def GetUseImageDirection(self) -> bool: ...

145

146

class GPUNeighborhoodOperatorImageFilter[TInputImage, TOutputImage, TOperatorValueType, TParentImageFilter]:

147

"""GPU-accelerated neighborhood operations."""

148

def New() -> GPUNeighborhoodOperatorImageFilter[TInputImage, TOutputImage, TOperatorValueType, TParentImageFilter]: ...

149

def SetOperator(self, op: OperatorType) -> None: ...

150

def GetOperator(self) -> OperatorType: ...

151

def OverrideBoundaryCondition(self, condition: BoundaryConditionType) -> None: ...

152

```

153

154

### GPU Finite Difference Framework

155

156

GPU-accelerated finite difference solvers for PDE-based image processing.

157

158

```python { .api }

159

class GPUFiniteDifferenceImageFilter[TInputImage, TOutputImage, TParentImageFilter]:

160

"""

161

GPU-accelerated finite difference solver base class.

162

163

Template Parameters:

164

- TInputImage: Input image type

165

- TOutputImage: Output image type

166

- TParentImageFilter: CPU parent filter type

167

"""

168

def New() -> GPUFiniteDifferenceImageFilter[TInputImage, TOutputImage, TParentImageFilter]: ...

169

def SetNumberOfIterations(self, iterations: IdentifierType) -> None: ...

170

def GetNumberOfIterations(self) -> IdentifierType: ...

171

def SetTimeStep(self, time_step: TimeStepType) -> None: ...

172

def GetTimeStep(self) -> TimeStepType: ...

173

def SetMaximumRMSError(self, error: double) -> None: ...

174

def GetMaximumRMSError(self) -> double: ...

175

def GetRMSChange(self) -> double: ...

176

def GPUGenerateData(self) -> None: ...

177

def GPUCalculateChange(self) -> TimeStepType: ...

178

def GPUApplyUpdate(self, time_step: TimeStepType) -> None: ...

179

180

class GPUDenseFiniteDifferenceImageFilter[TInputImage, TOutputImage, TParentImageFilter]:

181

"""GPU-accelerated dense finite difference solver."""

182

def New() -> GPUDenseFiniteDifferenceImageFilter[TInputImage, TOutputImage, TParentImageFilter]: ...

183

def GetFiniteDifferenceFunction(self) -> FiniteDifferenceFunctionType: ...

184

def SetFiniteDifferenceFunction(self, function: FiniteDifferenceFunctionType) -> None: ...

185

186

class GPUFiniteDifferenceFunction[TImageType]:

187

"""Base class for GPU finite difference functions."""

188

def ComputeUpdate(self, neighborhood: NeighborhoodType, global_data: GlobalDataStruct,

189

gd: GlobalDataStruct) -> PixelType: ...

190

def GetRadius(self) -> RadiusType: ...

191

def SetTimeStep(self, time_step: TimeStepType) -> None: ...

192

def GetTimeStep(self) -> TimeStepType: ...

193

def GPUComputeUpdate(self, input: ImagePointer, output: ImagePointer,

194

global_data: GlobalDataStruct) -> TimeStepType: ...

195

def GPUCalculateChange(self, input: ImagePointer) -> TimeStepType: ...

196

def GPUApplyUpdate(self, input: ImagePointer, update: ImagePointer,

197

time_step: TimeStepType) -> None: ...

198

```

199

200

### GPU Anisotropic Diffusion

201

202

GPU implementations of anisotropic diffusion algorithms.

203

204

```python { .api }

205

class GPUGradientAnisotropicDiffusionImageFilter[TInputImage, TOutputImage]:

206

"""GPU-accelerated gradient-based anisotropic diffusion."""

207

def New() -> GPUGradientAnisotropicDiffusionImageFilter[TInputImage, TOutputImage]: ...

208

def SetNumberOfIterations(self, iterations: int) -> None: ...

209

def GetNumberOfIterations(self) -> int: ...

210

def SetTimeStep(self, time_step: TimeStepType) -> None: ...

211

def GetTimeStep(self) -> TimeStepType: ...

212

def SetConductanceParameter(self, conductance: double) -> None: ...

213

def GetConductanceParameter(self) -> double: ...

214

215

class GPUCurvatureAnisotropicDiffusionImageFilter[TInputImage, TOutputImage]:

216

"""GPU-accelerated curvature-based anisotropic diffusion."""

217

def New() -> GPUCurvatureAnisotropicDiffusionImageFilter[TInputImage, TOutputImage]: ...

218

def SetNumberOfIterations(self, iterations: int) -> None: ...

219

def GetNumberOfIterations(self) -> int: ...

220

def SetTimeStep(self, time_step: TimeStepType) -> None: ...

221

def GetTimeStep(self) -> TimeStepType: ...

222

def SetConductanceParameter(self, conductance: double) -> None: ...

223

def GetConductanceParameter(self) -> double: ...

224

225

class GPUAnisotropicDiffusionFunction[TImage]:

226

"""Base GPU anisotropic diffusion function."""

227

def ComputeUpdate(self, neighborhood: NeighborhoodType, global_data: GlobalDataStruct,

228

gd: GlobalDataStruct) -> PixelType: ...

229

def SetConductanceParameter(self, conductance: double) -> None: ...

230

def GetConductanceParameter(self) -> double: ...

231

def SetTimeStep(self, time_step: TimeStepType) -> None: ...

232

def GetTimeStep(self) -> TimeStepType: ...

233

```

234

235

### GPU Utilities and Helpers

236

237

Utility classes for GPU operations and performance monitoring.

238

239

```python { .api }

240

class GPUInPlaceImageFilter[TInputImage, TParentImageFilter]:

241

"""Base class for GPU filters that modify images in-place."""

242

def New() -> GPUInPlaceImageFilter[TInputImage, TParentImageFilter]: ...

243

def SetInPlace(self, flag: bool) -> None: ...

244

def GetInPlace(self) -> bool: ...

245

def CanRunInPlace(self) -> bool: ...

246

247

class GPUImageToImageFilter[TInputImage, TOutputImage, TParentImageFilter]:

248

"""Base class for GPU image-to-image filters."""

249

def New() -> GPUImageToImageFilter[TInputImage, TOutputImage, TParentImageFilter]: ...

250

def GetGPUEnabled(self) -> bool: ...

251

def SetGPUEnabled(self, flag: bool) -> None: ...

252

def GPUGenerateData(self) -> None: ...

253

254

class GPUTimer:

255

"""High-resolution timer for GPU operations."""

256

def New() -> GPUTimer: ...

257

def Start(self) -> None: ...

258

def Stop(self) -> None: ...

259

def GetElapsedTime(self) -> double: ...

260

def Reset(self) -> None: ...

261

```

262

263

## Usage Examples

264

265

### Basic GPU Image Processing

266

267

```python

268

import itk

269

import numpy as np

270

271

# Check if GPU support is available

272

gpu_context = itk.GPUContextManager.GetInstance()

273

if gpu_context.GetNumberOfDevices() == 0:

274

print("No GPU devices available")

275

exit()

276

277

print(f"Found {gpu_context.GetNumberOfDevices()} GPU device(s)")

278

print(f"Using device: {gpu_context.GetDeviceName(0)}")

279

280

# Create test image

281

ImageType = itk.Image[itk.F, 2]

282

image = ImageType.New()

283

284

size = itk.Size[2]([256, 256])

285

region = itk.ImageRegion[2]()

286

region.SetSize(size)

287

image.SetRegions(region)

288

image.Allocate()

289

290

# Fill with noisy pattern

291

np.random.seed(42)

292

for x in range(256):

293

for y in range(256):

294

idx = itk.Index[2]([x, y])

295

# Create pattern with noise

296

value = 100.0 + 50.0 * np.sin(x/10.0) * np.cos(y/10.0)

297

noise = np.random.normal(0, 10)

298

image.SetPixel(idx, value + noise)

299

300

# Create GPU image

301

GPUImageType = itk.GPUImage[itk.F, 2]

302

gpu_image = GPUImageType.New()

303

gpu_image.Graft(image) # Copy CPU image to GPU

304

305

print("Image copied to GPU")

306

print(f"GPU buffer size: {gpu_image.GetGPUDataManager().GetBufferSize()} bytes")

307

308

# Apply GPU mean filter

309

GPUMeanFilterType = itk.GPUMeanImageFilter[GPUImageType, GPUImageType]

310

gpu_mean_filter = GPUMeanFilterType.New()

311

gpu_mean_filter.SetInput(gpu_image)

312

gpu_mean_filter.SetRadius(itk.Size[2]([2, 2])) # 5x5 kernel

313

314

# Time the GPU operation

315

gpu_timer = itk.GPUTimer.New()

316

gpu_timer.Start()

317

gpu_mean_filter.Update()

318

gpu_timer.Stop()

319

320

gpu_result = gpu_mean_filter.GetOutput()

321

gpu_time = gpu_timer.GetElapsedTime()

322

323

print(f"GPU mean filtering completed in {gpu_time:.3f} ms")

324

325

# Compare with CPU version for verification

326

CPUMeanFilterType = itk.MeanImageFilter[ImageType, ImageType]

327

cpu_mean_filter = CPUMeanFilterType.New()

328

cpu_mean_filter.SetInput(image)

329

cpu_mean_filter.SetRadius(itk.Size[2]([2, 2]))

330

331

cpu_timer = itk.GPUTimer.New()

332

cpu_timer.Start()

333

cpu_mean_filter.Update()

334

cpu_timer.Stop()

335

336

cpu_result = cpu_mean_filter.GetOutput()

337

cpu_time = cpu_timer.GetElapsedTime()

338

339

print(f"CPU mean filtering completed in {cpu_time:.3f} ms")

340

print(f"GPU speedup: {cpu_time/gpu_time:.2f}x")

341

342

# Copy GPU result back to CPU for comparison

343

gpu_result.UpdateCPUBuffer()

344

345

# Compare results at a few points

346

test_indices = [

347

itk.Index[2]([50, 50]),

348

itk.Index[2]([100, 100]),

349

itk.Index[2]([200, 200])

350

]

351

352

max_diff = 0.0

353

for idx in test_indices:

354

gpu_val = gpu_result.GetPixel(idx)

355

cpu_val = cpu_result.GetPixel(idx)

356

diff = abs(gpu_val - cpu_val)

357

max_diff = max(max_diff, diff)

358

print(f"Index {idx}: GPU={gpu_val:.3f}, CPU={cpu_val:.3f}, Diff={diff:.6f}")

359

360

print(f"Maximum difference: {max_diff:.6f}")

361

```

362

363

### GPU Binary Thresholding

364

365

```python

366

import itk

367

import numpy as np

368

369

# Create test image with varying intensities

370

ImageType = itk.Image[itk.UC, 2] # Use unsigned char

371

image = ImageType.New()

372

373

size = itk.Size[2]([512, 512])

374

region = itk.ImageRegion[2]()

375

region.SetSize(size)

376

image.SetRegions(region)

377

image.Allocate()

378

379

# Create gradient pattern

380

for x in range(512):

381

for y in range(512):

382

idx = itk.Index[2]([x, y])

383

value = int((x + y) * 255 / 1022) # Scale to 0-255

384

image.SetPixel(idx, value)

385

386

# Create GPU image

387

GPUImageType = itk.GPUImage[itk.UC, 2]

388

gpu_input = GPUImageType.New()

389

gpu_input.Graft(image)

390

391

# Create GPU binary threshold filter

392

GPUBinaryImageType = itk.GPUImage[itk.UC, 2]

393

GPUThresholdFilterType = itk.GPUBinaryThresholdImageFilter[GPUImageType, GPUBinaryImageType]

394

395

gpu_threshold_filter = GPUThresholdFilterType.New()

396

gpu_threshold_filter.SetInput(gpu_input)

397

gpu_threshold_filter.SetLowerThreshold(100)

398

gpu_threshold_filter.SetUpperThreshold(200)

399

gpu_threshold_filter.SetInsideValue(255)

400

gpu_threshold_filter.SetOutsideValue(0)

401

402

# Time GPU execution

403

gpu_timer = itk.GPUTimer.New()

404

gpu_timer.Start()

405

gpu_threshold_filter.Update()

406

gpu_timer.Stop()

407

408

gpu_result = gpu_threshold_filter.GetOutput()

409

gpu_time = gpu_timer.GetElapsedTime()

410

411

print(f"GPU binary thresholding completed in {gpu_time:.3f} ms")

412

413

# Compare with CPU version

414

CPUThresholdFilterType = itk.BinaryThresholdImageFilter[ImageType, ImageType]

415

cpu_threshold_filter = CPUThresholdFilterType.New()

416

cpu_threshold_filter.SetInput(image)

417

cpu_threshold_filter.SetLowerThreshold(100)

418

cpu_threshold_filter.SetUpperThreshold(200)

419

cpu_threshold_filter.SetInsideValue(255)

420

cpu_threshold_filter.SetOutsideValue(0)

421

422

cpu_timer = itk.GPUTimer.New()

423

cpu_timer.Start()

424

cpu_threshold_filter.Update()

425

cpu_timer.Stop()

426

427

cpu_result = cpu_threshold_filter.GetOutput()

428

cpu_time = cpu_timer.GetElapsedTime()

429

430

print(f"CPU binary thresholding completed in {cpu_time:.3f} ms")

431

print(f"GPU speedup: {cpu_time/gpu_time:.2f}x")

432

433

# Verify results match

434

gpu_result.UpdateCPUBuffer()

435

436

# Count threshold pixels

437

gpu_count = 0

438

cpu_count = 0

439

for x in range(512):

440

for y in range(512):

441

idx = itk.Index[2]([x, y])

442

if gpu_result.GetPixel(idx) == 255:

443

gpu_count += 1

444

if cpu_result.GetPixel(idx) == 255:

445

cpu_count += 1

446

447

print(f"GPU thresholded pixels: {gpu_count}")

448

print(f"CPU thresholded pixels: {cpu_count}")

449

print(f"Results match: {gpu_count == cpu_count}")

450

```

451

452

### GPU Anisotropic Diffusion

453

454

```python

455

import itk

456

import numpy as np

457

458

# Create noisy test image

459

ImageType = itk.Image[itk.F, 2]

460

image = ImageType.New()

461

462

size = itk.Size[2]([256, 256])

463

region = itk.ImageRegion[2]()

464

region.SetSize(size)

465

image.SetRegions(region)

466

image.Allocate()

467

468

# Create image with edges and noise

469

np.random.seed(42)

470

for x in range(256):

471

for y in range(256):

472

idx = itk.Index[2]([x, y])

473

474

# Create step pattern

475

base_value = 200.0 if (x > 128 and y > 128) else 100.0

476

if x > 128 and y <= 128:

477

base_value = 150.0

478

if x <= 128 and y > 128:

479

base_value = 75.0

480

481

# Add noise

482

noise = np.random.normal(0, 15)

483

value = base_value + noise

484

image.SetPixel(idx, max(0.0, min(255.0, value)))

485

486

# Create GPU image

487

GPUImageType = itk.GPUImage[itk.F, 2]

488

gpu_input = GPUImageType.New()

489

gpu_input.Graft(image)

490

491

# Create GPU anisotropic diffusion filter

492

GPUDiffusionFilterType = itk.GPUGradientAnisotropicDiffusionImageFilter[GPUImageType, GPUImageType]

493

gpu_diffusion_filter = GPUDiffusionFilterType.New()

494

gpu_diffusion_filter.SetInput(gpu_input)

495

gpu_diffusion_filter.SetNumberOfIterations(50)

496

gpu_diffusion_filter.SetTimeStep(0.0625) # Stable time step for 2D

497

gpu_diffusion_filter.SetConductanceParameter(5.0)

498

499

# Time GPU execution

500

gpu_timer = itk.GPUTimer.New()

501

gpu_timer.Start()

502

gpu_diffusion_filter.Update()

503

gpu_timer.Stop()

504

505

gpu_result = gpu_diffusion_filter.GetOutput()

506

gpu_time = gpu_timer.GetElapsedTime()

507

508

print(f"GPU anisotropic diffusion completed in {gpu_time:.3f} ms")

509

print(f"GPU iterations: {gpu_diffusion_filter.GetElapsedIterations()}")

510

print(f"GPU final RMS change: {gpu_diffusion_filter.GetRMSChange():.6f}")

511

512

# Compare with CPU version

513

CPUDiffusionFilterType = itk.GradientAnisotropicDiffusionImageFilter[ImageType, ImageType]

514

cpu_diffusion_filter = CPUDiffusionFilterType.New()

515

cpu_diffusion_filter.SetInput(image)

516

cpu_diffusion_filter.SetNumberOfIterations(50)

517

cpu_diffusion_filter.SetTimeStep(0.0625)

518

cpu_diffusion_filter.SetConductanceParameter(5.0)

519

520

cpu_timer = itk.GPUTimer.New()

521

cpu_timer.Start()

522

cpu_diffusion_filter.Update()

523

cpu_timer.Stop()

524

525

cpu_result = cpu_diffusion_filter.GetOutput()

526

cpu_time = cpu_timer.GetElapsedTime()

527

528

print(f"CPU anisotropic diffusion completed in {cpu_time:.3f} ms")

529

print(f"CPU iterations: {cpu_diffusion_filter.GetElapsedIterations()}")

530

print(f"CPU final RMS change: {cpu_diffusion_filter.GetRMSChange():.6f}")

531

print(f"GPU speedup: {cpu_time/gpu_time:.2f}x")

532

533

# Copy GPU result to CPU for comparison

534

gpu_result.UpdateCPUBuffer()

535

536

# Compare results at several points

537

test_points = [

538

itk.Index[2]([64, 64]), # Bottom-left region

539

itk.Index[2]([192, 64]), # Bottom-right region

540

itk.Index[2]([64, 192]), # Top-left region

541

itk.Index[2]([192, 192]), # Top-right region

542

itk.Index[2]([128, 128]) # Center (edge region)

543

]

544

545

print("\nComparison of GPU vs CPU results:")

546

total_diff = 0.0

547

for idx in test_points:

548

gpu_val = gpu_result.GetPixel(idx)

549

cpu_val = cpu_result.GetPixel(idx)

550

diff = abs(gpu_val - cpu_val)

551

total_diff += diff

552

print(f"Index {idx}: GPU={gpu_val:.3f}, CPU={cpu_val:.3f}, Diff={diff:.6f}")

553

554

avg_diff = total_diff / len(test_points)

555

print(f"Average difference: {avg_diff:.6f}")

556

```

557

558

### Custom GPU Kernel

559

560

```python

561

import itk

562

563

# This example shows how to create and use custom GPU kernels

564

# Note: In practice, kernel source would be more complex

565

566

# Define custom OpenCL kernel source

567

kernel_source = """

568

__kernel void custom_add_constant(

569

__global float* input,

570

__global float* output,

571

const float constant,

572

const int width,

573

const int height)

574

{

575

int gid_x = get_global_id(0);

576

int gid_y = get_global_id(1);

577

578

if (gid_x >= width || gid_y >= height)

579

return;

580

581

int idx = gid_y * width + gid_x;

582

output[idx] = input[idx] + constant;

583

}

584

"""

585

586

# Create kernel manager

587

kernel_manager = itk.GPUKernelManager.New()

588

kernel_manager.SetKernelSource(kernel_source)

589

590

# Create kernel

591

kernel_id = kernel_manager.CreateKernel("custom_add_constant")

592

593

if kernel_id >= 0:

594

print(f"Custom kernel created with ID: {kernel_id}")

595

596

# Create test image

597

ImageType = itk.Image[itk.F, 2]

598

image = ImageType.New()

599

600

size = itk.Size[2]([64, 64])

601

region = itk.ImageRegion[2]()

602

region.SetSize(size)

603

image.SetRegions(region)

604

image.Allocate()

605

606

# Fill with test data

607

for x in range(64):

608

for y in range(64):

609

idx = itk.Index[2]([x, y])

610

value = float(x + y)

611

image.SetPixel(idx, value)

612

613

# Create GPU images

614

GPUImageType = itk.GPUImage[itk.F, 2]

615

gpu_input = GPUImageType.New()

616

gpu_input.Graft(image)

617

618

gpu_output = GPUImageType.New()

619

gpu_output.SetRegions(region)

620

gpu_output.Allocate()

621

622

# Set kernel arguments

623

constant_value = 42.0

624

kernel_manager.SetKernelArgWithImage(kernel_id, 0, gpu_input.GetGPUDataManager())

625

kernel_manager.SetKernelArgWithImage(kernel_id, 1, gpu_output.GetGPUDataManager())

626

kernel_manager.SetKernelArg(kernel_id, 2, constant_value)

627

kernel_manager.SetKernelArg(kernel_id, 3, 64) # width

628

kernel_manager.SetKernelArg(kernel_id, 4, 64) # height

629

630

# Launch kernel

631

global_work_size = [64, 64]

632

local_work_size = [8, 8]

633

634

success = kernel_manager.LaunchKernel(kernel_id, global_work_size, local_work_size)

635

636

if success:

637

print("Custom kernel executed successfully")

638

639

# Copy result back to CPU

640

gpu_output.UpdateCPUBuffer()

641

642

# Verify results

643

test_idx = itk.Index[2]([10, 20])

644

input_val = image.GetPixel(test_idx)

645

output_val = gpu_output.GetPixel(test_idx)

646

expected = input_val + constant_value

647

648

print(f"Input: {input_val}, Output: {output_val}, Expected: {expected}")

649

print(f"Kernel working correctly: {abs(output_val - expected) < 1e-6}")

650

else:

651

print("Failed to launch custom kernel")

652

else:

653

print("Failed to create custom kernel")

654

```

655

656

### GPU Memory Management

657

658

```python

659

import itk

660

661

# Create test images of different sizes

662

sizes = [

663

itk.Size[2]([128, 128]),

664

itk.Size[2]([256, 256]),

665

itk.Size[2]([512, 512])

666

]

667

668

ImageType = itk.Image[itk.F, 2]

669

GPUImageType = itk.GPUImage[itk.F, 2]

670

671

gpu_images = []

672

673

print("GPU Memory Management Example")

674

print("=" * 40)

675

676

for i, size in enumerate(sizes):

677

# Create CPU image

678

image = ImageType.New()

679

region = itk.ImageRegion[2]()

680

region.SetSize(size)

681

image.SetRegions(region)

682

image.Allocate()

683

684

# Fill with data

685

for x in range(size[0]):

686

for y in range(size[1]):

687

idx = itk.Index[2]([x, y])

688

value = float((x + y) % 100)

689

image.SetPixel(idx, value)

690

691

# Create GPU image

692

gpu_image = GPUImageType.New()

693

gpu_image.Graft(image)

694

695

# Check memory usage

696

data_manager = gpu_image.GetGPUDataManager()

697

buffer_size = data_manager.GetBufferSize()

698

699

print(f"Image {i+1}: {size[0]}x{size[1]}")

700

print(f" Buffer size: {buffer_size} bytes ({buffer_size/(1024*1024):.2f} MB)")

701

print(f" CPU dirty: {data_manager.IsCPUBufferDirty()}")

702

print(f" GPU dirty: {data_manager.IsGPUBufferDirty()}")

703

704

# Modify GPU data to make CPU buffer dirty

705

gpu_image.SetGPUDataDirty()

706

print(f" After GPU modification - CPU dirty: {data_manager.IsCPUBufferDirty()}")

707

708

# Update CPU buffer

709

gpu_image.UpdateCPUBuffer()

710

print(f" After CPU update - CPU dirty: {data_manager.IsCPUBufferDirty()}")

711

712

gpu_images.append(gpu_image)

713

print()

714

715

# Test synchronization

716

print("Testing CPU/GPU synchronization:")

717

test_gpu_image = gpu_images[0]

718

test_idx = itk.Index[2]([50, 50])

719

720

# Modify on CPU side

721

original_value = test_gpu_image.GetPixel(test_idx)

722

new_value = original_value + 100.0

723

test_gpu_image.SetPixel(test_idx, new_value)

724

725

print(f"Original value: {original_value}")

726

print(f"Modified CPU value: {test_gpu_image.GetPixel(test_idx)}")

727

728

# Check if GPU buffer needs updating

729

data_manager = test_gpu_image.GetGPUDataManager()

730

print(f"GPU buffer dirty after CPU modification: {data_manager.IsGPUBufferDirty()}")

731

732

# Update GPU buffer

733

test_gpu_image.UpdateGPUBuffer()

734

print(f"GPU buffer dirty after update: {data_manager.IsGPUBufferDirty()}")

735

736

print("\nMemory management example completed")

737

```