or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

cuda-core.mddevice-memory.mddriver-api.mdgpu-direct-storage.mdindex.mdjit-compilation.mdkernels-streams.mdlibrary-management.mdruntime-compilation.md

jit-compilation.mddocs/

0

# JIT Compilation and Linking

1

2

NVVM LLVM-based compilation and NVJitLink just-in-time linking for advanced code generation workflows. This module provides access to NVIDIA's LLVM-based compiler infrastructure for compiling LLVM IR to PTX and advanced JIT linking capabilities for combining multiple device code modules.

3

4

## Capabilities

5

6

### NVVM LLVM-Based Compilation

7

8

Compile LLVM IR to PTX using NVIDIA's LLVM-based compiler backend.

9

10

```python { .api }

11

def create_program() -> int:

12

"""

13

Create a new NVVM compilation program.

14

15

Returns:

16

int: Program handle

17

18

Note:

19

Program manages compilation of LLVM IR modules to PTX

20

"""

21

22

def destroy_program(prog: int) -> None:

23

"""

24

Destroy an NVVM program and free associated resources.

25

26

Args:

27

prog (int): Program handle to destroy

28

"""

29

30

def add_module_to_program(prog: int, buffer: bytes, size: int, name: str) -> None:

31

"""

32

Add an LLVM IR module to the compilation program.

33

34

Args:

35

prog (int): Program handle

36

buffer (bytes): LLVM IR module data

37

size (int): Size of IR data in bytes

38

name (str): Module name for debugging

39

40

Note:

41

Multiple modules can be added to a single program

42

"""

43

44

def compile_program(prog: int, num_options: int, options) -> None:

45

"""

46

Compile all modules in the program to PTX.

47

48

Args:

49

prog (int): Program handle with added modules

50

num_options (int): Number of compilation options

51

options: Compilation option array

52

53

Raises:

54

nvvmError: If compilation fails

55

"""

56

57

def get_compiled_result_size(prog: int) -> int:

58

"""

59

Get the size of the compiled PTX result.

60

61

Args:

62

prog (int): Compiled program handle

63

64

Returns:

65

int: PTX size in bytes

66

"""

67

68

def get_compiled_result(prog: int, buffer: str) -> None:

69

"""

70

Retrieve the compiled PTX code.

71

72

Args:

73

prog (int): Compiled program handle

74

buffer (str): Pre-allocated buffer for PTX (use get_compiled_result_size)

75

"""

76

```

77

78

### NVVM Version and IR Information

79

80

Query NVVM compiler version and supported IR formats.

81

82

```python { .api }

83

def version() -> tuple:

84

"""

85

Get the NVVM compiler version.

86

87

Returns:

88

tuple[int, int]: (major_version, minor_version)

89

"""

90

91

def ir_version() -> tuple:

92

"""

93

Get the supported LLVM IR version.

94

95

Returns:

96

tuple[int, int]: (major_version, minor_version)

97

98

Note:

99

Indicates which LLVM IR versions are supported

100

"""

101

```

102

103

### NVJitLink Just-In-Time Linking

104

105

Link multiple device code modules into a single executable using NVJitLink.

106

107

```python { .api }

108

def create(num_options: int, options) -> int:

109

"""

110

Create a new NVJitLink linker handle.

111

112

Args:

113

num_options (int): Number of linker options

114

options: Linker option array

115

116

Returns:

117

int: Linker handle

118

119

Note:

120

Linker combines multiple device code modules

121

"""

122

123

def destroy(handle: int) -> None:

124

"""

125

Destroy an NVJitLink linker handle.

126

127

Args:

128

handle (int): Linker handle to destroy

129

"""

130

131

def add_data(

132

handle: int,

133

input_type: int,

134

data: bytes,

135

size: int,

136

name: str

137

) -> None:

138

"""

139

Add input data to the linker.

140

141

Args:

142

handle (int): Linker handle

143

input_type (int): Type of input data (PTX, CUBIN, FATBIN, etc.)

144

data (bytes): Input data

145

size (int): Data size in bytes

146

name (str): Input name for debugging

147

"""

148

149

def add_file(handle: int, input_type: int, file_name: str) -> None:

150

"""

151

Add input file to the linker.

152

153

Args:

154

handle (int): Linker handle

155

input_type (int): Type of input file

156

file_name (str): Path to input file

157

"""

158

159

def complete(handle: int) -> None:

160

"""

161

Complete the linking process.

162

163

Args:

164

handle (int): Linker handle with added inputs

165

166

Raises:

167

nvJitLinkError: If linking fails

168

169

Note:

170

Must be called after adding all inputs

171

"""

172

```

173

174

### Linked Code Retrieval

175

176

Extract the linked device code in various formats.

177

178

```python { .api }

179

def get_linked_cubin_size(handle: int) -> int:

180

"""

181

Get the size of the linked CUBIN code.

182

183

Args:

184

handle (int): Completed linker handle

185

186

Returns:

187

int: CUBIN size in bytes

188

"""

189

190

def get_linked_cubin(handle: int, cubin: bytes) -> None:

191

"""

192

Retrieve the linked CUBIN code.

193

194

Args:

195

handle (int): Completed linker handle

196

cubin (bytes): Pre-allocated buffer for CUBIN

197

"""

198

199

def get_linked_ptx_size(handle: int) -> int:

200

"""

201

Get the size of the linked PTX code.

202

203

Args:

204

handle (int): Completed linker handle

205

206

Returns:

207

int: PTX size in bytes

208

"""

209

210

def get_linked_ptx(handle: int, ptx: bytes) -> None:

211

"""

212

Retrieve the linked PTX code.

213

214

Args:

215

handle (int): Completed linker handle

216

ptx (bytes): Pre-allocated buffer for PTX

217

"""

218

```

219

220

### Link Information and Debugging

221

222

Access linking information and error details.

223

224

```python { .api }

225

def get_error_log_size(handle: int) -> int:

226

"""

227

Get the size of the linker error log.

228

229

Args:

230

handle (int): Linker handle

231

232

Returns:

233

int: Error log size in bytes

234

"""

235

236

def get_error_log(handle: int, log: str) -> None:

237

"""

238

Retrieve the linker error log.

239

240

Args:

241

handle (int): Linker handle

242

log (str): Pre-allocated buffer for error log

243

"""

244

245

def get_info_log_size(handle: int) -> int:

246

"""

247

Get the size of the linker information log.

248

249

Args:

250

handle (int): Linker handle

251

252

Returns:

253

int: Info log size in bytes

254

"""

255

256

def get_info_log(handle: int, log: str) -> None:

257

"""

258

Retrieve the linker information log.

259

260

Args:

261

handle (int): Linker handle

262

log (str): Pre-allocated buffer for info log

263

"""

264

265

def version() -> tuple:

266

"""

267

Get the NVJitLink version.

268

269

Returns:

270

tuple[int, int]: (major_version, minor_version)

271

"""

272

```

273

274

## Types

275

276

### NVVM Result Codes

277

278

```python { .api }

279

class Result:

280

"""NVVM compilation result codes"""

281

NVVM_SUCCESS: int # Compilation succeeded

282

NVVM_ERROR_OUT_OF_MEMORY: int # Out of memory

283

NVVM_ERROR_PROGRAM_CREATION_FAILURE: int # Program creation failed

284

NVVM_ERROR_IR_VERSION_MISMATCH: int # IR version not supported

285

NVVM_ERROR_INVALID_INPUT: int # Invalid input data

286

NVVM_ERROR_INVALID_PROGRAM: int # Invalid program handle

287

NVVM_ERROR_INVALID_IR: int # Invalid LLVM IR

288

NVVM_ERROR_INVALID_OPTION: int # Invalid compilation option

289

NVVM_ERROR_COMPILATION: int # Compilation failed

290

```

291

292

### NVJitLink Result Codes

293

294

```python { .api }

295

class Result:

296

"""NVJitLink operation result codes"""

297

NVJITLINK_SUCCESS: int # Operation succeeded

298

NVJITLINK_ERROR_UNRECOGNIZED_OPTION: int # Unrecognized linker option

299

NVJITLINK_ERROR_MISSING_ARCH: int # Missing target architecture

300

NVJITLINK_ERROR_INVALID_INPUT: int # Invalid input data

301

NVJITLINK_ERROR_PTX_COMPILE: int # PTX compilation error

302

NVJITLINK_ERROR_NVVM_COMPILE: int # NVVM compilation error

303

NVJITLINK_ERROR_INTERNAL: int # Internal linker error

304

```

305

306

### Input Types

307

308

```python { .api }

309

class InputType:

310

"""NVJitLink input data type enumeration"""

311

NVJITLINK_INPUT_NONE: int # No input

312

NVJITLINK_INPUT_CUBIN: int # CUBIN binary

313

NVJITLINK_INPUT_PTX: int # PTX assembly

314

NVJITLINK_INPUT_FATBIN: int # Fat binary (multi-architecture)

315

NVJITLINK_INPUT_OBJECT: int # Object file

316

NVJITLINK_INPUT_LIBRARY: int # Static library

317

NVJITLINK_INPUT_NVVM_IR: int # NVVM LLVM IR

318

NVJITLINK_INPUT_NVVM_BITCODE: int # NVVM bitcode

319

```

320

321

### Exception Classes

322

323

```python { .api }

324

class nvvmError(Exception):

325

"""NVVM compilation exception"""

326

def __init__(self, result: Result, message: str): ...

327

328

class nvJitLinkError(Exception):

329

"""NVJitLink operation exception"""

330

def __init__(self, result: Result, message: str): ...

331

```

332

333

## Usage Examples

334

335

### NVVM LLVM IR Compilation

336

337

```python

338

from cuda.bindings import nvvm

339

340

# Sample LLVM IR for a simple kernel

341

llvm_ir = b'''

342

target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"

343

target triple = "nvptx64-nvidia-cuda"

344

345

define void @simple_kernel(float* %input, float* %output, i32 %n) {

346

entry:

347

%tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()

348

%bid = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()

349

%bdim = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()

350

351

%tmp1 = mul i32 %bid, %bdim

352

%idx = add i32 %tmp1, %tid

353

354

%cond = icmp slt i32 %idx, %n

355

br i1 %cond, label %if.then, label %if.end

356

357

if.then:

358

%input_ptr = getelementptr float, float* %input, i32 %idx

359

%val = load float, float* %input_ptr

360

%result = fmul float %val, 2.0

361

%output_ptr = getelementptr float, float* %output, i32 %idx

362

store float %result, float* %output_ptr

363

br label %if.end

364

365

if.end:

366

ret void

367

}

368

369

declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() nounwind readnone

370

declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() nounwind readnone

371

declare i32 @llvm.nvvm.read.ptx.sreg.ntid.x() nounwind readnone

372

'''

373

374

try:

375

# Create NVVM program

376

program = nvvm.create_program()

377

378

# Add LLVM IR module

379

nvvm.add_module_to_program(program, llvm_ir, len(llvm_ir), "simple_kernel.ll")

380

381

# Compilation options

382

options = ["-arch=compute_70", "-opt=3"]

383

384

# Compile to PTX

385

nvvm.compile_program(program, len(options), options)

386

387

# Get compiled PTX

388

ptx_size = nvvm.get_compiled_result_size(program)

389

ptx_buffer = ' ' * ptx_size

390

nvvm.get_compiled_result(program, ptx_buffer)

391

392

print(f"Compiled {len(llvm_ir)} bytes of LLVM IR to {ptx_size} bytes of PTX")

393

print("First 200 characters of PTX:")

394

print(ptx_buffer[:200])

395

396

except nvvm.nvvmError as e:

397

print(f"NVVM compilation failed: {e}")

398

399

finally:

400

nvvm.destroy_program(program)

401

402

# Check NVVM version

403

major, minor = nvvm.version()

404

ir_major, ir_minor = nvvm.ir_version()

405

print(f"NVVM Version: {major}.{minor}")

406

print(f"Supported IR Version: {ir_major}.{ir_minor}")

407

```

408

409

### NVJitLink Module Linking

410

411

```python

412

from cuda.bindings import nvjitlink

413

414

# Sample PTX modules (simplified)

415

module1_ptx = b'''

416

.version 7.0

417

.target sm_70

418

.address_size 64

419

420

.visible .entry kernel_part1(.param .u64 kernel_part1_param_0) {

421

.reg .u64 %rd<2>;

422

ld.param.u64 %rd1, [kernel_part1_param_0];

423

// Kernel implementation...

424

ret;

425

}

426

'''

427

428

module2_ptx = b'''

429

.version 7.0

430

.target sm_70

431

.address_size 64

432

433

.visible .entry kernel_part2(.param .u64 kernel_part2_param_0) {

434

.reg .u64 %rd<2>;

435

ld.param.u64 %rd1, [kernel_part2_param_0];

436

// Kernel implementation...

437

ret;

438

}

439

'''

440

441

try:

442

# Create linker with options

443

linker_options = ["-arch=sm_70", "-optimize"]

444

linker = nvjitlink.create(len(linker_options), linker_options)

445

446

# Add PTX modules

447

nvjitlink.add_data(

448

linker,

449

nvjitlink.InputType.NVJITLINK_INPUT_PTX,

450

module1_ptx,

451

len(module1_ptx),

452

"module1.ptx"

453

)

454

455

nvjitlink.add_data(

456

linker,

457

nvjitlink.InputType.NVJITLINK_INPUT_PTX,

458

module2_ptx,

459

len(module2_ptx),

460

"module2.ptx"

461

)

462

463

# Complete linking

464

nvjitlink.complete(linker)

465

466

# Get linked CUBIN

467

cubin_size = nvjitlink.get_linked_cubin_size(linker)

468

cubin_data = bytearray(cubin_size)

469

nvjitlink.get_linked_cubin(linker, cubin_data)

470

471

print(f"Linked {len(module1_ptx) + len(module2_ptx)} bytes of PTX")

472

print(f"Generated {cubin_size} bytes of CUBIN")

473

474

# Get info log

475

info_size = nvjitlink.get_info_log_size(linker)

476

if info_size > 0:

477

info_log = ' ' * info_size

478

nvjitlink.get_info_log(linker, info_log)

479

print("Linker info:", info_log.strip())

480

481

except nvjitlink.nvJitLinkError as e:

482

print(f"JIT linking failed: {e}")

483

484

# Get error log

485

error_size = nvjitlink.get_error_log_size(linker)

486

if error_size > 0:

487

error_log = ' ' * error_size

488

nvjitlink.get_error_log(linker, error_log)

489

print("Linker errors:", error_log.strip())

490

491

finally:

492

nvjitlink.destroy(linker)

493

494

# Check NVJitLink version

495

major, minor = nvjitlink.version()

496

print(f"NVJitLink Version: {major}.{minor}")

497

```

498

499

### Advanced Multi-Module Linking

500

501

```python

502

from cuda.bindings import nvjitlink

503

import os

504

505

def link_cuda_modules(ptx_files, cubin_files, output_name):

506

"""Link multiple CUDA modules from files."""

507

508

linker_options = [

509

"-arch=sm_75",

510

"-optimize",

511

f"-o={output_name}"

512

]

513

514

linker = nvjitlink.create(len(linker_options), linker_options)

515

516

try:

517

# Add PTX files

518

for ptx_file in ptx_files:

519

nvjitlink.add_file(

520

linker,

521

nvjitlink.InputType.NVJITLINK_INPUT_PTX,

522

ptx_file

523

)

524

525

# Add CUBIN files

526

for cubin_file in cubin_files:

527

nvjitlink.add_file(

528

linker,

529

nvjitlink.InputType.NVJITLINK_INPUT_CUBIN,

530

cubin_file

531

)

532

533

# Complete linking

534

nvjitlink.complete(linker)

535

536

# Extract results

537

results = {}

538

539

# Get CUBIN

540

cubin_size = nvjitlink.get_linked_cubin_size(linker)

541

if cubin_size > 0:

542

cubin_data = bytearray(cubin_size)

543

nvjitlink.get_linked_cubin(linker, cubin_data)

544

results['cubin'] = cubin_data

545

546

# Get PTX

547

try:

548

ptx_size = nvjitlink.get_linked_ptx_size(linker)

549

if ptx_size > 0:

550

ptx_data = bytearray(ptx_size)

551

nvjitlink.get_linked_ptx(linker, ptx_data)

552

results['ptx'] = ptx_data

553

except:

554

# PTX not available for this link

555

pass

556

557

return results

558

559

finally:

560

nvjitlink.destroy(linker)

561

562

# Example usage

563

if __name__ == "__main__":

564

# Link example modules

565

ptx_modules = ["kernel1.ptx", "kernel2.ptx"]

566

cubin_modules = ["library.cubin"]

567

568

# Note: This assumes the files exist

569

try:

570

linked_code = link_cuda_modules(ptx_modules, cubin_modules, "combined")

571

572

if 'cubin' in linked_code:

573

print(f"Generated CUBIN: {len(linked_code['cubin'])} bytes")

574

575

if 'ptx' in linked_code:

576

print(f"Generated PTX: {len(linked_code['ptx'])} bytes")

577

578

except FileNotFoundError as e:

579

print(f"Input file not found: {e}")

580

except nvjitlink.nvJitLinkError as e:

581

print(f"Linking failed: {e}")

582

```

583

584

### NVVM and NVJitLink Pipeline

585

586

```python

587

from cuda.bindings import nvvm, nvjitlink

588

589

def llvm_to_cubin_pipeline(llvm_modules, target_arch="sm_70"):

590

"""Complete pipeline from LLVM IR to CUBIN via NVVM and NVJitLink."""

591

592

ptx_modules = []

593

594

# Step 1: Compile LLVM IR to PTX using NVVM

595

for i, llvm_ir in enumerate(llvm_modules):

596

program = nvvm.create_program()

597

598

try:

599

nvvm.add_module_to_program(

600

program, llvm_ir, len(llvm_ir), f"module_{i}.ll"

601

)

602

603

options = [f"-arch=compute_{target_arch[2:]}", "-opt=3"]

604

nvvm.compile_program(program, len(options), options)

605

606

ptx_size = nvvm.get_compiled_result_size(program)

607

ptx_buffer = bytearray(ptx_size)

608

nvvm.get_compiled_result(program, ptx_buffer)

609

610

ptx_modules.append(bytes(ptx_buffer))

611

612

finally:

613

nvvm.destroy_program(program)

614

615

# Step 2: Link PTX modules to CUBIN using NVJitLink

616

linker_options = [f"-arch={target_arch}"]

617

linker = nvjitlink.create(len(linker_options), linker_options)

618

619

try:

620

for i, ptx_data in enumerate(ptx_modules):

621

nvjitlink.add_data(

622

linker,

623

nvjitlink.InputType.NVJITLINK_INPUT_PTX,

624

ptx_data,

625

len(ptx_data),

626

f"module_{i}.ptx"

627

)

628

629

nvjitlink.complete(linker)

630

631

cubin_size = nvjitlink.get_linked_cubin_size(linker)

632

cubin_data = bytearray(cubin_size)

633

nvjitlink.get_linked_cubin(linker, cubin_data)

634

635

return bytes(cubin_data)

636

637

finally:

638

nvjitlink.destroy(linker)

639

640

# Example usage with mock LLVM IR

641

sample_llvm_modules = [

642

b'target triple = "nvptx64-nvidia-cuda"\ndefine void @kernel1() { ret void }',

643

b'target triple = "nvptx64-nvidia-cuda"\ndefine void @kernel2() { ret void }'

644

]

645

646

try:

647

final_cubin = llvm_to_cubin_pipeline(sample_llvm_modules, "sm_75")

648

print(f"Pipeline generated {len(final_cubin)} bytes of CUBIN")

649

except Exception as e:

650

print(f"Pipeline failed: {e}")

651

```