0
# JIT Compilation and Linking
1
2
NVVM LLVM-based compilation and NVJitLink just-in-time linking for advanced code generation workflows. This module provides access to NVIDIA's LLVM-based compiler infrastructure for compiling LLVM IR to PTX and advanced JIT linking capabilities for combining multiple device code modules.
3
4
## Capabilities
5
6
### NVVM LLVM-Based Compilation
7
8
Compile LLVM IR to PTX using NVIDIA's LLVM-based compiler backend.
9
10
```python { .api }
11
def create_program() -> int:
12
"""
13
Create a new NVVM compilation program.
14
15
Returns:
16
int: Program handle
17
18
Note:
19
Program manages compilation of LLVM IR modules to PTX
20
"""
21
22
def destroy_program(prog: int) -> None:
23
"""
24
Destroy an NVVM program and free associated resources.
25
26
Args:
27
prog (int): Program handle to destroy
28
"""
29
30
def add_module_to_program(prog: int, buffer: bytes, size: int, name: str) -> None:
31
"""
32
Add an LLVM IR module to the compilation program.
33
34
Args:
35
prog (int): Program handle
36
buffer (bytes): LLVM IR module data
37
size (int): Size of IR data in bytes
38
name (str): Module name for debugging
39
40
Note:
41
Multiple modules can be added to a single program
42
"""
43
44
def compile_program(prog: int, num_options: int, options) -> None:
45
"""
46
Compile all modules in the program to PTX.
47
48
Args:
49
prog (int): Program handle with added modules
50
num_options (int): Number of compilation options
51
options: Compilation option array
52
53
Raises:
54
nvvmError: If compilation fails
55
"""
56
57
def get_compiled_result_size(prog: int) -> int:
58
"""
59
Get the size of the compiled PTX result.
60
61
Args:
62
prog (int): Compiled program handle
63
64
Returns:
65
int: PTX size in bytes
66
"""
67
68
def get_compiled_result(prog: int, buffer: str) -> None:
69
"""
70
Retrieve the compiled PTX code.
71
72
Args:
73
prog (int): Compiled program handle
74
buffer (str): Pre-allocated buffer for PTX (use get_compiled_result_size)
75
"""
76
```
77
78
### NVVM Version and IR Information
79
80
Query NVVM compiler version and supported IR formats.
81
82
```python { .api }
83
def version() -> tuple:
84
"""
85
Get the NVVM compiler version.
86
87
Returns:
88
tuple[int, int]: (major_version, minor_version)
89
"""
90
91
def ir_version() -> tuple:
92
"""
93
Get the supported LLVM IR version.
94
95
Returns:
96
tuple[int, int]: (major_version, minor_version)
97
98
Note:
99
Indicates which LLVM IR versions are supported
100
"""
101
```
102
103
### NVJitLink Just-In-Time Linking
104
105
Link multiple device code modules into a single executable using NVJitLink.
106
107
```python { .api }
108
def create(num_options: int, options) -> int:
109
"""
110
Create a new NVJitLink linker handle.
111
112
Args:
113
num_options (int): Number of linker options
114
options: Linker option array
115
116
Returns:
117
int: Linker handle
118
119
Note:
120
Linker combines multiple device code modules
121
"""
122
123
def destroy(handle: int) -> None:
124
"""
125
Destroy an NVJitLink linker handle.
126
127
Args:
128
handle (int): Linker handle to destroy
129
"""
130
131
def add_data(
132
handle: int,
133
input_type: int,
134
data: bytes,
135
size: int,
136
name: str
137
) -> None:
138
"""
139
Add input data to the linker.
140
141
Args:
142
handle (int): Linker handle
143
input_type (int): Type of input data (PTX, CUBIN, FATBIN, etc.)
144
data (bytes): Input data
145
size (int): Data size in bytes
146
name (str): Input name for debugging
147
"""
148
149
def add_file(handle: int, input_type: int, file_name: str) -> None:
150
"""
151
Add input file to the linker.
152
153
Args:
154
handle (int): Linker handle
155
input_type (int): Type of input file
156
file_name (str): Path to input file
157
"""
158
159
def complete(handle: int) -> None:
160
"""
161
Complete the linking process.
162
163
Args:
164
handle (int): Linker handle with added inputs
165
166
Raises:
167
nvJitLinkError: If linking fails
168
169
Note:
170
Must be called after adding all inputs
171
"""
172
```
173
174
### Linked Code Retrieval
175
176
Extract the linked device code in various formats.
177
178
```python { .api }
179
def get_linked_cubin_size(handle: int) -> int:
180
"""
181
Get the size of the linked CUBIN code.
182
183
Args:
184
handle (int): Completed linker handle
185
186
Returns:
187
int: CUBIN size in bytes
188
"""
189
190
def get_linked_cubin(handle: int, cubin: bytes) -> None:
191
"""
192
Retrieve the linked CUBIN code.
193
194
Args:
195
handle (int): Completed linker handle
196
cubin (bytes): Pre-allocated buffer for CUBIN
197
"""
198
199
def get_linked_ptx_size(handle: int) -> int:
200
"""
201
Get the size of the linked PTX code.
202
203
Args:
204
handle (int): Completed linker handle
205
206
Returns:
207
int: PTX size in bytes
208
"""
209
210
def get_linked_ptx(handle: int, ptx: bytes) -> None:
211
"""
212
Retrieve the linked PTX code.
213
214
Args:
215
handle (int): Completed linker handle
216
ptx (bytes): Pre-allocated buffer for PTX
217
"""
218
```
219
220
### Link Information and Debugging
221
222
Access linking information and error details.
223
224
```python { .api }
225
def get_error_log_size(handle: int) -> int:
226
"""
227
Get the size of the linker error log.
228
229
Args:
230
handle (int): Linker handle
231
232
Returns:
233
int: Error log size in bytes
234
"""
235
236
def get_error_log(handle: int, log: str) -> None:
237
"""
238
Retrieve the linker error log.
239
240
Args:
241
handle (int): Linker handle
242
log (str): Pre-allocated buffer for error log
243
"""
244
245
def get_info_log_size(handle: int) -> int:
246
"""
247
Get the size of the linker information log.
248
249
Args:
250
handle (int): Linker handle
251
252
Returns:
253
int: Info log size in bytes
254
"""
255
256
def get_info_log(handle: int, log: str) -> None:
257
"""
258
Retrieve the linker information log.
259
260
Args:
261
handle (int): Linker handle
262
log (str): Pre-allocated buffer for info log
263
"""
264
265
def version() -> tuple:
266
"""
267
Get the NVJitLink version.
268
269
Returns:
270
tuple[int, int]: (major_version, minor_version)
271
"""
272
```
273
274
## Types
275
276
### NVVM Result Codes
277
278
```python { .api }
279
class Result:
280
"""NVVM compilation result codes"""
281
NVVM_SUCCESS: int # Compilation succeeded
282
NVVM_ERROR_OUT_OF_MEMORY: int # Out of memory
283
NVVM_ERROR_PROGRAM_CREATION_FAILURE: int # Program creation failed
284
NVVM_ERROR_IR_VERSION_MISMATCH: int # IR version not supported
285
NVVM_ERROR_INVALID_INPUT: int # Invalid input data
286
NVVM_ERROR_INVALID_PROGRAM: int # Invalid program handle
287
NVVM_ERROR_INVALID_IR: int # Invalid LLVM IR
288
NVVM_ERROR_INVALID_OPTION: int # Invalid compilation option
289
NVVM_ERROR_COMPILATION: int # Compilation failed
290
```
291
292
### NVJitLink Result Codes
293
294
```python { .api }
295
class Result:
296
"""NVJitLink operation result codes"""
297
NVJITLINK_SUCCESS: int # Operation succeeded
298
NVJITLINK_ERROR_UNRECOGNIZED_OPTION: int # Unrecognized linker option
299
NVJITLINK_ERROR_MISSING_ARCH: int # Missing target architecture
300
NVJITLINK_ERROR_INVALID_INPUT: int # Invalid input data
301
NVJITLINK_ERROR_PTX_COMPILE: int # PTX compilation error
302
NVJITLINK_ERROR_NVVM_COMPILE: int # NVVM compilation error
303
NVJITLINK_ERROR_INTERNAL: int # Internal linker error
304
```
305
306
### Input Types
307
308
```python { .api }
309
class InputType:
310
"""NVJitLink input data type enumeration"""
311
NVJITLINK_INPUT_NONE: int # No input
312
NVJITLINK_INPUT_CUBIN: int # CUBIN binary
313
NVJITLINK_INPUT_PTX: int # PTX assembly
314
NVJITLINK_INPUT_FATBIN: int # Fat binary (multi-architecture)
315
NVJITLINK_INPUT_OBJECT: int # Object file
316
NVJITLINK_INPUT_LIBRARY: int # Static library
317
NVJITLINK_INPUT_NVVM_IR: int # NVVM LLVM IR
318
NVJITLINK_INPUT_NVVM_BITCODE: int # NVVM bitcode
319
```
320
321
### Exception Classes
322
323
```python { .api }
324
class nvvmError(Exception):
325
"""NVVM compilation exception"""
326
def __init__(self, result: Result, message: str): ...
327
328
class nvJitLinkError(Exception):
329
"""NVJitLink operation exception"""
330
def __init__(self, result: Result, message: str): ...
331
```
332
333
## Usage Examples
334
335
### NVVM LLVM IR Compilation
336
337
```python
338
from cuda.bindings import nvvm
339
340
# Sample LLVM IR for a simple kernel
341
llvm_ir = b'''
342
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
343
target triple = "nvptx64-nvidia-cuda"
344
345
define void @simple_kernel(float* %input, float* %output, i32 %n) {
346
entry:
347
%tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
348
%bid = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
349
%bdim = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
350
351
%tmp1 = mul i32 %bid, %bdim
352
%idx = add i32 %tmp1, %tid
353
354
%cond = icmp slt i32 %idx, %n
355
br i1 %cond, label %if.then, label %if.end
356
357
if.then:
358
%input_ptr = getelementptr float, float* %input, i32 %idx
359
%val = load float, float* %input_ptr
360
%result = fmul float %val, 2.0
361
%output_ptr = getelementptr float, float* %output, i32 %idx
362
store float %result, float* %output_ptr
363
br label %if.end
364
365
if.end:
366
ret void
367
}
368
369
declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() nounwind readnone
370
declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() nounwind readnone
371
declare i32 @llvm.nvvm.read.ptx.sreg.ntid.x() nounwind readnone
372
'''
373
374
try:
375
# Create NVVM program
376
program = nvvm.create_program()
377
378
# Add LLVM IR module
379
nvvm.add_module_to_program(program, llvm_ir, len(llvm_ir), "simple_kernel.ll")
380
381
# Compilation options
382
options = ["-arch=compute_70", "-opt=3"]
383
384
# Compile to PTX
385
nvvm.compile_program(program, len(options), options)
386
387
# Get compiled PTX
388
ptx_size = nvvm.get_compiled_result_size(program)
389
ptx_buffer = ' ' * ptx_size
390
nvvm.get_compiled_result(program, ptx_buffer)
391
392
print(f"Compiled {len(llvm_ir)} bytes of LLVM IR to {ptx_size} bytes of PTX")
393
print("First 200 characters of PTX:")
394
print(ptx_buffer[:200])
395
396
except nvvm.nvvmError as e:
397
print(f"NVVM compilation failed: {e}")
398
399
finally:
400
nvvm.destroy_program(program)
401
402
# Check NVVM version
403
major, minor = nvvm.version()
404
ir_major, ir_minor = nvvm.ir_version()
405
print(f"NVVM Version: {major}.{minor}")
406
print(f"Supported IR Version: {ir_major}.{ir_minor}")
407
```
408
409
### NVJitLink Module Linking
410
411
```python
412
from cuda.bindings import nvjitlink
413
414
# Sample PTX modules (simplified)
415
module1_ptx = b'''
416
.version 7.0
417
.target sm_70
418
.address_size 64
419
420
.visible .entry kernel_part1(.param .u64 kernel_part1_param_0) {
421
.reg .u64 %rd<2>;
422
ld.param.u64 %rd1, [kernel_part1_param_0];
423
// Kernel implementation...
424
ret;
425
}
426
'''
427
428
module2_ptx = b'''
429
.version 7.0
430
.target sm_70
431
.address_size 64
432
433
.visible .entry kernel_part2(.param .u64 kernel_part2_param_0) {
434
.reg .u64 %rd<2>;
435
ld.param.u64 %rd1, [kernel_part2_param_0];
436
// Kernel implementation...
437
ret;
438
}
439
'''
440
441
try:
442
# Create linker with options
443
linker_options = ["-arch=sm_70", "-optimize"]
444
linker = nvjitlink.create(len(linker_options), linker_options)
445
446
# Add PTX modules
447
nvjitlink.add_data(
448
linker,
449
nvjitlink.InputType.NVJITLINK_INPUT_PTX,
450
module1_ptx,
451
len(module1_ptx),
452
"module1.ptx"
453
)
454
455
nvjitlink.add_data(
456
linker,
457
nvjitlink.InputType.NVJITLINK_INPUT_PTX,
458
module2_ptx,
459
len(module2_ptx),
460
"module2.ptx"
461
)
462
463
# Complete linking
464
nvjitlink.complete(linker)
465
466
# Get linked CUBIN
467
cubin_size = nvjitlink.get_linked_cubin_size(linker)
468
cubin_data = bytearray(cubin_size)
469
nvjitlink.get_linked_cubin(linker, cubin_data)
470
471
print(f"Linked {len(module1_ptx) + len(module2_ptx)} bytes of PTX")
472
print(f"Generated {cubin_size} bytes of CUBIN")
473
474
# Get info log
475
info_size = nvjitlink.get_info_log_size(linker)
476
if info_size > 0:
477
info_log = ' ' * info_size
478
nvjitlink.get_info_log(linker, info_log)
479
print("Linker info:", info_log.strip())
480
481
except nvjitlink.nvJitLinkError as e:
482
print(f"JIT linking failed: {e}")
483
484
# Get error log
485
error_size = nvjitlink.get_error_log_size(linker)
486
if error_size > 0:
487
error_log = ' ' * error_size
488
nvjitlink.get_error_log(linker, error_log)
489
print("Linker errors:", error_log.strip())
490
491
finally:
492
nvjitlink.destroy(linker)
493
494
# Check NVJitLink version
495
major, minor = nvjitlink.version()
496
print(f"NVJitLink Version: {major}.{minor}")
497
```
498
499
### Advanced Multi-Module Linking
500
501
```python
502
from cuda.bindings import nvjitlink
503
import os
504
505
def link_cuda_modules(ptx_files, cubin_files, output_name):
506
"""Link multiple CUDA modules from files."""
507
508
linker_options = [
509
"-arch=sm_75",
510
"-optimize",
511
f"-o={output_name}"
512
]
513
514
linker = nvjitlink.create(len(linker_options), linker_options)
515
516
try:
517
# Add PTX files
518
for ptx_file in ptx_files:
519
nvjitlink.add_file(
520
linker,
521
nvjitlink.InputType.NVJITLINK_INPUT_PTX,
522
ptx_file
523
)
524
525
# Add CUBIN files
526
for cubin_file in cubin_files:
527
nvjitlink.add_file(
528
linker,
529
nvjitlink.InputType.NVJITLINK_INPUT_CUBIN,
530
cubin_file
531
)
532
533
# Complete linking
534
nvjitlink.complete(linker)
535
536
# Extract results
537
results = {}
538
539
# Get CUBIN
540
cubin_size = nvjitlink.get_linked_cubin_size(linker)
541
if cubin_size > 0:
542
cubin_data = bytearray(cubin_size)
543
nvjitlink.get_linked_cubin(linker, cubin_data)
544
results['cubin'] = cubin_data
545
546
# Get PTX
547
try:
548
ptx_size = nvjitlink.get_linked_ptx_size(linker)
549
if ptx_size > 0:
550
ptx_data = bytearray(ptx_size)
551
nvjitlink.get_linked_ptx(linker, ptx_data)
552
results['ptx'] = ptx_data
553
except:
554
# PTX not available for this link
555
pass
556
557
return results
558
559
finally:
560
nvjitlink.destroy(linker)
561
562
# Example usage
563
if __name__ == "__main__":
564
# Link example modules
565
ptx_modules = ["kernel1.ptx", "kernel2.ptx"]
566
cubin_modules = ["library.cubin"]
567
568
# Note: This assumes the files exist
569
try:
570
linked_code = link_cuda_modules(ptx_modules, cubin_modules, "combined")
571
572
if 'cubin' in linked_code:
573
print(f"Generated CUBIN: {len(linked_code['cubin'])} bytes")
574
575
if 'ptx' in linked_code:
576
print(f"Generated PTX: {len(linked_code['ptx'])} bytes")
577
578
except FileNotFoundError as e:
579
print(f"Input file not found: {e}")
580
except nvjitlink.nvJitLinkError as e:
581
print(f"Linking failed: {e}")
582
```
583
584
### NVVM and NVJitLink Pipeline
585
586
```python
587
from cuda.bindings import nvvm, nvjitlink
588
589
def llvm_to_cubin_pipeline(llvm_modules, target_arch="sm_70"):
590
"""Complete pipeline from LLVM IR to CUBIN via NVVM and NVJitLink."""
591
592
ptx_modules = []
593
594
# Step 1: Compile LLVM IR to PTX using NVVM
595
for i, llvm_ir in enumerate(llvm_modules):
596
program = nvvm.create_program()
597
598
try:
599
nvvm.add_module_to_program(
600
program, llvm_ir, len(llvm_ir), f"module_{i}.ll"
601
)
602
603
options = [f"-arch=compute_{target_arch[2:]}", "-opt=3"]
604
nvvm.compile_program(program, len(options), options)
605
606
ptx_size = nvvm.get_compiled_result_size(program)
607
ptx_buffer = bytearray(ptx_size)
608
nvvm.get_compiled_result(program, ptx_buffer)
609
610
ptx_modules.append(bytes(ptx_buffer))
611
612
finally:
613
nvvm.destroy_program(program)
614
615
# Step 2: Link PTX modules to CUBIN using NVJitLink
616
linker_options = [f"-arch={target_arch}"]
617
linker = nvjitlink.create(len(linker_options), linker_options)
618
619
try:
620
for i, ptx_data in enumerate(ptx_modules):
621
nvjitlink.add_data(
622
linker,
623
nvjitlink.InputType.NVJITLINK_INPUT_PTX,
624
ptx_data,
625
len(ptx_data),
626
f"module_{i}.ptx"
627
)
628
629
nvjitlink.complete(linker)
630
631
cubin_size = nvjitlink.get_linked_cubin_size(linker)
632
cubin_data = bytearray(cubin_size)
633
nvjitlink.get_linked_cubin(linker, cubin_data)
634
635
return bytes(cubin_data)
636
637
finally:
638
nvjitlink.destroy(linker)
639
640
# Example usage with mock LLVM IR
641
sample_llvm_modules = [
642
b'target triple = "nvptx64-nvidia-cuda"\ndefine void @kernel1() { ret void }',
643
b'target triple = "nvptx64-nvidia-cuda"\ndefine void @kernel2() { ret void }'
644
]
645
646
try:
647
final_cubin = llvm_to_cubin_pipeline(sample_llvm_modules, "sm_75")
648
print(f"Pipeline generated {len(final_cubin)} bytes of CUBIN")
649
except Exception as e:
650
print(f"Pipeline failed: {e}")
651
```