Python wrapper for Nvidia CUDA parallel computation API with object cleanup, automatic error checking, and convenient abstractions.
62
Integration with OpenGL for graphics programming, allowing sharing of buffer objects and textures between CUDA and OpenGL contexts. This enables seamless interoperability for graphics and compute applications.
Initialize OpenGL integration and manage shared contexts between CUDA and OpenGL.
def init() -> None:
"""
Initialize OpenGL integration.
Must be called before any OpenGL interoperability functions.
Requires an active OpenGL context.
"""
def make_context(device: Device, flags: int = 0) -> Context:
"""
Create CUDA context with OpenGL interoperability.
Parameters:
- device: Device, CUDA device
- flags: int, context creation flags
Returns:
Context: CUDA context with OpenGL support
"""Share OpenGL buffer objects with CUDA for compute operations on graphics data.
class BufferObject:
"""OpenGL buffer object wrapper for CUDA interoperability."""
def __init__(self, buffer_id: int):
"""
Create buffer object from OpenGL buffer ID.
Parameters:
- buffer_id: int, OpenGL buffer object ID
"""
def register(self, flags: int = 0) -> RegisteredBuffer:
"""
Register buffer for CUDA access.
Parameters:
- flags: int, registration flags
Returns:
RegisteredBuffer: registered buffer for mapping
"""
class RegisteredBuffer:
"""Registered OpenGL buffer for CUDA access."""
def map(self, flags: int = 0) -> BufferObjectMapping:
"""
Map buffer for CUDA access.
Parameters:
- flags: int, mapping flags
Returns:
BufferObjectMapping: mapped buffer for CUDA operations
"""
def unregister(self) -> None:
"""Unregister buffer from CUDA."""
class BufferObjectMapping:
"""Mapped OpenGL buffer accessible from CUDA."""
def device_ptr(self) -> DeviceAllocation:
"""
Get CUDA device pointer to buffer data.
Returns:
DeviceAllocation: device pointer to buffer memory
"""
def size(self) -> int:
"""
Get buffer size in bytes.
Returns:
int: buffer size in bytes
"""
def unmap(self) -> None:
"""Unmap buffer from CUDA access."""
def __enter__(self) -> BufferObjectMapping:
"""Context manager entry."""
return self
def __exit__(self, exc_type, exc_val, exc_tb) -> None:
"""Context manager exit (automatically unmaps)."""
self.unmap()Share OpenGL textures and images with CUDA for image processing operations.
class RegisteredImage:
"""Registered OpenGL image/texture for CUDA access."""
def __init__(self, image_id: int, target: int, flags: int = 0):
"""
Register OpenGL image/texture for CUDA access.
Parameters:
- image_id: int, OpenGL texture/image ID
- target: int, OpenGL texture target (GL_TEXTURE_2D, etc.)
- flags: int, registration flags
"""
def map(self, flags: int = 0) -> RegisteredMapping:
"""
Map image for CUDA access.
Parameters:
- flags: int, mapping flags
Returns:
RegisteredMapping: mapped image for CUDA operations
"""
def unregister(self) -> None:
"""Unregister image from CUDA."""
class RegisteredMapping:
"""Mapped OpenGL image accessible from CUDA."""
def array(self) -> Array:
"""
Get CUDA array from mapped image.
Returns:
Array: CUDA array representing image data
"""
def device_ptr_and_size(self) -> tuple[DeviceAllocation, int]:
"""
Get device pointer and size.
Returns:
tuple: (device_pointer, size_in_bytes)
"""
def unmap(self) -> None:
"""Unmap image from CUDA access."""
def __enter__(self) -> RegisteredMapping:
"""Context manager entry."""
return self
def __exit__(self, exc_type, exc_val, exc_tb) -> None:
"""Context manager exit (automatically unmaps)."""
self.unmap()Manage graphics resources and their CUDA interoperability state.
def graphics_map_flags() -> object:
"""
Get graphics mapping flags namespace.
Returns:
object: namespace with mapping flag constants
"""
def register_buffer_object(buffer_id: int, flags: int = 0) -> RegisteredBuffer:
"""
Register OpenGL buffer object for CUDA access.
Parameters:
- buffer_id: int, OpenGL buffer object ID
- flags: int, registration flags
Returns:
RegisteredBuffer: registered buffer object
"""
def register_image(image_id: int, target: int, flags: int = 0) -> RegisteredImage:
"""
Register OpenGL image/texture for CUDA access.
Parameters:
- image_id: int, OpenGL texture/image ID
- target: int, OpenGL texture target
- flags: int, registration flags
Returns:
RegisteredImage: registered image object
"""
def unregister_buffer_object(registered_buffer: RegisteredBuffer) -> None:
"""
Unregister buffer object from CUDA.
Parameters:
- registered_buffer: RegisteredBuffer, buffer to unregister
"""
def unregister_image(registered_image: RegisteredImage) -> None:
"""
Unregister image from CUDA.
Parameters:
- registered_image: RegisteredImage, image to unregister
"""Synchronize operations between CUDA and OpenGL contexts.
def gl_sync() -> None:
"""
Synchronize OpenGL operations.
Ensures all pending OpenGL operations complete before CUDA operations.
"""
def cuda_gl_sync() -> None:
"""
Synchronize CUDA-OpenGL operations.
Ensures proper ordering between CUDA and OpenGL operations.
"""import pycuda.gl as cuda_gl
import pycuda.driver as cuda
import pycuda.gpuarray as gpuarray
import OpenGL.GL as gl
import numpy as np
# Initialize OpenGL integration
cuda_gl.init()
# Create OpenGL vertex buffer
vertex_data = np.array([[0.0, 0.0], [1.0, 0.0], [0.5, 1.0]], dtype=np.float32)
vbo = gl.glGenBuffers(1)
gl.glBindBuffer(gl.GL_ARRAY_BUFFER, vbo)
gl.glBufferData(gl.GL_ARRAY_BUFFER, vertex_data.nbytes, vertex_data, gl.GL_DYNAMIC_DRAW)
# Register buffer with CUDA
cuda_buffer = cuda_gl.BufferObject(vbo)
registered_buffer = cuda_buffer.register()
# Map buffer for CUDA access
with registered_buffer.map() as mapping:
# Get device pointer
dev_ptr = mapping.device_ptr()
size = mapping.size()
# Create GPU array from buffer
gpu_array = gpuarray.GPUArray(vertex_data.shape, vertex_data.dtype, gpudata=dev_ptr)
# Perform CUDA operations on vertex data
gpu_array *= 2.0 # Scale vertices
# Buffer is automatically unmapped when exiting context
# Vertex data is now modified and available to OpenGLimport pycuda.gl as cuda_gl
import OpenGL.GL as gl
# Create OpenGL texture
texture_id = gl.glGenTextures(1)
gl.glBindTexture(gl.GL_TEXTURE_2D, texture_id)
gl.glTexImage2D(gl.GL_TEXTURE_2D, 0, gl.GL_RGBA32F, 512, 512, 0,
gl.GL_RGBA, gl.GL_FLOAT, None)
# Register texture with CUDA
registered_image = cuda_gl.RegisteredImage(texture_id, gl.GL_TEXTURE_2D)
# Map texture for CUDA access
with registered_image.map() as mapping:
# Get CUDA array from texture
cuda_array = mapping.array()
# Perform image processing operations
# (Would typically use custom kernels here)
# Process image with CUDA kernels...
pass
# Texture is automatically unmapped and available to OpenGL for renderingclass CudaGLInterop:
def __init__(self, width, height):
self.width = width
self.height = height
# Initialize OpenGL integration
cuda_gl.init()
# Create ping-pong textures
self.textures = gl.glGenTextures(2)
for i, tex_id in enumerate(self.textures):
gl.glBindTexture(gl.GL_TEXTURE_2D, tex_id)
gl.glTexImage2D(gl.GL_TEXTURE_2D, 0, gl.GL_RGBA32F, width, height, 0,
gl.GL_RGBA, gl.GL_FLOAT, None)
gl.glTexParameteri(gl.GL_TEXTURE_2D, gl.GL_TEXTURE_MIN_FILTER, gl.GL_NEAREST)
gl.glTexParameteri(gl.GL_TEXTURE_2D, gl.GL_TEXTURE_MAG_FILTER, gl.GL_NEAREST)
# Register textures with CUDA
self.registered_images = [
cuda_gl.RegisteredImage(tex_id, gl.GL_TEXTURE_2D)
for tex_id in self.textures
]
self.current_texture = 0
def process_frame(self):
"""Process current texture with CUDA and swap buffers."""
# Map current texture for reading
with self.registered_images[self.current_texture].map() as input_mapping:
# Map next texture for writing
next_texture = (self.current_texture + 1) % 2
with self.registered_images[next_texture].map() as output_mapping:
# Get CUDA arrays
input_array = input_mapping.array()
output_array = output_mapping.array()
# Perform CUDA processing
self.cuda_process(input_array, output_array)
# Swap current texture
self.current_texture = (self.current_texture + 1) % 2
# Synchronize for OpenGL rendering
cuda_gl.cuda_gl_sync()
def cuda_process(self, input_array, output_array):
"""Perform CUDA processing on texture data."""
# Custom CUDA kernel processing would go here
pass
def get_current_texture(self):
"""Get current texture ID for OpenGL rendering."""
return self.textures[self.current_texture]# Efficient buffer management
class GLBufferPool:
def __init__(self, buffer_size, pool_size=4):
self.buffer_size = buffer_size
self.available_buffers = []
self.mapped_buffers = {}
# Pre-allocate buffer pool
for _ in range(pool_size):
# Create OpenGL buffer
vbo = gl.glGenBuffers(1)
gl.glBindBuffer(gl.GL_ARRAY_BUFFER, vbo)
gl.glBufferData(gl.GL_ARRAY_BUFFER, buffer_size, None, gl.GL_STREAM_DRAW)
# Register with CUDA
cuda_buffer = cuda_gl.BufferObject(vbo)
registered = cuda_buffer.register()
self.available_buffers.append((vbo, registered))
def get_buffer(self):
"""Get available buffer from pool."""
if not self.available_buffers:
raise RuntimeError("No available buffers in pool")
vbo, registered = self.available_buffers.pop()
mapping = registered.map()
self.mapped_buffers[vbo] = (registered, mapping)
return vbo, mapping
def return_buffer(self, vbo):
"""Return buffer to pool."""
if vbo in self.mapped_buffers:
registered, mapping = self.mapped_buffers.pop(vbo)
mapping.unmap()
self.available_buffers.append((vbo, registered))# Graphics resource mapping flags
graphics_map_flags = SimpleNamespace(
NONE=0,
READ_ONLY=1,
WRITE_DISCARD=2
)
# Graphics resource registration flags
graphics_register_flags = SimpleNamespace(
NONE=0,
SURFACE_LDST=1,
TEXTURE_GATHER=2
)Install with Tessl CLI
npx tessl i tessl/pypi-pycudadocs
evals
scenario-1
scenario-2
scenario-3
scenario-4
scenario-5
scenario-6
scenario-7
scenario-8
scenario-9
scenario-10