tessl/pypi-ctranslate2

Fast inference engine for Transformer models

—

Pending

Overview

Eval results

Files

Utilities and Configuration

Name: tessl/pypi-ctranslate2
Author: tessl

Helper functions for model management, device configuration, logging, and tensor operations. CTranslate2 provides utilities for checking model compatibility, managing computational resources, and working with multi-dimensional arrays efficiently.

Capabilities

Model Management

Utilities for checking and managing CTranslate2 model directories and compatibility.

def contains_model(path: str) -> bool:
    """
    Check if a directory contains a valid CTranslate2 model.
    
    Args:
        path (str): Path to directory to check
        
    Returns:
        bool: True if directory contains a valid CTranslate2 model
    """

Device and Hardware Management

Functions for managing computational resources and checking hardware capabilities.

def get_cuda_device_count() -> int:
    """
    Get the number of available CUDA devices.
    
    Returns:
        int: Number of CUDA devices available
    """

def get_supported_compute_types(device: str, device_index: int = 0) -> list:
    """
    Get supported compute types for a specific device.
    
    Args:
        device (str): Device type ("cpu" or "cuda")
        device_index (int): Device index for multi-device setups
        
    Returns:
        list: List of supported compute types for the device
    """

Random Number Generation

Control random number generation for reproducible results.

def set_random_seed(seed: int):
    """
    Set random seed for reproducible inference.
    
    Args:
        seed (int): Random seed value
    """

Logging Configuration

Functions for configuring CTranslate2 logging levels and output.

def get_log_level() -> str:
    """
    Get current logging level.
    
    Returns:
        str: Current log level ("trace", "debug", "info", "warning", "error", "critical", "off")
    """

def set_log_level(level: str):
    """
    Set logging level for CTranslate2.
    
    Args:
        level (str): Log level to set ("trace", "debug", "info", "warning", "error", "critical", "off")
    """

Tensor Operations

The StorageView class provides efficient multi-dimensional array operations with device management.

class StorageView:
    def __init__(self, array=None, dtype=None):
        """
        Initialize a StorageView for efficient tensor operations.
        
        Args:
            array: Input array data (numpy array, list, etc.)
            dtype: Data type for the storage ("float32", "float16", "int32", "int16", "int8")
        """
    
    def numpy(self):
        """
        Convert StorageView to NumPy array.
        
        Returns:
            numpy.ndarray: NumPy array representation
        """
    
    def copy(self) -> 'StorageView':
        """
        Create a copy of the StorageView.
        
        Returns:
            StorageView: Copied StorageView object
        """
    
    def to(self, dtype: str) -> 'StorageView':
        """
        Convert StorageView to different data type.
        
        Args:
            dtype (str): Target data type
            
        Returns:
            StorageView: New StorageView with converted data type
        """
    
    @property
    def device(self) -> str:
        """Device where the storage is located."""
    
    @property
    def device_index(self) -> int:
        """Device index for multi-device setups."""
    
    @property
    def dtype(self) -> str:
        """Data type of the stored elements."""
    
    @property
    def shape(self) -> tuple:
        """Shape of the multi-dimensional array."""
    
    @property
    def size(self) -> int:
        """Total number of elements in the array."""
    
    @property
    def rank(self) -> int:
        """Number of dimensions in the array."""

Execution Statistics

Monitor performance and resource usage during model inference.

class ExecutionStats:
    """Statistics from model execution."""
    
    @property
    def num_tokens(self) -> int:
        """Total number of tokens processed."""
    
    @property
    def num_examples(self) -> int:
        """Total number of examples processed."""
    
    @property
    def total_time_in_ms(self) -> float:
        """Total execution time in milliseconds."""

Multi-Process Information

Information about distributed processing setups.

class MpiInfo:
    """MPI (Message Passing Interface) information."""
    
    @property
    def rank(self) -> int:
        """Current process rank in MPI setup."""
    
    @property
    def size(self) -> int:
        """Total number of processes in MPI setup."""

Data Type and Device Enumerations

Constants for specifying data types and devices.

class DataType:
    """Data type constants for StorageView and model operations."""
    FLOAT32: str = "float32"
    FLOAT16: str = "float16"
    BFLOAT16: str = "bfloat16"
    INT32: str = "int32"
    INT16: str = "int16"
    INT8: str = "int8"

class Device:
    """Device constants for model placement."""
    CPU: str = "cpu"
    CUDA: str = "cuda"
    AUTO: str = "auto"

Usage Examples

Model Directory Validation

import ctranslate2

# Check if directory contains valid model
model_path = "path/to/potential/model"
if ctranslate2.contains_model(model_path):
    print("Valid CTranslate2 model found")
    translator = ctranslate2.Translator(model_path)
else:
    print("No valid model found in directory")

Device Management

import ctranslate2

# Check available CUDA devices
cuda_count = ctranslate2.get_cuda_device_count()
print(f"Available CUDA devices: {cuda_count}")

if cuda_count > 0:
    # Check supported compute types for GPU
    gpu_compute_types = ctranslate2.get_supported_compute_types("cuda", 0)
    print(f"GPU compute types: {gpu_compute_types}")
    
    # Use optimal compute type
    if "int8" in gpu_compute_types:
        translator = ctranslate2.Translator(
            "model_path",
            device="cuda",
            compute_type="int8"
        )

# Check supported compute types for CPU
cpu_compute_types = ctranslate2.get_supported_compute_types("cpu")
print(f"CPU compute types: {cpu_compute_types}")

Reproducible Results

import ctranslate2

# Set seed for reproducible inference
ctranslate2.set_random_seed(42)

# Now all inference will be deterministic
generator = ctranslate2.Generator("model_path", device="cpu")
results1 = generator.generate_batch([["Hello"]], sampling_temperature=0.8)

# Reset seed and run again - should get same results
ctranslate2.set_random_seed(42)
results2 = generator.generate_batch([["Hello"]], sampling_temperature=0.8)

assert results1[0].sequences == results2[0].sequences

Logging Configuration

import ctranslate2

# Set logging level to see detailed information
ctranslate2.set_log_level("debug")

# Load model with debug logging
translator = ctranslate2.Translator("model_path", device="cpu")

# Get current log level
current_level = ctranslate2.get_log_level()
print(f"Current log level: {current_level}")

# Reduce logging for production
ctranslate2.set_log_level("warning")

Working with StorageView

import ctranslate2
import numpy as np

# Create StorageView from numpy array
np_array = np.random.randn(3, 4).astype(np.float32)
storage = ctranslate2.StorageView(np_array)

print(f"Shape: {storage.shape}")
print(f"Size: {storage.size}")
print(f"Data type: {storage.dtype}")
print(f"Device: {storage.device}")
print(f"Rank: {storage.rank}")

# Convert to different data type
storage_fp16 = storage.to("float16")
print(f"New data type: {storage_fp16.dtype}")

# Convert back to numpy
np_result = storage_fp16.numpy()
print(f"Result shape: {np_result.shape}")

# Create copy
storage_copy = storage.copy()
print(f"Copy device: {storage_copy.device}")

Performance Monitoring

import ctranslate2

# Create translator with statistics enabled
translator = ctranslate2.Translator("model_path", device="cpu")

# Perform translation
source = [["Hello", "world"] for _ in range(100)]
results = translator.translate_batch(source)

# Note: ExecutionStats would be available through specific API calls
# or integrated profiling tools (implementation-specific)

Multi-GPU Setup

import ctranslate2

# Check available devices
cuda_count = ctranslate2.get_cuda_device_count()

if cuda_count >= 2:
    # Use specific GPU device
    translator_gpu0 = ctranslate2.Translator(
        "model_path",
        device="cuda",
        device_index=0
    )
    
    translator_gpu1 = ctranslate2.Translator(
        "model_path", 
        device="cuda",
        device_index=1
    )
    
    # Or use multiple devices with tensor parallelism
    translator_parallel = ctranslate2.Translator(
        "model_path",
        device="cuda",
        device_index=[0, 1],  # Use both GPUs
        tensor_parallel=True
    )

Advanced Configuration

import ctranslate2

# Configure for optimal performance
ctranslate2.set_log_level("warning")  # Reduce logging overhead
ctranslate2.set_random_seed(42)       # Reproducible results

# Check optimal compute type for device
device = "cuda" if ctranslate2.get_cuda_device_count() > 0 else "cpu"
compute_types = ctranslate2.get_supported_compute_types(device)

# Select best compute type (prefer quantized for speed)
compute_type = "int8" if "int8" in compute_types else "default"

# Create optimized model instance
model = ctranslate2.Translator(
    "model_path",
    device=device,
    compute_type=compute_type,
    inter_threads=4,          # Parallel processing
    max_queued_batches=16,    # Larger batch queue
    flash_attention=True      # Enable Flash Attention if available
)

print(f"Model loaded on {model.device} with {model.compute_type} precision")

Types

# Core utility types
class StorageView:
    """Multi-dimensional array container for efficient tensor operations."""
    device: str              # Device location ("cpu", "cuda")
    device_index: int        # Device index for multi-device setups
    dtype: str              # Data type of elements
    shape: tuple            # Array dimensions
    size: int               # Total number of elements
    rank: int               # Number of dimensions

class ExecutionStats:
    """Performance statistics from model execution."""
    num_tokens: int         # Number of tokens processed
    num_examples: int       # Number of examples processed
    total_time_in_ms: float # Total execution time

class MpiInfo:
    """Multi-process interface information."""
    rank: int               # Process rank in distributed setup
    size: int               # Total number of processes

# Enumeration classes
class DataType:
    """Available data types for tensors and computations."""
    FLOAT32: str = "float32"
    FLOAT16: str = "float16"
    BFLOAT16: str = "bfloat16"
    INT32: str = "int32"
    INT16: str = "int16"
    INT8: str = "int8"

class Device:
    """Available device types for model execution."""
    CPU: str = "cpu"
    CUDA: str = "cuda"
    AUTO: str = "auto"

Install with Tessl CLI