tessl/pypi-ctranslate2

Fast inference engine for Transformer models

—

Pending

Overview

Eval results

Files

Model Conversion

Name: tessl/pypi-ctranslate2
Author: tessl

Convert models from popular frameworks (Transformers, Fairseq, OpenNMT, etc.) to CTranslate2 format for optimized inference. CTranslate2 converters support quantization, file copying, and various framework-specific options to ensure optimal performance and compatibility.

Capabilities

Transformers Converter

Convert Hugging Face Transformers models to CTranslate2 format. Supports most popular model architectures including BERT, GPT-2, T5, BART, and more.

class TransformersConverter:
    def __init__(self, model_name_or_path: str, activation_scales: str = None, 
                 copy_files: list = None, load_as_float16: bool = False, 
                 revision: str = None, low_cpu_mem_usage: bool = False, 
                 trust_remote_code: bool = False):
        """
        Initialize converter for Hugging Face Transformers models.
        
        Args:
            model_name_or_path (str): Model name on Hub or local path
            activation_scales (str): Path to activation scales for smoothquant
            copy_files (list): Additional files to copy to output directory
            load_as_float16 (bool): Load model weights in float16
            revision (str): Model revision/branch to use
            low_cpu_mem_usage (bool): Enable low CPU memory loading
            trust_remote_code (bool): Allow custom code execution
        """
    
    def convert(self, output_dir: str, vmap: str = None, 
                quantization: str = None, force: bool = False) -> str:
        """
        Convert the model to CTranslate2 format.
        
        Args:
            output_dir (str): Output directory for converted model
            vmap (str): Path to vocabulary mapping file
            quantization (str): Quantization type ("int8", "int8_float16", "int16", "float16")
            force (bool): Overwrite output directory if it exists
            
        Returns:
            str: Path to the converted model directory
        """
    
    def convert_from_args(self, args) -> str:
        """
        Convert model using parsed command-line arguments.
        
        Args:
            args: Parsed arguments object
            
        Returns:
            str: Path to the converted model directory
        """
    
    @staticmethod
    def declare_arguments(parser):
        """
        Add converter-specific arguments to argument parser.
        
        Args:
            parser: ArgumentParser instance to modify
        """

Fairseq Converter

Convert Fairseq models to CTranslate2 format. Supports various Fairseq model architectures.

class FairseqConverter:
    def __init__(self, model_path: str, data_dir: str = None):
        """
        Initialize converter for Fairseq models.
        
        Args:
            model_path (str): Path to Fairseq model checkpoint
            data_dir (str): Path to data directory with vocabularies
        """
    
    def convert(self, output_dir: str, vmap: str = None, 
                quantization: str = None, force: bool = False) -> str:
        """
        Convert the Fairseq model to CTranslate2 format.
        
        Args:
            output_dir (str): Output directory for converted model
            vmap (str): Path to vocabulary mapping file
            quantization (str): Quantization type
            force (bool): Overwrite output directory if it exists
            
        Returns:
            str: Path to the converted model directory
        """

OpenNMT Converters

Convert OpenNMT-py and OpenNMT-tf models to CTranslate2 format.

class OpenNMTPyConverter:
    def __init__(self, model_path: str):
        """
        Initialize converter for OpenNMT-py models.
        
        Args:
            model_path (str): Path to OpenNMT-py model file
        """
    
    def convert(self, output_dir: str, vmap: str = None, 
                quantization: str = None, force: bool = False) -> str:
        """Convert the OpenNMT-py model to CTranslate2 format."""

class OpenNMTTFConverter:
    def __init__(self, model_path: str):
        """
        Initialize converter for OpenNMT-tf models.
        
        Args:
            model_path (str): Path to OpenNMT-tf model checkpoint
        """
    
    def convert(self, output_dir: str, vmap: str = None, 
                quantization: str = None, force: bool = False) -> str:
        """Convert the OpenNMT-tf model to CTranslate2 format."""

Marian Converter

Convert Marian NMT models to CTranslate2 format.

class MarianConverter:
    def __init__(self, model_path: str):
        """
        Initialize converter for Marian models.
        
        Args:
            model_path (str): Path to Marian model directory
        """
    
    def convert(self, output_dir: str, vmap: str = None, 
                quantization: str = None, force: bool = False) -> str:
        """Convert the Marian model to CTranslate2 format."""

OPUS-MT Converter

Convert OPUS-MT models to CTranslate2 format.

class OpusMTConverter:
    def __init__(self, model_name: str):
        """
        Initialize converter for OPUS-MT models.
        
        Args:
            model_name (str): OPUS-MT model name from Hugging Face Hub
        """
    
    def convert(self, output_dir: str, vmap: str = None, 
                quantization: str = None, force: bool = False) -> str:
        """Convert the OPUS-MT model to CTranslate2 format."""

OpenAI GPT-2 Converter

Convert OpenAI GPT-2 models to CTranslate2 format.

class OpenAIGPT2Converter:
    def __init__(self, model_name: str = "124M"):
        """
        Initialize converter for OpenAI GPT-2 models.
        
        Args:
            model_name (str): GPT-2 model size ("124M", "355M", "774M", "1558M")
        """
    
    def convert(self, output_dir: str, vmap: str = None, 
                quantization: str = None, force: bool = False) -> str:
        """Convert the GPT-2 model to CTranslate2 format."""

Base Converter Class

All converters inherit from this base class providing common functionality.

class Converter:
    """Abstract base class for model converters."""
    
    def convert(self, output_dir: str, vmap: str = None, 
                quantization: str = None, force: bool = False) -> str:
        """
        Convert model to CTranslate2 format.
        
        Args:
            output_dir (str): Output directory for converted model
            vmap (str): Path to vocabulary mapping file
            quantization (str): Quantization type
            force (bool): Overwrite output directory if it exists
            
        Returns:
            str: Path to the converted model directory
        """
    
    def convert_from_args(self, args) -> str:
        """
        Convert model using parsed command-line arguments.
        
        Args:
            args: Parsed arguments object with conversion parameters
            
        Returns:
            str: Path to the converted model directory
        """
    
    @staticmethod
    def declare_arguments(parser):
        """
        Add common converter arguments to argument parser.
        
        Args:
            parser: ArgumentParser instance to modify
        """

Console Scripts

CTranslate2 provides command-line tools for model conversion:

# Available console scripts (entry points):
# ct2-transformers-converter    - Convert Transformers models
# ct2-fairseq-converter        - Convert Fairseq models  
# ct2-opennmt-py-converter     - Convert OpenNMT-py models
# ct2-opennmt-tf-converter     - Convert OpenNMT-tf models
# ct2-marian-converter         - Convert Marian models
# ct2-opus-mt-converter        - Convert OPUS-MT models
# ct2-openai-gpt2-converter    - Convert OpenAI GPT-2 models

Conversion Utilities

Helper functions for model conversion and optimization.

def fuse_linear(spec, layers: list):
    """
    Fuse multiple linear layers for optimization.
    
    Args:
        spec: Model specification object
        layers (list): List of linear layers to fuse
    """

def fuse_linear_prequant(spec, layers: list, axis: int):
    """
    Fuse pre-quantized linear layers.
    
    Args:
        spec: Model specification object
        layers (list): List of pre-quantized linear layers
        axis (int): Axis along which to fuse
    """

def permute_for_sliced_rotary(weight, num_heads: int, rotary_dim: int = None):
    """
    Permute weights for rotary position embeddings.
    
    Args:
        weight: Weight tensor to permute
        num_heads (int): Number of attention heads
        rotary_dim (int): Rotary embedding dimension
        
    Returns:
        Permuted weight tensor
    """

def smooth_activation(layer_norm, linear, activation_scales):
    """
    Apply SmoothQuant activation smoothing technique.
    
    Args:
        layer_norm: Layer normalization module
        linear: Linear layer module  
        activation_scales: Activation scaling factors
    """

Usage Examples

Converting Transformers Models

import ctranslate2

# Convert a Hugging Face model
converter = ctranslate2.converters.TransformersConverter("microsoft/DialoGPT-medium")
converter.convert("ct2_model", quantization="int8")

# Convert with additional options
converter = ctranslate2.converters.TransformersConverter(
    "t5-small",
    copy_files=["config.json", "tokenizer.json"],
    load_as_float16=True
)
converter.convert("t5_ct2", quantization="int8_float16")

# Convert local model
converter = ctranslate2.converters.TransformersConverter("/path/to/local/model")
converter.convert("output_dir", force=True)

Converting Other Frameworks

import ctranslate2

# Convert Fairseq model
fairseq_converter = ctranslate2.converters.FairseqConverter(
    "checkpoint_best.pt", 
    data_dir="data-bin/wmt14_en_de"
)
fairseq_converter.convert("fairseq_ct2")

# Convert OpenNMT-py model
opennmt_converter = ctranslate2.converters.OpenNMTPyConverter("model.pt")
opennmt_converter.convert("opennmt_ct2")

# Convert OPUS-MT model
opus_converter = ctranslate2.converters.OpusMTConverter("Helsinki-NLP/opus-mt-en-de")
opus_converter.convert("opus_ct2")

Using Command Line Tools

# Convert Transformers model
ct2-transformers-converter --model microsoft/DialoGPT-medium --output_dir ct2_model --quantization int8

# Convert with custom options
ct2-transformers-converter \
    --model t5-small \
    --output_dir t5_ct2 \
    --quantization int8_float16 \
    --copy_files config.json tokenizer.json \
    --load_as_float16

# Convert Fairseq model
ct2-fairseq-converter \
    --model_path checkpoint_best.pt \
    --data_dir data-bin/wmt14_en_de \
    --output_dir fairseq_ct2 \
    --quantization int8

Quantization Options

# Available quantization types:
quantization_options = [
    "int8",           # 8-bit integer quantization
    "int8_float16",   # 8-bit weights, 16-bit activations
    "int16",          # 16-bit integer quantization  
    "float16",        # 16-bit floating point
    "int8_float32",   # 8-bit weights, 32-bit activations
    "int4",           # 4-bit integer quantization (experimental)
]

# Example with different quantization levels
converter = ctranslate2.converters.TransformersConverter("gpt2")

# Fastest inference, smaller model
converter.convert("gpt2_int8", quantization="int8")

# Balanced speed/quality
converter.convert("gpt2_fp16", quantization="float16") 

# Highest quality, larger model
converter.convert("gpt2_fp32")  # No quantization (default)

Types

# Quantization types
class Quantization:
    CT2: str        # Standard CTranslate2 quantization
    AWQ_GEMM: str   # AWQ quantization with GEMM
    AWQ_GEMV: str   # AWQ quantization with GEMV

Install with Tessl CLI