CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-ctranslate2

Fast inference engine for Transformer models

Pending
Overview
Eval results
Files

specifications.mddocs/

Model Specifications

Programmatically define and build Transformer model architectures from scratch using CTranslate2's specification system. Model specifications enable creating custom models, modifying existing architectures, and building models without relying on external frameworks.

Capabilities

Base Model Specifications

Core specification classes that provide the foundation for building different types of Transformer models.

class ModelSpec:
    """Abstract base class for all model specifications."""
    
    def save(self, output_dir: str):
        """
        Save the model specification to a directory.
        
        Args:
            output_dir (str): Directory to save the model
        """
    
    def validate(self):
        """Validate the model specification for correctness."""
    
    def optimize(self, quantization: str = None):
        """
        Optimize model weights with optional quantization.
        
        Args:
            quantization (str): Quantization type ("int8", "float16", etc.)
        """
    
    def register_file(self, path: str, filename: str = None):
        """
        Register additional files to include with the model.
        
        Args:
            path (str): Path to the file to register
            filename (str): Optional custom filename in model directory
        """

class LayerSpec:
    """Abstract base class for layer specifications."""
    
    def variables(self, prefix: str = "", ordered: bool = False) -> dict:
        """
        Get layer variables with optional prefix.
        
        Args:
            prefix (str): Prefix for variable names
            ordered (bool): Whether to return ordered dictionary
            
        Returns:
            dict: Dictionary of layer variables
        """
    
    def validate(self):
        """Validate the layer specification."""

class SequenceToSequenceModelSpec(ModelSpec):
    """Base class for sequence-to-sequence model specifications."""
    
    def register_source_vocabulary(self, tokens: list):
        """
        Register source vocabulary tokens.
        
        Args:
            tokens (list): List of source vocabulary tokens
        """
    
    def register_target_vocabulary(self, tokens: list):
        """
        Register target vocabulary tokens.
        
        Args:
            tokens (list): List of target vocabulary tokens
        """
    
    def register_vocabulary_mapping(self, path: str):
        """
        Register vocabulary mapping file.
        
        Args:
            path (str): Path to vocabulary mapping file
        """

class LanguageModelSpec(ModelSpec):
    """Base class for language model specifications."""
    
    def register_vocabulary(self, tokens: list):
        """
        Register vocabulary tokens.
        
        Args:
            tokens (list): List of vocabulary tokens
        """

Transformer Model Specifications

Specific implementations for different Transformer model architectures.

class TransformerSpec(SequenceToSequenceModelSpec):
    """Specification for sequence-to-sequence Transformer models."""
    
    def __init__(self, encoder: 'TransformerEncoderSpec', decoder: 'TransformerDecoderSpec'):
        """
        Initialize Transformer specification.
        
        Args:
            encoder (TransformerEncoderSpec): Encoder specification
            decoder (TransformerDecoderSpec): Decoder specification
        """
    
    @classmethod
    def from_config(cls, num_layers: int, num_heads: int, 
                   d_model: int = 512, d_ff: int = 2048, **kwargs):
        """
        Create Transformer specification from configuration.
        
        Args:
            num_layers (int): Number of encoder/decoder layers
            num_heads (int): Number of attention heads
            d_model (int): Model dimension
            d_ff (int): Feed-forward dimension
            **kwargs: Additional configuration parameters
            
        Returns:
            TransformerSpec: Configured Transformer specification
        """

class TransformerDecoderModelSpec(LanguageModelSpec):
    """Specification for decoder-only Transformer models (GPT-style)."""
    
    def __init__(self, decoder: 'TransformerDecoderSpec'):
        """
        Initialize decoder-only Transformer specification.
        
        Args:
            decoder (TransformerDecoderSpec): Decoder specification
        """
    
    @classmethod
    def from_config(cls, num_layers: int, num_heads: int, 
                   d_model: int = 512, vocab_size: int = 50257, **kwargs):
        """
        Create decoder-only Transformer from configuration.
        
        Args:
            num_layers (int): Number of decoder layers
            num_heads (int): Number of attention heads
            d_model (int): Model dimension
            vocab_size (int): Vocabulary size
            **kwargs: Additional configuration parameters
            
        Returns:
            TransformerDecoderModelSpec: Configured decoder model
        """

class TransformerEncoderModelSpec(ModelSpec):
    """Specification for encoder-only Transformer models (BERT-style)."""
    
    def __init__(self, encoder: 'TransformerEncoderSpec', pooling_layer: bool = False):
        """
        Initialize encoder-only Transformer specification.
        
        Args:
            encoder (TransformerEncoderSpec): Encoder specification
            pooling_layer (bool): Whether to include pooling layer
        """

Transformer Layer Specifications

Detailed specifications for Transformer encoder and decoder layers.

class TransformerEncoderSpec(LayerSpec):
    """Specification for Transformer encoder layers."""
    
    def __init__(self, num_layers: int, num_heads: int, 
                 pre_norm: bool = True, activation: str = "relu",
                 num_source_embeddings: int = None, 
                 embeddings_merge: str = "concat",
                 layernorm_embedding: bool = False,
                 relative_position: bool = False,
                 relative_attention_bias: bool = False,
                 ffn_glu: bool = False, rms_norm: bool = False,
                 multi_query_attention: bool = False):
        """
        Initialize Transformer encoder specification.
        
        Args:
            num_layers (int): Number of encoder layers
            num_heads (int): Number of attention heads
            pre_norm (bool): Whether to use pre-normalization
            activation (str): Activation function ("relu", "gelu", etc.)
            num_source_embeddings (int): Number of source embeddings
            embeddings_merge (str): How to merge embeddings ("concat", "add")
            layernorm_embedding (bool): Whether to normalize embeddings
            relative_position (bool): Whether to use relative position
            relative_attention_bias (bool): Whether to use attention bias
            ffn_glu (bool): Whether to use GLU in feed-forward
            rms_norm (bool): Whether to use RMS normalization
            multi_query_attention (bool): Whether to use multi-query attention
        """

class TransformerDecoderSpec(LayerSpec):
    """Specification for Transformer decoder layers."""
    
    def __init__(self, num_layers: int, num_heads: int,
                 pre_norm: bool = True, activation: str = "relu",
                 layernorm_embedding: bool = False,
                 with_encoder_attention: bool = True,
                 no_final_norm: bool = False,
                 project_in_out: bool = False,
                 relative_position: bool = False,
                 relative_attention_bias: bool = False,
                 alignment_layer: int = None,
                 alignment_heads: int = None,
                 ffn_glu: bool = False, rms_norm: bool = False,
                 alibi: bool = False,
                 alibi_use_positive_positions: bool = False,
                 scale_alibi: bool = False,
                 rotary_dim: int = None,
                 rotary_interleave: bool = True,
                 rotary_scaling_type: str = None,
                 rotary_scaling_factor: float = 1.0,
                 rotary_base: float = 10000.0,
                 parallel_residual: bool = False,
                 shared_layer_norm: bool = False,
                 pre_post_layer_norm: bool = False,
                 multi_query_attention: bool = False,
                 num_heads_kv: int = None,
                 head_dim: int = None,
                 sliding_window: int = None):
        """
        Initialize Transformer decoder specification.
        
        Args:
            num_layers (int): Number of decoder layers
            num_heads (int): Number of attention heads
            pre_norm (bool): Whether to use pre-normalization
            activation (str): Activation function
            layernorm_embedding (bool): Whether to normalize embeddings
            with_encoder_attention (bool): Whether to use encoder-decoder attention
            no_final_norm (bool): Whether to skip final normalization
            project_in_out (bool): Whether to project input/output
            relative_position (bool): Whether to use relative position
            relative_attention_bias (bool): Whether to use attention bias
            alignment_layer (int): Layer for alignment attention
            alignment_heads (int): Number of alignment heads
            ffn_glu (bool): Whether to use GLU in feed-forward
            rms_norm (bool): Whether to use RMS normalization
            alibi (bool): Whether to use ALiBi position encoding
            alibi_use_positive_positions (bool): Use positive positions in ALiBi
            scale_alibi (bool): Whether to scale ALiBi
            rotary_dim (int): Rotary embedding dimension
            rotary_interleave (bool): Whether to interleave rotary embeddings
            rotary_scaling_type (str): Type of rotary scaling
            rotary_scaling_factor (float): Rotary scaling factor
            rotary_base (float): Rotary base frequency
            parallel_residual (bool): Whether to use parallel residual
            shared_layer_norm (bool): Whether to share layer norm
            pre_post_layer_norm (bool): Pre and post layer normalization
            multi_query_attention (bool): Whether to use multi-query attention
            num_heads_kv (int): Number of key-value heads
            head_dim (int): Dimension per attention head
            sliding_window (int): Sliding window size for attention
        """

Common Layer Specifications

Building blocks for constructing Transformer architectures.

class LayerNormSpec(LayerSpec):
    """Layer normalization specification."""
    
    def __init__(self, normalized_shape: int, eps: float = 1e-5):
        """
        Initialize layer normalization.
        
        Args:
            normalized_shape (int): Size of normalized dimensions
            eps (float): Epsilon for numerical stability
        """

class LinearSpec(LayerSpec):
    """Linear/dense layer specification."""
    
    def __init__(self, in_features: int, out_features: int, bias: bool = True):
        """
        Initialize linear layer.
        
        Args:
            in_features (int): Input feature dimension
            out_features (int): Output feature dimension
            bias (bool): Whether to include bias term
        """

class Conv1DSpec(LayerSpec):
    """1D convolution layer specification."""
    
    def __init__(self, in_channels: int, out_channels: int, 
                 kernel_size: int, stride: int = 1, padding: int = 0):
        """
        Initialize 1D convolution layer.
        
        Args:
            in_channels (int): Number of input channels
            out_channels (int): Number of output channels
            kernel_size (int): Convolution kernel size
            stride (int): Convolution stride
            padding (int): Convolution padding
        """

class EmbeddingsSpec(LayerSpec):
    """Embedding layer specification."""
    
    def __init__(self, num_embeddings: int, embedding_dim: int, 
                 padding_idx: int = None):
        """
        Initialize embedding layer.
        
        Args:
            num_embeddings (int): Vocabulary size
            embedding_dim (int): Embedding dimension
            padding_idx (int): Index for padding token
        """

class MultiHeadAttentionSpec(LayerSpec):
    """Multi-head attention layer specification."""
    
    def __init__(self, d_model: int, num_heads: int, dropout: float = 0.0):
        """
        Initialize multi-head attention.
        
        Args:
            d_model (int): Model dimension
            num_heads (int): Number of attention heads
            dropout (float): Dropout probability
        """

Configuration Classes

Configuration objects for different model types.

class ModelConfig:
    """Base configuration class for models."""
    
    def to_dict(self) -> dict:
        """Convert configuration to dictionary."""
    
    def save_as_json(self, path: str):
        """
        Save configuration as JSON file.
        
        Args:
            path (str): Path to save JSON file
        """

class SequenceToSequenceModelConfig(ModelConfig):
    """Configuration for sequence-to-sequence models."""
    
    def __init__(self, unk_token: str = "<unk>", bos_token: str = "<s>", 
                 eos_token: str = "</s>", decoder_start_token: str = None,
                 add_source_bos: bool = False, add_source_eos: bool = False):
        """
        Initialize seq2seq model configuration.
        
        Args:
            unk_token (str): Unknown token
            bos_token (str): Beginning of sequence token
            eos_token (str): End of sequence token
            decoder_start_token (str): Decoder start token
            add_source_bos (bool): Add BOS to source sequences
            add_source_eos (bool): Add EOS to source sequences
        """

class LanguageModelConfig(ModelConfig):
    """Configuration for language models."""
    
    def __init__(self, unk_token: str = "<unk>", bos_token: str = "<s>", 
                 eos_token: str = "</s>"):
        """
        Initialize language model configuration.
        
        Args:
            unk_token (str): Unknown token
            bos_token (str): Beginning of sequence token
            eos_token (str): End of sequence token
        """

Specialized Model Specifications

Specifications for domain-specific models like Whisper and Wav2Vec2.

class WhisperSpec(ModelSpec):
    """Specification for Whisper speech recognition models."""
    
    def __init__(self, num_encoder_layers: int, num_encoder_heads: int,
                 num_decoder_layers: int, num_decoder_heads: int,
                 d_model: int = 512, vocab_size: int = 51865):
        """
        Initialize Whisper specification.
        
        Args:
            num_encoder_layers (int): Number of encoder layers
            num_encoder_heads (int): Number of encoder attention heads
            num_decoder_layers (int): Number of decoder layers
            num_decoder_heads (int): Number of decoder attention heads
            d_model (int): Model dimension
            vocab_size (int): Vocabulary size
        """

class WhisperConfig(ModelConfig):
    """Configuration for Whisper models."""
    
    def __init__(self, suppress_ids: list = None, suppress_ids_begin: list = None,
                 lang_ids: dict = None, alignment_heads: list = None):
        """
        Initialize Whisper configuration.
        
        Args:
            suppress_ids (list): Token IDs to suppress during generation
            suppress_ids_begin (list): Token IDs to suppress at beginning
            lang_ids (dict): Language ID mappings
            alignment_heads (list): Attention heads for alignment
        """

class Wav2Vec2Spec(ModelSpec):
    """Specification for Wav2Vec2 models."""
    
    def __init__(self, feat_layers: list, num_layers: int, num_heads: int,
                 vocab_size: int, return_hidden: bool = False):
        """
        Initialize Wav2Vec2 specification.
        
        Args:
            feat_layers (list): Feature extraction layer configuration
            num_layers (int): Number of transformer layers
            num_heads (int): Number of attention heads
            vocab_size (int): Vocabulary size
            return_hidden (bool): Whether to return hidden states
        """

class Wav2Vec2BertSpec(ModelSpec):
    """Specification for Wav2Vec2-BERT models."""
    
    def __init__(self, num_hidden_layers: int, num_adapter_layers: int,
                 vocab_size: int, return_hidden: bool = False):
        """
        Initialize Wav2Vec2-BERT specification.
        
        Args:
            num_hidden_layers (int): Number of hidden layers
            num_adapter_layers (int): Number of adapter layers
            vocab_size (int): Vocabulary size
            return_hidden (bool): Whether to return hidden states
        """

Usage Examples

Building a Custom Transformer

import ctranslate2.specs as specs

# Create encoder specification
encoder_spec = specs.TransformerEncoderSpec(
    num_layers=6,
    num_heads=8,
    pre_norm=True,
    activation="gelu",
    ffn_glu=True
)

# Create decoder specification
decoder_spec = specs.TransformerDecoderSpec(
    num_layers=6,
    num_heads=8,
    pre_norm=True,
    activation="gelu",
    with_encoder_attention=True,
    ffn_glu=True
)

# Create full transformer specification
transformer_spec = specs.TransformerSpec(encoder_spec, decoder_spec)

# Register vocabularies
source_vocab = ["<unk>", "<s>", "</s>"] + ["token_" + str(i) for i in range(1000)]
target_vocab = ["<unk>", "<s>", "</s>"] + ["token_" + str(i) for i in range(1000)]

transformer_spec.register_source_vocabulary(source_vocab)
transformer_spec.register_target_vocabulary(target_vocab)

# Save the model
transformer_spec.save("custom_transformer_model")

Building a Language Model

import ctranslate2.specs as specs

# Create decoder-only model (GPT-style)
decoder_spec = specs.TransformerDecoderSpec(
    num_layers=12,
    num_heads=12,
    pre_norm=True,
    activation="gelu",
    with_encoder_attention=False,  # No encoder for language models
    rotary_dim=64,  # Use rotary position embeddings
    parallel_residual=True
)

# Create language model specification
lm_spec = specs.TransformerDecoderModelSpec(decoder_spec)

# Register vocabulary
vocab = ["<unk>", "<s>", "</s>"] + ["token_" + str(i) for i in range(50000)]
lm_spec.register_vocabulary(vocab)

# Configure model
config = specs.LanguageModelConfig(
    unk_token="<unk>",
    bos_token="<s>",
    eos_token="</s>"
)

# Save the model
lm_spec.save("custom_language_model")

Using Factory Methods

import ctranslate2.specs as specs

# Create transformer using factory method
transformer_spec = specs.TransformerSpec.from_config(
    num_layers=6,
    num_heads=8,
    d_model=512,
    d_ff=2048,
    activation="gelu",
    pre_norm=True
)

# Create decoder-only model using factory method
decoder_spec = specs.TransformerDecoderModelSpec.from_config(
    num_layers=12,
    num_heads=12,
    d_model=768,
    vocab_size=50257,
    activation="gelu"
)

Types

# Enumerations for specifications
class Activation:
    RELU: str = "relu"
    GELU: str = "gelu"
    SWISH: str = "swish" 
    SILU: str = "silu"
    TANH: str = "tanh"
    SIGMOID: str = "sigmoid"

class EmbeddingsMerge:
    CONCAT: str = "concat"
    ADD: str = "add"

class RotaryScalingType:
    LINEAR: str = "linear"
    SU: str = "su"
    LLAMA3: str = "llama3"

class Quantization:
    CT2: str = "ct2"
    AWQ_GEMM: str = "awq_gemm"
    AWQ_GEMV: str = "awq_gemv"

Install with Tessl CLI

npx tessl i tessl/pypi-ctranslate2

docs

converters.md

index.md

inference.md

specialized.md

specifications.md

utilities.md

tile.json