CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-pytorch-transformers

Repository of pre-trained NLP Transformer models: BERT & RoBERTa, GPT & GPT-2, Transformer-XL, XLNet and XLM

Pending
Overview
Eval results
Files

base-classes.mddocs/

Base Classes

Core abstract base classes that define common interfaces shared by all models, tokenizers, and configurations in the pytorch-transformers library. These classes provide essential functionality for loading, saving, and managing pre-trained components.

Capabilities

PreTrainedModel

Abstract base class for all transformer models, providing common functionality for model loading, saving, parameter management, and inference.

class PreTrainedModel:
    @classmethod
    def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
        """
        Instantiate a pre-trained PyTorch model from a pre-trained model configuration.
        
        Parameters:
        - pretrained_model_name_or_path (str): Model name or local path
        - config (PretrainedConfig, optional): Model configuration
        - cache_dir (str, optional): Directory to cache downloaded files
        - from_tf (bool, optional): Load from TensorFlow checkpoint
        - force_download (bool, optional): Force re-download even if cached
        - resume_download (bool, optional): Resume incomplete downloads
        - proxies (dict, optional): HTTP proxy configuration
        - output_loading_info (bool, optional): Return loading info dict
        - use_auth_token (str/bool, optional): Authentication token for private models
        - revision (str, optional): Git branch/tag/commit to use
        - kwargs: Additional arguments passed to model constructor
        
        Returns:
        PreTrainedModel: Instance of the model class
        """
    
    def save_pretrained(self, save_directory):
        """
        Save model weights and configuration to a directory.
        
        Parameters:
        - save_directory (str): Directory to save model files
        """
    
    def resize_token_embeddings(self, new_num_tokens=None):
        """
        Resize token embeddings matrix of the model.
        
        Parameters:
        - new_num_tokens (int, optional): New vocabulary size
        
        Returns:
        torch.nn.Embedding: New embeddings matrix
        """
    
    def prune_heads(self, heads_to_prune):
        """
        Prune attention heads in the model.
        
        Parameters:
        - heads_to_prune (dict): Dictionary mapping layer to heads to prune
        """
    
    def get_input_embeddings(self):
        """
        Get the model's input embeddings.
        
        Returns:
        torch.nn.Module: Input embeddings layer
        """
    
    def set_input_embeddings(self, value):
        """
        Set the model's input embeddings.
        
        Parameters:
        - value (torch.nn.Module): New input embeddings layer
        """
    
    def get_output_embeddings(self):
        """
        Get the model's output embeddings.
        
        Returns:
        torch.nn.Module: Output embeddings layer
        """
    
    def set_output_embeddings(self, new_embeddings):
        """
        Set the model's output embeddings.
        
        Parameters:
        - new_embeddings (torch.nn.Module): New output embeddings layer
        """

Usage Examples:

from pytorch_transformers import BertModel
import torch

# Load pre-trained model
model = BertModel.from_pretrained("bert-base-uncased")

# Save model
model.save_pretrained("./my-bert-model")

# Resize embeddings for new vocabulary
model.resize_token_embeddings(30000)

# Prune attention heads
heads_to_prune = {0: [0, 1], 1: [0]}  # Prune heads 0,1 in layer 0 and head 0 in layer 1
model.prune_heads(heads_to_prune)

# Access embeddings
input_embeddings = model.get_input_embeddings()
print(f"Embedding dimensions: {input_embeddings.weight.shape}")

# Model inference
inputs = torch.randint(0, 1000, (1, 10))  # Random token IDs
outputs = model(inputs)

PreTrainedTokenizer

Abstract base class for all tokenizers, providing common tokenization interface and special token handling.

class PreTrainedTokenizer:
    @classmethod
    def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
        """
        Instantiate a pre-trained tokenizer from a vocabulary file.
        
        Parameters:
        - pretrained_model_name_or_path (str): Model name or local path
        - cache_dir (str, optional): Directory to cache downloaded files
        - force_download (bool, optional): Force re-download even if cached
        - resume_download (bool, optional): Resume incomplete downloads
        - proxies (dict, optional): HTTP proxy configuration
        - use_auth_token (str/bool, optional): Authentication token for private models
        - revision (str, optional): Git branch/tag/commit to use
        - kwargs: Additional arguments passed to tokenizer constructor
        
        Returns:
        PreTrainedTokenizer: Instance of the tokenizer class
        """
    
    def save_pretrained(self, save_directory):
        """
        Save tokenizer vocabulary and configuration to a directory.
        
        Parameters:
        - save_directory (str): Directory to save tokenizer files
        """
    
    def tokenize(self, text, **kwargs):
        """
        Tokenize a string into a list of tokens.
        
        Parameters:
        - text (str): Input text to tokenize
        - kwargs: Additional tokenization arguments
        
        Returns:
        List[str]: List of tokens
        """
    
    def encode(self, text, text_pair=None, add_special_tokens=True, max_length=None, **kwargs):
        """
        Encode text into token IDs.
        
        Parameters:
        - text (str): Primary input text
        - text_pair (str, optional): Secondary input text for sentence pairs
        - add_special_tokens (bool): Whether to add special tokens
        - max_length (int, optional): Maximum sequence length
        - kwargs: Additional encoding arguments
        
        Returns:
        List[int]: List of token IDs
        """
    
    def decode(self, token_ids, skip_special_tokens=False, clean_up_tokenization_spaces=True):
        """
        Decode token IDs back to text.
        
        Parameters:
        - token_ids (List[int]): Token IDs to decode
        - skip_special_tokens (bool): Whether to remove special tokens
        - clean_up_tokenization_spaces (bool): Whether to clean up spaces
        
        Returns:
        str: Decoded text
        """
    
    def convert_tokens_to_ids(self, tokens):
        """
        Convert tokens to token IDs.
        
        Parameters:
        - tokens (List[str] or str): Token(s) to convert
        
        Returns:
        List[int] or int: Token ID(s)
        """
    
    def convert_ids_to_tokens(self, ids):
        """
        Convert token IDs to tokens.
        
        Parameters:
        - ids (List[int] or int): Token ID(s) to convert
        
        Returns:
        List[str] or str: Token(s)
        """
    
    def __call__(self, text, text_pair=None, **kwargs):
        """
        Main tokenization method with tensor output support.
        
        Parameters:
        - text (str or List[str]): Input text(s)
        - text_pair (str or List[str], optional): Pair text(s)
        - return_tensors (str, optional): Type of tensors to return ('pt', 'tf', 'np')
        - padding (bool/str, optional): Padding strategy
        - truncation (bool/str, optional): Truncation strategy
        - max_length (int, optional): Maximum sequence length
        - kwargs: Additional arguments
        
        Returns:
        Dict: Dictionary containing input_ids, attention_mask, etc.
        """

Special Token Properties:

# Special tokens available on all tokenizers
bos_token: str        # Beginning of sequence token
eos_token: str        # End of sequence token
unk_token: str        # Unknown token
sep_token: str        # Separator token
pad_token: str        # Padding token
cls_token: str        # Classification token
mask_token: str       # Mask token for masked language modeling

# Special token IDs
bos_token_id: int
eos_token_id: int
unk_token_id: int
sep_token_id: int
pad_token_id: int
cls_token_id: int
mask_token_id: int

# Vocabulary size
vocab_size: int

Usage Examples:

from pytorch_transformers import BertTokenizer

# Load tokenizer
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

# Basic tokenization
text = "Hello, how are you?"
tokens = tokenizer.tokenize(text)
print(f"Tokens: {tokens}")

# Encoding to IDs
token_ids = tokenizer.encode(text)
print(f"Token IDs: {token_ids}")

# Decoding back to text
decoded = tokenizer.decode(token_ids)
print(f"Decoded: {decoded}")

# Full preprocessing with tensors
inputs = tokenizer(
    text,
    return_tensors="pt",
    padding=True,
    truncation=True,
    max_length=512
)
print(f"Input shape: {inputs['input_ids'].shape}")

# Access special tokens
print(f"CLS token: {tokenizer.cls_token}")
print(f"SEP token: {tokenizer.sep_token}")
print(f"PAD token ID: {tokenizer.pad_token_id}")

# Save tokenizer
tokenizer.save_pretrained("./my-tokenizer")

PretrainedConfig

Base configuration class for all model configurations, containing model hyperparameters and architecture specifications.

class PretrainedConfig:
    @classmethod
    def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
        """
        Instantiate a PretrainedConfig from a pre-trained model configuration.
        
        Parameters:
        - pretrained_model_name_or_path (str): Model name or local path
        - cache_dir (str, optional): Directory to cache downloaded files
        - force_download (bool, optional): Force re-download even if cached
        - resume_download (bool, optional): Resume incomplete downloads
        - proxies (dict, optional): HTTP proxy configuration
        - use_auth_token (str/bool, optional): Authentication token for private models
        - revision (str, optional): Git branch/tag/commit to use
        - kwargs: Additional configuration parameters
        
        Returns:
        PretrainedConfig: Instance of the configuration class
        """
    
    def save_pretrained(self, save_directory):
        """
        Save configuration to a directory.
        
        Parameters:
        - save_directory (str): Directory to save configuration file
        """
    
    def to_dict(self):
        """
        Serialize configuration to a Python dictionary.
        
        Returns:
        Dict: Configuration as dictionary
        """
    
    def to_json_string(self):
        """
        Serialize configuration to a JSON string.
        
        Returns:
        str: Configuration as JSON string
        """
    
    @classmethod
    def from_dict(cls, config_dict, **kwargs):
        """
        Construct configuration from a dictionary.
        
        Parameters:
        - config_dict (Dict): Configuration dictionary
        - kwargs: Additional parameters
        
        Returns:
        PretrainedConfig: Configuration instance
        """
    
    @classmethod
    def from_json_file(cls, json_file):
        """
        Construct configuration from a JSON file.
        
        Parameters:
        - json_file (str): Path to JSON configuration file
        
        Returns:
        PretrainedConfig: Configuration instance
        """

Usage Examples:

from pytorch_transformers import BertConfig

# Load configuration
config = BertConfig.from_pretrained("bert-base-uncased")

# Access configuration parameters
print(f"Hidden size: {config.hidden_size}")
print(f"Number of layers: {config.num_hidden_layers}")
print(f"Number of attention heads: {config.num_attention_heads}")

# Modify configuration
config.num_labels = 3  # For classification with 3 classes

# Save configuration
config.save_pretrained("./my-config")

# Convert to dictionary/JSON
config_dict = config.to_dict()
config_json = config.to_json_string()

# Create from dictionary
custom_config = BertConfig.from_dict({
    "hidden_size": 512,
    "num_hidden_layers": 6,
    "num_attention_heads": 8
})

Model Utilities

Core utility classes and functions for model parameter management and weight manipulation.

Conv1D

A 1D convolution layer implementation as used in GPT models, where weights are transposed compared to standard linear layers.

class Conv1D(nn.Module):
    def __init__(self, nf, nx):
        """
        Conv1D layer as defined by Radford et al. for OpenAI GPT (and also used in GPT-2).
        Basically works like a Linear layer but the weights are transposed.
        
        Parameters:
        - nf (int): Size of output features
        - nx (int): Size of input features
        """
    
    def forward(self, x):
        """
        Forward pass through the Conv1D layer.
        
        Parameters:
        - x (torch.Tensor): Input tensor
        
        Returns:
        torch.Tensor: Output tensor
        """

Layer Pruning

Utility functions for pruning model layers to remove attention heads or reduce model size.

def prune_layer(layer, index, dim=None):
    """
    Prune a Conv1D or nn.Linear layer to keep only entries in index.
    Return the pruned layer as a new layer with requires_grad=True.
    Used to remove heads.
    
    Parameters:
    - layer (nn.Module): Layer to prune (Conv1D or nn.Linear)
    - index (torch.LongTensor): Indices of entries to keep
    - dim (int, optional): Dimension along which to prune (default: 0 for Linear, 1 for Conv1D)
    
    Returns:
    nn.Module: New pruned layer
    """

Usage Examples:

from pytorch_transformers import Conv1D, prune_layer
import torch
import torch.nn as nn

# Create a Conv1D layer
conv1d = Conv1D(768, 512)  # 768 output features, 512 input features
input_tensor = torch.randn(32, 128, 512)  # batch_size, seq_len, input_features
output = conv1d(input_tensor)
print(output.shape)  # torch.Size([32, 128, 768])

# Prune a linear layer to keep only certain features
linear = nn.Linear(768, 12)  # Original layer
indices_to_keep = torch.LongTensor([0, 2, 4, 6, 8, 10])  # Keep every other feature
pruned_linear = prune_layer(linear, indices_to_keep, dim=1)
print(f"Original: {linear.weight.shape}, Pruned: {pruned_linear.weight.shape}")

Constants

File naming constants used throughout the library for consistent model serialization.

# Model weight files
WEIGHTS_NAME: str = "pytorch_model.bin"
CONFIG_NAME: str = "config.json"
TF_WEIGHTS_NAME: str = "model.ckpt"

Usage Examples:

from pytorch_transformers import WEIGHTS_NAME, CONFIG_NAME, TF_WEIGHTS_NAME
import os

# Check for model files in a directory
model_dir = "./my-model"
weights_path = os.path.join(model_dir, WEIGHTS_NAME)
config_path = os.path.join(model_dir, CONFIG_NAME)

if os.path.exists(weights_path):
    print(f"PyTorch weights found: {weights_path}")
if os.path.exists(config_path):
    print(f"Config found: {config_path}")

# When loading TensorFlow weights
tf_weights_path = os.path.join(model_dir, TF_WEIGHTS_NAME + ".index")
if os.path.exists(tf_weights_path):
    print(f"TensorFlow weights found: {tf_weights_path}")

Install with Tessl CLI

npx tessl i tessl/pypi-pytorch-transformers

docs

auto-classes.md

base-classes.md

bert-models.md

file-utilities.md

gpt2-models.md

index.md

optimization.md

other-models.md

tile.json