Embeddings, Retrieval, and Reranking framework for computing dense, sparse, and cross-encoder embeddings using state-of-the-art transformer models
—
Cross-encoders jointly process pairs of sentences, making them ideal for tasks like reranking, textual entailment, and semantic textual similarity where direct comparison between texts is needed.
CrossEncoder(
model_name_or_path: str,
num_labels: int | None = None,
max_length: int | None = None,
activation_fn: Callable | None = None,
device: str | None = None,
cache_folder: str | None = None,
trust_remote_code: bool = False,
revision: str | None = None,
local_files_only: bool = False,
token: bool | str | None = None,
model_kwargs: dict | None = None,
tokenizer_kwargs: dict | None = None,
config_kwargs: dict | None = None,
model_card_data: CrossEncoderModelCardData | None = None,
backend: Literal["torch", "onnx", "openvino"] = "torch"
){ .api }
Initialize a CrossEncoder model for scoring sentence pairs.
Parameters:
model_name_or_path: A model name from Hugging Face Hub or path to a local modelnum_labels: Number of labels of the classifier. If 1, regression model that outputs continuous score 0...1. If > 1, outputs several scores for soft-maxed probability scoresmax_length: Max length for input sequences. Longer sequences will be truncatedactivation_fn: Callable (like nn.Sigmoid) for the default activation function on top of model.predict()device: Device ("cuda", "cpu", "mps", "npu") that should be used for computationcache_folder: Path to the folder where cached files are storedtrust_remote_code: Whether to allow custom models defined on the Hub in their own modeling filesrevision: The specific model version to use. Can be a branch name, tag name, or commit idlocal_files_only: Whether to only look at local files (do not try to download the model)token: Hugging Face authentication token to download private modelsmodel_kwargs: Additional model configuration parameters to be passed to the Hugging Face Transformers modeltokenizer_kwargs: Additional tokenizer configuration parameters to be passed to the Hugging Face Transformers tokenizerconfig_kwargs: Additional model configuration parameters to be passed to the Hugging Face Transformers configmodel_card_data: A model card data object that contains information about the modelbackend: The backend to use for inference ("torch", "onnx", "openvino")def predict(
sentences: list[tuple[str, str]] | list[list[str]] | tuple[str, str] | list[str],
batch_size: int = 32,
show_progress_bar: bool | None = None,
activation_fn: Callable | None = None,
apply_softmax: bool | None = False,
convert_to_numpy: bool = True,
convert_to_tensor: bool = False
) -> list[torch.Tensor] | np.ndarray | torch.Tensor{ .api }
Predict scores for sentence pairs.
Parameters:
sentences: List of sentence pairs [(Sent1, Sent2), (Sent3, Sent4)] or single sentence pair (Sent1, Sent2)batch_size: Batch size for encodingshow_progress_bar: Output progress baractivation_fn: Activation function applied on the logits output of the CrossEncoderapply_softmax: If set to True and model.num_labels > 1, applies softmax on the logits outputconvert_to_numpy: Whether the output should be a list of numpy vectorsconvert_to_tensor: Whether the output should be one large tensorReturns: Prediction scores for each sentence pair
def rank(
query: str,
documents: list[str],
top_k: int | None = None,
return_documents: bool = False,
batch_size: int = 32,
show_progress_bar: bool | None = None,
activation_fn: Callable | None = None,
apply_softmax=False,
convert_to_numpy: bool = True,
convert_to_tensor: bool = False
) -> list[dict[str, int | float | str]]{ .api }
Rank documents based on their relevance to a query.
Parameters:
query: A single querydocuments: A list of documentstop_k: Return the top-k documents. If None, all documents are returnedreturn_documents: If True, also returns the documents. If False, only returns the indices and scoresbatch_size: Batch size for encodingshow_progress_bar: Output progress baractivation_fn: Activation function applied on the logits output of the CrossEncoderapply_softmax: If there are more than 2 dimensions and apply_softmax=True, applies softmax on the logits outputconvert_to_numpy: Convert the output to a numpy matrixconvert_to_tensor: Convert the output to a tensorReturns: List of ranked results with scores and optionally documents
def save(
path: str,
*,
safe_serialization: bool = True,
**kwargs
) -> None{ .api }
Save the cross-encoder model to a directory.
def save_pretrained(
path: str,
*,
safe_serialization: bool = True,
**kwargs
) -> None{ .api }
Save model using HuggingFace format.
def push_to_hub(
repo_id: str,
*,
token: str | None = None,
private: bool | None = None,
safe_serialization: bool = True,
commit_message: str | None = None,
exist_ok: bool = False,
revision: str | None = None,
create_pr: bool = False,
tags: list[str] | None = None
) -> str{ .api }
Push model to HuggingFace Hub.
@property
def device() -> torch.device{ .api }
Current device of the model.
@property
def tokenizer() -> PreTrainedTokenizer{ .api }
Access to the model's tokenizer.
@property
def config() -> PretrainedConfig{ .api }
Model configuration object.
CrossEncoderTrainer(
model: CrossEncoder | None = None,
args: CrossEncoderTrainingArguments | None = None,
train_dataset: Dataset | None = None,
eval_dataset: Dataset | None = None,
tokenizer: PreTrainedTokenizer | None = None,
data_collator: DataCollator | None = None,
compute_metrics: callable | None = None,
callbacks: list[TrainerCallback] | None = None,
optimizers: tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR] = (None, None),
preprocess_logits_for_metrics: callable | None = None
){ .api }
Trainer for cross-encoder models based on HuggingFace Trainer.
Parameters:
model: CrossEncoder model to trainargs: Training argumentstrain_dataset: Training dataseteval_dataset: Evaluation datasettokenizer: Tokenizer (usually auto-detected from model)data_collator: Data collator for batchingcompute_metrics: Function to compute evaluation metricscallbacks: Training callbacksoptimizers: Custom optimizer and schedulerpreprocess_logits_for_metrics: Logits preprocessing functiondef train(
resume_from_checkpoint: str | bool | None = None,
trial: dict[str, Any] | None = None,
ignore_keys_for_eval: list[str] | None = None,
**kwargs
) -> TrainOutput{ .api }
Train the cross-encoder model.
def evaluate(
eval_dataset: Dataset | None = None,
ignore_keys: list[str] | None = None,
metric_key_prefix: str = "eval"
) -> dict[str, float]{ .api }
Evaluate the model on the evaluation dataset.
class CrossEncoderTrainingArguments(TrainingArguments):
def __init__(
self,
output_dir: str,
evaluation_strategy: str | IntervalStrategy = "no",
eval_steps: int | None = None,
eval_delay: float = 0,
logging_dir: str | None = None,
logging_strategy: str | IntervalStrategy = "steps",
logging_steps: int = 500,
save_strategy: str | IntervalStrategy = "steps",
save_steps: int = 500,
save_total_limit: int | None = None,
seed: int = 42,
data_seed: int | None = None,
jit_mode_eval: bool = False,
use_ipex: bool = False,
bf16: bool = False,
fp16: bool = False,
fp16_opt_level: str = "O1",
half_precision_backend: str = "auto",
bf16_full_eval: bool = False,
fp16_full_eval: bool = False,
tf32: bool | None = None,
local_rank: int = -1,
ddp_backend: str | None = None,
tpu_num_cores: int | None = None,
tpu_metrics_debug: bool = False,
debug: str | list[DebugOption] = "",
dataloader_drop_last: bool = False,
dataloader_num_workers: int = 0,
past_index: int = -1,
run_name: str | None = None,
disable_tqdm: bool | None = None,
remove_unused_columns: bool = True,
label_names: list[str] | None = None,
load_best_model_at_end: bool = False,
ignore_data_skip: bool = False,
fsdp: str | list[str] = "",
fsdp_min_num_params: int = 0,
fsdp_config: dict[str, Any] | None = None,
fsdp_transformer_layer_cls_to_wrap: str | None = None,
deepspeed: str | None = None,
label_smoothing_factor: float = 0.0,
optim: str | OptimizerNames = "adamw_torch",
optim_args: str | None = None,
adafactor: bool = False,
group_by_length: bool = False,
length_column_name: str | None = "length",
report_to: str | list[str] | None = None,
ddp_find_unused_parameters: bool | None = None,
ddp_bucket_cap_mb: int | None = None,
ddp_broadcast_buffers: bool | None = None,
dataloader_pin_memory: bool = True,
skip_memory_metrics: bool = True,
use_legacy_prediction_loop: bool = False,
push_to_hub: bool = False,
resume_from_checkpoint: str | None = None,
hub_model_id: str | None = None,
hub_strategy: str | HubStrategy = "every_save",
hub_token: str | None = None,
hub_private_repo: bool = False,
hub_always_push: bool = False,
gradient_checkpointing: bool = False,
include_inputs_for_metrics: bool = False,
auto_find_batch_size: bool = False,
full_determinism: bool = False,
torchdynamo: str | None = None,
ray_scope: str | None = "last",
ddp_timeout: int = 1800,
torch_compile: bool = False,
torch_compile_backend: str | None = None,
torch_compile_mode: str | None = None,
dispatch_batches: bool | None = None,
split_batches: bool | None = None,
include_tokens_per_second: bool = False,
**kwargs
){ .api }
Training arguments for cross-encoder training, extending HuggingFace TrainingArguments.
class CrossEncoderModelCardData:
def __init__(
self,
language: str | list[str] | None = None,
license: str | None = None,
tags: str | list[str] | None = None,
model_name: str | None = None,
model_id: str | None = None,
eval_results: list[EvalResult] | None = None,
train_datasets: str | list[str] | None = None,
eval_datasets: str | list[str] | None = None
){ .api }
Data class for generating model cards for cross-encoder models.
Parameters:
language: Language(s) supported by the modellicense: Model licensetags: Tags for categorizing the modelmodel_name: Human-readable model namemodel_id: Model identifiereval_results: Evaluation results to includetrain_datasets: Training datasets usedeval_datasets: Evaluation datasets usedfrom sentence_transformers import CrossEncoder
# Load pre-trained cross-encoder
cross_encoder = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')
# Score sentence pairs
pairs = [
('How many people live in Berlin?', 'Berlin has a population of 3,520,031'),
('How many people live in Berlin?', 'The weather in Berlin is nice'),
('What is the capital of France?', 'Paris is the capital of France')
]
scores = cross_encoder.predict(pairs)
print("Relevance scores:", scores)
# Apply softmax for probabilities
probs = cross_encoder.predict(pairs, apply_softmax=True)
print("Relevance probabilities:", probs)query = "How to learn machine learning?"
documents = [
"Machine learning is a subset of artificial intelligence",
"Start with basic statistics and linear algebra",
"Python is a popular programming language",
"Practice with real datasets and projects",
"Understanding algorithms is crucial for ML success"
]
# Rank documents by relevance
results = cross_encoder.rank(query, documents, top_k=3)
for result in results:
print(f"Score: {result['score']:.4f}")
print(f"Document: {result['corpus_id']}")
if 'text' in result:
print(f"Text: {result['text']}")
print()# For binary classification tasks
cross_encoder = CrossEncoder('cross-encoder/nli-deberta-v3-base')
# Natural Language Inference pairs
nli_pairs = [
("A man is eating pizza", "A man is eating food"), # Entailment
("A woman is reading a book", "A woman is cooking"), # Contradiction
("It's raining outside", "The weather is bad") # Neutral
]
scores = cross_encoder.predict(nli_pairs, apply_softmax=True)
# Returns probabilities for [contradiction, entailment, neutral]
for pair, score in zip(nli_pairs, scores):
prediction = ["contradiction", "entailment", "neutral"][score.argmax()]
confidence = score.max()
print(f"Premise: {pair[0]}")
print(f"Hypothesis: {pair[1]}")
print(f"Prediction: {prediction} (confidence: {confidence:.4f})")
print()from sentence_transformers import CrossEncoder, CrossEncoderTrainer, CrossEncoderTrainingArguments
from datasets import Dataset
import torch
# Create training data
train_data = [
{"sentence1": "The cat sits on the mat", "sentence2": "A feline rests on a rug", "label": 1},
{"sentence1": "I love pizza", "sentence2": "Dogs are great pets", "label": 0},
{"sentence1": "Machine learning is AI", "sentence2": "ML is a subset of artificial intelligence", "label": 1}
]
# Convert to dataset
train_dataset = Dataset.from_list(train_data)
# Initialize cross-encoder
model = CrossEncoder('distilbert-base-uncased', num_labels=2)
# Training arguments
args = CrossEncoderTrainingArguments(
output_dir='./cross-encoder-output',
num_train_epochs=3,
per_device_train_batch_size=16,
logging_steps=10,
save_steps=100,
eval_steps=100,
evaluation_strategy="steps",
save_total_limit=2,
load_best_model_at_end=True,
)
# Create trainer
trainer = CrossEncoderTrainer(
model=model,
args=args,
train_dataset=train_dataset,
compute_metrics=lambda eval_pred: {
'accuracy': (eval_pred.predictions.argmax(-1) == eval_pred.label_ids).mean()
}
)
# Train model
trainer.train()
# Save trained model
model.save('./my-cross-encoder')import torch.nn as nn
# Load model with custom activation
cross_encoder = CrossEncoder(
'cross-encoder/ms-marco-MiniLM-L-6-v2',
default_activation_function=nn.Sigmoid()
)
# Use custom activation in prediction
scores = cross_encoder.predict(
pairs,
activation_fct=nn.Tanh() # Override default activation
)
# Batch prediction with progress bar
large_pairs = [("query " + str(i), "document " + str(i)) for i in range(1000)]
scores = cross_encoder.predict(
large_pairs,
batch_size=64,
show_progress_bar=True,
num_workers=4
)from sentence_transformers import CrossEncoderModelCardData
# Create model card data
model_card_data = CrossEncoderModelCardData(
language=['en'],
license='apache-2.0',
tags=['sentence-transformers', 'cross-encoder', 'reranking'],
model_name='My Custom Cross-Encoder',
train_datasets=['ms-marco'],
eval_datasets=['trec-dl-2019']
)
# Save model with model card
cross_encoder.save('./my-model', model_card_data=model_card_data)Install with Tessl CLI
npx tessl i tessl/pypi-sentence-transformers