Embeddings, Retrieval, and Reranking framework for computing dense, sparse, and cross-encoder embeddings using state-of-the-art transformer models
—
The SentenceTransformer class is the main interface for loading, using, and customizing bi-encoder models that map sentences and text to dense vector embeddings.
SentenceTransformer(
model_name_or_path: str | None = None,
modules: Iterable[nn.Module] | None = None,
device: str | None = None,
prompts: dict[str, str] | None = None,
default_prompt_name: str | None = None,
similarity_fn_name: str | SimilarityFunction | None = None,
cache_folder: str | None = None,
trust_remote_code: bool = False,
revision: str | None = None,
local_files_only: bool = False,
token: bool | str | None = None,
use_auth_token: bool | str | None = None,
truncate_dim: int | None = None,
model_kwargs: dict[str, Any] | None = None,
tokenizer_kwargs: dict[str, Any] | None = None,
config_kwargs: dict[str, Any] | None = None,
model_card_data: SentenceTransformerModelCardData | None = None,
backend: Literal["torch", "onnx", "openvino"] = "torch"
){ .api }
Initialize a SentenceTransformer model.
Parameters:
model_name_or_path: Model identifier from HuggingFace Hub or local pathmodules: Iterable of PyTorch modules to create custom model architecturedevice: Device to run the model on ('cpu', 'cuda', 'mps', 'npu', etc.)prompts: Dictionary of prompts for different tasksdefault_prompt_name: Default prompt to usesimilarity_fn_name: Similarity function for embeddings comparisoncache_folder: Custom cache directory for modelstrust_remote_code: Allow custom code execution from remote modelsrevision: Specific model revision/branch to loadlocal_files_only: Only use locally cached filestoken: HuggingFace authentication tokenuse_auth_token: Deprecated argument, use token insteadtruncate_dim: Truncate embeddings to this dimensionmodel_kwargs: Additional model configuration parameterstokenizer_kwargs: Additional tokenizer configuration parametersconfig_kwargs: Additional model configuration parametersmodel_card_data: Model card data object for generating model cardsbackend: Backend to use for inference ("torch", "onnx", "openvino")def encode(
sentences: str | list[str] | np.ndarray,
prompt_name: str | None = None,
prompt: str | None = None,
batch_size: int = 32,
show_progress_bar: bool | None = None,
output_value: Literal["sentence_embedding", "token_embeddings"] | None = "sentence_embedding",
precision: Literal["float32", "int8", "uint8", "binary", "ubinary"] = "float32",
convert_to_numpy: bool = True,
convert_to_tensor: bool = False,
device: str | list[str | torch.device] | None = None,
normalize_embeddings: bool = False,
truncate_dim: int | None = None,
pool: dict[Literal["input", "output", "processes"], Any] | None = None,
chunk_size: int | None = None,
**kwargs
) -> list[Tensor] | np.ndarray | Tensor | dict[str, Tensor] | list[dict[str, Tensor]]{ .api }
Encode sentences into embeddings.
Parameters:
sentences: Input text(s) to encodeprompt_name: Name of the prompt to use for encodingprompt: The prompt to use for encodingbatch_size: Batch size for processingshow_progress_bar: Display progress bar during encodingoutput_value: Type of embeddings to return ('sentence_embedding', 'token_embeddings', or None for all)precision: Precision to use for embeddings ("float32", "int8", "uint8", "binary", "ubinary")convert_to_numpy: Return numpy arrays instead of tensorsconvert_to_tensor: Return PyTorch tensorsdevice: Device(s) for computation (single device or list for multi-process)normalize_embeddings: L2 normalize the embeddingstruncate_dim: Dimension to truncate sentence embeddings topool: Multi-process pool for encodingchunk_size: Size of chunks for multi-process encoding**kwargs: Additional keyword argumentsReturns: Embeddings as numpy arrays, tensors, or lists
def encode_query(
sentences: str | list[str] | np.ndarray,
prompt_name: str | None = None,
prompt: str | None = None,
batch_size: int = 32,
show_progress_bar: bool | None = None,
output_value: Literal["sentence_embedding", "token_embeddings"] | None = "sentence_embedding",
precision: Literal["float32", "int8", "uint8", "binary", "ubinary"] = "float32",
convert_to_numpy: bool = True,
convert_to_tensor: bool = False,
device: str | list[str | torch.device] | None = None,
normalize_embeddings: bool = False,
truncate_dim: int | None = None,
pool: dict[Literal["input", "output", "processes"], Any] | None = None,
chunk_size: int | None = None,
**kwargs
) -> list[Tensor] | np.ndarray | Tensor | dict[str, Tensor] | list[dict[str, Tensor]]{ .api }
Encode queries for retrieval tasks with query-specific prompt.
def encode_document(
sentences: str | list[str] | np.ndarray,
prompt_name: str | None = None,
prompt: str | None = None,
batch_size: int = 32,
show_progress_bar: bool | None = None,
output_value: Literal["sentence_embedding", "token_embeddings"] | None = "sentence_embedding",
precision: Literal["float32", "int8", "uint8", "binary", "ubinary"] = "float32",
convert_to_numpy: bool = True,
convert_to_tensor: bool = False,
device: str | list[str | torch.device] | None = None,
normalize_embeddings: bool = False,
truncate_dim: int | None = None,
pool: dict[Literal["input", "output", "processes"], Any] | None = None,
chunk_size: int | None = None,
**kwargs
) -> list[Tensor] | np.ndarray | Tensor | dict[str, Tensor] | list[dict[str, Tensor]]{ .api }
Encode documents for retrieval tasks with document-specific prompt.
def similarity(
embeddings1: Tensor | npt.NDArray[np.float32],
embeddings2: Tensor | npt.NDArray[np.float32]
) -> Tensor{ .api }
Compute similarity between two sets of embeddings using the model's similarity function.
def similarity_pairwise(
embeddings1: Tensor | npt.NDArray[np.float32],
embeddings2: Tensor | npt.NDArray[np.float32]
) -> Tensor{ .api }
Compute pairwise similarities between all embeddings in two sets.
def get_sentence_embedding_dimension() -> int | None{ .api }
Get the dimension of sentence embeddings.
def get_max_seq_length() -> int | None{ .api }
Get the maximum sequence length the model can handle.
def tokenize(
texts: list[str] | list[dict] | list[tuple[str, str]],
**kwargs
) -> dict[str, Tensor]{ .api }
Tokenize input texts using the model's tokenizer.
def save(
path: str,
model_name: str | None = None,
create_model_card: bool = True,
train_datasets: list[str] | None = None,
safe_serialization: bool = True
) -> None{ .api }
Save the model to a local directory.
def save_pretrained(
save_directory: str,
**kwargs
) -> None{ .api }
Save model using HuggingFace format.
def save_to_hub(
repo_id: str,
organization: str | None = None,
token: str | None = None,
private: bool | None = None,
safe_serialization: bool = True,
commit_message: str = "Add new SentenceTransformer model.",
local_model_path: str | None = None,
exist_ok: bool = False,
replace_model_card: bool = False,
train_datasets: list[str] | None = None
) -> str{ .api }
Save and push model to HuggingFace Hub.
def push_to_hub(
repo_id: str,
token: str | None = None,
private: bool | None = None,
safe_serialization: bool = True,
commit_message: str | None = None,
local_model_path: str | None = None,
exist_ok: bool = False,
replace_model_card: bool = False,
train_datasets: list[str] | None = None,
revision: str | None = None,
create_pr: bool = False
) -> str{ .api }
Push existing model to HuggingFace Hub.
def evaluate(
evaluator: SentenceEvaluator,
output_path: str | None = None
) -> float | dict[str, float]{ .api }
Evaluate the model using a provided evaluator.
def forward(
input: dict[str, torch.Tensor],
**kwargs
) -> dict[str, torch.Tensor]{ .api }
Forward pass through the model.
def start_multi_process_pool(
target_devices: list[str] | None = None
) -> dict[Literal["input", "output", "processes"], Any]{ .api }
Start a multi-process pool for parallel encoding.
@staticmethod
def stop_multi_process_pool(pool: dict[Literal["input", "output", "processes"], Any]) -> None{ .api }
Stop a multi-process pool.
def encode_multi_process(
sentences: list[str],
pool: dict[Literal["input", "output", "processes"], Any],
prompt_name: str | None = None,
prompt: str | None = None,
batch_size: int = 32,
chunk_size: int | None = None,
show_progress_bar: bool | None = None,
precision: Literal["float32", "int8", "uint8", "binary", "ubinary"] = "float32",
normalize_embeddings: bool = False,
truncate_dim: int | None = None
) -> np.ndarray{ .api }
Encode sentences using multi-processing for improved performance.
@property
def device() -> torch.device{ .api }
Current device of the model.
@property
def tokenizer() -> PreTrainedTokenizer{ .api }
Access to the model's tokenizer.
@property
def max_seq_length() -> int{ .api }
Maximum sequence length supported by the model.
@property
def similarity_fn_name() -> Literal["cosine", "dot", "euclidean", "manhattan"]{ .api }
Name of the similarity function used by the model.
@property
def transformers_model() -> PreTrainedModel | None{ .api }
Access to the underlying transformer model.
from sentence_transformers import SentenceTransformer
# Load pre-trained model
model = SentenceTransformer('all-MiniLM-L6-v2')
# Encode single sentence
embedding = model.encode("Hello world")
print(f"Embedding shape: {embedding.shape}")
# Encode multiple sentences
sentences = [
"The cat sits on the mat",
"A feline rests on a rug",
"Dogs are great pets"
]
embeddings = model.encode(sentences)
print(f"Embeddings shape: {embeddings.shape}")# Compute similarity between two sentences
sentence1 = "The weather is nice today"
sentence2 = "Today has beautiful weather"
emb1 = model.encode(sentence1)
emb2 = model.encode(sentence2)
similarity = model.similarity(emb1, emb2)
print(f"Similarity: {similarity.item():.4f}")
# Pairwise similarities
embeddings = model.encode([
"Python is a programming language",
"Java is used for software development",
"I love pizza",
"Pasta is delicious"
])
# Compute all pairwise similarities
similarities = model.similarity_pairwise(embeddings, embeddings)
print(f"Similarity matrix shape: {similarities.shape}")# For retrieval tasks with different prompts
queries = ["What is machine learning?", "How does neural networks work?"]
documents = [
"Machine learning is a subset of artificial intelligence",
"Neural networks are computational models inspired by biological neurons",
"Pizza recipes vary by region and preference"
]
# Encode with task-specific methods
query_embeddings = model.encode_query(queries)
doc_embeddings = model.encode_document(documents)
# Compute retrieval similarities
similarities = model.similarity(query_embeddings, doc_embeddings)from sentence_transformers import SentenceTransformer
from sentence_transformers.models import Transformer, Pooling, Dense
# Create custom model architecture
transformer = Transformer('distilbert-base-uncased', max_seq_length=256)
pooling = Pooling(transformer.get_word_embedding_dimension(), pooling_mode='mean')
dense = Dense(pooling.get_sentence_embedding_dimension(), 256, activation_function='tanh')
# Combine modules
model = SentenceTransformer(modules=[transformer, pooling, dense])
# Use the custom model
embeddings = model.encode(["Custom model example"])# Multi-process encoding for large datasets
sentences = ["sentence " + str(i) for i in range(10000)]
# Start multi-process pool
pool = model.start_multi_process_pool(['cuda:0', 'cuda:1'])
# Encode using multiple GPUs
embeddings = model.encode_multi_process(sentences, pool, batch_size=64)
# Clean up
model.stop_multi_process_pool(pool)
# Normalized embeddings for cosine similarity
embeddings = model.encode(sentences, normalize_embeddings=True)# Save model locally
model.save('./my-sentence-transformer')
# Save to HuggingFace Hub
model.save_to_hub('my-username/my-sentence-transformer')
# Load saved model
loaded_model = SentenceTransformer('./my-sentence-transformer')from sentence_transformers import SimilarityFunction
class SimilarityFunction(Enum):
COSINE = "cosine"
DOT_PRODUCT = "dot"
DOT = "dot" # Alias for DOT_PRODUCT
EUCLIDEAN = "euclidean"
MANHATTAN = "manhattan"{ .api }
Enumeration of available similarity functions for comparing embeddings.
# Set similarity function during initialization
model = SentenceTransformer(
'all-MiniLM-L6-v2',
similarity_fn_name=SimilarityFunction.COSINE
)
# Or use string names
model = SentenceTransformer(
'all-MiniLM-L6-v2',
similarity_fn_name='euclidean'
)Install with Tessl CLI
npx tessl i tessl/pypi-sentence-transformers