FlagEmbedding - BGE: One-Stop Retrieval Toolkit For Search and RAG
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Embedders designed for encoder-only transformer models (BERT-like architectures). These models excel at understanding bidirectional context and are particularly effective for semantic similarity tasks and dense retrieval.
Standard embedder for encoder-only models using CLS token pooling by default. Supports all standard BERT-like architectures and provides a solid foundation for most embedding tasks.
from typing import Union
class FlagModel(AbsEmbedder):
def __init__(
self,
model_name_or_path: str,
pooling_method: str = "cls",
normalize_embeddings: bool = True,
use_fp16: bool = True,
query_instruction_for_retrieval: Optional[str] = None,
query_instruction_format: str = "{}{}",
devices: Optional[Union[str, List[str]]] = None,
batch_size: int = 256,
query_max_length: int = 512,
passage_max_length: int = 512,
convert_to_numpy: bool = True,
trust_remote_code: bool = False,
cache_dir: Optional[str] = None,
**kwargs
):
"""
Initialize encoder-only embedder.
Args:
model_name_or_path: Path to model or HuggingFace model name
pooling_method: Pooling strategy ("cls", "mean")
normalize_embeddings: Whether to normalize output embeddings
use_fp16: Use half precision for inference
query_instruction_for_retrieval: Instruction prepended to queries
query_instruction_format: Format string for instructions
devices: List of devices for multi-GPU inference
batch_size: Default batch size for encoding
query_max_length: Maximum query token length
passage_max_length: Maximum passage token length
convert_to_numpy: Convert outputs to numpy arrays
trust_remote_code: Allow custom model code execution
cache_dir: Directory for model cache
**kwargs: Additional model parameters
"""Advanced embedder specifically designed for BGE-M3 models with support for dense, sparse, and ColBERT representations. Provides unified multi-vector embeddings for comprehensive retrieval scenarios.
class BGEM3FlagModel(AbsEmbedder):
def __init__(
self,
model_name_or_path: str,
pooling_method: str = "cls",
normalize_embeddings: bool = True,
use_fp16: bool = True,
query_instruction_for_retrieval: Optional[str] = None,
query_instruction_format: str = "{}{}",
devices: Optional[Union[str, List[str]]] = None,
batch_size: int = 256,
query_max_length: int = 512,
passage_max_length: int = 512,
convert_to_numpy: bool = True,
colbert_dim: int = -1,
return_dense: bool = True,
return_sparse: bool = False,
return_colbert_vecs: bool = False,
**kwargs
):
"""
Initialize BGE-M3 specialized embedder.
Args:
model_name_or_path: Path to BGE-M3 model
pooling_method: Pooling strategy ("cls", "mean")
normalize_embeddings: Whether to normalize output embeddings
use_fp16: Use half precision for inference
query_instruction_for_retrieval: Instruction prepended to queries
query_instruction_format: Format string for instructions
devices: List of devices for multi-GPU inference
batch_size: Default batch size for encoding
query_max_length: Maximum query token length
passage_max_length: Maximum passage token length
convert_to_numpy: Convert outputs to numpy arrays
colbert_dim: ColBERT dimension (-1 for auto)
return_dense: Include dense embeddings in output
return_sparse: Include sparse embeddings in output
return_colbert_vecs: Include ColBERT vectors in output
**kwargs: Additional model parameters
"""
def compute_score(
self,
q_reps: Dict[str, Any],
p_reps: Dict[str, Any],
weights: Optional[List[float]] = None
) -> float:
"""
Compute similarity score between query and passage representations.
Args:
q_reps: Query representations (dense, sparse, colbert)
p_reps: Passage representations (dense, sparse, colbert)
weights: Weights for combining different representation types
Returns:
Combined similarity score
"""
def compute_lexical_matching_score(
self,
lexical_weights_1: Dict[int, float],
lexical_weights_2: Dict[int, float]
) -> float:
"""
Compute lexical matching score between sparse representations.
Args:
lexical_weights_1: First sparse representation weights
lexical_weights_2: Second sparse representation weights
Returns:
Lexical matching score
"""
def colbert_score(
self,
q_reps: torch.Tensor,
p_reps: torch.Tensor
) -> float:
"""
Compute ColBERT similarity score.
Args:
q_reps: Query ColBERT vectors
p_reps: Passage ColBERT vectors
Returns:
ColBERT similarity score
"""
def convert_id_to_token(
self,
lexical_weights: Dict[int, float]
) -> List[Dict[str, Any]]:
"""
Convert token IDs in sparse weights to actual tokens.
Args:
lexical_weights: Sparse weights with token IDs
Returns:
List of token-weight mappings
"""from FlagEmbedding import FlagModel
# Initialize with CLS pooling
embedder = FlagModel(
'bge-large-en-v1.5',
pooling_method="cls",
use_fp16=True
)
# Encode queries and documents
queries = ["What is deep learning?", "How do transformers work?"]
documents = ["Deep learning is a subset of ML", "Transformers use attention mechanisms"]
query_embeddings = embedder.encode_queries(queries)
doc_embeddings = embedder.encode_corpus(documents)
print(f"Query embeddings shape: {query_embeddings.shape}")
print(f"Document embeddings shape: {doc_embeddings.shape}")from FlagEmbedding import FlagModel
# Use mean pooling instead of CLS
embedder = FlagModel(
'bge-base-en-v1.5',
pooling_method="mean",
normalize_embeddings=True
)
texts = ["Example text for embedding"]
embeddings = embedder.encode(texts)from FlagEmbedding import BGEM3FlagModel
# Initialize BGE-M3 with all representation types
embedder = BGEM3FlagModel(
'bge-m3',
return_dense=True,
return_sparse=True,
return_colbert_vecs=True,
use_fp16=True
)
# Encode with multiple representation types
query = ["machine learning applications"]
passage = ["ML is used in healthcare, finance, and technology"]
query_output = embedder.encode_queries(query)
passage_output = embedder.encode_corpus(passage)
# Access different representation types
if isinstance(query_output, dict):
dense_query = query_output.get('dense_vecs')
sparse_query = query_output.get('lexical_weights')
colbert_query = query_output.get('colbert_vecs')from FlagEmbedding import BGEM3FlagModel
embedder = BGEM3FlagModel(
'bge-m3',
return_dense=True,
return_sparse=True,
return_colbert_vecs=True
)
# Get representations for scoring
query_reps = embedder.encode_queries(["machine learning"])
passage_reps = embedder.encode_corpus(["ML algorithms"])
# Compute combined similarity score
score = embedder.compute_score(query_reps[0], passage_reps[0])
print(f"Combined similarity: {score}")
# Compute individual scores if needed
if 'lexical_weights' in query_reps[0]:
lexical_score = embedder.compute_lexical_matching_score(
query_reps[0]['lexical_weights'][0],
passage_reps[0]['lexical_weights'][0]
)
print(f"Lexical similarity: {lexical_score}")
if 'colbert_vecs' in query_reps[0]:
colbert_score = embedder.colbert_score(
query_reps[0]['colbert_vecs'][0],
passage_reps[0]['colbert_vecs'][0]
)
print(f"ColBERT similarity: {colbert_score}")from FlagEmbedding import FlagModel
# Add custom instruction for retrieval tasks
embedder = FlagModel(
'bge-large-en-v1.5',
query_instruction_for_retrieval="Represent this query for retrieving relevant documents: ",
query_instruction_format="{}{}"
)
# Queries will be prepended with instruction
queries = ["best practices for machine learning"]
embeddings = embedder.encode_queries(queries)from FlagEmbedding import FlagModel
# Use multiple GPUs for large-scale processing
embedder = FlagModel(
'bge-large-en-v1.5',
devices=['cuda:0', 'cuda:1', 'cuda:2'],
batch_size=128
)
# Process large corpus efficiently
large_corpus = [f"Document {i}" for i in range(50000)]
embeddings = embedder.encode_corpus(large_corpus)from typing import Dict, List, Optional, Union, Any
import torch
import numpy as np
# BGE-M3 specific types
M3Output = Dict[str, Union[torch.Tensor, np.ndarray, List[Dict[int, float]]]]
SparseWeights = Dict[int, float]
ColBERTVectors = torch.Tensor
DenseEmbedding = Union[torch.Tensor, np.ndarray]
# Pooling method types
PoolingMethod = Literal["cls", "mean"]Install with Tessl CLI
npx tessl i tessl/pypi-flagembedding