FlagEmbedding - BGE: One-Stop Retrieval Toolkit For Search and RAG
npx @tessl/cli install tessl/pypi-flagembedding@1.3.0FlagEmbedding (BGE - BAAI General Embedding) is a comprehensive Python library focused on retrieval-augmented language models and embedding technologies. The package provides state-of-the-art text embedding models, rerankers, and multimodal embedding capabilities for search and RAG applications. It includes tools for both inference and fine-tuning of embedding models, evaluation frameworks, and supports various embedding tasks including text-to-text, text-to-image, and image-to-text retrieval.
pip install FlagEmbeddingfrom FlagEmbedding import FlagAutoModel, FlagAutoRerankerFor direct model access:
from FlagEmbedding import FlagModel, BGEM3FlagModel, FlagReranker
from FlagEmbedding import FlagLLMModel, FlagICLModel, FlagLLMRerankerFor model class enumeration:
from FlagEmbedding import EmbedderModelClass, RerankerModelClassfrom FlagEmbedding import FlagAutoModel, FlagAutoReranker
# Initialize embedder with automatic model selection
embedder = FlagAutoModel.from_finetuned('bge-large-en-v1.5', use_fp16=True)
# Encode queries and documents
queries = ["What is machine learning?", "How does neural networks work?"]
documents = [
"Machine learning is a subset of artificial intelligence...",
"Neural networks are computing systems inspired by biological neural networks..."
]
query_embeddings = embedder.encode_queries(queries)
doc_embeddings = embedder.encode_corpus(documents)
# Initialize reranker for scoring
reranker = FlagAutoReranker.from_finetuned('bge-reranker-base')
# Score query-document pairs
pairs = [("What is machine learning?", "Machine learning is a subset of artificial intelligence...")]
scores = reranker.compute_score(pairs)
print(f"Similarity score: {scores[0]}")FlagEmbedding is built around a hierarchical architecture that supports multiple model types and architectures:
AbsEmbedder and AbsReranker define the interface contracts for all embedding/reranking modelsFlagAutoModel, FlagAutoReranker) that automatically select appropriate model implementationsAutomatically selects and initializes the appropriate embedder or reranker class based on the model name, providing the simplest way to use FlagEmbedding with any supported model.
class FlagAutoModel:
@classmethod
def from_finetuned(
cls,
model_name_or_path: str,
model_class: Optional[str] = None,
normalize_embeddings: bool = True,
use_fp16: bool = True,
query_instruction_for_retrieval: Optional[str] = None,
devices: Optional[List[str]] = None,
pooling_method: Optional[str] = None,
trust_remote_code: Optional[bool] = None,
**kwargs
) -> AbsEmbedder: ...
class FlagAutoReranker:
@classmethod
def from_finetuned(
cls,
model_name_or_path: str,
model_class: Optional[str] = None,
use_fp16: bool = False,
trust_remote_code: Optional[bool] = None,
**kwargs
) -> AbsReranker: ...Embedders designed for encoder-only transformer models (BERT-like architectures), including specialized implementations for BGE-M3 models with dense, sparse, and ColBERT support.
class FlagModel(AbsEmbedder):
def __init__(
self,
model_name_or_path: str,
pooling_method: str = "cls",
normalize_embeddings: bool = True,
use_fp16: bool = True,
trust_remote_code: bool = False,
**kwargs
): ...
class BGEM3FlagModel(AbsEmbedder):
def __init__(
self,
model_name_or_path: str,
pooling_method: str = "cls",
normalize_embeddings: bool = True,
use_fp16: bool = True,
colbert_dim: int = -1,
return_dense: bool = True,
return_sparse: bool = False,
return_colbert_vecs: bool = False,
**kwargs
): ...Embedders for decoder-only transformer models (LLM-like architectures), including support for in-context learning approaches.
class FlagLLMModel(AbsEmbedder):
def __init__(
self,
model_name_or_path: str,
pooling_method: str = "last_token",
normalize_embeddings: bool = True,
use_fp16: bool = True,
query_instruction_format: str = "Instruct: {}\\nQuery: {}",
**kwargs
): ...
class FlagICLModel(AbsEmbedder):
def __init__(
self,
model_name_or_path: str,
pooling_method: str = "last_token",
normalize_embeddings: bool = True,
use_fp16: bool = True,
**kwargs
): ...Reranking models for scoring query-document pairs, available in both encoder-only and decoder-only variants with specialized implementations for different use cases.
class FlagReranker(AbsReranker):
def __init__(
self,
model_name_or_path: str,
use_fp16: bool = False,
trust_remote_code: bool = False,
**kwargs
): ...
class FlagLLMReranker(AbsReranker):
def __init__(
self,
model_name_or_path: str,
use_fp16: bool = False,
**kwargs
): ...Abstract base classes that define the core interface contracts for embedders and rerankers, providing multi-device support and consistent API patterns.
class AbsEmbedder:
def encode_queries(
self,
queries: List[str],
batch_size: Optional[int] = None,
max_length: Optional[int] = None,
convert_to_numpy: Optional[bool] = None,
**kwargs
) -> Union[torch.Tensor, np.ndarray]: ...
class AbsReranker:
def compute_score(
self,
sentence_pairs: List[Tuple[str, str]],
**kwargs
) -> np.ndarray: ...Enumerations for supported model classes and utility functions for discovering available models and their capabilities.
class EmbedderModelClass(Enum):
ENCODER_ONLY_BASE = "encoder-only-base"
ENCODER_ONLY_M3 = "encoder-only-m3"
DECODER_ONLY_BASE = "decoder-only-base"
DECODER_ONLY_ICL = "decoder-only-icl"
class RerankerModelClass(Enum):
ENCODER_ONLY_BASE = "encoder-only-base"
DECODER_ONLY_BASE = "decoder-only-base"
DECODER_ONLY_LAYERWISE = "decoder-only-layerwise"
DECODER_ONLY_LIGHTWEIGHT = "decoder-only-lightweight"
def support_model_list() -> List[str]: ...
def support_native_bge_model_list() -> List[str]: ...FlagEmbedding supports a comprehensive range of pre-trained models:
from typing import List, Union, Optional, Dict, Any, Tuple
import torch
import numpy as np
# Core types used across the API
QueryType = Union[str, List[str]]
CorpusType = Union[str, List[str]]
EmbeddingOutput = Union[torch.Tensor, np.ndarray]
SentencePair = Tuple[str, str]
DeviceSpec = Union[str, List[str]]