FlagEmbedding - BGE: One-Stop Retrieval Toolkit For Search and RAG
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Reranking models for scoring query-document pairs to improve retrieval accuracy. Rerankers take a query and a set of candidate documents and assign relevance scores to help identify the most relevant matches.
Standard reranker for encoder-only models. Efficiently scores query-document pairs using cross-encoder architecture for high-accuracy relevance scoring.
from typing import Union
class FlagReranker(AbsReranker):
def __init__(
self,
model_name_or_path: str,
use_fp16: bool = False,
query_instruction_for_rerank: Optional[str] = None,
query_instruction_format: str = "{}{}",
passage_instruction_for_rerank: Optional[str] = None,
passage_instruction_format: str = "{}{}",
devices: Optional[Union[str, List[str]]] = None,
batch_size: int = 128,
query_max_length: Optional[int] = None,
max_length: int = 512,
normalize: bool = False,
trust_remote_code: bool = False,
cache_dir: Optional[str] = None,
**kwargs
):
"""
Initialize encoder-only reranker.
Args:
model_name_or_path: Path to reranker model
use_fp16: Use half precision for inference
query_instruction_for_rerank: Instruction prepended to queries
query_instruction_format: Format string for query instructions
passage_instruction_for_rerank: Instruction prepended to passages
passage_instruction_format: Format string for passage instructions
devices: List of devices for multi-GPU inference
batch_size: Default batch size for scoring
query_max_length: Maximum query token length
max_length: Maximum total sequence length
normalize: Whether to normalize output scores
trust_remote_code: Allow custom model code execution
cache_dir: Directory for model cache
**kwargs: Additional model parameters
"""Reranker using large language models for sophisticated relevance assessment. Leverages LLM reasoning capabilities for nuanced query-document relevance scoring.
class FlagLLMReranker(AbsReranker):
def __init__(
self,
model_name_or_path: str,
use_fp16: bool = False,
query_instruction_for_rerank: Optional[str] = None,
query_instruction_format: str = "{}{}",
passage_instruction_for_rerank: Optional[str] = None,
passage_instruction_format: str = "{}{}",
devices: Optional[Union[str, List[str]]] = None,
batch_size: int = 128,
query_max_length: Optional[int] = None,
max_length: int = 512,
normalize: bool = False,
**kwargs
):
"""
Initialize LLM-based reranker.
Args:
model_name_or_path: Path to LLM reranker model
use_fp16: Use half precision for inference
query_instruction_for_rerank: Instruction prepended to queries
query_instruction_format: Format string for query instructions
passage_instruction_for_rerank: Instruction prepended to passages
passage_instruction_format: Format string for passage instructions
devices: List of devices for multi-GPU inference
batch_size: Default batch size for scoring
query_max_length: Maximum query token length
max_length: Maximum total sequence length
normalize: Whether to normalize output scores
**kwargs: Additional model parameters
"""Specialized LLM reranker that uses layer-wise processing for enhanced efficiency and performance. Optimized for large-scale reranking tasks.
class LayerWiseFlagLLMReranker(AbsReranker):
def __init__(
self,
model_name_or_path: str,
use_fp16: bool = False,
query_instruction_for_rerank: Optional[str] = None,
query_instruction_format: str = "{}{}",
passage_instruction_for_rerank: Optional[str] = None,
passage_instruction_format: str = "{}{}",
devices: Optional[Union[str, List[str]]] = None,
batch_size: int = 128,
query_max_length: Optional[int] = None,
max_length: int = 512,
normalize: bool = False,
**kwargs
):
"""
Initialize layer-wise LLM reranker for efficient processing.
Args:
model_name_or_path: Path to layer-wise reranker model
use_fp16: Use half precision for inference
query_instruction_for_rerank: Instruction prepended to queries
query_instruction_format: Format string for query instructions
passage_instruction_for_rerank: Instruction prepended to passages
passage_instruction_format: Format string for passage instructions
devices: List of devices for multi-GPU inference
batch_size: Default batch size for scoring
query_max_length: Maximum query token length
max_length: Maximum total sequence length
normalize: Whether to normalize output scores
**kwargs: Additional model parameters
"""Optimized lightweight LLM reranker for resource-constrained environments. Provides good reranking performance with reduced computational requirements.
class LightWeightFlagLLMReranker(AbsReranker):
def __init__(
self,
model_name_or_path: str,
use_fp16: bool = False,
query_instruction_for_rerank: Optional[str] = None,
query_instruction_format: str = "{}{}",
passage_instruction_for_rerank: Optional[str] = None,
passage_instruction_format: str = "{}{}",
devices: Optional[Union[str, List[str]]] = None,
batch_size: int = 128,
query_max_length: Optional[int] = None,
max_length: int = 512,
normalize: bool = False,
**kwargs
):
"""
Initialize lightweight LLM reranker for efficient processing.
Args:
model_name_or_path: Path to lightweight reranker model
use_fp16: Use half precision for inference
query_instruction_for_rerank: Instruction prepended to queries
query_instruction_format: Format string for query instructions
passage_instruction_for_rerank: Instruction prepended to passages
passage_instruction_format: Format string for passage instructions
devices: List of devices for multi-GPU inference
batch_size: Default batch size for scoring
query_max_length: Maximum query token length
max_length: Maximum total sequence length
normalize: Whether to normalize output scores
**kwargs: Additional model parameters
"""from FlagEmbedding import FlagReranker
# Initialize reranker
reranker = FlagReranker('bge-reranker-base', use_fp16=True)
# Score query-document pairs
query = "What is machine learning?"
documents = [
"Machine learning is a subset of artificial intelligence",
"Cooking recipes for Italian pasta dishes",
"ML algorithms learn patterns from data",
"Weather forecast for next week"
]
# Create query-document pairs
pairs = [(query, doc) for doc in documents]
# Get relevance scores
scores = reranker.compute_score(pairs)
# Sort documents by relevance
ranked_docs = sorted(zip(documents, scores), key=lambda x: x[1], reverse=True)
for doc, score in ranked_docs:
print(f"Score: {score:.4f} - {doc[:50]}...")from FlagEmbedding import FlagReranker
# Initialize with custom instructions
reranker = FlagReranker(
'bge-reranker-base',
query_instruction_for_rerank="Query: ",
passage_instruction_for_rerank="Passage: ",
query_instruction_format="{}{}",
passage_instruction_format="{}{}",
use_fp16=True,
batch_size=64
)
# Multiple queries
queries = [
"Python programming tutorials",
"Machine learning algorithms",
"Data science techniques"
]
documents = [
"Learn Python programming from scratch",
"Advanced ML algorithms explained",
"Data analysis with pandas and numpy",
"Web development with Django",
"Deep learning neural networks"
]
# Score all query-document combinations
all_pairs = [(q, d) for q in queries for d in documents]
scores = reranker.compute_score(all_pairs)
# Reshape scores for analysis
import numpy as np
score_matrix = np.array(scores).reshape(len(queries), len(documents))
for i, query in enumerate(queries):
print(f"\\nQuery: {query}")
query_scores = score_matrix[i]
ranked_indices = np.argsort(query_scores)[::-1]
for j in ranked_indices[:3]: # Top 3 documents
print(f" {query_scores[j]:.4f}: {documents[j]}")from FlagEmbedding import FlagLLMReranker
# Initialize LLM reranker for nuanced scoring
reranker = FlagLLMReranker(
'bge-reranker-v2-gemma',
use_fp16=True,
batch_size=32, # Smaller batch for LLM
max_length=1024 # Longer context for LLM
)
# Complex query requiring reasoning
query = "How can renewable energy help reduce climate change impacts?"
documents = [
"Solar panels convert sunlight to electricity with zero emissions",
"Climate change causes rising sea levels and extreme weather",
"Wind turbines generate clean energy without carbon footprint",
"Fossil fuels are the primary cause of greenhouse gas emissions",
"Electric vehicles reduce transportation emissions significantly"
]
pairs = [(query, doc) for doc in documents]
scores = reranker.compute_score(pairs)
# LLM rerankers often provide more nuanced scoring
for doc, score in zip(documents, scores):
print(f"{score:.4f}: {doc}")from FlagEmbedding import FlagReranker
# Use multiple GPUs for large-scale reranking
reranker = FlagReranker(
'bge-reranker-large',
devices=['cuda:0', 'cuda:1', 'cuda:2'],
batch_size=256,
use_fp16=True
)
# Large-scale reranking scenario
query = "artificial intelligence applications"
large_document_set = [f"Document {i} about AI applications" for i in range(10000)]
# Create pairs (this could be memory intensive)
pairs = [(query, doc) for doc in large_document_set]
# Efficient batch processing across GPUs
scores = reranker.compute_score(pairs)
# Get top-k results
k = 100
top_indices = np.argsort(scores)[-k:][::-1]
top_documents = [large_document_set[i] for i in top_indices]
top_scores = [scores[i] for i in top_indices]from FlagEmbedding import LightWeightFlagLLMReranker
# Use lightweight reranker for efficiency
reranker = LightWeightFlagLLMReranker(
'bge-reranker-v2.5-gemma2-lightweight',
use_fp16=True,
batch_size=128,
normalize=True # Normalize scores for consistency
)
# Efficient processing with good performance
query = "best practices for software development"
candidates = [
"Code review processes improve software quality",
"Unit testing prevents bugs in production",
"Agile methodology enhances team collaboration",
"Version control systems track code changes"
]
pairs = [(query, candidate) for candidate in candidates]
scores = reranker.compute_score(pairs)
# Normalized scores for easy interpretation
for candidate, score in zip(candidates, scores):
print(f"Relevance: {score:.3f} - {candidate}")from FlagEmbedding import LayerWiseFlagLLMReranker
# Layer-wise reranker for balanced performance-efficiency
reranker = LayerWiseFlagLLMReranker(
'bge-reranker-v2-minicpm-layerwise',
use_fp16=True,
batch_size=64
)
# Particularly effective for medium-scale tasks
query = "quantum computing applications"
documents = [
"Quantum computers solve complex optimization problems",
"Classical computers use binary logic gates",
"Quantum algorithms leverage superposition and entanglement",
"Cryptography applications of quantum computing",
"Machine learning acceleration with quantum processors"
]
pairs = [(query, doc) for doc in documents]
scores = reranker.compute_score(pairs)
# Layer-wise processing often provides good relevance ranking
sorted_results = sorted(zip(documents, scores), key=lambda x: x[1], reverse=True)
for doc, score in sorted_results:
print(f"{score:.4f}: {doc}")from typing import List, Tuple, Optional, Union
import numpy as np
# Core reranking types
QueryDocumentPair = Tuple[str, str]
RelevanceScore = float
BatchPairs = List[QueryDocumentPair]
BatchScores = np.ndarray
# Instruction formatting
InstructionFormat = str # Format string with {} placeholdersInstall with Tessl CLI
npx tessl i tessl/pypi-flagembedding