Python SDK for Feast - an open source feature store for machine learning that manages features for both training and serving environments.
—
The FeastVectorStore class provides vector store functionality for RAG (Retrieval-Augmented Generation) applications and semantic search using Feast's feature store infrastructure. It enables efficient vector similarity search and document retrieval for AI applications.
Initialize a vector store instance with a Feast repository and RAG-enabled feature view.
class FeastVectorStore:
def __init__(self, repo_path: str, rag_view: FeatureView, features: List[str]):
"""
Initialize the Feast vector store.
Parameters:
- repo_path: Path to the Feast repository
- rag_view: Feature view configured for RAG operations
- features: List of feature names to retrieve in queries
"""Query the vector store using vector embeddings or text queries for semantic similarity search.
def query(
self,
query_vector: Optional[np.ndarray] = None,
query_string: Optional[str] = None,
top_k: int = 10
) -> OnlineResponse:
"""
Query the Feast vector store for similar documents.
Parameters:
- query_vector: Vector embedding for similarity search
- query_string: Text query for semantic search
- top_k: Number of most similar results to return
Returns:
OnlineResponse containing the retrieved documents and features
Note: Either query_vector or query_string must be provided
"""Access the underlying Feast store and configuration.
@property
def store(self) -> FeatureStore:
"""Access the underlying FeatureStore instance."""import numpy as np
from feast import FeatureStore, FeatureView, Field, FileSource, ValueType, FeastVectorStore
from datetime import timedelta
# Define a RAG-enabled feature view with vector fields
documents_source = FileSource(
path="data/document_embeddings.parquet",
timestamp_field="created_timestamp"
)
# Create feature view for document embeddings
document_embeddings_fv = FeatureView(
name="document_embeddings",
entities=["document_id"],
ttl=timedelta(days=365),
schema=[
Field(name="title", dtype=ValueType.STRING),
Field(name="content", dtype=ValueType.STRING),
Field(name="embedding", dtype=ValueType.FLOAT_LIST), # Vector field
Field(name="category", dtype=ValueType.STRING)
],
source=documents_source
)
# Initialize vector store
vector_store = FeastVectorStore(
repo_path="./feast_repo",
rag_view=document_embeddings_fv,
features=[
"document_embeddings:title",
"document_embeddings:content",
"document_embeddings:embedding",
"document_embeddings:category"
]
)# Create query vector (e.g., from text embedding model)
query_embedding = np.array([0.1, 0.2, 0.3, 0.4, 0.5]) # Example 5-dimensional vector
# Perform vector similarity search
results = vector_store.query(
query_vector=query_embedding,
top_k=5
)
# Access results
result_dict = results.to_dict()
print("Top 5 similar documents:")
for i in range(len(result_dict["document_id"])):
print(f"Document: {result_dict['title'][i]}")
print(f"Category: {result_dict['category'][i]}")
print(f"Content: {result_dict['content'][i][:100]}...")
print("---")# Perform text-based semantic search (if supported by the vector store backend)
results = vector_store.query(
query_string="machine learning algorithms",
top_k=10
)
# Convert to DataFrame for analysis
df = results.to_df()
print(df[["title", "category", "content"]])def rag_query(question: str, vector_store: FeastVectorStore, embedding_model, llm_model):
"""
Complete RAG pipeline using FeastVectorStore.
Args:
question: User question
vector_store: Configured FeastVectorStore instance
embedding_model: Model to create embeddings
llm_model: Language model for generation
"""
# Generate embedding for the question
question_embedding = embedding_model.encode(question)
# Retrieve relevant documents
context_results = vector_store.query(
query_vector=question_embedding,
top_k=5
)
# Format context from retrieved documents
context_dict = context_results.to_dict()
context_text = "\n\n".join([
f"Title: {title}\nContent: {content}"
for title, content in zip(context_dict["title"], context_dict["content"])
])
# Generate answer using retrieved context
prompt = f"""
Context:
{context_text}
Question: {question}
Answer based on the provided context:
"""
answer = llm_model.generate(prompt)
return answer, context_results
# Usage
question = "What are the benefits of feature stores?"
answer, sources = rag_query(question, vector_store, embedding_model, llm_model)
print(f"Answer: {answer}")
print(f"Sources: {len(sources.to_dict()['document_id'])} documents")from feast import Entity
# Define document entity
document_entity = Entity(
name="document_id",
value_type=ValueType.STRING,
description="Unique document identifier"
)
# Create vector store with comprehensive configuration
vector_store = FeastVectorStore(
repo_path="./feast_repo",
rag_view=document_embeddings_fv,
features=[
"document_embeddings:title",
"document_embeddings:content",
"document_embeddings:embedding",
"document_embeddings:category",
"document_embeddings:author",
"document_embeddings:published_date",
"document_embeddings:tags"
]
)
# Batch vector search for multiple queries
query_vectors = [
np.random.rand(384), # Example embedding dimension
np.random.rand(384),
np.random.rand(384)
]
batch_results = []
for i, query_vec in enumerate(query_vectors):
result = vector_store.query(
query_vector=query_vec,
top_k=3
)
batch_results.append(result)
print(f"Query {i+1}: Found {len(result.to_dict()['document_id'])} results")The FeastVectorStore works with various vector database backends supported by Feast's online stores:
The specific vector search capabilities depend on the configured online store backend and its vector index configuration.
Install with Tessl CLI
npx tessl i tessl/pypi-feast