LLM framework to build customizable, production-ready LLM applications with pipelines connecting models, vector DBs, and data processors.
—
Backend storage systems supporting vector and keyword search across multiple databases including Elasticsearch, FAISS, Pinecone, Weaviate, and others.
from haystack.document_stores import (
InMemoryDocumentStore,
ElasticsearchDocumentStore,
FAISSDocumentStore,
PineconeDocumentStore,
WeaviateDocumentStore
)
from haystack.document_stores.base import BaseDocumentStorefrom haystack.document_stores.base import BaseDocumentStore
from haystack.schema import Document, FilterType
from typing import List, Optional, Dict, Any
class BaseDocumentStore:
def write_documents(self, documents: List[Document], index: Optional[str] = None,
batch_size: int = 10_000, duplicate_documents: str = "overwrite") -> None:
"""
Write documents to the document store.
Args:
documents: List of Document objects to store
index: Optional index name
batch_size: Number of documents to write in each batch
duplicate_documents: How to handle duplicates ("overwrite", "skip", "fail")
"""
def get_all_documents(self, index: Optional[str] = None,
filters: Optional[FilterType] = None) -> List[Document]:
"""Retrieve all documents from the store."""
def query(self, query: str, filters: Optional[FilterType] = None,
top_k: int = 10, index: Optional[str] = None) -> List[Document]:
"""Query documents using keyword search."""
def query_by_embedding(self, query_emb, filters: Optional[FilterType] = None,
top_k: int = 10, index: Optional[str] = None) -> List[Document]:
"""Query documents using vector similarity search."""from haystack.document_stores import InMemoryDocumentStore
class InMemoryDocumentStore(BaseDocumentStore):
def __init__(self, embedding_dim: int = 768, return_embedding: bool = False,
similarity: str = "cosine", duplicate_documents: str = "overwrite"):
"""
Initialize in-memory document store.
Args:
embedding_dim: Dimension of document embeddings
return_embedding: Whether to return embeddings by default
similarity: Similarity metric ("cosine", "dot_product", "l2")
duplicate_documents: How to handle duplicate documents
"""from haystack.document_stores import ElasticsearchDocumentStore
class ElasticsearchDocumentStore(BaseDocumentStore):
def __init__(self, host: str = "localhost", port: int = 9200,
username: str = "", password: str = "",
index: str = "document", embedding_dim: int = 768):
"""
Initialize Elasticsearch document store.
Args:
host: Elasticsearch host address
port: Elasticsearch port
username: Username for authentication
password: Password for authentication
index: Index name for documents
embedding_dim: Dimension of embeddings
"""from haystack.document_stores import FAISSDocumentStore
class FAISSDocumentStore(BaseDocumentStore):
def __init__(self, sql_url: str = "sqlite:///faiss_document_store.db",
vector_dim: int = 768, faiss_index_factory_str: str = "Flat"):
"""
Initialize FAISS document store.
Args:
sql_url: SQLAlchemy URL for metadata storage
vector_dim: Dimension of embeddings
faiss_index_factory_str: FAISS index type
"""Install with Tessl CLI
npx tessl i tessl/pypi-farm-haystack