CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-farm-haystack

LLM framework to build customizable, production-ready LLM applications with pipelines connecting models, vector DBs, and data processors.

Pending
Overview
Eval results
Files

document-stores.mddocs/

Document Stores

Backend storage systems supporting vector and keyword search across multiple databases including Elasticsearch, FAISS, Pinecone, Weaviate, and others.

Core Imports

from haystack.document_stores import (
    InMemoryDocumentStore,
    ElasticsearchDocumentStore, 
    FAISSDocumentStore,
    PineconeDocumentStore,
    WeaviateDocumentStore
)
from haystack.document_stores.base import BaseDocumentStore

Base Document Store

from haystack.document_stores.base import BaseDocumentStore
from haystack.schema import Document, FilterType
from typing import List, Optional, Dict, Any

class BaseDocumentStore:
    def write_documents(self, documents: List[Document], index: Optional[str] = None,
                       batch_size: int = 10_000, duplicate_documents: str = "overwrite") -> None:
        """
        Write documents to the document store.
        
        Args:
            documents: List of Document objects to store
            index: Optional index name
            batch_size: Number of documents to write in each batch
            duplicate_documents: How to handle duplicates ("overwrite", "skip", "fail")
        """

    def get_all_documents(self, index: Optional[str] = None,
                         filters: Optional[FilterType] = None) -> List[Document]:
        """Retrieve all documents from the store."""

    def query(self, query: str, filters: Optional[FilterType] = None,
              top_k: int = 10, index: Optional[str] = None) -> List[Document]:
        """Query documents using keyword search."""

    def query_by_embedding(self, query_emb, filters: Optional[FilterType] = None,
                          top_k: int = 10, index: Optional[str] = None) -> List[Document]:
        """Query documents using vector similarity search."""

In-Memory Document Store

from haystack.document_stores import InMemoryDocumentStore

class InMemoryDocumentStore(BaseDocumentStore):
    def __init__(self, embedding_dim: int = 768, return_embedding: bool = False,
                 similarity: str = "cosine", duplicate_documents: str = "overwrite"):
        """
        Initialize in-memory document store.
        
        Args:
            embedding_dim: Dimension of document embeddings
            return_embedding: Whether to return embeddings by default
            similarity: Similarity metric ("cosine", "dot_product", "l2")
            duplicate_documents: How to handle duplicate documents
        """

Elasticsearch Document Store

from haystack.document_stores import ElasticsearchDocumentStore

class ElasticsearchDocumentStore(BaseDocumentStore):
    def __init__(self, host: str = "localhost", port: int = 9200,
                 username: str = "", password: str = "",
                 index: str = "document", embedding_dim: int = 768):
        """
        Initialize Elasticsearch document store.
        
        Args:
            host: Elasticsearch host address
            port: Elasticsearch port
            username: Username for authentication
            password: Password for authentication
            index: Index name for documents
            embedding_dim: Dimension of embeddings
        """

FAISS Document Store

from haystack.document_stores import FAISSDocumentStore

class FAISSDocumentStore(BaseDocumentStore):
    def __init__(self, sql_url: str = "sqlite:///faiss_document_store.db",
                 vector_dim: int = 768, faiss_index_factory_str: str = "Flat"):
        """
        Initialize FAISS document store.
        
        Args:
            sql_url: SQLAlchemy URL for metadata storage
            vector_dim: Dimension of embeddings
            faiss_index_factory_str: FAISS index type
        """

Install with Tessl CLI

npx tessl i tessl/pypi-farm-haystack

docs

agents.md

core-schema.md

document-stores.md

evaluation-utilities.md

file-processing.md

generators.md

index.md

pipelines.md

readers.md

retrievers.md

tile.json