0
# Document Stores
1
2
Backend storage systems supporting vector and keyword search across multiple databases including Elasticsearch, FAISS, Pinecone, Weaviate, and others.
3
4
## Core Imports
5
6
```python
7
from haystack.document_stores import (
8
InMemoryDocumentStore,
9
ElasticsearchDocumentStore,
10
FAISSDocumentStore,
11
PineconeDocumentStore,
12
WeaviateDocumentStore
13
)
14
from haystack.document_stores.base import BaseDocumentStore
15
```
16
17
## Base Document Store
18
19
```python { .api }
20
from haystack.document_stores.base import BaseDocumentStore
21
from haystack.schema import Document, FilterType
22
from typing import List, Optional, Dict, Any
23
24
class BaseDocumentStore:
25
def write_documents(self, documents: List[Document], index: Optional[str] = None,
26
batch_size: int = 10_000, duplicate_documents: str = "overwrite") -> None:
27
"""
28
Write documents to the document store.
29
30
Args:
31
documents: List of Document objects to store
32
index: Optional index name
33
batch_size: Number of documents to write in each batch
34
duplicate_documents: How to handle duplicates ("overwrite", "skip", "fail")
35
"""
36
37
def get_all_documents(self, index: Optional[str] = None,
38
filters: Optional[FilterType] = None) -> List[Document]:
39
"""Retrieve all documents from the store."""
40
41
def query(self, query: str, filters: Optional[FilterType] = None,
42
top_k: int = 10, index: Optional[str] = None) -> List[Document]:
43
"""Query documents using keyword search."""
44
45
def query_by_embedding(self, query_emb, filters: Optional[FilterType] = None,
46
top_k: int = 10, index: Optional[str] = None) -> List[Document]:
47
"""Query documents using vector similarity search."""
48
```
49
50
## In-Memory Document Store
51
52
```python { .api }
53
from haystack.document_stores import InMemoryDocumentStore
54
55
class InMemoryDocumentStore(BaseDocumentStore):
56
def __init__(self, embedding_dim: int = 768, return_embedding: bool = False,
57
similarity: str = "cosine", duplicate_documents: str = "overwrite"):
58
"""
59
Initialize in-memory document store.
60
61
Args:
62
embedding_dim: Dimension of document embeddings
63
return_embedding: Whether to return embeddings by default
64
similarity: Similarity metric ("cosine", "dot_product", "l2")
65
duplicate_documents: How to handle duplicate documents
66
"""
67
```
68
69
## Elasticsearch Document Store
70
71
```python { .api }
72
from haystack.document_stores import ElasticsearchDocumentStore
73
74
class ElasticsearchDocumentStore(BaseDocumentStore):
75
def __init__(self, host: str = "localhost", port: int = 9200,
76
username: str = "", password: str = "",
77
index: str = "document", embedding_dim: int = 768):
78
"""
79
Initialize Elasticsearch document store.
80
81
Args:
82
host: Elasticsearch host address
83
port: Elasticsearch port
84
username: Username for authentication
85
password: Password for authentication
86
index: Index name for documents
87
embedding_dim: Dimension of embeddings
88
"""
89
```
90
91
## FAISS Document Store
92
93
```python { .api }
94
from haystack.document_stores import FAISSDocumentStore
95
96
class FAISSDocumentStore(BaseDocumentStore):
97
def __init__(self, sql_url: str = "sqlite:///faiss_document_store.db",
98
vector_dim: int = 768, faiss_index_factory_str: str = "Flat"):
99
"""
100
Initialize FAISS document store.
101
102
Args:
103
sql_url: SQLAlchemy URL for metadata storage
104
vector_dim: Dimension of embeddings
105
faiss_index_factory_str: FAISS index type
106
"""
107
```
108