or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

agents.mdcore-schema.mddocument-stores.mdevaluation-utilities.mdfile-processing.mdgenerators.mdindex.mdpipelines.mdreaders.mdretrievers.md

document-stores.mddocs/

0

# Document Stores

1

2

Backend storage systems supporting vector and keyword search across multiple databases including Elasticsearch, FAISS, Pinecone, Weaviate, and others.

3

4

## Core Imports

5

6

```python

7

from haystack.document_stores import (

8

InMemoryDocumentStore,

9

ElasticsearchDocumentStore,

10

FAISSDocumentStore,

11

PineconeDocumentStore,

12

WeaviateDocumentStore

13

)

14

from haystack.document_stores.base import BaseDocumentStore

15

```

16

17

## Base Document Store

18

19

```python { .api }

20

from haystack.document_stores.base import BaseDocumentStore

21

from haystack.schema import Document, FilterType

22

from typing import List, Optional, Dict, Any

23

24

class BaseDocumentStore:

25

def write_documents(self, documents: List[Document], index: Optional[str] = None,

26

batch_size: int = 10_000, duplicate_documents: str = "overwrite") -> None:

27

"""

28

Write documents to the document store.

29

30

Args:

31

documents: List of Document objects to store

32

index: Optional index name

33

batch_size: Number of documents to write in each batch

34

duplicate_documents: How to handle duplicates ("overwrite", "skip", "fail")

35

"""

36

37

def get_all_documents(self, index: Optional[str] = None,

38

filters: Optional[FilterType] = None) -> List[Document]:

39

"""Retrieve all documents from the store."""

40

41

def query(self, query: str, filters: Optional[FilterType] = None,

42

top_k: int = 10, index: Optional[str] = None) -> List[Document]:

43

"""Query documents using keyword search."""

44

45

def query_by_embedding(self, query_emb, filters: Optional[FilterType] = None,

46

top_k: int = 10, index: Optional[str] = None) -> List[Document]:

47

"""Query documents using vector similarity search."""

48

```

49

50

## In-Memory Document Store

51

52

```python { .api }

53

from haystack.document_stores import InMemoryDocumentStore

54

55

class InMemoryDocumentStore(BaseDocumentStore):

56

def __init__(self, embedding_dim: int = 768, return_embedding: bool = False,

57

similarity: str = "cosine", duplicate_documents: str = "overwrite"):

58

"""

59

Initialize in-memory document store.

60

61

Args:

62

embedding_dim: Dimension of document embeddings

63

return_embedding: Whether to return embeddings by default

64

similarity: Similarity metric ("cosine", "dot_product", "l2")

65

duplicate_documents: How to handle duplicate documents

66

"""

67

```

68

69

## Elasticsearch Document Store

70

71

```python { .api }

72

from haystack.document_stores import ElasticsearchDocumentStore

73

74

class ElasticsearchDocumentStore(BaseDocumentStore):

75

def __init__(self, host: str = "localhost", port: int = 9200,

76

username: str = "", password: str = "",

77

index: str = "document", embedding_dim: int = 768):

78

"""

79

Initialize Elasticsearch document store.

80

81

Args:

82

host: Elasticsearch host address

83

port: Elasticsearch port

84

username: Username for authentication

85

password: Password for authentication

86

index: Index name for documents

87

embedding_dim: Dimension of embeddings

88

"""

89

```

90

91

## FAISS Document Store

92

93

```python { .api }

94

from haystack.document_stores import FAISSDocumentStore

95

96

class FAISSDocumentStore(BaseDocumentStore):

97

def __init__(self, sql_url: str = "sqlite:///faiss_document_store.db",

98

vector_dim: int = 768, faiss_index_factory_str: str = "Flat"):

99

"""

100

Initialize FAISS document store.

101

102

Args:

103

sql_url: SQLAlchemy URL for metadata storage

104

vector_dim: Dimension of embeddings

105

faiss_index_factory_str: FAISS index type

106

"""

107

```

108