Tessl Tile for pypi/chromadb@1.0.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

clients.md collections.md configuration.md documents.md embedding-functions.md index.md queries.md

documents.mddocs/

0
# Document Operations
1

2
Document operations form the core of ChromaDB's functionality, enabling storage, retrieval, updating, and deletion of documents with embeddings, metadata, and associated data. All operations support batching for efficient processing.
3

4
## Capabilities
5

6
### Adding Documents
7

8
Add documents to a collection with automatic or manual embedding generation, supporting text, images, URIs, and metadata.
9

10
```python { .api }
11
def add(
12
    ids: IDs,
13
    documents: Optional[Documents] = None,
14
    embeddings: Optional[Embeddings] = None,
15
    metadatas: Optional[Metadatas] = None,
16
    images: Optional[Images] = None,
17
    uris: Optional[URIs] = None
18
) -> None:
19
    """
20
    Add documents to the collection.
21
    
22
    Args:
23
        ids: List of unique document identifiers
24
        documents: List of document text content
25
        embeddings: List of embedding vectors (generated if not provided)
26
        metadatas: List of metadata dictionaries for each document
27
        images: List of image arrays
28
        uris: List of URIs pointing to external resources
29
        
30
    Raises:
31
        ValueError: If document IDs already exist or invalid data provided
32
    """
33
```
34

35
**Usage Example:**
36

37
```python
38
import chromadb
39

40
client = chromadb.EphemeralClient()
41
collection = client.create_collection("documents")
42

43
# Add documents with automatic embedding generation
44
collection.add(
45
    documents=["This is the first document", "This is the second document"],
46
    metadatas=[{"source": "web", "type": "article"}, {"source": "book", "type": "chapter"}],
47
    ids=["doc1", "doc2"]
48
)
49

50
# Add with custom embeddings
51
collection.add(
52
    documents=["Custom embedding document"],
53
    embeddings=[[0.1, 0.2, 0.3, 0.4]],  # Your pre-computed embedding
54
    metadatas=[{"custom": True}],
55
    ids=["doc3"]
56
)
57
```
58

59
### Querying Documents
60

61
Perform vector similarity search to find documents similar to query text, embeddings, or images with filtering and ranking.
62

63
```python { .api }
64
def query(
65
    query_texts: Optional[Documents] = None,
66
    query_embeddings: Optional[Embeddings] = None,
67
    query_images: Optional[Images] = None,
68
    query_uris: Optional[URIs] = None,
69
    ids: Optional[IDs] = None,
70
    n_results: int = 10,
71
    where: Optional[Where] = None,
72
    where_document: Optional[WhereDocument] = None,
73
    include: Include = ["metadatas", "documents", "distances"]
74
) -> QueryResult:
75
    """
76
    Query the collection for similar documents.
77
    
78
    Args:
79
        query_texts: List of text queries to find similar documents
80
        query_embeddings: List of embedding vectors to search with
81
        query_images: List of image arrays to search with
82
        query_uris: List of URIs to load and search with
83
        ids: Specific document IDs to search within
84
        n_results: Number of results to return per query
85
        where: Metadata filter conditions
86
        where_document: Document text filter conditions
87
        include: Fields to include in results
88
        
89
    Returns:
90
        QueryResult: Search results with documents, distances, and metadata
91
    """
92
```
93

94
**Usage Example:**
95

96
```python
97
# Query with text
98
results = collection.query(
99
    query_texts=["Find documents about machine learning"],
100
    n_results=5,
101
    where={"source": "web"},
102
    include=["documents", "metadatas", "distances"]
103
)
104

105
# Query with embeddings
106
results = collection.query(
107
    query_embeddings=[[0.1, 0.2, 0.3, 0.4]],
108
    n_results=3,
109
    where={"type": {"$in": ["article", "paper"]}}
110
)
111

112
print(f"Found {len(results['ids'][0])} similar documents")
113
for i, doc in enumerate(results['documents'][0]):
114
    print(f"Distance: {results['distances'][0][i]:.3f}, Doc: {doc[:100]}...")
115
```
116

117
### Getting Documents
118

119
Retrieve specific documents by ID or filter criteria without similarity ranking.
120

121
```python { .api }
122
def get(
123
    ids: Optional[IDs] = None,
124
    where: Optional[Where] = None,
125
    limit: Optional[int] = None,
126
    offset: Optional[int] = None,
127
    where_document: Optional[WhereDocument] = None,
128
    include: Include = ["metadatas", "documents"]
129
) -> GetResult:
130
    """
131
    Get documents from the collection.
132
    
133
    Args:
134
        ids: Specific document IDs to retrieve
135
        where: Metadata filter conditions
136
        limit: Maximum number of documents to return
137
        offset: Number of documents to skip
138
        where_document: Document text filter conditions
139
        include: Fields to include in results
140
        
141
    Returns:
142
        GetResult: Retrieved documents with requested fields
143
    """
144
```
145

146
**Usage Example:**
147

148
```python
149
# Get specific documents by ID
150
docs = collection.get(
151
    ids=["doc1", "doc2"],
152
    include=["documents", "metadatas"]
153
)
154

155
# Get documents with metadata filtering
156
docs = collection.get(
157
    where={"source": "web"},
158
    limit=10,
159
    include=["documents", "metadatas", "embeddings"]
160
)
161

162
# Get all documents (paginated)
163
all_docs = collection.get(limit=100, offset=0)
164
```
165

166
### Peeking at Documents
167

168
Quickly preview the first few documents in a collection for inspection.
169

170
```python { .api }
171
def peek(self, limit: int = 10) -> GetResult:
172
    """
173
    Peek at the first few documents in the collection.
174
    
175
    Args:
176
        limit: Number of documents to return
177
        
178
    Returns:
179
        GetResult: First documents in the collection
180
    """
181
```
182

183
**Usage Example:**
184

185
```python
186
# Preview first 5 documents
187
preview = collection.peek(limit=5)
188
print(f"Collection contains {len(preview['ids'])} documents (showing first 5)")
189
```
190

191
### Updating Documents
192

193
Modify existing documents, embeddings, or metadata while preserving document IDs.
194

195
```python { .api }
196
def update(
197
    ids: IDs,
198
    documents: Optional[Documents] = None,
199
    embeddings: Optional[Embeddings] = None,
200
    metadatas: Optional[Metadatas] = None,
201
    images: Optional[Images] = None,
202
    uris: Optional[URIs] = None
203
) -> None:
204
    """
205
    Update existing documents in the collection.
206
    
207
    Args:
208
        ids: List of document IDs to update
209
        documents: New document text content
210
        embeddings: New embedding vectors (regenerated if not provided)
211
        metadatas: New metadata dictionaries
212
        images: New image arrays
213
        uris: New URIs
214
        
215
    Raises:
216
        ValueError: If document IDs do not exist
217
    """
218
```
219

220
**Usage Example:**
221

222
```python
223
# Update document text (embeddings will be regenerated)
224
collection.update(
225
    ids=["doc1"],
226
    documents=["This is the updated first document"],
227
    metadatas=[{"source": "web", "type": "article", "updated": True}]
228
)
229

230
# Update only metadata
231
collection.update(
232
    ids=["doc2"],
233
    metadatas=[{"source": "book", "type": "chapter", "reviewed": True}]
234
)
235
```
236

237
### Upserting Documents
238

239
Insert new documents or update existing ones in a single operation, providing convenience for data synchronization.
240

241
```python { .api }
242
def upsert(
243
    ids: IDs,
244
    documents: Optional[Documents] = None,
245
    embeddings: Optional[Embeddings] = None,
246
    metadatas: Optional[Metadatas] = None,
247
    images: Optional[Images] = None,
248
    uris: Optional[URIs] = None
249
) -> None:
250
    """
251
    Insert new documents or update existing ones.
252
    
253
    Args:
254
        ids: List of document IDs to upsert
255
        documents: Document text content
256
        embeddings: Embedding vectors (generated if not provided)
257
        metadatas: Metadata dictionaries
258
        images: Image arrays
259
        uris: URIs
260
    """
261
```
262

263
**Usage Example:**
264

265
```python
266
# Upsert documents (creates new or updates existing)
267
collection.upsert(
268
    documents=["New document", "Updated existing document"],
269
    metadatas=[{"source": "api"}, {"source": "user", "updated": True}],
270
    ids=["new_doc", "existing_doc"]
271
)
272
```
273

274
### Deleting Documents
275

276
Remove documents from the collection by ID or filter criteria.
277

278
```python { .api }
279
def delete(
280
    ids: Optional[IDs] = None,
281
    where: Optional[Where] = None,
282
    where_document: Optional[WhereDocument] = None
283
) -> None:
284
    """
285
    Delete documents from the collection.
286
    
287
    Args:
288
        ids: Specific document IDs to delete
289
        where: Metadata filter conditions for deletion
290
        where_document: Document text filter conditions for deletion
291
        
292
    Note: If no arguments provided, deletes all documents in collection
293
    """
294
```
295

296
**Usage Example:**
297

298
```python
299
# Delete specific documents
300
collection.delete(ids=["doc1", "doc2"])
301

302
# Delete documents matching metadata criteria
303
collection.delete(where={"source": "temporary"})
304

305
# Delete documents matching text criteria  
306
collection.delete(where_document={"$contains": "delete_me"})
307

308
# Delete all documents (use with caution)
309
collection.delete()
310
```
311

312
## Types
313

314
```python { .api }
315
from typing import List, Dict, Optional, Union, Any, Literal
316
from numpy.typing import NDArray
317

318
# Basic document types
319
ID = str
320
IDs = List[ID]
321
Document = str
322
Documents = List[Document]
323
URI = str
324
URIs = List[URI]
325
Image = NDArray[Any]  # Image array
326
Images = List[Image]
327

328
# Embedding types
329
Embedding = List[float]
330
Embeddings = List[Embedding]
331

332
# Metadata types
333
Metadata = Dict[str, Union[str, int, float, bool, None]]
334
Metadatas = List[Metadata]
335

336
# Query filter types
337
Where = Dict[Union[str, Literal["$and", "$or"]], Any]
338
WhereDocument = Dict[Literal["$contains", "$not_contains"], Union[str, List[Any]]]
339

340
# Include fields specification
341
Include = List[Literal["documents", "embeddings", "metadatas", "distances", "uris", "data"]]
342

343
# Result types
344
GetResult = Dict[str, List[Any]]  # Contains ids, documents, metadatas, embeddings, etc.
345
QueryResult = Dict[str, List[Any]]  # Contains ids, documents, metadatas, embeddings, distances, etc.
346
```

Version

Tile

Files

documents.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

documents.mddocs/