or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

collection-management.mdconstruction.mddocument-management.mdindex.mdmmr.mdsearch-operations.md

collection-management.mddocs/

0

# Collection Management

1

2

Collection-level operations for managing ChromaDB collections, including document retrieval, collection maintenance, and low-level access to the underlying ChromaDB functionality.

3

4

## Capabilities

5

6

### Document Retrieval

7

8

Retrieve documents from the collection using various criteria without performing similarity search.

9

10

```python { .api }

11

def get(

12

ids: Optional[Union[str, list[str]]] = None,

13

where: Optional[Where] = None,

14

limit: Optional[int] = None,

15

offset: Optional[int] = None,

16

where_document: Optional[WhereDocument] = None,

17

include: Optional[list[str]] = None

18

) -> dict[str, Any]:

19

"""

20

Retrieve documents from the collection.

21

22

Parameters:

23

- ids: Document IDs to retrieve (string or list of strings)

24

- where: Metadata filter conditions

25

- limit: Maximum number of documents to return

26

- offset: Number of documents to skip (for pagination)

27

- where_document: Document content filter conditions

28

- include: Fields to include in results ["embeddings", "metadatas", "documents"]

29

(IDs are always included, defaults to ["metadatas", "documents"])

30

31

Returns:

32

Dictionary with keys "ids", "embeddings", "metadatas", "documents"

33

containing the requested data

34

"""

35

36

def get_by_ids(ids: Sequence[str], /) -> list[Document]:

37

"""

38

Get documents by their specific IDs.

39

40

Parameters:

41

- ids: Sequence of document IDs to retrieve

42

43

Returns:

44

List of Document objects (may be fewer than requested if IDs not found)

45

The order may not match the input ID order - rely on document.id field

46

"""

47

```

48

49

**Usage Example:**

50

```python

51

# Get specific documents by ID

52

result = vector_store.get(ids=["doc_1", "doc_2", "doc_3"])

53

print(f"Found {len(result['ids'])} documents")

54

for i, doc_id in enumerate(result["ids"]):

55

print(f"ID: {doc_id}")

56

print(f"Content: {result['documents'][i]}")

57

print(f"Metadata: {result['metadatas'][i]}")

58

59

# Get documents with pagination

60

page_1 = vector_store.get(limit=10, offset=0)

61

page_2 = vector_store.get(limit=10, offset=10)

62

63

# Get documents matching metadata criteria

64

filtered_docs = vector_store.get(

65

where={"category": "science", "year": "2023"},

66

limit=50

67

)

68

69

# Get documents with embeddings included

70

docs_with_embeddings = vector_store.get(

71

where={"status": "active"},

72

include=["documents", "metadatas", "embeddings"]

73

)

74

75

# Get documents by IDs using convenience method

76

documents = vector_store.get_by_ids(["doc_1", "doc_2"])

77

for doc in documents:

78

print(f"ID: {doc.id}, Content: {doc.page_content}")

79

```

80

81

### Collection Maintenance

82

83

Operations for maintaining and managing the underlying ChromaDB collection.

84

85

```python { .api }

86

def reset_collection() -> None:

87

"""

88

Reset the collection by deleting it and recreating an empty one.

89

90

This operation removes all documents and starts with a fresh collection

91

using the same configuration settings.

92

"""

93

94

def delete_collection() -> None:

95

"""

96

Delete the entire collection and all its documents.

97

98

After this operation, the collection no longer exists and the

99

Chroma instance becomes unusable.

100

"""

101

```

102

103

**Usage Example:**

104

```python

105

# Reset collection (clear all data but keep configuration)

106

vector_store.reset_collection()

107

print("Collection reset - all documents removed")

108

109

# Delete collection entirely

110

vector_store.delete_collection()

111

print("Collection deleted")

112

113

# Note: After delete_collection(), the vector_store instance cannot be used

114

# Create a new instance if needed:

115

# vector_store = Chroma(collection_name="new_collection", ...)

116

```

117

118

### Collection Properties

119

120

Access to collection-level information and settings.

121

122

```python { .api }

123

@property

124

def embeddings(self) -> Optional[Embeddings]:

125

"""

126

Access the configured embedding function.

127

128

Returns:

129

The embedding function used by this vector store, or None if not configured

130

"""

131

```

132

133

**Usage Example:**

134

```python

135

# Check if embeddings are configured

136

if vector_store.embeddings:

137

print("Embedding function is configured")

138

# Use the embedding function directly if needed

139

query_embedding = vector_store.embeddings.embed_query("test query")

140

else:

141

print("No embedding function configured")

142

```

143

144

## Advanced Filtering

145

146

### Metadata Filtering (Where Clauses)

147

148

Use ChromaDB's filtering syntax to query documents based on metadata.

149

150

**Simple Equality:**

151

```python

152

where = {"category": "science"}

153

where = {"author": "Smith", "year": 2023}

154

```

155

156

**Comparison Operators:**

157

```python

158

where = {"year": {"$gte": 2020}} # Greater than or equal

159

where = {"score": {"$lt": 0.5}} # Less than

160

where = {"count": {"$ne": 0}} # Not equal

161

```

162

163

**Logical Operators:**

164

```python

165

where = {

166

"$and": [

167

{"category": "science"},

168

{"year": {"$gte": 2020}}

169

]

170

}

171

172

where = {

173

"$or": [

174

{"category": "science"},

175

{"category": "technology"}

176

]

177

}

178

```

179

180

**Inclusion/Exclusion:**

181

```python

182

where = {"category": {"$in": ["science", "tech", "ai"]}}

183

where = {"status": {"$nin": ["draft", "archived"]}}

184

```

185

186

### Document Content Filtering

187

188

Filter based on the actual document text content.

189

190

**Text Contains:**

191

```python

192

where_document = {"$contains": "machine learning"}

193

```

194

195

**Text Does Not Contain:**

196

```python

197

where_document = {"$not_contains": "deprecated"}

198

```

199

200

**Complex Document Filtering:**

201

```python

202

where_document = {

203

"$and": [

204

{"$contains": "python"},

205

{"$not_contains": "javascript"}

206

]

207

}

208

```

209

210

### Pagination and Limiting

211

212

Efficiently handle large result sets with pagination.

213

214

**Basic Pagination:**

215

```python

216

# Get first 20 documents

217

batch_1 = vector_store.get(limit=20, offset=0)

218

219

# Get next 20 documents

220

batch_2 = vector_store.get(limit=20, offset=20)

221

222

# Get documents 100-120

223

batch_n = vector_store.get(limit=20, offset=100)

224

```

225

226

**Filtered Pagination:**

227

```python

228

def get_documents_by_category(category: str, page_size: int = 50):

229

offset = 0

230

while True:

231

batch = vector_store.get(

232

where={"category": category},

233

limit=page_size,

234

offset=offset

235

)

236

237

if not batch["ids"]:

238

break

239

240

yield batch

241

offset += page_size

242

243

# Use pagination generator

244

for batch in get_documents_by_category("science"):

245

print(f"Processing {len(batch['ids'])} documents")

246

```

247

248

## Error Handling

249

250

Collection management operations can raise various exceptions:

251

252

**Common Error Scenarios:**

253

```python

254

try:

255

# Collection operations

256

documents = vector_store.get_by_ids(["invalid_id"])

257

258

except Exception as e:

259

print(f"Error retrieving documents: {e}")

260

261

try:

262

# Collection deletion

263

vector_store.delete_collection()

264

265

except Exception as e:

266

print(f"Error deleting collection: {e}")

267

268

# Check if collection exists before operations

269

if hasattr(vector_store, '_chroma_collection') and vector_store._chroma_collection:

270

# Safe to perform operations

271

result = vector_store.get(limit=10)

272

else:

273

print("Collection not initialized")

274

```