0
# Document & Collection Operations
1
2
Complete document lifecycle management and collection operations for ArangoDB. Supports standard document collections, vertex collections for graphs, and edge collections with comprehensive CRUD operations, batch processing, and indexing.
3
4
## Capabilities
5
6
### Collection Management
7
8
Create, delete, and manage collections with various configuration options and collection types.
9
10
```python { .api }
11
class StandardDatabase:
12
def collection(self, name: str) -> StandardCollection:
13
"""
14
Get collection interface.
15
16
Parameters:
17
- name: str, collection name
18
19
Returns:
20
StandardCollection: Collection interface object
21
"""
22
23
def collections(self) -> Result[List[Json]]:
24
"""
25
List all collections.
26
27
Returns:
28
Result[List[Json]]: List of collection information dicts
29
"""
30
31
def create_collection(self, name: str, sync=None, system: bool = False,
32
**kwargs) -> Result[StandardCollection]:
33
"""
34
Create a new collection.
35
36
Parameters:
37
- name: str, collection name
38
- sync: bool, wait for sync to disk
39
- system: bool, create as system collection
40
- **kwargs: additional collection options (type, sharding, etc.)
41
42
Returns:
43
Result[StandardCollection]: Created collection object
44
"""
45
46
def delete_collection(self, name: str, ignore_missing: bool = False,
47
system=None) -> Result[bool]:
48
"""
49
Delete a collection.
50
51
Parameters:
52
- name: str, collection name
53
- ignore_missing: bool, ignore if collection doesn't exist
54
- system: bool, allow deletion of system collections
55
56
Returns:
57
Result[bool]: True on success
58
"""
59
60
def has_collection(self, name: str) -> Result[bool]:
61
"""
62
Check if collection exists.
63
64
Parameters:
65
- name: str, collection name
66
67
Returns:
68
Result[bool]: True if collection exists
69
"""
70
```
71
72
### Document Operations
73
74
Core CRUD operations for individual documents with revision control, return options, and synchronization settings.
75
76
```python { .api }
77
class StandardCollection:
78
@property
79
def name(self) -> str:
80
"""Collection name."""
81
82
@property
83
def db_name(self) -> str:
84
"""Database name."""
85
86
def insert(self, document: Json, return_new: bool = False, sync=None,
87
silent: bool = False, overwrite: bool = False, **kwargs) -> Result:
88
"""
89
Insert a document.
90
91
Parameters:
92
- document: dict, document data
93
- return_new: bool, return inserted document
94
- sync: bool, wait for sync to disk
95
- silent: bool, minimal return data
96
- overwrite: bool, overwrite existing document
97
- **kwargs: additional insert options
98
99
Returns:
100
Result[Json|bool]: Document metadata or boolean on success
101
"""
102
103
def get(self, document, rev=None, check_rev: bool = True) -> Result:
104
"""
105
Get a document.
106
107
Parameters:
108
- document: str or dict, document key or dict with _key
109
- rev: str, expected revision
110
- check_rev: bool, check revision matches
111
112
Returns:
113
Result[Json]: Document data or None if not found
114
"""
115
116
def update(self, document: Json, check_rev: bool = True, merge: bool = True,
117
keep_none: bool = True, return_new: bool = False,
118
return_old: bool = False, sync=None, silent: bool = False) -> Result:
119
"""
120
Update a document.
121
122
Parameters:
123
- document: dict, document with _key and fields to update
124
- check_rev: bool, check current revision
125
- merge: bool, merge with existing document
126
- keep_none: bool, keep null values
127
- return_new: bool, return updated document
128
- return_old: bool, return original document
129
- sync: bool, wait for sync to disk
130
- silent: bool, minimal return data
131
132
Returns:
133
Result[Json|bool]: Document metadata or boolean
134
"""
135
136
def replace(self, document: Json, check_rev: bool = True,
137
return_new: bool = False, return_old: bool = False,
138
sync=None, silent: bool = False) -> Result:
139
"""
140
Replace a document completely.
141
142
Parameters:
143
- document: dict, complete replacement document with _key
144
- check_rev: bool, check current revision
145
- return_new: bool, return new document
146
- return_old: bool, return original document
147
- sync: bool, wait for sync to disk
148
- silent: bool, minimal return data
149
150
Returns:
151
Result[Json|bool]: Document metadata or boolean
152
"""
153
154
def delete(self, document, rev=None, check_rev: bool = True,
155
ignore_missing: bool = False, return_old: bool = False,
156
sync=None, silent: bool = False) -> Result:
157
"""
158
Delete a document.
159
160
Parameters:
161
- document: str or dict, document key or dict with _key
162
- rev: str, expected revision
163
- check_rev: bool, check revision matches
164
- ignore_missing: bool, ignore if document doesn't exist
165
- return_old: bool, return deleted document
166
- sync: bool, wait for sync to disk
167
- silent: bool, minimal return data
168
169
Returns:
170
Result[Json|bool]: Document metadata or boolean
171
"""
172
173
def has(self, document, rev=None, check_rev: bool = True) -> Result[bool]:
174
"""
175
Check if document exists.
176
177
Parameters:
178
- document: str or dict, document key or dict with _key
179
- rev: str, expected revision
180
- check_rev: bool, check revision matches
181
182
Returns:
183
Result[bool]: True if document exists
184
"""
185
```
186
187
### Batch Operations
188
189
Efficient bulk operations for processing multiple documents in single requests with batch processing optimizations.
190
191
```python { .api }
192
def insert_many(self, documents: Sequence[Json], return_new: bool = False,
193
sync=None, silent: bool = False, overwrite: bool = False,
194
**kwargs) -> Result:
195
"""
196
Insert multiple documents.
197
198
Parameters:
199
- documents: list, list of document dicts
200
- return_new: bool, return inserted documents
201
- sync: bool, wait for sync to disk
202
- silent: bool, minimal return data
203
- overwrite: bool, overwrite existing documents
204
- **kwargs: additional options
205
206
Returns:
207
Result[List[Json]|bool]: List of document metadata or boolean
208
"""
209
210
def update_many(self, documents: Sequence[Json], check_rev: bool = True,
211
merge: bool = True, keep_none: bool = True,
212
return_new: bool = False, return_old: bool = False,
213
sync=None, silent: bool = False) -> Result:
214
"""
215
Update multiple documents.
216
217
Parameters:
218
- documents: list, list of document dicts with _key
219
- check_rev: bool, check revisions
220
- merge: bool, merge with existing documents
221
- keep_none: bool, keep null values
222
- return_new: bool, return updated documents
223
- return_old: bool, return original documents
224
- sync: bool, wait for sync to disk
225
- silent: bool, minimal return data
226
227
Returns:
228
Result[List[Json]|bool]: List of document metadata or boolean
229
"""
230
231
def delete_many(self, documents: Sequence, return_old: bool = False,
232
check_rev: bool = True, sync=None, silent: bool = False) -> Result:
233
"""
234
Delete multiple documents.
235
236
Parameters:
237
- documents: list, list of keys or document dicts
238
- return_old: bool, return deleted documents
239
- check_rev: bool, check revisions
240
- sync: bool, wait for sync to disk
241
- silent: bool, minimal return data
242
243
Returns:
244
Result[List[Json]|bool]: List of document metadata or boolean
245
"""
246
```
247
248
### Index Management
249
250
Create and manage various index types to optimize query performance and enforce constraints.
251
252
```python { .api }
253
def indexes(self) -> Result[List[Json]]:
254
"""
255
List all indexes on collection.
256
257
Returns:
258
Result[List[Json]]: List of index information dicts
259
"""
260
261
def add_hash_index(self, fields: Sequence[str], unique=None, sparse=None,
262
deduplicate=None, name=None, in_background=None) -> Result[Json]:
263
"""
264
Create hash index.
265
266
Parameters:
267
- fields: list, field names to index
268
- unique: bool, enforce uniqueness
269
- sparse: bool, exclude null values
270
- deduplicate: bool, remove duplicate values
271
- name: str, index name
272
- in_background: bool, create in background
273
274
Returns:
275
Result[Json]: Index information dict
276
"""
277
278
def add_skiplist_index(self, fields: Sequence[str], unique=None, sparse=None,
279
deduplicate=None, name=None, in_background=None) -> Result[Json]:
280
"""
281
Create skiplist index for range queries.
282
283
Parameters:
284
- fields: list, field names to index
285
- unique: bool, enforce uniqueness
286
- sparse: bool, exclude null values
287
- deduplicate: bool, remove duplicate values
288
- name: str, index name
289
- in_background: bool, create in background
290
291
Returns:
292
Result[Json]: Index information dict
293
"""
294
295
def add_geo_index(self, fields: Sequence[str], ordered=None, name=None,
296
in_background=None) -> Result[Json]:
297
"""
298
Create geo-spatial index.
299
300
Parameters:
301
- fields: list, field names for coordinates
302
- ordered: bool, longitude/latitude order
303
- name: str, index name
304
- in_background: bool, create in background
305
306
Returns:
307
Result[Json]: Index information dict
308
"""
309
310
def add_fulltext_index(self, fields: Sequence[str], min_length=None,
311
name=None, in_background=None) -> Result[Json]:
312
"""
313
Create fulltext search index.
314
315
Parameters:
316
- fields: list, text field names
317
- min_length: int, minimum word length
318
- name: str, index name
319
- in_background: bool, create in background
320
321
Returns:
322
Result[Json]: Index information dict
323
"""
324
325
def delete_index(self, index_id: str, ignore_missing: bool = False) -> Result[bool]:
326
"""
327
Delete an index.
328
329
Parameters:
330
- index_id: str, index identifier
331
- ignore_missing: bool, ignore if index doesn't exist
332
333
Returns:
334
Result[bool]: True on success
335
"""
336
```
337
338
### Graph Collections
339
340
Specialized collection types for graph operations with vertex and edge specific functionality.
341
342
```python { .api }
343
class VertexCollection(Collection):
344
"""Vertex collection for graph operations."""
345
346
def link(self, from_vertex, to_vertex, data=None, sync=None) -> Result[Json]:
347
"""
348
Create edge between vertices.
349
350
Parameters:
351
- from_vertex: str or dict, source vertex
352
- to_vertex: str or dict, target vertex
353
- data: dict, edge data
354
- sync: bool, wait for sync to disk
355
356
Returns:
357
Result[Json]: Edge document metadata
358
"""
359
360
class EdgeCollection(Collection):
361
"""Edge collection for graph operations."""
362
363
def edges(self, vertex, direction=None) -> Result[Json]:
364
"""
365
Get edges connected to vertex.
366
367
Parameters:
368
- vertex: str or dict, vertex identifier
369
- direction: str, edge direction ('in', 'out', 'any')
370
371
Returns:
372
Result[Json]: Edges information dict
373
"""
374
375
def link(self, from_vertex, to_vertex, data=None, sync=None) -> Result[Json]:
376
"""
377
Create edge between vertices.
378
379
Parameters:
380
- from_vertex: str or dict, source vertex
381
- to_vertex: str or dict, target vertex
382
- data: dict, edge data
383
- sync: bool, wait for sync to disk
384
385
Returns:
386
Result[Json]: Edge document metadata
387
"""
388
```
389
390
## Usage Examples
391
392
### Basic Collection Operations
393
394
```python
395
from arango import ArangoClient
396
397
client = ArangoClient()
398
db = client.db('example', username='root', password='password')
399
400
# Create collection
401
students = db.create_collection('students')
402
403
# Insert documents
404
student1 = students.insert({'name': 'Alice', 'age': 22, 'major': 'CS'})
405
student2 = students.insert({'name': 'Bob', 'age': 21, 'major': 'Math'})
406
407
print(f"Inserted: {student1['_key']}")
408
409
# Get document
410
alice = students.get(student1['_key'])
411
print(f"Retrieved: {alice['name']}")
412
413
# Update document
414
students.update({'_key': student1['_key'], 'age': 23})
415
416
# Delete document
417
students.delete(student2['_key'])
418
```
419
420
### Batch Operations
421
422
```python
423
# Batch insert
424
documents = [
425
{'name': 'Charlie', 'age': 20, 'major': 'Physics'},
426
{'name': 'Diana', 'age': 22, 'major': 'Biology'},
427
{'name': 'Eve', 'age': 21, 'major': 'Chemistry'}
428
]
429
430
results = students.insert_many(documents, return_new=True)
431
for result in results:
432
print(f"Inserted: {result['new']['name']}")
433
434
# Batch update
435
updates = [
436
{'_key': 'charlie_key', 'gpa': 3.8},
437
{'_key': 'diana_key', 'gpa': 3.9}
438
]
439
students.update_many(updates)
440
441
# Batch delete
442
keys_to_delete = ['eve_key', 'old_record_key']
443
students.delete_many(keys_to_delete, ignore_missing=True)
444
```
445
446
### Index Management
447
448
```python
449
# Create indexes for performance
450
students.add_hash_index(['name'], unique=True)
451
students.add_skiplist_index(['age', 'gpa'])
452
students.add_fulltext_index(['major', 'description'])
453
454
# Geo index for location data
455
locations = db.create_collection('locations')
456
locations.add_geo_index(['coordinates'])
457
458
# List all indexes
459
indexes = students.indexes()
460
for index in indexes:
461
print(f"Index: {index['type']} on {index['fields']}")
462
```
463
464
### Collection Configuration
465
466
```python
467
# Create collection with options
468
courses = db.create_collection(
469
'courses',
470
sync=True, # Synchronous writes
471
schema={ # Document validation
472
'rule': {
473
'type': 'object',
474
'properties': {
475
'name': {'type': 'string'},
476
'credits': {'type': 'number', 'minimum': 1}
477
},
478
'required': ['name', 'credits']
479
},
480
'level': 'moderate'
481
}
482
)
483
484
# Check collection existence
485
if db.has_collection('students'):
486
print("Students collection exists")
487
488
# Get collection info
489
collections = db.collections()
490
for col in collections:
491
print(f"Collection: {col['name']} (type: {col['type']})")
492
```
493
494
### Error Handling
495
496
```python
497
from arango import DocumentNotFoundError, UniqueConstraintViolatedError
498
499
try:
500
# Try to get non-existent document
501
doc = students.get('nonexistent_key')
502
if doc is None:
503
print("Document not found")
504
505
# Try to insert duplicate
506
students.insert({'name': 'Alice', 'age': 22}) # Duplicate name
507
508
except DocumentNotFoundError:
509
print("Document does not exist")
510
except UniqueConstraintViolatedError:
511
print("Unique constraint violated")
512
```