0
# Document Management
1
2
## Overview
3
4
Document management in pycrdt centers around the `Doc` class, which serves as the container for all collaborative data types. Documents provide transaction management, state tracking, and synchronization capabilities. The library supports both basic document operations and type-safe document variants for structured applications.
5
6
## Core Types
7
8
### Doc
9
10
The main document container that holds collaborative data types and manages their synchronization.
11
12
```python { .api }
13
class Doc:
14
def __init__(
15
self,
16
init: dict[str, T] = {},
17
*,
18
client_id: int | None = None,
19
doc: _Doc | None = None,
20
Model=None,
21
allow_multithreading: bool = False
22
) -> None:
23
"""
24
Create a new collaborative document.
25
26
Args:
27
init (dict): Initial data for the document
28
client_id (int, optional): Unique client identifier
29
doc (_Doc, optional): Existing native document instance
30
Model: Model class for typed documents
31
allow_multithreading (bool): Enable multithreading support
32
"""
33
34
@property
35
def guid(self) -> int:
36
"""Get the globally unique document identifier."""
37
38
@property
39
def client_id(self) -> int:
40
"""Get the client identifier for this document."""
41
42
def transaction(self, origin: Any = None) -> Transaction:
43
"""
44
Create a new read-write transaction context.
45
46
Args:
47
origin: Optional origin identifier for the transaction
48
49
Returns:
50
Transaction: Context manager for batched operations
51
"""
52
53
def new_transaction(self, origin: Any = None, timeout: float | None = None) -> NewTransaction:
54
"""
55
Create a new transaction with async support.
56
57
Args:
58
origin: Optional origin identifier
59
timeout (float, optional): Transaction timeout in seconds
60
61
Returns:
62
NewTransaction: Async-compatible transaction context
63
"""
64
65
def get_state(self) -> bytes:
66
"""
67
Get the current document state as binary data.
68
69
Returns:
70
bytes: Document state vector
71
"""
72
73
def get_update(self, state: bytes | None = None) -> bytes:
74
"""
75
Generate an update containing changes since the given state.
76
77
Args:
78
state (bytes, optional): Previous state to compare against
79
80
Returns:
81
bytes: Binary update data
82
"""
83
84
def apply_update(self, update: bytes) -> None:
85
"""
86
Apply an update to this document.
87
88
Args:
89
update (bytes): Binary update data to apply
90
"""
91
92
def get(self, key: str, *, type: type[T]) -> T:
93
"""
94
Get or create a shared data type at the given key.
95
96
Args:
97
key (str): Key to access the shared type
98
type: Class of the shared type to create/retrieve
99
100
Returns:
101
T: The shared data type instance
102
"""
103
104
def keys(self) -> Iterable[str]:
105
"""
106
Get all keys of shared data types in this document.
107
108
Returns:
109
Iterable[str]: Iterator over document keys
110
"""
111
112
def values(self) -> Iterable[T]:
113
"""
114
Get all shared data type values in this document.
115
116
Returns:
117
Iterable[T]: Iterator over document values
118
"""
119
120
def items(self) -> Iterable[tuple[str, T]]:
121
"""
122
Get all key-value pairs of shared data types in this document.
123
124
Returns:
125
Iterable[tuple[str, T]]: Iterator over (key, value) pairs
126
"""
127
128
def observe(self, callback: Callable[[TransactionEvent], None]) -> Subscription:
129
"""
130
Observe document-level changes.
131
132
Args:
133
callback: Function called when document changes occur
134
135
Returns:
136
Subscription: Handle for unsubscribing
137
"""
138
139
def observe_subdocs(self, callback: Callable[[SubdocsEvent], None]) -> Subscription:
140
"""
141
Observe subdocument changes.
142
143
Args:
144
callback: Function called when subdocuments change
145
146
Returns:
147
Subscription: Handle for unsubscribing
148
"""
149
150
def unobserve(self, subscription: Subscription) -> None:
151
"""
152
Remove an event observer.
153
154
Args:
155
subscription: Subscription handle to remove
156
"""
157
158
async def events(
159
self,
160
subdocs: bool = False,
161
max_buffer_size: float = float("inf")
162
) -> MemoryObjectReceiveStream:
163
"""
164
Get an async stream of document events.
165
166
Args:
167
subdocs (bool): Include subdocument events
168
max_buffer_size (float): Maximum event buffer size
169
170
Returns:
171
MemoryObjectReceiveStream: Async event stream
172
"""
173
174
# Dictionary-like interface
175
def __getitem__(self, key: str) -> T:
176
"""Get shared type by key."""
177
178
def __setitem__(self, key: str, value: T) -> None:
179
"""Set shared type at key."""
180
181
def __iter__(self) -> Iterable[str]:
182
"""Iterate over document keys."""
183
184
def keys(self) -> Iterable[str]:
185
"""Get all document keys."""
186
187
def values(self) -> Iterable[T]:
188
"""Get all shared type values."""
189
190
def items(self) -> Iterable[tuple[str, T]]:
191
"""Get key-value pairs."""
192
```
193
194
### TypedDoc
195
196
A type-safe wrapper around Doc that provides typed access to root shared values.
197
198
```python { .api }
199
class TypedDoc:
200
"""
201
Base class for type-safe document containers.
202
203
Usage:
204
class MyDoc(TypedDoc):
205
map0: Map[int]
206
array0: Array[bool]
207
text0: Text
208
209
doc = MyDoc()
210
doc.map0["foo"] = 3
211
doc.array0.append(True)
212
doc.text0 += "Hello"
213
"""
214
```
215
216
## Transaction Types
217
218
### Transaction
219
220
Context manager for read-write operations on documents.
221
222
```python { .api }
223
class Transaction:
224
def __init__(self, doc: Doc, origin: Any = None) -> None:
225
"""
226
Create a transaction context.
227
228
Args:
229
doc (Doc): Document to create transaction for
230
origin: Optional transaction origin identifier
231
"""
232
233
@property
234
def origin(self) -> Any:
235
"""Get the transaction origin identifier."""
236
237
def __enter__(self) -> Transaction:
238
"""Enter transaction context."""
239
240
def __exit__(self, exc_type, exc_val, exc_tb) -> None:
241
"""Exit transaction context and commit changes."""
242
```
243
244
### NewTransaction
245
246
Supports both sync and async context managers for new transactions.
247
248
```python { .api }
249
class NewTransaction:
250
def __init__(self, doc: Doc, origin: Any = None, timeout: float | None = None) -> None:
251
"""
252
Create a new transaction with async support.
253
254
Args:
255
doc (Doc): Document to create transaction for
256
origin: Optional transaction origin identifier
257
timeout (float, optional): Transaction timeout
258
"""
259
260
def __enter__(self) -> NewTransaction:
261
"""Enter sync context."""
262
263
def __exit__(self, exc_type, exc_val, exc_tb) -> None:
264
"""Exit sync context."""
265
266
async def __aenter__(self) -> NewTransaction:
267
"""Enter async context."""
268
269
async def __aexit__(self, exc_type, exc_val, exc_tb) -> None:
270
"""Exit async context."""
271
```
272
273
### ReadTransaction
274
275
Read-only transaction that cannot modify document state.
276
277
```python { .api }
278
class ReadTransaction:
279
"""
280
Read-only transaction context for safe read operations.
281
Cannot be used to modify document state.
282
"""
283
```
284
285
## Event Types
286
287
### TransactionEvent
288
289
Event emitted when document changes occur.
290
291
```python { .api }
292
class TransactionEvent:
293
@property
294
def update(self) -> bytes:
295
"""Get the binary update data for this transaction."""
296
```
297
298
### SubdocsEvent
299
300
Event emitted when subdocument changes occur.
301
302
```python { .api }
303
class SubdocsEvent:
304
"""Event containing subdocument change information."""
305
```
306
307
### Subscription
308
309
Handle for managing event subscriptions.
310
311
```python { .api }
312
class Subscription:
313
"""
314
Subscription handle for event observers.
315
Used with unobserve() to clean up event listeners.
316
"""
317
```
318
319
## Base Classes
320
321
### BaseDoc
322
323
Base class for document containers with multithreading support.
324
325
```python { .api }
326
class BaseDoc:
327
"""
328
Base class for document containers with multithreading support.
329
Provides common functionality for Doc and typed document variants.
330
"""
331
```
332
333
### BaseType
334
335
Abstract base class for all shared collaborative data types.
336
337
```python { .api }
338
class BaseType:
339
"""
340
Abstract base for all shared collaborative data types.
341
Provides common functionality for Text, Array, Map, and XML types.
342
"""
343
344
@property
345
def doc(self) -> Doc:
346
"""Get the document this type belongs to."""
347
348
@property
349
def is_prelim(self) -> bool:
350
"""Check if type is preliminary (not yet integrated)."""
351
352
@property
353
def is_integrated(self) -> bool:
354
"""Check if type is integrated into document."""
355
356
@property
357
def type_name(self) -> str:
358
"""Get the name of the type."""
359
360
def observe(self, callback: Callable[[BaseEvent], None]) -> Subscription:
361
"""Observe changes to this type."""
362
363
def observe_deep(self, callback: Callable[[list[BaseEvent]], None]) -> Subscription:
364
"""Observe deep changes including nested structures."""
365
366
def unobserve(self, subscription: Subscription) -> None:
367
"""Remove an event observer."""
368
369
def to_py(self) -> Any:
370
"""Convert to Python native type (abstract method)."""
371
```
372
373
### Sequence
374
375
Base class for sequential data types (Text, Array).
376
377
```python { .api }
378
class Sequence(BaseType):
379
"""
380
Base class for sequential data types like Text and Array.
381
Provides position tracking functionality.
382
"""
383
384
def sticky_index(self, index: int, assoc: Assoc = Assoc.AFTER) -> StickyIndex:
385
"""Create a sticky index for position tracking."""
386
```
387
388
### BaseEvent
389
390
Base class for all change events.
391
392
```python { .api }
393
class BaseEvent:
394
"""
395
Base class for all change events with automatic attribute processing.
396
Used by TextEvent, ArrayEvent, MapEvent, and XmlEvent.
397
"""
398
```
399
400
### Typed
401
402
Base class for type-safe containers.
403
404
```python { .api }
405
class Typed:
406
"""
407
Base class for type-safe containers with runtime type checking.
408
Used by TypedDoc, TypedArray, and TypedMap.
409
"""
410
```
411
412
## Usage Examples
413
414
### Basic Document Operations
415
416
```python
417
import pycrdt
418
from pycrdt import Doc, Text, Array, Map
419
420
# Create a new document
421
doc = Doc()
422
423
# Access shared types
424
text = doc.get("content", type=Text)
425
users = doc.get("users", type=Array)
426
settings = doc.get("settings", type=Map)
427
428
# Use dictionary-like interface
429
text_alt = doc["content"] # Get existing shared type
430
doc["metadata"] = Map() # Create new shared type
431
```
432
433
### Transaction Usage
434
435
```python
436
from pycrdt import Doc, Text, Array
437
438
doc = Doc()
439
text = doc.get("content", type=Text)
440
items = doc.get("items", type=Array)
441
442
# Batch multiple operations in a transaction
443
with doc.transaction() as txn:
444
text.insert(0, "Hello, world!")
445
items.append("item1")
446
items.append("item2")
447
# All changes committed atomically
448
449
# Async transaction
450
async def update_document():
451
async with doc.new_transaction() as txn:
452
text += " More content"
453
items.extend(["item3", "item4"])
454
```
455
456
### Type-Safe Documents
457
458
```python
459
from pycrdt import TypedDoc, Text, Array, Map
460
461
class ProjectDoc(TypedDoc):
462
title: Text
463
tasks: Array[str]
464
metadata: Map[Any]
465
466
# Create typed document
467
project = ProjectDoc()
468
469
# Type-safe access
470
project.title.insert(0, "My Project")
471
project.tasks.append("Task 1")
472
project.metadata["created"] = "2024-01-01"
473
474
# Access underlying document
475
raw_doc = project._doc
476
```
477
478
### Event Observation
479
480
```python
481
from pycrdt import Doc, TransactionEvent
482
483
doc = Doc()
484
485
def on_document_change(event: TransactionEvent):
486
print(f"Document updated: {len(event.update)} bytes")
487
488
# Subscribe to document changes
489
subscription = doc.observe(on_document_change)
490
491
# Make changes to trigger events
492
with doc.transaction():
493
text = doc.get("content", type=Text)
494
text.insert(0, "Hello")
495
496
# Clean up subscription
497
doc.unobserve(subscription)
498
```
499
500
### Async Event Streaming
501
502
```python
503
import anyio
504
from pycrdt import Doc
505
506
async def monitor_document(doc: Doc):
507
async with doc.events() as event_stream:
508
async for event in event_stream:
509
print(f"Event received: {event}")
510
511
# Run event monitoring
512
doc = Doc()
513
anyio.run(monitor_document, doc)
514
```
515
516
### Document State Management
517
518
```python
519
from pycrdt import Doc, Text
520
521
# Create documents
522
doc1 = Doc()
523
doc2 = Doc()
524
525
text1 = doc1.get("content", type=Text)
526
text1.insert(0, "Hello from doc1")
527
528
# Get state and update
529
state1 = doc1.get_state()
530
update = doc1.get_update()
531
532
# Apply to second document
533
doc2.apply_update(update)
534
text2 = doc2.get("content", type=Text)
535
print(str(text2)) # "Hello from doc1"
536
537
# Incremental updates
538
text1.insert(5, " there")
539
incremental_update = doc1.get_update(state1)
540
doc2.apply_update(incremental_update)
541
```
542
543
## Error Handling
544
545
```python
546
from pycrdt import Doc, Text
547
548
doc = Doc()
549
550
try:
551
# Invalid transaction usage
552
txn = doc.transaction()
553
# Forgetting to use context manager can cause issues
554
555
# Type mismatches in typed documents
556
class StrictDoc(TypedDoc):
557
numbers: Array[int]
558
559
strict_doc = StrictDoc()
560
strict_doc.numbers.append("string") # May raise TypeError
561
562
except (ValueError, TypeError, RuntimeError) as e:
563
print(f"Document operation failed: {e}")
564
```