0
# BSON Handling
1
2
BSON encoding/decoding functions and MongoDB-specific data types including ObjectId, Decimal128, Binary data, and timestamp handling.
3
4
## Capabilities
5
6
### BSON Encoding and Decoding
7
8
Core functions for converting between Python documents and BSON (Binary JSON) format.
9
10
```python { .api }
11
def encode(document, check_keys=False, codec_options=DEFAULT_CODEC_OPTIONS):
12
"""
13
Encode a Python document to BSON.
14
15
Parameters:
16
- document: Python dict or other mapping
17
- check_keys: if True, check for invalid key names
18
- codec_options: BSON codec options
19
20
Returns:
21
bytes: BSON-encoded document
22
"""
23
24
def decode(data, codec_options=DEFAULT_CODEC_OPTIONS):
25
"""
26
Decode BSON data to Python document.
27
28
Parameters:
29
- data: BSON bytes
30
- codec_options: BSON codec options
31
32
Returns:
33
dict: Decoded Python document
34
"""
35
36
def decode_all(data, codec_options=DEFAULT_CODEC_OPTIONS):
37
"""
38
Decode multiple BSON documents.
39
40
Parameters:
41
- data: BSON bytes containing multiple documents
42
- codec_options: BSON codec options
43
44
Returns:
45
list: List of decoded Python documents
46
"""
47
48
def decode_iter(data, codec_options=DEFAULT_CODEC_OPTIONS):
49
"""
50
Iterate over BSON documents in data.
51
52
Parameters:
53
- data: BSON bytes containing multiple documents
54
- codec_options: BSON codec options
55
56
Yields:
57
dict: Each decoded Python document
58
"""
59
60
def decode_file_iter(file_obj, codec_options=DEFAULT_CODEC_OPTIONS):
61
"""
62
Iterate over BSON documents from file.
63
64
Parameters:
65
- file_obj: file-like object containing BSON data
66
- codec_options: BSON codec options
67
68
Yields:
69
dict: Each decoded Python document
70
"""
71
72
def is_valid(bson):
73
"""
74
Check if data is valid BSON.
75
76
Parameters:
77
- bson: bytes to validate
78
79
Returns:
80
bool: True if valid BSON
81
"""
82
83
def has_c():
84
"""
85
Check if C extensions are available.
86
87
Returns:
88
bool: True if C extensions loaded
89
"""
90
```
91
92
### BSON Class
93
94
Container class for BSON data with encoding/decoding methods.
95
96
```python { .api }
97
class BSON(bytes):
98
@classmethod
99
def encode(cls, document, check_keys=False, codec_options=DEFAULT_CODEC_OPTIONS):
100
"""
101
Encode document to BSON instance.
102
103
Parameters:
104
- document: Python dict or mapping
105
- check_keys: check for invalid key names
106
- codec_options: BSON codec options
107
108
Returns:
109
BSON: BSON instance containing encoded data
110
"""
111
112
def decode(self, codec_options=DEFAULT_CODEC_OPTIONS):
113
"""
114
Decode BSON data to Python document.
115
116
Parameters:
117
- codec_options: BSON codec options
118
119
Returns:
120
dict: Decoded Python document
121
"""
122
```
123
124
### ObjectId
125
126
MongoDB's unique identifier type with timestamp and machine information.
127
128
```python { .api }
129
class ObjectId:
130
def __init__(self, oid=None):
131
"""
132
Create ObjectId from hex string, bytes, or generate new one.
133
134
Parameters:
135
- oid: 24-character hex string, 12 bytes, or None for new ObjectId
136
"""
137
138
@classmethod
139
def from_datetime(cls, generation_time):
140
"""
141
Create ObjectId from datetime.
142
143
Parameters:
144
- generation_time: datetime for ObjectId timestamp
145
146
Returns:
147
ObjectId: ObjectId with specified generation time
148
"""
149
150
@classmethod
151
def is_valid(cls, oid):
152
"""
153
Check if ObjectId is valid.
154
155
Parameters:
156
- oid: ObjectId candidate
157
158
Returns:
159
bool: True if valid ObjectId
160
"""
161
162
@property
163
def binary(self):
164
"""
165
ObjectId as 12 bytes.
166
167
Returns:
168
bytes: 12-byte ObjectId
169
"""
170
171
@property
172
def generation_time(self):
173
"""
174
Datetime when ObjectId was generated.
175
176
Returns:
177
datetime: Generation time (timezone-aware)
178
"""
179
180
def __str__(self):
181
"""
182
ObjectId as 24-character hex string.
183
184
Returns:
185
str: Hex representation
186
"""
187
```
188
189
### Decimal128
190
191
128-bit decimal number support for high-precision arithmetic.
192
193
```python { .api }
194
class Decimal128:
195
def __init__(self, value):
196
"""
197
Create Decimal128 from string, int, float, or Decimal.
198
199
Parameters:
200
- value: numeric value to convert
201
"""
202
203
@classmethod
204
def from_bid(cls, value):
205
"""
206
Create from Binary Integer Decimal representation.
207
208
Parameters:
209
- value: 16-byte BID representation
210
211
Returns:
212
Decimal128: Decimal128 instance
213
"""
214
215
def to_decimal(self):
216
"""
217
Convert to Python Decimal.
218
219
Returns:
220
Decimal: Python decimal.Decimal instance
221
"""
222
223
@property
224
def bid(self):
225
"""
226
Binary Integer Decimal representation.
227
228
Returns:
229
bytes: 16-byte BID data
230
"""
231
```
232
233
### Binary Data Types
234
235
Support for binary data with different subtypes.
236
237
```python { .api }
238
class Binary(bytes):
239
BINARY_SUBTYPE = 0
240
FUNCTION_SUBTYPE = 1
241
BINARY_SUBTYPE_OLD = 2
242
UUID_SUBTYPE = 3
243
UUID_SUBTYPE_OLD = 4
244
MD5_SUBTYPE = 5
245
ENCRYPTED_SUBTYPE = 6
246
COLUMN_SUBTYPE = 7
247
USER_DEFINED_SUBTYPE = 128
248
249
def __init__(self, data, subtype=BINARY_SUBTYPE):
250
"""
251
Create Binary data with subtype.
252
253
Parameters:
254
- data: binary data
255
- subtype: BSON binary subtype
256
"""
257
258
@property
259
def subtype(self):
260
"""
261
Binary subtype.
262
263
Returns:
264
int: BSON binary subtype
265
"""
266
267
class UUIDLegacy(Binary):
268
def __init__(self, uuid):
269
"""
270
Legacy UUID representation.
271
272
Parameters:
273
- uuid: UUID instance or bytes
274
"""
275
276
def as_uuid(self, uuid_representation=UuidRepresentation.UNSPECIFIED):
277
"""
278
Convert to UUID.
279
280
Parameters:
281
- uuid_representation: UUID representation format
282
283
Returns:
284
UUID: Python UUID instance
285
"""
286
```
287
288
### Code and Regular Expressions
289
290
JavaScript code and regular expression types.
291
292
```python { .api }
293
class Code(str):
294
def __init__(self, code, scope=None):
295
"""
296
JavaScript code with optional scope.
297
298
Parameters:
299
- code: JavaScript code string
300
- scope: optional scope dictionary
301
"""
302
303
@property
304
def scope(self):
305
"""
306
JavaScript scope variables.
307
308
Returns:
309
dict: Scope dictionary or None
310
"""
311
312
class Regex(str):
313
def __init__(self, pattern, flags=0):
314
"""
315
BSON regular expression.
316
317
Parameters:
318
- pattern: regex pattern string
319
- flags: regex flags
320
"""
321
322
@property
323
def pattern(self):
324
"""
325
Regular expression pattern.
326
327
Returns:
328
str: Regex pattern
329
"""
330
331
@property
332
def flags(self):
333
"""
334
Regular expression flags.
335
336
Returns:
337
int: Regex flags
338
"""
339
340
def try_compile(self):
341
"""
342
Compile to Python regex.
343
344
Returns:
345
Pattern: Compiled regex or None if invalid
346
"""
347
```
348
349
### Special Values and Types
350
351
Min/Max keys, timestamps, and database references.
352
353
```python { .api }
354
class MinKey:
355
"""BSON MinKey - compares less than all other values."""
356
357
class MaxKey:
358
"""BSON MaxKey - compares greater than all other values."""
359
360
class Timestamp:
361
def __init__(self, time, inc):
362
"""
363
MongoDB timestamp.
364
365
Parameters:
366
- time: timestamp value (32-bit)
367
- inc: increment value (32-bit)
368
"""
369
370
@property
371
def time(self):
372
"""
373
Timestamp time component.
374
375
Returns:
376
int: Time value
377
"""
378
379
@property
380
def inc(self):
381
"""
382
Timestamp increment component.
383
384
Returns:
385
int: Increment value
386
"""
387
388
class DBRef:
389
def __init__(self, collection, id, database=None, **kwargs):
390
"""
391
Database reference.
392
393
Parameters:
394
- collection: collection name
395
- id: document identifier
396
- database: optional database name
397
"""
398
399
@property
400
def collection(self):
401
"""
402
Referenced collection name.
403
404
Returns:
405
str: Collection name
406
"""
407
408
@property
409
def id(self):
410
"""
411
Referenced document ID.
412
413
Returns:
414
Any: Document identifier
415
"""
416
417
@property
418
def database(self):
419
"""
420
Referenced database name.
421
422
Returns:
423
str: Database name or None
424
"""
425
426
class Int64(int):
427
"""64-bit integer type for platforms without native 64-bit support."""
428
```
429
430
### Codec Options
431
432
Configuration for BSON encoding/decoding behavior.
433
434
```python { .api }
435
class CodecOptions:
436
def __init__(
437
self,
438
document_class=dict,
439
tz_aware=False,
440
uuid_representation=UuidRepresentation.UNSPECIFIED,
441
unicode_decode_error_handler='strict',
442
tzinfo=None,
443
type_registry=None,
444
datetime_conversion=DatetimeConversion.DATETIME
445
):
446
"""
447
BSON codec configuration.
448
449
Parameters:
450
- document_class: class for decoded documents
451
- tz_aware: timezone-aware datetime instances
452
- uuid_representation: UUID representation format
453
- unicode_decode_error_handler: Unicode error handling
454
- tzinfo: timezone for datetime instances
455
- type_registry: custom type registry
456
- datetime_conversion: datetime conversion mode
457
"""
458
459
DEFAULT_CODEC_OPTIONS: CodecOptions
460
```
461
462
## Usage Examples
463
464
### Basic BSON Operations
465
466
```python
467
import bson
468
from bson import encode, decode, ObjectId
469
470
# Encode Python document to BSON
471
document = {"name": "Alice", "age": 30, "_id": ObjectId()}
472
bson_data = encode(document)
473
print(f"BSON size: {len(bson_data)} bytes")
474
475
# Decode BSON back to Python
476
decoded = decode(bson_data)
477
print(f"Decoded: {decoded}")
478
479
# Work with multiple documents
480
docs = [{"x": i} for i in range(3)]
481
bson_data = b''.join(encode(doc) for doc in docs)
482
483
# Decode all at once
484
all_docs = bson.decode_all(bson_data)
485
print(f"All docs: {all_docs}")
486
487
# Or iterate over them
488
for doc in bson.decode_iter(bson_data):
489
print(f"Doc: {doc}")
490
```
491
492
### Working with ObjectIds
493
494
```python
495
from bson import ObjectId
496
from datetime import datetime
497
498
# Generate new ObjectId
499
oid = ObjectId()
500
print(f"ObjectId: {oid}")
501
print(f"Generated at: {oid.generation_time}")
502
503
# Create ObjectId from string
504
oid_str = str(oid)
505
oid2 = ObjectId(oid_str)
506
print(f"From string: {oid2}")
507
508
# Create ObjectId with specific timestamp
509
past_time = datetime(2023, 1, 1)
510
oid_past = ObjectId.from_datetime(past_time)
511
print(f"From datetime: {oid_past}")
512
```
513
514
### High-Precision Decimals
515
516
```python
517
from bson import Decimal128
518
from decimal import Decimal
519
520
# Create from string for precision
521
dec128 = Decimal128("123.456789012345678901234567890")
522
print(f"Decimal128: {dec128}")
523
524
# Convert back to Python Decimal
525
py_decimal = dec128.to_decimal()
526
print(f"Python Decimal: {py_decimal}")
527
528
# Use in documents
529
document = {
530
"price": Decimal128("999.99"),
531
"tax_rate": Decimal128("0.0825")
532
}
533
```
534
535
### Binary Data
536
537
```python
538
from bson import Binary
539
import uuid
540
541
# Store binary data
542
image_data = b'\x89PNG\r\n\x1a\n...' # PNG header
543
binary_field = Binary(image_data, Binary.BINARY_SUBTYPE)
544
545
# Store UUID
546
user_id = uuid.uuid4()
547
uuid_binary = Binary(user_id.bytes, Binary.UUID_SUBTYPE)
548
549
document = {
550
"image": binary_field,
551
"user_id": uuid_binary
552
}
553
```