CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-bson

Independent BSON codec for Python that doesn't depend on MongoDB

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Overview
Eval results
Files

custom-objects.mddocs/

Custom Object Serialization

Framework for creating custom BSON-serializable objects through the BSONCoding abstract base class. This system enables automatic serialization and deserialization of custom Python objects with class registration and metadata preservation.

Capabilities

BSONCoding Abstract Base Class

Abstract base class that defines the interface for custom BSON-serializable objects, requiring implementation of encoding and initialization methods.

from abc import ABCMeta, abstractmethod

class BSONCoding:
    __metaclass__ = ABCMeta
    
    @abstractmethod
    def bson_encode(self):
        """
        Serialize object state to dictionary for BSON encoding.
        
        Returns:
        dict: Dictionary representation of object state
        """
    
    @abstractmethod  
    def bson_init(self, raw_values):
        """
        Initialize object from deserialized BSON data.
        
        Parameters:
        - raw_values: dict, deserialized data including class metadata
        
        Returns:
        object: Initialized object instance (usually self) or alternative object
        """

Usage example:

from bson.codec import BSONCoding
import bson

class Person(BSONCoding):
    def __init__(self, name, age, email=None):
        self.name = name
        self.age = age
        self.email = email
    
    def bson_encode(self):
        """Convert to dict for BSON serialization"""
        data = {
            "name": self.name,
            "age": self.age
        }
        if self.email:
            data["email"] = self.email
        return data
    
    def bson_init(self, raw_values):
        """Initialize from BSON data"""
        self.name = raw_values["name"]
        self.age = raw_values["age"]
        self.email = raw_values.get("email")
        return self
    
    def __repr__(self):
        return f"Person(name='{self.name}', age={self.age}, email='{self.email}')"

# Register class for deserialization
bson.import_class(Person)

# Create and serialize
person = Person("Alice", 30, "alice@example.com")
bson_data = bson.dumps(person)

# Deserialize back to object
restored_person = bson.loads(bson_data)
print(type(restored_person))  # <class '__main__.Person'>
print(restored_person)        # Person(name='Alice', age=30, email='alice@example.com')

Class Registration Functions

Functions for registering BSONCoding subclasses to enable automatic deserialization by class name.

def import_class(cls):
    """
    Register a BSONCoding subclass for deserialization.
    
    Parameters:
    - cls: BSONCoding subclass to register
    
    Note: Only registers classes that inherit from BSONCoding
    """

def import_classes(*args):
    """
    Register multiple BSONCoding subclasses.
    
    Parameters:
    - args: Variable number of BSONCoding subclasses
    """

def import_classes_from_modules(*args):
    """
    Register all BSONCoding subclasses from modules.
    
    Parameters:
    - args: Variable number of module objects to scan for BSONCoding classes
    """

Usage example:

from bson.codec import BSONCoding, import_class, import_classes, import_classes_from_modules
import bson

class User(BSONCoding):
    def __init__(self, username, role="user"):
        self.username = username
        self.role = role
    
    def bson_encode(self):
        return {"username": self.username, "role": self.role}
    
    def bson_init(self, raw_values):
        self.username = raw_values["username"]
        self.role = raw_values.get("role", "user")
        return self

class Product(BSONCoding):
    def __init__(self, name, price):
        self.name = name
        self.price = price
    
    def bson_encode(self):
        return {"name": self.name, "price": self.price}
    
    def bson_init(self, raw_values):
        self.name = raw_values["name"]
        self.price = raw_values["price"]
        return self

# Register individual classes
import_class(User)
bson.import_class(Product)  # Alternative syntax

# Register multiple classes at once
# import_classes(User, Product)

# Register all BSONCoding classes from a module
# import bson.import_classes_from_modules(my_models_module)

# Test serialization
user = User("alice", "admin")
product = Product("Laptop", 999.99)

data = {
    "user": user,
    "product": product,
    "timestamp": "2023-01-01"
}

bson_data = bson.dumps(data)
restored = bson.loads(bson_data)

print(type(restored["user"]))     # <class '__main__.User'>
print(type(restored["product"]))  # <class '__main__.Product'>

Advanced BSONCoding Patterns

Alternative Object Return

The bson_init method can return a different object instance:

from bson.codec import BSONCoding
import bson

class Singleton(BSONCoding):
    _instance = None
    
    def __new__(cls, *args, **kwargs):
        if cls._instance is None:
            cls._instance = super().__new__(cls)
        return cls._instance
    
    def __init__(self, name="default"):
        if not hasattr(self, 'initialized'):
            self.name = name
            self.initialized = True
    
    def bson_encode(self):
        return {"name": self.name}
    
    def bson_init(self, raw_values):
        # Return the singleton instance instead of self
        instance = Singleton(raw_values["name"])
        return instance

bson.import_class(Singleton)

# Test singleton behavior through BSON
obj1 = Singleton("test")
bson_data = bson.dumps(obj1)
obj2 = bson.loads(bson_data)

print(obj1 is obj2)  # True (same instance)

Nested Custom Objects

Custom objects can contain other custom objects:

from bson.codec import BSONCoding
import bson

class Address(BSONCoding):
    def __init__(self, street, city, country):
        self.street = street
        self.city = city
        self.country = country
    
    def bson_encode(self):
        return {
            "street": self.street,
            "city": self.city,
            "country": self.country
        }
    
    def bson_init(self, raw_values):
        self.street = raw_values["street"]
        self.city = raw_values["city"] 
        self.country = raw_values["country"]
        return self

class Customer(BSONCoding):
    def __init__(self, name, address, orders=None):
        self.name = name
        self.address = address  # Address object
        self.orders = orders or []
    
    def bson_encode(self):
        return {
            "name": self.name,
            "address": self.address,  # Will be recursively encoded
            "orders": self.orders
        }
    
    def bson_init(self, raw_values):
        self.name = raw_values["name"]
        self.address = raw_values["address"]  # Already deserialized as Address
        self.orders = raw_values.get("orders", [])
        return self

# Register both classes
bson.import_classes(Address, Customer)

# Create nested objects
address = Address("123 Main St", "Anytown", "USA")
customer = Customer("John Doe", address, ["order1", "order2"])

# Serialize and deserialize
bson_data = bson.dumps(customer)
restored = bson.loads(bson_data)

print(type(restored))          # <class '__main__.Customer'>
print(type(restored.address))  # <class '__main__.Address'>
print(restored.address.city)   # "Anytown"

Object Encoding and Decoding Process

Encoding Process

When a BSONCoding object is serialized:

  1. dumps() detects BSONCoding instance
  2. Calls obj.bson_encode() to get dictionary representation
  3. Adds special "$$__CLASS_NAME__$$" field with class name
  4. Encodes resulting dictionary as BSON document
# Internal encoding process example
class MyClass(BSONCoding):
    def __init__(self, value):
        self.value = value
    
    def bson_encode(self):
        return {"value": self.value}
    
    def bson_init(self, raw_values):
        self.value = raw_values["value"]
        return self

obj = MyClass(42)

# When dumps(obj) is called:
# 1. obj.bson_encode() returns {"value": 42}
# 2. Class name added: {"value": 42, "$$__CLASS_NAME__$$": "MyClass"}
# 3. Dictionary encoded as BSON

Decoding Process

When a BSON document with class metadata is deserialized:

  1. loads() detects "$$__CLASS_NAME__$$" field in dictionary
  2. Looks up registered class by name
  3. Creates empty instance with special _EmptyClass technique
  4. Calls instance.bson_init(raw_values) to initialize
  5. Returns initialized object
# Internal decoding process
# 1. BSON decoded to: {"value": 42, "$$__CLASS_NAME__$$": "MyClass"}
# 2. Class lookup finds MyClass in registry
# 3. Empty instance created and class changed to MyClass
# 4. instance.bson_init({"value": 42, "$$__CLASS_NAME__$$": "MyClass"}) called
# 5. Initialized MyClass instance returned

Error Handling

Missing Class Registration

class MissingClassDefinition(ValueError):
    """Raised when trying to deserialize unknown class"""
    def __init__(self, class_name): ...

Occurs when BSON data contains class name not registered with import_class():

from bson.codec import BSONCoding, MissingClassDefinition
import bson

class UnregisteredClass(BSONCoding):
    def bson_encode(self):
        return {"data": "test"}
    
    def bson_init(self, raw_values):
        return self

# Serialize without registering
obj = UnregisteredClass()
bson_data = bson.dumps(obj)

# Attempting to deserialize fails
try:
    restored = bson.loads(bson_data)
except MissingClassDefinition as e:
    print(f"Class not registered: {e}")
    
# Solution: register the class
bson.import_class(UnregisteredClass)
restored = bson.loads(bson_data)  # Now works

BSONCoding Interface Violations

Classes must properly implement the abstract methods:

from bson.codec import BSONCoding

# Incorrect - missing abstract method implementations
class BadClass(BSONCoding):
    pass

try:
    obj = BadClass()  # Raises TypeError
except TypeError as e:
    print(f"Abstract method error: {e}")

# Correct - implement both abstract methods
class GoodClass(BSONCoding):
    def bson_encode(self):
        return {}
    
    def bson_init(self, raw_values):
        return self

Best Practices

Serialization Design

  • Include only essential data in bson_encode() return value
  • Handle optional fields gracefully in bson_init()
  • Consider version compatibility when changing object structure
  • Use meaningful field names that won't conflict with BSON metadata

Class Registration

  • Register classes immediately after definition or in module initialization
  • Use import_classes_from_modules() for automatic registration
  • Be careful with class name conflicts in different modules

Performance Considerations

  • BSONCoding objects add overhead compared to plain dictionaries
  • Nested custom objects multiply serialization cost
  • Consider using regular dictionaries for simple data structures

Install with Tessl CLI

npx tessl i tessl/pypi-bson

docs

custom-objects.md

index.md

network.md

objectid.md

serialization.md

types.md

tile.json