Python Data Structures for Humans - a library for data validation and transformation using structured models
—
Additional field types and utilities provided as contrib modules in Schematics. These modules extend core functionality with specialized field types for specific use cases and external dependencies.
MongoDB-specific field types for working with BSON data types.
from schematics.contrib.mongo import ObjectIdType
class ObjectIdType(BaseType):
"""
MongoDB ObjectId field with BSON integration.
Handles MongoDB ObjectId objects with automatic conversion between
string representations and BSON ObjectId instances. Requires pymongo/bson.
"""
def __init__(self, auto_fill=False, **kwargs):
"""
Initialize ObjectId field.
Args:
auto_fill (bool): Whether to auto-generate ObjectIds (default: False)
**kwargs: Base field options
"""
def to_native(self, value, context=None):
"""
Convert string or ObjectId to BSON ObjectId.
Args:
value: String representation or ObjectId instance
context: Conversion context
Returns:
bson.ObjectId: BSON ObjectId instance
Raises:
ConversionError: If value cannot be converted to ObjectId
"""
def to_primitive(self, value, context=None):
"""
Convert ObjectId to string representation.
Args:
value: BSON ObjectId instance
context: Conversion context
Returns:
str: String representation of ObjectId
"""Native Python enum support for field values.
from schematics.contrib.enum_type import EnumType
class EnumType(BaseType):
"""
Field type for Python enum values.
Restricts field values to members of a specified enum class with
optional support for accepting enum values in addition to enum members.
"""
def __init__(self, enum, use_values=False, **kwargs):
"""
Initialize enum field.
Args:
enum: Enum class to restrict values to
use_values (bool): Whether to accept enum values in addition to members
**kwargs: Base field options
"""
def to_native(self, value, context=None):
"""
Convert input to enum member.
Args:
value: Enum member, enum name string, or enum value
context: Conversion context
Returns:
Enum member corresponding to input
Raises:
ConversionError: If value is not a valid enum member/value
"""
def to_primitive(self, value, context=None):
"""
Convert enum member to primitive representation.
Args:
value: Enum member
context: Conversion context
Returns:
str or primitive: Enum name (default) or value (if use_values=True)
"""Simple state machine for data processing workflows.
from schematics.contrib.machine import Machine
class Machine:
"""
Simple state machine for data transformation workflows.
Manages data through predefined states (raw, converted, validated, serialized)
with automatic callbacks for state transitions.
"""
states = ('raw', 'converted', 'validated', 'serialized')
def __init__(self, data, *args):
"""
Initialize machine with data and schema.
Args:
data: Initial data
*args: Schema and additional arguments for transformations
"""
def convert(self):
"""
Transition from raw to converted state.
Applies conversion transformation to data.
"""
def validate(self):
"""
Transition from converted to validated state.
Applies validation to converted data.
"""
def serialize(self):
"""
Transition from validated to serialized state.
Converts validated data to primitive representation.
"""
def can(self, state):
"""
Check if transition to state is possible.
Args:
state (str): Target state name
Returns:
bool: True if transition is valid
"""
def cannot(self, state):
"""
Check if transition to state is not possible.
Args:
state (str): Target state name
Returns:
bool: True if transition is invalid
"""from schematics.models import Model
from schematics.types import StringType
from schematics.contrib.mongo import ObjectIdType
import bson
class Document(Model):
_id = ObjectIdType(auto_fill=True)
title = StringType(required=True)
content = StringType()
# Create with string ObjectId
doc = Document({
'_id': '507f1f77bcf86cd799439011',
'title': 'My Document',
'content': 'Document content here'
})
doc.validate() # Converts string to ObjectId
# Access as BSON ObjectId
object_id = doc._id # bson.ObjectId('507f1f77bcf86cd799439011')
id_string = doc.to_primitive()['_id'] # '507f1f77bcf86cd799439011'
# Auto-fill generates new ObjectId
new_doc = Document({'title': 'Auto ID Document'})
new_doc.validate() # _id is automatically generatedimport enum
from schematics.models import Model
from schematics.types import StringType
from schematics.contrib.enum_type import EnumType
class Status(enum.Enum):
DRAFT = 'draft'
PUBLISHED = 'published'
ARCHIVED = 'archived'
class Priority(enum.Enum):
LOW = 1
MEDIUM = 2
HIGH = 3
class Article(Model):
title = StringType(required=True)
status = EnumType(Status, required=True)
priority = EnumType(Priority, use_values=True) # Accept numeric values
# Using enum members
article = Article({
'title': 'My Article',
'status': Status.DRAFT, # Direct enum member
'priority': Priority.HIGH # Direct enum member
})
# Using enum names/values
article2 = Article({
'title': 'Another Article',
'status': 'published', # String name -> Status.PUBLISHED
'priority': 2 # Numeric value -> Priority.MEDIUM (use_values=True)
})
article.validate() # Success
article2.validate() # Success
# Export behavior
primitive_data = article.to_primitive()
# {'title': 'My Article', 'status': 'DRAFT', 'priority': 3}
primitive_data2 = article2.to_primitive()
# {'title': 'Another Article', 'status': 'PUBLISHED', 'priority': 2}from schematics.models import Model
from schematics.types import StringType, IntType
from schematics.contrib.machine import Machine
class User(Model):
name = StringType(required=True)
age = IntType(min_value=0, required=True)
# Raw data processing workflow
raw_data = {'name': 'John Doe', 'age': '30'} # Age as string
user_schema = User
# Initialize state machine
machine = Machine(raw_data, user_schema)
print(machine.state) # 'raw'
# Convert data types
machine.convert()
print(machine.state) # 'converted'
print(machine.data) # {'name': 'John Doe', 'age': 30} - age now integer
# Validate converted data
machine.validate()
print(machine.state) # 'validated'
# Serialize to primitives
machine.serialize()
print(machine.state) # 'serialized'
print(machine.data) # Final primitive representation
# Check valid transitions
print(machine.can('raw')) # False - can't go back to raw
print(machine.cannot('raw')) # True - cannot transition to rawimport enum
from schematics.models import Model
from schematics.types import StringType, DateTimeType
from schematics.contrib.mongo import ObjectIdType
from schematics.contrib.enum_type import EnumType
from datetime import datetime
class DocumentStatus(enum.Enum):
DRAFT = 'draft'
REVIEW = 'review'
PUBLISHED = 'published'
class BlogPost(Model):
_id = ObjectIdType()
title = StringType(required=True, max_length=200)
content = StringType(required=True)
status = EnumType(DocumentStatus, default=DocumentStatus.DRAFT)
created_at = DateTimeType(default=datetime.utcnow)
updated_at = DateTimeType()
# Create blog post with mixed field types
post_data = {
'title': 'Understanding Schematics',
'content': 'Schematics is a powerful data validation library...',
'status': 'review', # String converted to enum
'created_at': '2024-01-15T10:30:00Z'
}
post = BlogPost(post_data)
post.validate()
# Export for API
api_response = post.to_primitive()
# {
# '_id': None, # Not set
# 'title': 'Understanding Schematics',
# 'content': 'Schematics is a powerful data validation library...',
# 'status': 'REVIEW',
# 'created_at': '2024-01-15T10:30:00+00:00',
# 'updated_at': None
# }
# Save to MongoDB (conceptual)
post._id = ObjectIdType().to_native('507f1f77bcf86cd799439011')
post.status = DocumentStatus.PUBLISHED
post.updated_at = datetime.utcnow()
final_data = post.to_primitive()
# Now includes ObjectId string and updated status/timestampInstall with Tessl CLI
npx tessl i tessl/pypi-schematics