Lightweight, extensible schema and data validation tool for Python dictionaries.
Cerberus provides advanced validation capabilities including normalization, custom validators, field dependencies, coercion, and complex validation scenarios. These features enable sophisticated validation logic for complex data processing requirements.
Document transformation and value coercion during validation.
class Validator:
@property
def coercers(self):
"""Available coercion methods for transforming values"""
@property
def default_setters(self):
"""Available default value setter methods"""
@property
def normalization_rules(self):
"""Rules applied during document normalization"""Access to validation rule methods and capabilities.
class Validator:
@property
def validators(self):
"""Available validator methods for custom validation logic"""
@property
def rules(self):
"""All available validation rules"""
@property
def validation_rules(self):
"""Rules applied during validation phase"""Pre-defined rule processing categories.
class Validator:
mandatory_validations: tuple
"""Rules that are evaluated on every field regardless of schema"""
priority_validations: tuple
"""Rules that are processed first during validation"""Complex schema validation capabilities.
class Validator:
def _validate_dependencies(self, dependencies, field, value): ...
def _validate_excludes(self, excludes, field, value): ...
def _validate_contains(self, contains, field, value): ...
def _validate_itemsrules(self, itemsrules, field, value): ...
def _validate_keysrules(self, keysrules, field, value): ...
def _validate_valuesrules(self, valuesrules, field, value): ...
def _validate_oneof(self, oneof, field, value): ...
def _validate_anyof(self, anyof, field, value): ...
def _validate_allof(self, allof, field, value): ...
def _validate_noneof(self, noneof, field, value): ...from cerberus import Validator
# Schema with normalization rules
schema = {
'name': {
'type': 'string',
'coerce': str.title, # Convert to title case
'default': 'Anonymous'
},
'age': {
'type': 'integer',
'coerce': int, # Convert to integer
'min': 0
},
'tags': {
'type': 'list',
'default': [],
'schema': {'type': 'string', 'coerce': str.lower}
}
}
v = Validator(schema)
# Document with mixed case and string numbers
document = {
'name': 'john doe',
'age': '25',
'tags': ['Python', 'VALIDATION']
}
# Normalize the document
normalized = v.normalized(document)
print(normalized)
# Output: {
# 'name': 'John Doe',
# 'age': 25,
# 'tags': ['python', 'validation']
# }
# Normalize document with missing fields (uses defaults)
partial_doc = {'age': '30'}
normalized_partial = v.normalized(partial_doc)
print(normalized_partial)
# Output: {'name': 'Anonymous', 'age': 30, 'tags': []}from cerberus import Validator
import datetime
def to_datetime(value):
"""Convert string to datetime"""
if isinstance(value, str):
return datetime.datetime.fromisoformat(value)
return value
def normalize_email(value):
"""Normalize email to lowercase"""
return value.lower().strip() if isinstance(value, str) else value
schema = {
'created_at': {
'type': 'datetime',
'coerce': to_datetime
},
'email': {
'type': 'string',
'coerce': normalize_email,
'regex': r'^[^@]+@[^@]+\.[^@]+$'
}
}
v = Validator(schema)
document = {
'created_at': '2023-01-01T12:00:00',
'email': ' USER@EXAMPLE.COM '
}
normalized = v.normalized(document)
print(normalized['created_at']) # datetime object
print(normalized['email']) # 'user@example.com'from cerberus import Validator
# Schema with field dependencies
schema = {
'name': {'type': 'string'},
'age': {'type': 'integer'},
'email': {
'type': 'string',
'dependencies': ['name', 'age'] # email requires name and age
},
'phone': {
'type': 'string',
'dependencies': {'email': {'regex': r'.*@company\.com$'}} # phone requires company email
}
}
v = Validator(schema)
# Valid with all dependencies satisfied
valid_doc = {
'name': 'John',
'age': 30,
'email': 'john@company.com',
'phone': '555-1234'
}
print(v.validate(valid_doc)) # True
# Invalid - email present without required dependencies
invalid_doc1 = {'email': 'john@example.com'}
print(v.validate(invalid_doc1)) # False - missing name and age
# Invalid - phone present but email doesn't match pattern
invalid_doc2 = {
'name': 'John',
'age': 30,
'email': 'john@example.com', # Not @company.com
'phone': '555-1234'
}
print(v.validate(invalid_doc2)) # Falsefrom cerberus import Validator
# Schema with mutually exclusive fields
schema = {
'login_method': {
'type': 'string',
'allowed': ['email', 'username', 'phone']
},
'email': {
'type': 'string',
'excludes': ['username', 'phone'] # Can't have email with username or phone
},
'username': {
'type': 'string',
'excludes': ['email', 'phone'] # Can't have username with email or phone
},
'phone': {
'type': 'string',
'excludes': ['email', 'username'] # Can't have phone with email or username
}
}
v = Validator(schema)
# Valid - only one login method
valid_doc = {
'login_method': 'email',
'email': 'john@example.com'
}
print(v.validate(valid_doc)) # True
# Invalid - multiple conflicting login methods
invalid_doc = {
'login_method': 'email',
'email': 'john@example.com',
'username': 'john_doe' # Conflicts with email
}
print(v.validate(invalid_doc)) # Falsefrom cerberus import Validator
# Schema with logical constraints
schema = {
'user_type': {'type': 'string'},
'permissions': {
'type': 'dict',
'oneof': [ # Must match exactly one of these schemas
{
'schema': {
'read': {'type': 'boolean'},
'write': {'type': 'boolean'}
}
},
{
'schema': {
'admin': {'type': 'boolean', 'allowed': [True]}
}
}
]
},
'contact_info': {
'type': 'dict',
'anyof': [ # Must match at least one of these schemas
{'schema': {'email': {'type': 'string', 'required': True}}},
{'schema': {'phone': {'type': 'string', 'required': True}}},
{'schema': {'address': {'type': 'string', 'required': True}}}
]
}
}
v = Validator(schema)
# Valid - matches admin permissions schema
valid_doc1 = {
'user_type': 'admin',
'permissions': {'admin': True},
'contact_info': {'email': 'admin@example.com'}
}
print(v.validate(valid_doc1)) # True
# Valid - matches read/write permissions schema and has phone
valid_doc2 = {
'user_type': 'user',
'permissions': {'read': True, 'write': False},
'contact_info': {'phone': '555-1234', 'email': 'user@example.com'}
}
print(v.validate(valid_doc2)) # True
# Invalid - permissions match neither schema (mixing both formats)
invalid_doc = {
'user_type': 'user',
'permissions': {'read': True, 'admin': True}, # Violates oneof
'contact_info': {'name': 'John'} # Doesn't match any anyof schemas
}
print(v.validate(invalid_doc)) # Falsefrom cerberus import Validator
class BusinessValidator(Validator):
def _validate_business_hours(self, business_hours, field, value):
"""Validate business hours format"""
if business_hours:
try:
start, end = value.split('-')
start_hour = int(start.split(':')[0])
end_hour = int(end.split(':')[0])
if not (0 <= start_hour <= 23 and 0 <= end_hour <= 23):
self._error(field, "business hours must use 24-hour format")
if start_hour >= end_hour:
self._error(field, "start time must be before end time")
except (ValueError, AttributeError):
self._error(field, "business hours must be in format 'HH:MM-HH:MM'")
def _validate_tax_id(self, tax_id, field, value):
"""Validate tax ID format"""
if tax_id and not (value.isdigit() and len(value) == 9):
self._error(field, "tax ID must be 9 digits")
# Use custom validator
schema = {
'business_name': {'type': 'string'},
'hours': {'type': 'string', 'business_hours': True},
'tax_id': {'type': 'string', 'tax_id': True}
}
v = BusinessValidator(schema)
valid_doc = {
'business_name': 'ABC Corp',
'hours': '09:00-17:00',
'tax_id': '123456789'
}
print(v.validate(valid_doc)) # True
invalid_doc = {
'business_name': 'ABC Corp',
'hours': '17:00-09:00', # End before start
'tax_id': '12345' # Too short
}
print(v.validate(invalid_doc)) # False
print(v.errors)from cerberus import Validator
# Schema with complex nested validation
schema = {
'products': {
'type': 'list',
'schema': {
'type': 'dict',
'schema': {
'name': {'type': 'string', 'required': True},
'price': {'type': 'float', 'min': 0},
'category': {'type': 'string', 'required': True},
'tags': {
'type': 'list',
'schema': {'type': 'string'},
'contains': ['available'] # Must contain 'available' tag
}
}
}
},
'metadata': {
'type': 'dict',
'keysrules': {'type': 'string', 'regex': r'^[a-z_]+$'}, # Keys must be lowercase with underscores
'valuesrules': {'type': 'string'} # All values must be strings
}
}
v = Validator(schema)
valid_doc = {
'products': [
{
'name': 'Widget',
'price': 19.99,
'category': 'gadgets',
'tags': ['new', 'available', 'popular']
}
],
'metadata': {
'store_id': '12345',
'created_by': 'system'
}
}
print(v.validate(valid_doc)) # True
invalid_doc = {
'products': [
{
'name': 'Widget',
'price': -5.0, # Negative price
'category': 'gadgets',
'tags': ['new', 'popular'] # Missing required 'available' tag
}
],
'metadata': {
'storeID': '12345', # Invalid key format (camelCase instead of snake_case)
'created_by': 123 # Invalid value type (number instead of string)
}
}
print(v.validate(invalid_doc)) # Falsefrom cerberus import Validator
schema = {
'id': {'type': 'integer', 'readonly': True},
'name': {'type': 'string'},
'created_at': {'type': 'datetime', 'readonly': True},
'internal_field': {'type': 'string'} # Unknown field to be purged
}
# Validator that purges readonly and unknown fields
v = Validator(schema, purge_readonly=True, purge_unknown=True)
document = {
'id': 123,
'name': 'Test',
'created_at': '2023-01-01T00:00:00',
'extra_field': 'should be removed',
'internal_field': 'keep this'
}
# Normalize document (removes readonly and unknown fields)
normalized = v.normalized(document)
print(normalized)
# Output: {'name': 'Test', 'internal_field': 'keep this'}
# Validation without purging will fail for readonly fields if they're being set
v_strict = Validator(schema, purge_readonly=False)
print(v_strict.validate(document)) # False - readonly fields presentfrom cerberus import Validator
import uuid
import datetime
def generate_id():
"""Generate unique ID"""
return str(uuid.uuid4())
def current_timestamp():
"""Get current timestamp"""
return datetime.datetime.now()
schema = {
'id': {
'type': 'string',
'default_setter': generate_id
},
'name': {'type': 'string'},
'created_at': {
'type': 'datetime',
'default_setter': current_timestamp
},
'status': {
'type': 'string',
'default': 'active' # Simple default value
}
}
v = Validator(schema)
# Document with missing fields
document = {'name': 'Test Item'}
normalized = v.normalized(document)
print(normalized)
# Output includes generated ID, current timestamp, and default status
# {
# 'id': '550e8400-e29b-41d4-a716-446655440000',
# 'name': 'Test Item',
# 'created_at': datetime.datetime(2023, 1, 1, 12, 0, 0),
# 'status': 'active'
# }from cerberus import Validator
schema = {
'id': {'type': 'integer', 'required': True, 'readonly': True},
'name': {'type': 'string', 'required': True},
'email': {'type': 'string', 'required': True},
'age': {'type': 'integer', 'min': 0}
}
v = Validator(schema)
# Full document validation
full_doc = {'id': 1, 'name': 'John', 'email': 'john@example.com', 'age': 30}
print(v.validate(full_doc)) # True
# Update validation - only validates provided fields
update_doc = {'name': 'Johnny', 'age': 31} # Missing required 'email' and 'id'
print(v.validate(update_doc, update=True)) # True - required fields not enforced in update mode
# Invalid update
invalid_update = {'age': -5} # Violates min constraint
print(v.validate(invalid_update, update=True)) # False - constraint violations still applyInstall with Tessl CLI
npx tessl i tessl/pypi-cerberus