tessl/pypi-cerberus

Lightweight, extensible schema and data validation tool for Python dictionaries.

Overview

Eval results

Files

Advanced Features

Name: tessl/pypi-cerberus
Author: tessl

Cerberus provides advanced validation capabilities including normalization, custom validators, field dependencies, coercion, and complex validation scenarios. These features enable sophisticated validation logic for complex data processing requirements.

Capabilities

Normalization and Coercion

Document transformation and value coercion during validation.

class Validator:
    @property
    def coercers(self):
        """Available coercion methods for transforming values"""

    @property
    def default_setters(self):
        """Available default value setter methods"""

    @property
    def normalization_rules(self):
        """Rules applied during document normalization"""

Custom Validation Rules

Access to validation rule methods and capabilities.

class Validator:
    @property
    def validators(self):
        """Available validator methods for custom validation logic"""

    @property
    def rules(self):
        """All available validation rules"""

    @property
    def validation_rules(self):
        """Rules applied during validation phase"""

Validation Rule Categories

Pre-defined rule processing categories.

class Validator:
    mandatory_validations: tuple
        """Rules that are evaluated on every field regardless of schema"""

    priority_validations: tuple
        """Rules that are processed first during validation"""

Advanced Schema Features

Complex schema validation capabilities.

class Validator:
    def _validate_dependencies(self, dependencies, field, value): ...
    def _validate_excludes(self, excludes, field, value): ...
    def _validate_contains(self, contains, field, value): ...
    def _validate_itemsrules(self, itemsrules, field, value): ...
    def _validate_keysrules(self, keysrules, field, value): ...
    def _validate_valuesrules(self, valuesrules, field, value): ...
    def _validate_oneof(self, oneof, field, value): ...
    def _validate_anyof(self, anyof, field, value): ...
    def _validate_allof(self, allof, field, value): ...
    def _validate_noneof(self, noneof, field, value): ...

Usage Examples

Document Normalization

from cerberus import Validator

# Schema with normalization rules
schema = {
    'name': {
        'type': 'string',
        'coerce': str.title,  # Convert to title case
        'default': 'Anonymous'
    },
    'age': {
        'type': 'integer',
        'coerce': int,  # Convert to integer
        'min': 0
    },
    'tags': {
        'type': 'list',
        'default': [],
        'schema': {'type': 'string', 'coerce': str.lower}
    }
}

v = Validator(schema)

# Document with mixed case and string numbers
document = {
    'name': 'john doe',
    'age': '25',
    'tags': ['Python', 'VALIDATION']
}

# Normalize the document
normalized = v.normalized(document)
print(normalized)
# Output: {
#     'name': 'John Doe',
#     'age': 25,
#     'tags': ['python', 'validation']
# }

# Normalize document with missing fields (uses defaults)
partial_doc = {'age': '30'}
normalized_partial = v.normalized(partial_doc)
print(normalized_partial)
# Output: {'name': 'Anonymous', 'age': 30, 'tags': []}

Custom Coercion Functions

from cerberus import Validator
import datetime

def to_datetime(value):
    """Convert string to datetime"""
    if isinstance(value, str):
        return datetime.datetime.fromisoformat(value)
    return value

def normalize_email(value):
    """Normalize email to lowercase"""
    return value.lower().strip() if isinstance(value, str) else value

schema = {
    'created_at': {
        'type': 'datetime',
        'coerce': to_datetime
    },
    'email': {
        'type': 'string',
        'coerce': normalize_email,
        'regex': r'^[^@]+@[^@]+\.[^@]+$'
    }
}

v = Validator(schema)

document = {
    'created_at': '2023-01-01T12:00:00',
    'email': '  USER@EXAMPLE.COM  '
}

normalized = v.normalized(document)
print(normalized['created_at'])  # datetime object
print(normalized['email'])       # 'user@example.com'

Field Dependencies

from cerberus import Validator

# Schema with field dependencies
schema = {
    'name': {'type': 'string'},
    'age': {'type': 'integer'},
    'email': {
        'type': 'string',
        'dependencies': ['name', 'age']  # email requires name and age
    },
    'phone': {
        'type': 'string',
        'dependencies': {'email': {'regex': r'.*@company\.com$'}}  # phone requires company email
    }
}

v = Validator(schema)

# Valid with all dependencies satisfied
valid_doc = {
    'name': 'John',
    'age': 30,
    'email': 'john@company.com',
    'phone': '555-1234'
}
print(v.validate(valid_doc))  # True

# Invalid - email present without required dependencies
invalid_doc1 = {'email': 'john@example.com'}
print(v.validate(invalid_doc1))  # False - missing name and age

# Invalid - phone present but email doesn't match pattern
invalid_doc2 = {
    'name': 'John',
    'age': 30,
    'email': 'john@example.com',  # Not @company.com
    'phone': '555-1234'
}
print(v.validate(invalid_doc2))  # False

Field Exclusions

from cerberus import Validator

# Schema with mutually exclusive fields
schema = {
    'login_method': {
        'type': 'string',
        'allowed': ['email', 'username', 'phone']
    },
    'email': {
        'type': 'string',
        'excludes': ['username', 'phone']  # Can't have email with username or phone
    },
    'username': {
        'type': 'string',
        'excludes': ['email', 'phone']     # Can't have username with email or phone
    },
    'phone': {
        'type': 'string',
        'excludes': ['email', 'username']  # Can't have phone with email or username
    }
}

v = Validator(schema)

# Valid - only one login method
valid_doc = {
    'login_method': 'email',
    'email': 'john@example.com'
}
print(v.validate(valid_doc))  # True

# Invalid - multiple conflicting login methods
invalid_doc = {
    'login_method': 'email',
    'email': 'john@example.com',
    'username': 'john_doe'  # Conflicts with email
}
print(v.validate(invalid_doc))  # False

Logical Validation Rules

from cerberus import Validator

# Schema with logical constraints
schema = {
    'user_type': {'type': 'string'},
    'permissions': {
        'type': 'dict',
        'oneof': [  # Must match exactly one of these schemas
            {
                'schema': {
                    'read': {'type': 'boolean'},
                    'write': {'type': 'boolean'}
                }
            },
            {
                'schema': {
                    'admin': {'type': 'boolean', 'allowed': [True]}
                }
            }
        ]
    },
    'contact_info': {
        'type': 'dict',
        'anyof': [  # Must match at least one of these schemas
            {'schema': {'email': {'type': 'string', 'required': True}}},
            {'schema': {'phone': {'type': 'string', 'required': True}}},
            {'schema': {'address': {'type': 'string', 'required': True}}}
        ]
    }
}

v = Validator(schema)

# Valid - matches admin permissions schema
valid_doc1 = {
    'user_type': 'admin',
    'permissions': {'admin': True},
    'contact_info': {'email': 'admin@example.com'}
}
print(v.validate(valid_doc1))  # True

# Valid - matches read/write permissions schema and has phone
valid_doc2 = {
    'user_type': 'user',
    'permissions': {'read': True, 'write': False},
    'contact_info': {'phone': '555-1234', 'email': 'user@example.com'}
}
print(v.validate(valid_doc2))  # True

# Invalid - permissions match neither schema (mixing both formats)
invalid_doc = {
    'user_type': 'user',
    'permissions': {'read': True, 'admin': True},  # Violates oneof
    'contact_info': {'name': 'John'}  # Doesn't match any anyof schemas
}
print(v.validate(invalid_doc))  # False

Custom Validation Methods

from cerberus import Validator

class BusinessValidator(Validator):
    def _validate_business_hours(self, business_hours, field, value):
        """Validate business hours format"""
        if business_hours:
            try:
                start, end = value.split('-')
                start_hour = int(start.split(':')[0])
                end_hour = int(end.split(':')[0])
                if not (0 <= start_hour <= 23 and 0 <= end_hour <= 23):
                    self._error(field, "business hours must use 24-hour format")
                if start_hour >= end_hour:
                    self._error(field, "start time must be before end time")
            except (ValueError, AttributeError):
                self._error(field, "business hours must be in format 'HH:MM-HH:MM'")

    def _validate_tax_id(self, tax_id, field, value):
        """Validate tax ID format"""
        if tax_id and not (value.isdigit() and len(value) == 9):
            self._error(field, "tax ID must be 9 digits")

# Use custom validator
schema = {
    'business_name': {'type': 'string'},
    'hours': {'type': 'string', 'business_hours': True},
    'tax_id': {'type': 'string', 'tax_id': True}
}

v = BusinessValidator(schema)

valid_doc = {
    'business_name': 'ABC Corp',
    'hours': '09:00-17:00',
    'tax_id': '123456789'
}
print(v.validate(valid_doc))  # True

invalid_doc = {
    'business_name': 'ABC Corp',
    'hours': '17:00-09:00',  # End before start
    'tax_id': '12345'        # Too short
}
print(v.validate(invalid_doc))  # False
print(v.errors)

Complex List and Dict Validation

from cerberus import Validator

# Schema with complex nested validation
schema = {
    'products': {
        'type': 'list',
        'schema': {
            'type': 'dict',
            'schema': {
                'name': {'type': 'string', 'required': True},
                'price': {'type': 'float', 'min': 0},
                'category': {'type': 'string', 'required': True},
                'tags': {
                    'type': 'list',
                    'schema': {'type': 'string'},
                    'contains': ['available']  # Must contain 'available' tag
                }
            }
        }
    },
    'metadata': {
        'type': 'dict',
        'keysrules': {'type': 'string', 'regex': r'^[a-z_]+$'},  # Keys must be lowercase with underscores
        'valuesrules': {'type': 'string'}  # All values must be strings
    }
}

v = Validator(schema)

valid_doc = {
    'products': [
        {
            'name': 'Widget',
            'price': 19.99,
            'category': 'gadgets',
            'tags': ['new', 'available', 'popular']
        }
    ],
    'metadata': {
        'store_id': '12345',
        'created_by': 'system'
    }
}
print(v.validate(valid_doc))  # True

invalid_doc = {
    'products': [
        {
            'name': 'Widget',
            'price': -5.0,  # Negative price
            'category': 'gadgets',
            'tags': ['new', 'popular']  # Missing required 'available' tag
        }
    ],
    'metadata': {
        'storeID': '12345',  # Invalid key format (camelCase instead of snake_case)
        'created_by': 123    # Invalid value type (number instead of string)
    }
}
print(v.validate(invalid_doc))  # False

Readonly Fields and Purging

from cerberus import Validator

schema = {
    'id': {'type': 'integer', 'readonly': True},
    'name': {'type': 'string'},
    'created_at': {'type': 'datetime', 'readonly': True},
    'internal_field': {'type': 'string'}  # Unknown field to be purged
}

# Validator that purges readonly and unknown fields
v = Validator(schema, purge_readonly=True, purge_unknown=True)

document = {
    'id': 123,
    'name': 'Test',
    'created_at': '2023-01-01T00:00:00',
    'extra_field': 'should be removed',
    'internal_field': 'keep this'
}

# Normalize document (removes readonly and unknown fields)
normalized = v.normalized(document)
print(normalized)
# Output: {'name': 'Test', 'internal_field': 'keep this'}

# Validation without purging will fail for readonly fields if they're being set
v_strict = Validator(schema, purge_readonly=False)
print(v_strict.validate(document))  # False - readonly fields present

Default Value Setters

from cerberus import Validator
import uuid
import datetime

def generate_id():
    """Generate unique ID"""
    return str(uuid.uuid4())

def current_timestamp():
    """Get current timestamp"""
    return datetime.datetime.now()

schema = {
    'id': {
        'type': 'string',
        'default_setter': generate_id
    },
    'name': {'type': 'string'},
    'created_at': {
        'type': 'datetime',
        'default_setter': current_timestamp
    },
    'status': {
        'type': 'string',
        'default': 'active'  # Simple default value
    }
}

v = Validator(schema)

# Document with missing fields
document = {'name': 'Test Item'}

normalized = v.normalized(document)
print(normalized)
# Output includes generated ID, current timestamp, and default status
# {
#     'id': '550e8400-e29b-41d4-a716-446655440000',
#     'name': 'Test Item',
#     'created_at': datetime.datetime(2023, 1, 1, 12, 0, 0),
#     'status': 'active'
# }

Validation with Update Mode

from cerberus import Validator

schema = {
    'id': {'type': 'integer', 'required': True, 'readonly': True},
    'name': {'type': 'string', 'required': True},
    'email': {'type': 'string', 'required': True},
    'age': {'type': 'integer', 'min': 0}
}

v = Validator(schema)

# Full document validation
full_doc = {'id': 1, 'name': 'John', 'email': 'john@example.com', 'age': 30}
print(v.validate(full_doc))  # True

# Update validation - only validates provided fields
update_doc = {'name': 'Johnny', 'age': 31}  # Missing required 'email' and 'id'
print(v.validate(update_doc, update=True))  # True - required fields not enforced in update mode

# Invalid update
invalid_update = {'age': -5}  # Violates min constraint
print(v.validate(invalid_update, update=True))  # False - constraint violations still apply

Install with Tessl CLI