0
# Advanced Features
1
2
Cerberus provides advanced validation capabilities including normalization, custom validators, field dependencies, coercion, and complex validation scenarios. These features enable sophisticated validation logic for complex data processing requirements.
3
4
## Capabilities
5
6
### Normalization and Coercion
7
8
Document transformation and value coercion during validation.
9
10
```python { .api }
11
class Validator:
12
@property
13
def coercers(self):
14
"""Available coercion methods for transforming values"""
15
16
@property
17
def default_setters(self):
18
"""Available default value setter methods"""
19
20
@property
21
def normalization_rules(self):
22
"""Rules applied during document normalization"""
23
```
24
25
### Custom Validation Rules
26
27
Access to validation rule methods and capabilities.
28
29
```python { .api }
30
class Validator:
31
@property
32
def validators(self):
33
"""Available validator methods for custom validation logic"""
34
35
@property
36
def rules(self):
37
"""All available validation rules"""
38
39
@property
40
def validation_rules(self):
41
"""Rules applied during validation phase"""
42
```
43
44
### Validation Rule Categories
45
46
Pre-defined rule processing categories.
47
48
```python { .api }
49
class Validator:
50
mandatory_validations: tuple
51
"""Rules that are evaluated on every field regardless of schema"""
52
53
priority_validations: tuple
54
"""Rules that are processed first during validation"""
55
```
56
57
### Advanced Schema Features
58
59
Complex schema validation capabilities.
60
61
```python { .api }
62
class Validator:
63
def _validate_dependencies(self, dependencies, field, value): ...
64
def _validate_excludes(self, excludes, field, value): ...
65
def _validate_contains(self, contains, field, value): ...
66
def _validate_itemsrules(self, itemsrules, field, value): ...
67
def _validate_keysrules(self, keysrules, field, value): ...
68
def _validate_valuesrules(self, valuesrules, field, value): ...
69
def _validate_oneof(self, oneof, field, value): ...
70
def _validate_anyof(self, anyof, field, value): ...
71
def _validate_allof(self, allof, field, value): ...
72
def _validate_noneof(self, noneof, field, value): ...
73
```
74
75
## Usage Examples
76
77
### Document Normalization
78
79
```python
80
from cerberus import Validator
81
82
# Schema with normalization rules
83
schema = {
84
'name': {
85
'type': 'string',
86
'coerce': str.title, # Convert to title case
87
'default': 'Anonymous'
88
},
89
'age': {
90
'type': 'integer',
91
'coerce': int, # Convert to integer
92
'min': 0
93
},
94
'tags': {
95
'type': 'list',
96
'default': [],
97
'schema': {'type': 'string', 'coerce': str.lower}
98
}
99
}
100
101
v = Validator(schema)
102
103
# Document with mixed case and string numbers
104
document = {
105
'name': 'john doe',
106
'age': '25',
107
'tags': ['Python', 'VALIDATION']
108
}
109
110
# Normalize the document
111
normalized = v.normalized(document)
112
print(normalized)
113
# Output: {
114
# 'name': 'John Doe',
115
# 'age': 25,
116
# 'tags': ['python', 'validation']
117
# }
118
119
# Normalize document with missing fields (uses defaults)
120
partial_doc = {'age': '30'}
121
normalized_partial = v.normalized(partial_doc)
122
print(normalized_partial)
123
# Output: {'name': 'Anonymous', 'age': 30, 'tags': []}
124
```
125
126
### Custom Coercion Functions
127
128
```python
129
from cerberus import Validator
130
import datetime
131
132
def to_datetime(value):
133
"""Convert string to datetime"""
134
if isinstance(value, str):
135
return datetime.datetime.fromisoformat(value)
136
return value
137
138
def normalize_email(value):
139
"""Normalize email to lowercase"""
140
return value.lower().strip() if isinstance(value, str) else value
141
142
schema = {
143
'created_at': {
144
'type': 'datetime',
145
'coerce': to_datetime
146
},
147
'email': {
148
'type': 'string',
149
'coerce': normalize_email,
150
'regex': r'^[^@]+@[^@]+\.[^@]+$'
151
}
152
}
153
154
v = Validator(schema)
155
156
document = {
157
'created_at': '2023-01-01T12:00:00',
158
'email': ' USER@EXAMPLE.COM '
159
}
160
161
normalized = v.normalized(document)
162
print(normalized['created_at']) # datetime object
163
print(normalized['email']) # 'user@example.com'
164
```
165
166
### Field Dependencies
167
168
```python
169
from cerberus import Validator
170
171
# Schema with field dependencies
172
schema = {
173
'name': {'type': 'string'},
174
'age': {'type': 'integer'},
175
'email': {
176
'type': 'string',
177
'dependencies': ['name', 'age'] # email requires name and age
178
},
179
'phone': {
180
'type': 'string',
181
'dependencies': {'email': {'regex': r'.*@company\.com$'}} # phone requires company email
182
}
183
}
184
185
v = Validator(schema)
186
187
# Valid with all dependencies satisfied
188
valid_doc = {
189
'name': 'John',
190
'age': 30,
191
'email': 'john@company.com',
192
'phone': '555-1234'
193
}
194
print(v.validate(valid_doc)) # True
195
196
# Invalid - email present without required dependencies
197
invalid_doc1 = {'email': 'john@example.com'}
198
print(v.validate(invalid_doc1)) # False - missing name and age
199
200
# Invalid - phone present but email doesn't match pattern
201
invalid_doc2 = {
202
'name': 'John',
203
'age': 30,
204
'email': 'john@example.com', # Not @company.com
205
'phone': '555-1234'
206
}
207
print(v.validate(invalid_doc2)) # False
208
```
209
210
### Field Exclusions
211
212
```python
213
from cerberus import Validator
214
215
# Schema with mutually exclusive fields
216
schema = {
217
'login_method': {
218
'type': 'string',
219
'allowed': ['email', 'username', 'phone']
220
},
221
'email': {
222
'type': 'string',
223
'excludes': ['username', 'phone'] # Can't have email with username or phone
224
},
225
'username': {
226
'type': 'string',
227
'excludes': ['email', 'phone'] # Can't have username with email or phone
228
},
229
'phone': {
230
'type': 'string',
231
'excludes': ['email', 'username'] # Can't have phone with email or username
232
}
233
}
234
235
v = Validator(schema)
236
237
# Valid - only one login method
238
valid_doc = {
239
'login_method': 'email',
240
'email': 'john@example.com'
241
}
242
print(v.validate(valid_doc)) # True
243
244
# Invalid - multiple conflicting login methods
245
invalid_doc = {
246
'login_method': 'email',
247
'email': 'john@example.com',
248
'username': 'john_doe' # Conflicts with email
249
}
250
print(v.validate(invalid_doc)) # False
251
```
252
253
### Logical Validation Rules
254
255
```python
256
from cerberus import Validator
257
258
# Schema with logical constraints
259
schema = {
260
'user_type': {'type': 'string'},
261
'permissions': {
262
'type': 'dict',
263
'oneof': [ # Must match exactly one of these schemas
264
{
265
'schema': {
266
'read': {'type': 'boolean'},
267
'write': {'type': 'boolean'}
268
}
269
},
270
{
271
'schema': {
272
'admin': {'type': 'boolean', 'allowed': [True]}
273
}
274
}
275
]
276
},
277
'contact_info': {
278
'type': 'dict',
279
'anyof': [ # Must match at least one of these schemas
280
{'schema': {'email': {'type': 'string', 'required': True}}},
281
{'schema': {'phone': {'type': 'string', 'required': True}}},
282
{'schema': {'address': {'type': 'string', 'required': True}}}
283
]
284
}
285
}
286
287
v = Validator(schema)
288
289
# Valid - matches admin permissions schema
290
valid_doc1 = {
291
'user_type': 'admin',
292
'permissions': {'admin': True},
293
'contact_info': {'email': 'admin@example.com'}
294
}
295
print(v.validate(valid_doc1)) # True
296
297
# Valid - matches read/write permissions schema and has phone
298
valid_doc2 = {
299
'user_type': 'user',
300
'permissions': {'read': True, 'write': False},
301
'contact_info': {'phone': '555-1234', 'email': 'user@example.com'}
302
}
303
print(v.validate(valid_doc2)) # True
304
305
# Invalid - permissions match neither schema (mixing both formats)
306
invalid_doc = {
307
'user_type': 'user',
308
'permissions': {'read': True, 'admin': True}, # Violates oneof
309
'contact_info': {'name': 'John'} # Doesn't match any anyof schemas
310
}
311
print(v.validate(invalid_doc)) # False
312
```
313
314
### Custom Validation Methods
315
316
```python
317
from cerberus import Validator
318
319
class BusinessValidator(Validator):
320
def _validate_business_hours(self, business_hours, field, value):
321
"""Validate business hours format"""
322
if business_hours:
323
try:
324
start, end = value.split('-')
325
start_hour = int(start.split(':')[0])
326
end_hour = int(end.split(':')[0])
327
if not (0 <= start_hour <= 23 and 0 <= end_hour <= 23):
328
self._error(field, "business hours must use 24-hour format")
329
if start_hour >= end_hour:
330
self._error(field, "start time must be before end time")
331
except (ValueError, AttributeError):
332
self._error(field, "business hours must be in format 'HH:MM-HH:MM'")
333
334
def _validate_tax_id(self, tax_id, field, value):
335
"""Validate tax ID format"""
336
if tax_id and not (value.isdigit() and len(value) == 9):
337
self._error(field, "tax ID must be 9 digits")
338
339
# Use custom validator
340
schema = {
341
'business_name': {'type': 'string'},
342
'hours': {'type': 'string', 'business_hours': True},
343
'tax_id': {'type': 'string', 'tax_id': True}
344
}
345
346
v = BusinessValidator(schema)
347
348
valid_doc = {
349
'business_name': 'ABC Corp',
350
'hours': '09:00-17:00',
351
'tax_id': '123456789'
352
}
353
print(v.validate(valid_doc)) # True
354
355
invalid_doc = {
356
'business_name': 'ABC Corp',
357
'hours': '17:00-09:00', # End before start
358
'tax_id': '12345' # Too short
359
}
360
print(v.validate(invalid_doc)) # False
361
print(v.errors)
362
```
363
364
### Complex List and Dict Validation
365
366
```python
367
from cerberus import Validator
368
369
# Schema with complex nested validation
370
schema = {
371
'products': {
372
'type': 'list',
373
'schema': {
374
'type': 'dict',
375
'schema': {
376
'name': {'type': 'string', 'required': True},
377
'price': {'type': 'float', 'min': 0},
378
'category': {'type': 'string', 'required': True},
379
'tags': {
380
'type': 'list',
381
'schema': {'type': 'string'},
382
'contains': ['available'] # Must contain 'available' tag
383
}
384
}
385
}
386
},
387
'metadata': {
388
'type': 'dict',
389
'keysrules': {'type': 'string', 'regex': r'^[a-z_]+$'}, # Keys must be lowercase with underscores
390
'valuesrules': {'type': 'string'} # All values must be strings
391
}
392
}
393
394
v = Validator(schema)
395
396
valid_doc = {
397
'products': [
398
{
399
'name': 'Widget',
400
'price': 19.99,
401
'category': 'gadgets',
402
'tags': ['new', 'available', 'popular']
403
}
404
],
405
'metadata': {
406
'store_id': '12345',
407
'created_by': 'system'
408
}
409
}
410
print(v.validate(valid_doc)) # True
411
412
invalid_doc = {
413
'products': [
414
{
415
'name': 'Widget',
416
'price': -5.0, # Negative price
417
'category': 'gadgets',
418
'tags': ['new', 'popular'] # Missing required 'available' tag
419
}
420
],
421
'metadata': {
422
'storeID': '12345', # Invalid key format (camelCase instead of snake_case)
423
'created_by': 123 # Invalid value type (number instead of string)
424
}
425
}
426
print(v.validate(invalid_doc)) # False
427
```
428
429
### Readonly Fields and Purging
430
431
```python
432
from cerberus import Validator
433
434
schema = {
435
'id': {'type': 'integer', 'readonly': True},
436
'name': {'type': 'string'},
437
'created_at': {'type': 'datetime', 'readonly': True},
438
'internal_field': {'type': 'string'} # Unknown field to be purged
439
}
440
441
# Validator that purges readonly and unknown fields
442
v = Validator(schema, purge_readonly=True, purge_unknown=True)
443
444
document = {
445
'id': 123,
446
'name': 'Test',
447
'created_at': '2023-01-01T00:00:00',
448
'extra_field': 'should be removed',
449
'internal_field': 'keep this'
450
}
451
452
# Normalize document (removes readonly and unknown fields)
453
normalized = v.normalized(document)
454
print(normalized)
455
# Output: {'name': 'Test', 'internal_field': 'keep this'}
456
457
# Validation without purging will fail for readonly fields if they're being set
458
v_strict = Validator(schema, purge_readonly=False)
459
print(v_strict.validate(document)) # False - readonly fields present
460
```
461
462
### Default Value Setters
463
464
```python
465
from cerberus import Validator
466
import uuid
467
import datetime
468
469
def generate_id():
470
"""Generate unique ID"""
471
return str(uuid.uuid4())
472
473
def current_timestamp():
474
"""Get current timestamp"""
475
return datetime.datetime.now()
476
477
schema = {
478
'id': {
479
'type': 'string',
480
'default_setter': generate_id
481
},
482
'name': {'type': 'string'},
483
'created_at': {
484
'type': 'datetime',
485
'default_setter': current_timestamp
486
},
487
'status': {
488
'type': 'string',
489
'default': 'active' # Simple default value
490
}
491
}
492
493
v = Validator(schema)
494
495
# Document with missing fields
496
document = {'name': 'Test Item'}
497
498
normalized = v.normalized(document)
499
print(normalized)
500
# Output includes generated ID, current timestamp, and default status
501
# {
502
# 'id': '550e8400-e29b-41d4-a716-446655440000',
503
# 'name': 'Test Item',
504
# 'created_at': datetime.datetime(2023, 1, 1, 12, 0, 0),
505
# 'status': 'active'
506
# }
507
```
508
509
### Validation with Update Mode
510
511
```python
512
from cerberus import Validator
513
514
schema = {
515
'id': {'type': 'integer', 'required': True, 'readonly': True},
516
'name': {'type': 'string', 'required': True},
517
'email': {'type': 'string', 'required': True},
518
'age': {'type': 'integer', 'min': 0}
519
}
520
521
v = Validator(schema)
522
523
# Full document validation
524
full_doc = {'id': 1, 'name': 'John', 'email': 'john@example.com', 'age': 30}
525
print(v.validate(full_doc)) # True
526
527
# Update validation - only validates provided fields
528
update_doc = {'name': 'Johnny', 'age': 31} # Missing required 'email' and 'id'
529
print(v.validate(update_doc, update=True)) # True - required fields not enforced in update mode
530
531
# Invalid update
532
invalid_update = {'age': -5} # Violates min constraint
533
print(v.validate(invalid_update, update=True)) # False - constraint violations still apply
534
```