0
# Advanced Components
1
2
Internal processing components for custom loader and dumper construction, including constructors, representers, resolvers, and processing pipeline components. These components provide the building blocks for customized YAML processing.
3
4
## Capabilities
5
6
### Constructor Components
7
8
Constructor classes handle converting YAML nodes to Python objects with different levels of safety and functionality.
9
10
```python { .api }
11
class BaseConstructor:
12
"""
13
Base constructor class providing fundamental object construction.
14
15
Attributes:
16
- yaml_constructors: dict - Registry of tag constructors
17
- yaml_multi_constructors: dict - Registry of tag prefix constructors
18
- constructed_objects: dict - Cache of constructed objects
19
- recursive_objects: dict - Tracking for recursive construction
20
- state_generators: list - State generator functions
21
- deep_construct: bool - Enable deep construction
22
"""
23
24
def check_data(self) -> bool:
25
"""Check if more data is available for construction."""
26
27
def get_data(self) -> Any:
28
"""Get next available data object."""
29
30
def get_single_data(self) -> Any:
31
"""Get single document data object."""
32
33
def construct_document(self, node) -> Any:
34
"""Construct Python object from document node."""
35
36
def construct_object(self, node, deep=False) -> Any:
37
"""Construct Python object from any node."""
38
39
def construct_scalar(self, node) -> Any:
40
"""Construct scalar value from scalar node."""
41
42
def construct_sequence(self, node, deep=False) -> list:
43
"""Construct list from sequence node."""
44
45
def construct_mapping(self, node, deep=False) -> dict:
46
"""Construct dictionary from mapping node."""
47
48
def construct_pairs(self, node, deep=False) -> list:
49
"""Construct list of key-value pairs from mapping node."""
50
51
class SafeConstructor(BaseConstructor):
52
"""
53
Safe constructor for standard YAML types only.
54
55
Additional Attributes:
56
- bool_values: dict - Boolean value mappings
57
- inf_value: float - Infinity value representation
58
- nan_value: float - NaN value representation
59
- timestamp_regexp: Pattern - Timestamp parsing pattern
60
"""
61
62
def flatten_mapping(self, node):
63
"""Flatten mapping nodes handling merge keys."""
64
65
def construct_yaml_null(self, node) -> None:
66
"""Construct null/None values."""
67
68
def construct_yaml_bool(self, node) -> bool:
69
"""Construct boolean values."""
70
71
def construct_yaml_int(self, node) -> int:
72
"""Construct integer values."""
73
74
def construct_yaml_float(self, node) -> float:
75
"""Construct floating-point values."""
76
77
def construct_yaml_binary(self, node) -> bytes:
78
"""Construct binary data from base64."""
79
80
def construct_yaml_timestamp(self, node):
81
"""Construct datetime objects from timestamps."""
82
83
def construct_yaml_omap(self, node):
84
"""Construct ordered mappings."""
85
86
def construct_yaml_pairs(self, node) -> list:
87
"""Construct key-value pair lists."""
88
89
def construct_yaml_set(self, node) -> set:
90
"""Construct set objects."""
91
92
def construct_yaml_str(self, node) -> str:
93
"""Construct string values."""
94
95
def construct_yaml_seq(self, node) -> list:
96
"""Construct sequence/list values."""
97
98
def construct_yaml_map(self, node) -> dict:
99
"""Construct mapping/dict values."""
100
101
class FullConstructor(SafeConstructor):
102
"""
103
Full constructor with extended Python object support.
104
105
Supports Python built-in types like tuples, sets, and complex numbers
106
while remaining safer than UnsafeConstructor.
107
"""
108
109
def construct_python_str(self, node) -> str:
110
"""Construct Python string objects."""
111
112
def construct_python_unicode(self, node) -> str:
113
"""Construct Unicode string objects."""
114
115
def construct_python_bytes(self, node) -> bytes:
116
"""Construct byte string objects."""
117
118
def construct_python_long(self, node) -> int:
119
"""Construct long integer objects."""
120
121
def construct_python_complex(self, node) -> complex:
122
"""Construct complex number objects."""
123
124
def construct_python_tuple(self, node) -> tuple:
125
"""Construct tuple objects."""
126
127
class UnsafeConstructor(FullConstructor):
128
"""
129
Unsafe constructor allowing arbitrary Python object construction.
130
131
WARNING: Can execute arbitrary code. Only use with trusted input.
132
"""
133
134
class Constructor(SafeConstructor):
135
"""Legacy constructor class (alias for SafeConstructor)."""
136
```
137
138
### Representer Components
139
140
Representer classes handle converting Python objects to YAML nodes with different levels of type support.
141
142
```python { .api }
143
class BaseRepresenter:
144
"""
145
Base representer class for converting Python objects to YAML nodes.
146
147
Class Attributes:
148
- yaml_representers: dict - Registry of type representers
149
- yaml_multi_representers: dict - Registry of type hierarchy representers
150
151
Instance Attributes:
152
- default_style: str - Default scalar style
153
- sort_keys: bool - Sort mapping keys alphabetically
154
- default_flow_style: bool - Default collection flow style
155
- represented_objects: dict - Cache of represented objects
156
- object_keeper: list - Object reference keeper
157
- alias_key: int - Current alias key
158
"""
159
160
def represent(self, data) -> str:
161
"""Represent data and return YAML string."""
162
163
def represent_data(self, data):
164
"""Represent data and return YAML node."""
165
166
def represent_scalar(self, tag: str, value: str, style=None):
167
"""Create scalar node representation."""
168
169
def represent_sequence(self, tag: str, sequence, flow_style=None):
170
"""Create sequence node representation."""
171
172
def represent_mapping(self, tag: str, mapping, flow_style=None):
173
"""Create mapping node representation."""
174
175
def ignore_aliases(self, data) -> bool:
176
"""Check if aliases should be ignored for this data."""
177
178
class SafeRepresenter(BaseRepresenter):
179
"""
180
Safe representer for standard Python types.
181
182
Class Attributes:
183
- inf_value: float - Infinity value
184
"""
185
186
def represent_none(self, data) -> None:
187
"""Represent None values."""
188
189
def represent_str(self, data):
190
"""Represent string values."""
191
192
def represent_binary(self, data):
193
"""Represent binary data as base64."""
194
195
def represent_bool(self, data):
196
"""Represent boolean values."""
197
198
def represent_int(self, data):
199
"""Represent integer values."""
200
201
def represent_float(self, data):
202
"""Represent floating-point values."""
203
204
def represent_list(self, data):
205
"""Represent list objects."""
206
207
def represent_dict(self, data):
208
"""Represent dictionary objects."""
209
210
def represent_set(self, data):
211
"""Represent set objects."""
212
213
def represent_date(self, data):
214
"""Represent date objects."""
215
216
def represent_datetime(self, data):
217
"""Represent datetime objects."""
218
219
class Representer(SafeRepresenter):
220
"""
221
Extended representer with additional Python object support.
222
"""
223
224
def represent_complex(self, data):
225
"""Represent complex number objects."""
226
227
def represent_tuple(self, data):
228
"""Represent tuple objects."""
229
230
def represent_name(self, data):
231
"""Represent function/method names."""
232
233
def represent_module(self, data):
234
"""Represent module objects."""
235
236
def represent_object(self, data):
237
"""Represent arbitrary Python objects."""
238
```
239
240
### Resolver Components
241
242
Resolver classes handle automatic tag detection and assignment based on scalar values and document structure.
243
244
```python { .api }
245
class BaseResolver:
246
"""
247
Base resolver for YAML tag resolution.
248
249
Class Attributes:
250
- DEFAULT_SCALAR_TAG: str - Default tag for scalars
251
- DEFAULT_SEQUENCE_TAG: str - Default tag for sequences
252
- DEFAULT_MAPPING_TAG: str - Default tag for mappings
253
- yaml_implicit_resolvers: dict - Registry of implicit resolvers
254
- yaml_path_resolvers: dict - Registry of path resolvers
255
"""
256
257
def descend_resolver(self, current_node, current_index):
258
"""Descend into resolver tree for nested resolution."""
259
260
def ascend_resolver(self):
261
"""Ascend from resolver tree after nested resolution."""
262
263
def resolve(self, kind, value, implicit):
264
"""Resolve appropriate tag for a value."""
265
266
class Resolver(BaseResolver):
267
"""
268
Standard resolver with built-in tag resolution patterns.
269
270
Includes patterns for detecting integers, floats, booleans,
271
null values, timestamps, and other standard YAML types.
272
"""
273
```
274
275
### Processing Pipeline Components
276
277
Core components that handle different stages of the YAML processing pipeline.
278
279
```python { .api }
280
class Reader:
281
"""
282
YAML stream reader handling encoding and character-level input.
283
284
Attributes:
285
- name: str - Stream name
286
- stream: Any - Input stream
287
- encoding: str - Stream encoding
288
- index: int - Current character index
289
- line: int - Current line number
290
- column: int - Current column number
291
"""
292
293
def peek(self, index=0) -> str:
294
"""Peek at character at specified index."""
295
296
def prefix(self, length=1) -> str:
297
"""Get character prefix of specified length."""
298
299
def forward(self, length=1):
300
"""Advance position by specified length."""
301
302
def get_mark(self):
303
"""Get current position mark."""
304
305
class Scanner:
306
"""
307
YAML token scanner converting characters to tokens.
308
309
Attributes:
310
- done: bool - Scanning completion flag
311
- tokens: list - Token queue
312
- flow_level: int - Current flow nesting level
313
- indent: int - Current indentation level
314
"""
315
316
def check_token(self, *choices) -> bool:
317
"""Check if next token matches any of the given choices."""
318
319
def peek_token(self):
320
"""Peek at next token without consuming it."""
321
322
def get_token(self):
323
"""Get and consume next token."""
324
325
class Parser:
326
"""
327
YAML event parser converting tokens to events.
328
329
Attributes:
330
- current_event: Event - Current event
331
- yaml_version: tuple - YAML version
332
- tag_handles: dict - Tag handle mappings
333
"""
334
335
def check_event(self, *choices) -> bool:
336
"""Check if next event matches any of the given choices."""
337
338
def peek_event(self):
339
"""Peek at next event without consuming it."""
340
341
def get_event(self):
342
"""Get and consume next event."""
343
344
class Composer:
345
"""
346
YAML node composer converting events to node trees.
347
348
Attributes:
349
- anchors: dict - Anchor to node mappings
350
"""
351
352
def check_node(self) -> bool:
353
"""Check if a node is available."""
354
355
def get_node(self):
356
"""Get next node."""
357
358
def compose_node(self, parent, index):
359
"""Compose node with specified parent and index."""
360
361
class Serializer:
362
"""
363
YAML serializer converting node trees to events.
364
365
Attributes:
366
- use_encoding: bool - Use encoding flag
367
- use_explicit_start: bool - Use explicit document start
368
- use_explicit_end: bool - Use explicit document end
369
- serialized_nodes: dict - Cache of serialized nodes
370
- anchors: dict - Node to anchor mappings
371
"""
372
373
def open(self):
374
"""Open serialization session."""
375
376
def close(self):
377
"""Close serialization session."""
378
379
def serialize(self, node):
380
"""Serialize node to events."""
381
382
class Emitter:
383
"""
384
YAML emitter converting events to text output.
385
386
Attributes:
387
- canonical: bool - Produce canonical YAML
388
- indent: int - Indentation level
389
- width: int - Maximum line width
390
- allow_unicode: bool - Allow unicode characters
391
"""
392
393
def emit(self, event):
394
"""Emit event as YAML text."""
395
396
def need_more_events(self) -> bool:
397
"""Check if more events are needed."""
398
```
399
400
## Usage Examples
401
402
### Custom Loader Construction
403
404
```python
405
import yaml
406
407
# Create custom loader with specific components
408
class CustomLoader(
409
yaml.Reader,
410
yaml.Scanner,
411
yaml.Parser,
412
yaml.Composer,
413
yaml.SafeConstructor, # Use safe constructor
414
yaml.Resolver
415
):
416
"""Custom loader with safe construction and standard resolution."""
417
418
def __init__(self, stream):
419
yaml.Reader.__init__(self, stream)
420
yaml.Scanner.__init__(self)
421
yaml.Parser.__init__(self)
422
yaml.Composer.__init__(self)
423
yaml.SafeConstructor.__init__(self)
424
yaml.Resolver.__init__(self)
425
426
# Use custom loader
427
yaml_input = """
428
name: Custom Test
429
values: [1, 2, 3]
430
enabled: true
431
"""
432
433
data = yaml.load(yaml_input, Loader=CustomLoader)
434
print(f"Loaded with custom loader: {data}")
435
```
436
437
### Custom Dumper Construction
438
439
```python
440
import yaml
441
442
# Create custom dumper with specific components
443
class CustomDumper(
444
yaml.Emitter,
445
yaml.Serializer,
446
yaml.SafeRepresenter, # Use safe representer
447
yaml.Resolver
448
):
449
"""Custom dumper with safe representation and standard resolution."""
450
451
def __init__(self, stream, **kwargs):
452
yaml.Emitter.__init__(self, stream, **kwargs)
453
yaml.Serializer.__init__(self, **kwargs)
454
yaml.SafeRepresenter.__init__(self, **kwargs)
455
yaml.Resolver.__init__(self)
456
457
# Use custom dumper
458
data = {
459
'name': 'Custom Output',
460
'items': ['a', 'b', 'c'],
461
'count': 42
462
}
463
464
yaml_output = yaml.dump(data, Dumper=CustomDumper, indent=4)
465
print(f"Dumped with custom dumper:\n{yaml_output}")
466
```
467
468
### Custom Constructor Integration
469
470
```python
471
import yaml
472
from datetime import datetime
473
474
class DateTimeConstructor(yaml.SafeConstructor):
475
"""Constructor with custom datetime handling."""
476
477
def __init__(self):
478
super().__init__()
479
self.add_constructor('!datetime', self.construct_datetime)
480
481
def construct_datetime(self, node):
482
"""Construct datetime from ISO string."""
483
value = self.construct_scalar(node)
484
return datetime.fromisoformat(value)
485
486
class CustomLoaderWithDateTime(
487
yaml.Reader,
488
yaml.Scanner,
489
yaml.Parser,
490
yaml.Composer,
491
DateTimeConstructor, # Use custom constructor
492
yaml.Resolver
493
):
494
"""Loader with custom datetime construction."""
495
496
def __init__(self, stream):
497
yaml.Reader.__init__(self, stream)
498
yaml.Scanner.__init__(self)
499
yaml.Parser.__init__(self)
500
yaml.Composer.__init__(self)
501
DateTimeConstructor.__init__(self)
502
yaml.Resolver.__init__(self)
503
504
# Test custom datetime construction
505
yaml_input = """
506
created: !datetime 2024-01-15T14:30:00
507
modified: !datetime 2024-01-16T09:15:30
508
name: Test Document
509
"""
510
511
data = yaml.load(yaml_input, Loader=CustomLoaderWithDateTime)
512
print(f"Created: {data['created']} (type: {type(data['created'])})")
513
print(f"Modified: {data['modified']} (type: {type(data['modified'])})")
514
```
515
516
### Pipeline Component Inspection
517
518
```python
519
import yaml
520
521
def inspect_processing_components():
522
"""Inspect the components used in standard loaders."""
523
524
# Inspect SafeLoader components
525
print("SafeLoader MRO:")
526
for i, cls in enumerate(yaml.SafeLoader.__mro__):
527
print(f" {i}: {cls.__name__}")
528
529
print("\nSafeDumper MRO:")
530
for i, cls in enumerate(yaml.SafeDumper.__mro__):
531
print(f" {i}: {cls.__name__}")
532
533
# Check constructor registry
534
print(f"\nSafeConstructor has {len(yaml.SafeConstructor.yaml_constructors)} constructors")
535
print("Sample constructors:")
536
for tag, constructor in list(yaml.SafeConstructor.yaml_constructors.items())[:5]:
537
print(f" {tag}: {constructor.__name__}")
538
539
# Check representer registry
540
print(f"\nSafeRepresenter has {len(yaml.SafeRepresenter.yaml_representers)} representers")
541
print("Sample representers:")
542
for type_class, representer in list(yaml.SafeRepresenter.yaml_representers.items())[:5]:
543
print(f" {type_class}: {representer.__name__}")
544
545
inspect_processing_components()
546
```
547
548
### Component Customization
549
550
```python
551
import yaml
552
553
class LoggingConstructor(yaml.SafeConstructor):
554
"""Constructor that logs construction activity."""
555
556
def construct_object(self, node, deep=False):
557
print(f"Constructing {type(node).__name__} with tag {node.tag}")
558
return super().construct_object(node, deep)
559
560
class LoggingRepresenter(yaml.SafeRepresenter):
561
"""Representer that logs representation activity."""
562
563
def represent_data(self, data):
564
print(f"Representing {type(data).__name__}: {data}")
565
return super().represent_data(data)
566
567
class LoggingLoader(
568
yaml.Reader,
569
yaml.Scanner,
570
yaml.Parser,
571
yaml.Composer,
572
LoggingConstructor,
573
yaml.Resolver
574
):
575
def __init__(self, stream):
576
yaml.Reader.__init__(self, stream)
577
yaml.Scanner.__init__(self)
578
yaml.Parser.__init__(self)
579
yaml.Composer.__init__(self)
580
LoggingConstructor.__init__(self)
581
yaml.Resolver.__init__(self)
582
583
class LoggingDumper(
584
yaml.Emitter,
585
yaml.Serializer,
586
LoggingRepresenter,
587
yaml.Resolver
588
):
589
def __init__(self, stream, **kwargs):
590
yaml.Emitter.__init__(self, stream, **kwargs)
591
yaml.Serializer.__init__(self, **kwargs)
592
LoggingRepresenter.__init__(self, **kwargs)
593
yaml.Resolver.__init__(self)
594
595
# Test logging components
596
yaml_input = """
597
name: Logging Test
598
items:
599
- value: 1
600
- value: 2
601
enabled: true
602
"""
603
604
print("=== LOADING ===")
605
data = yaml.load(yaml_input, Loader=LoggingLoader)
606
607
print("\n=== DUMPING ===")
608
yaml_output = yaml.dump(data, Dumper=LoggingDumper)
609
print(f"\nFinal output:\n{yaml_output}")
610
```