0
# Low-Level Processing
1
2
Lower-level YAML processing functions for advanced use cases requiring fine-grained control over the processing pipeline. These functions provide direct access to each stage of YAML processing.
3
4
## Capabilities
5
6
### Scanning and Parsing
7
8
Functions that provide access to the tokenization and parsing stages of YAML processing.
9
10
```python { .api }
11
def scan(stream, Loader=None):
12
"""
13
Scan YAML stream and yield tokens.
14
15
Parameters:
16
- stream: YAML input as string, bytes, or file-like object
17
- Loader: Loader class to use for scanning (defaults to SafeLoader)
18
19
Yields:
20
- Token objects representing YAML syntax elements
21
22
Raises:
23
- ScannerError: If invalid YAML syntax is encountered
24
"""
25
26
def parse(stream, Loader=None):
27
"""
28
Parse YAML stream and yield events.
29
30
Parameters:
31
- stream: YAML input as string, bytes, or file-like object
32
- Loader: Loader class to use for parsing (defaults to SafeLoader)
33
34
Yields:
35
- Event objects representing YAML structure elements
36
37
Raises:
38
- ParserError: If invalid YAML structure is encountered
39
"""
40
```
41
42
### Composition
43
44
Functions that build node trees from YAML events, handling anchors and aliases.
45
46
```python { .api }
47
def compose(stream, Loader=None):
48
"""
49
Compose YAML stream into a node tree.
50
51
Parameters:
52
- stream: YAML input as string, bytes, or file-like object
53
- Loader: Loader class to use for composition (defaults to SafeLoader)
54
55
Returns:
56
- Root Node object representing the document structure
57
58
Raises:
59
- ComposerError: If node composition fails (e.g., duplicate anchors)
60
"""
61
62
def compose_all(stream, Loader=None):
63
"""
64
Compose all YAML documents from stream into node trees.
65
66
Parameters:
67
- stream: YAML input containing multiple documents
68
- Loader: Loader class to use for composition (defaults to SafeLoader)
69
70
Yields:
71
- Node objects representing each document structure
72
73
Raises:
74
- ComposerError: If node composition fails
75
"""
76
```
77
78
### Serialization
79
80
Functions that convert node trees back to YAML events and text.
81
82
```python { .api }
83
def serialize(
84
node,
85
stream=None,
86
Dumper=None,
87
*,
88
canonical=None,
89
indent=None,
90
width=None,
91
allow_unicode=None,
92
line_break=None,
93
encoding=None,
94
explicit_start=None,
95
explicit_end=None,
96
version=None,
97
tags=None
98
):
99
"""
100
Serialize a node tree to YAML events.
101
102
Parameters:
103
- node: Node object to serialize
104
- stream: Output stream (if None, returns string)
105
- Dumper: Dumper class to use (defaults to SafeDumper)
106
- (formatting parameters same as dump function)
107
108
Returns:
109
- YAML string if stream is None, otherwise None
110
111
Raises:
112
- SerializerError: If serialization fails
113
"""
114
115
def serialize_all(
116
nodes,
117
stream=None,
118
Dumper=None,
119
*,
120
canonical=None,
121
indent=None,
122
width=None,
123
allow_unicode=None,
124
line_break=None,
125
encoding=None,
126
explicit_start=None,
127
explicit_end=None,
128
version=None,
129
tags=None
130
):
131
"""
132
Serialize multiple node trees to YAML events.
133
134
Parameters:
135
- nodes: Sequence of Node objects to serialize
136
- stream: Output stream (if None, returns string)
137
- Dumper: Dumper class to use (defaults to SafeDumper)
138
- (formatting parameters same as dump function)
139
140
Returns:
141
- YAML string with multiple documents if stream is None, otherwise None
142
143
Raises:
144
- SerializerError: If serialization fails
145
"""
146
```
147
148
### Event Emission
149
150
Function for converting YAML events directly to text output.
151
152
```python { .api }
153
def emit(
154
events,
155
stream=None,
156
Dumper=None,
157
canonical=None,
158
indent=None,
159
width=None,
160
allow_unicode=None,
161
line_break=None,
162
encoding=None,
163
explicit_start=None,
164
explicit_end=None,
165
version=None,
166
tags=None
167
):
168
"""
169
Emit YAML events as text.
170
171
Parameters:
172
- events: Iterable of Event objects
173
- stream: Output stream (if None, returns string)
174
- Dumper: Dumper class to use (defaults to SafeDumper)
175
- (formatting parameters same as dump function)
176
177
Returns:
178
- YAML string if stream is None, otherwise None
179
180
Raises:
181
- EmitterError: If emission fails
182
"""
183
```
184
185
## Usage Examples
186
187
### Token Scanning
188
189
```python
190
import yaml
191
192
yaml_input = """
193
name: John Doe
194
age: 30
195
skills:
196
- Python
197
- YAML
198
"""
199
200
print("Tokens:")
201
for token in yaml.scan(yaml_input):
202
print(f" {type(token).__name__}: {token}")
203
204
# Output shows tokens like:
205
# StreamStartToken
206
# BlockMappingStartToken
207
# KeyToken
208
# ScalarToken: name
209
# ValueToken
210
# ScalarToken: John Doe
211
# KeyToken
212
# ScalarToken: age
213
# ...
214
```
215
216
### Event Parsing
217
218
```python
219
import yaml
220
221
yaml_input = """
222
users:
223
- name: Alice
224
role: admin
225
- name: Bob
226
role: user
227
"""
228
229
print("Events:")
230
for event in yaml.parse(yaml_input):
231
print(f" {type(event).__name__}: {event}")
232
233
# Output shows events like:
234
# StreamStartEvent
235
# DocumentStartEvent
236
# MappingStartEvent
237
# ScalarEvent: users
238
# SequenceStartEvent
239
# MappingStartEvent
240
# ScalarEvent: name
241
# ScalarEvent: Alice
242
# ...
243
```
244
245
### Node Composition
246
247
```python
248
import yaml
249
250
yaml_input = """
251
config: &default_config
252
debug: false
253
timeout: 30
254
255
development:
256
<<: *default_config
257
debug: true
258
259
production:
260
<<: *default_config
261
timeout: 60
262
"""
263
264
# Compose into node tree
265
root_node = yaml.compose(yaml_input)
266
print(f"Root node: {type(root_node).__name__}")
267
print(f"Tag: {root_node.tag}")
268
print(f"Value type: {type(root_node.value)}")
269
270
# Inspect node structure
271
if hasattr(root_node, 'value'):
272
for key_node, value_node in root_node.value:
273
print(f"Key: {key_node.value} -> Value type: {type(value_node).__name__}")
274
```
275
276
### Custom Processing Pipeline
277
278
```python
279
import yaml
280
281
def custom_yaml_processor(yaml_input):
282
"""Custom YAML processing with intermediate inspection."""
283
284
print("=== SCANNING ===")
285
tokens = list(yaml.scan(yaml_input))
286
print(f"Found {len(tokens)} tokens")
287
288
print("\n=== PARSING ===")
289
events = list(yaml.parse(yaml_input))
290
print(f"Found {len(events)} events")
291
292
print("\n=== COMPOSING ===")
293
node = yaml.compose(yaml_input)
294
print(f"Root node: {type(node).__name__} with tag {node.tag}")
295
296
print("\n=== CONSTRUCTING ===")
297
# Use the composed node to construct Python objects
298
loader = yaml.SafeLoader(yaml_input)
299
try:
300
loader.get_single_data() # This actually constructs the objects
301
constructed_data = loader.construct_document(node)
302
print(f"Constructed: {constructed_data}")
303
finally:
304
loader.dispose()
305
306
return constructed_data
307
308
# Test custom processor
309
yaml_input = """
310
name: Test Document
311
items:
312
- id: 1
313
value: alpha
314
- id: 2
315
value: beta
316
"""
317
318
result = custom_yaml_processor(yaml_input)
319
```
320
321
### Manual Event Generation
322
323
```python
324
import yaml
325
326
def create_yaml_events():
327
"""Create YAML events manually and emit them."""
328
329
events = [
330
yaml.StreamStartEvent(),
331
yaml.DocumentStartEvent(),
332
yaml.MappingStartEvent(anchor=None, tag=None, implicit=True),
333
334
# Key: name
335
yaml.ScalarEvent(anchor=None, tag=None, implicit=(True, True), value='name'),
336
yaml.ScalarEvent(anchor=None, tag=None, implicit=(True, True), value='Manual YAML'),
337
338
# Key: items
339
yaml.ScalarEvent(anchor=None, tag=None, implicit=(True, True), value='items'),
340
yaml.SequenceStartEvent(anchor=None, tag=None, implicit=True),
341
342
# First item
343
yaml.ScalarEvent(anchor=None, tag=None, implicit=(True, True), value='item1'),
344
yaml.ScalarEvent(anchor=None, tag=None, implicit=(True, True), value='item2'),
345
346
yaml.SequenceEndEvent(),
347
yaml.MappingEndEvent(),
348
yaml.DocumentEndEvent(),
349
yaml.StreamEndEvent()
350
]
351
352
# Emit events to YAML text
353
yaml_output = yaml.emit(events)
354
return yaml_output
355
356
generated_yaml = create_yaml_events()
357
print("Generated YAML:")
358
print(generated_yaml)
359
360
# Verify by parsing back
361
data = yaml.safe_load(generated_yaml)
362
print(f"Parsed back: {data}")
363
```
364
365
### Node Tree Manipulation
366
367
```python
368
import yaml
369
370
def modify_yaml_tree(yaml_input):
371
"""Modify YAML at the node level before construction."""
372
373
# Compose to node tree
374
root_node = yaml.compose(yaml_input)
375
376
# Find and modify specific nodes
377
if isinstance(root_node, yaml.MappingNode):
378
for key_node, value_node in root_node.value:
379
if (isinstance(key_node, yaml.ScalarNode) and
380
key_node.value == 'debug'):
381
# Change debug value to True
382
if isinstance(value_node, yaml.ScalarNode):
383
value_node.value = 'true'
384
value_node.tag = 'tag:yaml.org,2002:bool'
385
386
elif (isinstance(key_node, yaml.ScalarNode) and
387
key_node.value == 'version'):
388
# Increment version number
389
if isinstance(value_node, yaml.ScalarNode):
390
try:
391
current_version = float(value_node.value)
392
value_node.value = str(current_version + 0.1)
393
except ValueError:
394
pass
395
396
# Serialize modified tree back to YAML
397
modified_yaml = yaml.serialize(root_node)
398
return modified_yaml
399
400
# Test node manipulation
401
original_yaml = """
402
app_name: MyApp
403
version: 1.0
404
debug: false
405
features:
406
- auth
407
- logging
408
"""
409
410
print("Original YAML:")
411
print(original_yaml)
412
413
modified_yaml = modify_yaml_tree(original_yaml)
414
print("\nModified YAML:")
415
print(modified_yaml)
416
417
# Parse modified YAML to verify
418
modified_data = yaml.safe_load(modified_yaml)
419
print(f"\nParsed modified data: {modified_data}")
420
```
421
422
### Stream Processing
423
424
```python
425
import yaml
426
from io import StringIO
427
428
def stream_yaml_processing():
429
"""Process YAML using streams for memory efficiency."""
430
431
# Create large YAML document
432
yaml_content = """
433
users:
434
"""
435
436
# Add many users
437
for i in range(1000):
438
yaml_content += f"""
439
- id: {i}
440
name: User{i}
441
email: user{i}@example.com
442
"""
443
444
print(f"Processing YAML with {len(yaml_content)} characters")
445
446
# Process with streaming
447
stream = StringIO(yaml_content)
448
449
# Use compose to build node tree without full construction
450
root_node = yaml.compose(stream)
451
print(f"Composed node tree: {type(root_node).__name__}")
452
453
# Process the tree structure without loading all data into memory
454
user_count = 0
455
if isinstance(root_node, yaml.MappingNode):
456
for key_node, value_node in root_node.value:
457
if (isinstance(key_node, yaml.ScalarNode) and
458
key_node.value == 'users' and
459
isinstance(value_node, yaml.SequenceNode)):
460
user_count = len(value_node.value)
461
break
462
463
print(f"Found {user_count} users without loading all data")
464
465
# Now selectively construct only what we need
466
stream.seek(0) # Reset stream
467
full_data = yaml.safe_load(stream)
468
first_user = full_data['users'][0] if full_data['users'] else None
469
last_user = full_data['users'][-1] if full_data['users'] else None
470
471
print(f"First user: {first_user}")
472
print(f"Last user: {last_user}")
473
474
stream_yaml_processing()
475
```