0
# C Extensions
1
2
High-performance C-based implementations of loaders, dumpers, and processing components for improved performance. These extensions provide significant speed improvements for YAML processing when the LibYAML C library is available.
3
4
## Capabilities
5
6
### C Parser
7
8
High-performance C-based parser that replaces the Python Reader, Scanner, Parser, and Composer components.
9
10
```python { .api }
11
class CParser:
12
"""
13
C-based parser for improved performance.
14
15
Combines Reader, Scanner, Parser, and Composer functionality
16
in a single C implementation for maximum efficiency.
17
"""
18
19
def __init__(self, stream):
20
"""
21
Initialize C parser with input stream.
22
23
Parameters:
24
- stream: Input stream (string, bytes, or file-like object)
25
"""
26
27
def dispose(self) -> None:
28
"""Clean up parser resources."""
29
30
def get_token(self):
31
"""Get next token from stream."""
32
33
def peek_token(self):
34
"""Peek at next token without consuming it."""
35
36
def check_token(self, *choices) -> bool:
37
"""Check if next token matches any of the given choices."""
38
39
def get_event(self):
40
"""Get next event from stream."""
41
42
def peek_event(self):
43
"""Peek at next event without consuming it."""
44
45
def check_event(self, *choices) -> bool:
46
"""Check if next event matches any of the given choices."""
47
48
def check_node(self) -> bool:
49
"""Check if a node is available."""
50
51
def get_node(self):
52
"""Get next node from stream."""
53
54
def get_single_node(self):
55
"""Get single document node from stream."""
56
```
57
58
### C Emitter
59
60
High-performance C-based emitter that replaces the Python Serializer and Emitter components.
61
62
```python { .api }
63
class CEmitter:
64
"""
65
C-based emitter for improved performance.
66
67
Combines Serializer and Emitter functionality
68
in a single C implementation for maximum efficiency.
69
"""
70
```
71
72
### C Loader Classes
73
74
Complete loader implementations using C-based parsing for maximum performance.
75
76
```python { .api }
77
class CBaseLoader(CParser, BaseConstructor, BaseResolver):
78
"""
79
Base C loader combining C parser with Python constructor and resolver.
80
81
Provides the foundation for other C loader classes while maintaining
82
compatibility with Python-based construction and resolution.
83
"""
84
85
class CLoader(CParser, SafeConstructor, Resolver):
86
"""
87
Standard C loader with safe construction.
88
89
Equivalent to SafeLoader but with C-based parsing for improved performance.
90
Recommended for general use with untrusted YAML input.
91
"""
92
93
class CSafeLoader(CParser, SafeConstructor, Resolver):
94
"""
95
Safe C loader (alias for CLoader).
96
97
Provides safe YAML loading with C-based performance improvements.
98
Only constructs standard YAML types, preventing code execution.
99
"""
100
101
class CFullLoader(CParser, FullConstructor, Resolver):
102
"""
103
Full-featured C loader with extended Python object support.
104
105
Equivalent to FullLoader but with C-based parsing for improved performance.
106
Supports additional Python types while remaining safer than CUnsafeLoader.
107
"""
108
109
class CUnsafeLoader(CParser, UnsafeConstructor, Resolver):
110
"""
111
Unsafe C loader allowing arbitrary Python object construction.
112
113
WARNING: Can execute arbitrary Python code. Only use with trusted input.
114
Provides maximum functionality with C-based performance improvements.
115
"""
116
```
117
118
### C Dumper Classes
119
120
Complete dumper implementations using C-based emission for maximum performance.
121
122
```python { .api }
123
class CBaseDumper(CEmitter, BaseRepresenter, BaseResolver):
124
"""
125
Base C dumper combining C emitter with Python representer and resolver.
126
127
Provides the foundation for other C dumper classes while maintaining
128
compatibility with Python-based representation and resolution.
129
"""
130
131
class CDumper(CEmitter, SafeRepresenter, Resolver):
132
"""
133
Standard C dumper with safe representation.
134
135
Equivalent to SafeDumper but with C-based emission for improved performance.
136
Only represents standard Python types safely.
137
"""
138
139
class CSafeDumper(CEmitter, SafeRepresenter, Resolver):
140
"""
141
Safe C dumper (alias for CDumper).
142
143
Provides safe YAML dumping with C-based performance improvements.
144
Recommended for general use when LibYAML is available.
145
"""
146
```
147
148
### Performance Detection
149
150
Utility for checking C extension availability.
151
152
```python { .api }
153
__with_libyaml__: bool
154
"""
155
Boolean flag indicating whether LibYAML C extensions are available.
156
157
- True: C extensions are available and will provide performance benefits
158
- False: Only Python implementations are available
159
"""
160
```
161
162
## Usage Examples
163
164
### Basic C Extension Usage
165
166
```python
167
import yaml
168
169
# Check if C extensions are available
170
if yaml.__with_libyaml__:
171
print("LibYAML C extensions are available")
172
print("C loaders and dumpers will provide better performance")
173
else:
174
print("LibYAML C extensions are not available")
175
print("Using Python implementations")
176
177
# Use C-based safe loading (if available)
178
yaml_input = """
179
name: Performance Test
180
data:
181
- item: 1
182
value: alpha
183
- item: 2
184
value: beta
185
- item: 3
186
value: gamma
187
settings:
188
enabled: true
189
timeout: 30
190
debug: false
191
"""
192
193
# CSafeLoader provides the same safety as SafeLoader with better performance
194
if yaml.__with_libyaml__:
195
data = yaml.load(yaml_input, Loader=yaml.CSafeLoader)
196
else:
197
data = yaml.load(yaml_input, Loader=yaml.SafeLoader)
198
199
print(f"Loaded data: {data}")
200
201
# CSafeDumper provides the same safety as SafeDumper with better performance
202
if yaml.__with_libyaml__:
203
yaml_output = yaml.dump(data, Dumper=yaml.CSafeDumper)
204
else:
205
yaml_output = yaml.dump(data, Dumper=yaml.SafeDumper)
206
207
print(f"Dumped YAML:\n{yaml_output}")
208
```
209
210
### Performance Comparison
211
212
```python
213
import yaml
214
import time
215
216
def performance_test():
217
"""Compare performance between Python and C implementations."""
218
219
# Create a larger YAML document for testing
220
test_data = {
221
'users': [
222
{
223
'id': i,
224
'name': f'User{i}',
225
'email': f'user{i}@example.com',
226
'settings': {
227
'theme': 'dark' if i % 2 else 'light',
228
'notifications': True,
229
'language': 'en'
230
},
231
'scores': [float(j * i) for j in range(10)]
232
}
233
for i in range(1, 1001) # 1000 users
234
],
235
'metadata': {
236
'version': '1.0',
237
'created': '2024-01-15T10:30:00',
238
'description': 'Performance test data'
239
}
240
}
241
242
print(f"Test data: {len(test_data['users'])} users")
243
244
# Test Python SafeDumper
245
start_time = time.time()
246
python_yaml = yaml.dump(test_data, Dumper=yaml.SafeDumper)
247
python_dump_time = time.time() - start_time
248
249
# Test C SafeDumper (if available)
250
if yaml.__with_libyaml__:
251
start_time = time.time()
252
c_yaml = yaml.dump(test_data, Dumper=yaml.CSafeDumper)
253
c_dump_time = time.time() - start_time
254
255
print(f"Python SafeDumper: {python_dump_time:.4f} seconds")
256
print(f"C SafeDumper: {c_dump_time:.4f} seconds")
257
print(f"C dumper is {python_dump_time / c_dump_time:.2f}x faster")
258
259
# Verify outputs are equivalent
260
assert python_yaml == c_yaml, "Outputs should be identical"
261
262
# Test loading performance
263
start_time = time.time()
264
python_data = yaml.load(python_yaml, Loader=yaml.SafeLoader)
265
python_load_time = time.time() - start_time
266
267
start_time = time.time()
268
c_data = yaml.load(c_yaml, Loader=yaml.CSafeLoader)
269
c_load_time = time.time() - start_time
270
271
print(f"Python SafeLoader: {python_load_time:.4f} seconds")
272
print(f"C SafeLoader: {c_load_time:.4f} seconds")
273
print(f"C loader is {python_load_time / c_load_time:.2f}x faster")
274
275
# Verify loaded data is equivalent
276
assert python_data == c_data, "Loaded data should be identical"
277
278
else:
279
print(f"Python SafeDumper: {python_dump_time:.4f} seconds")
280
print("C extensions not available for comparison")
281
282
performance_test()
283
```
284
285
### Automatic C Extension Usage
286
287
```python
288
import yaml
289
290
def get_best_loader():
291
"""Get the best available loader (C if available, Python otherwise)."""
292
return yaml.CSafeLoader if yaml.__with_libyaml__ else yaml.SafeLoader
293
294
def get_best_dumper():
295
"""Get the best available dumper (C if available, Python otherwise)."""
296
return yaml.CSafeDumper if yaml.__with_libyaml__ else yaml.SafeDumper
297
298
def load_yaml_optimized(yaml_input):
299
"""Load YAML using the fastest available implementation."""
300
return yaml.load(yaml_input, Loader=get_best_loader())
301
302
def dump_yaml_optimized(data):
303
"""Dump YAML using the fastest available implementation."""
304
return yaml.dump(data, Dumper=get_best_dumper())
305
306
# Usage
307
yaml_config = """
308
app:
309
name: My Application
310
version: 1.0.0
311
features:
312
- authentication
313
- logging
314
- caching
315
database:
316
host: localhost
317
port: 5432
318
name: myapp_db
319
"""
320
321
# Automatically uses C extensions if available
322
config = load_yaml_optimized(yaml_config)
323
print(f"Loaded config with {get_best_loader().__name__}")
324
325
# Dump back using optimal dumper
326
optimized_yaml = dump_yaml_optimized(config)
327
print(f"Dumped config with {get_best_dumper().__name__}")
328
```
329
330
### Direct C Component Usage
331
332
```python
333
import yaml
334
from io import StringIO
335
336
def use_c_parser_directly():
337
"""Use CParser directly for fine-grained control."""
338
339
if not yaml.__with_libyaml__:
340
print("C extensions not available")
341
return
342
343
yaml_input = """
344
documents:
345
- title: Document 1
346
content: Content of first document
347
- title: Document 2
348
content: Content of second document
349
"""
350
351
# Use CParser directly
352
parser = yaml.CParser(yaml_input)
353
354
try:
355
print("Parsing events:")
356
while True:
357
event = parser.get_event()
358
print(f" {type(event).__name__}")
359
if isinstance(event, yaml.StreamEndEvent):
360
break
361
finally:
362
parser.dispose()
363
364
# Use CParser for nodes
365
parser = yaml.CParser(yaml_input)
366
try:
367
node = parser.get_single_node()
368
print(f"\nRoot node: {type(node).__name__} with tag {node.tag}")
369
370
# Manually construct from node
371
constructor = yaml.SafeConstructor()
372
data = constructor.construct_document(node)
373
print(f"Constructed data: {data}")
374
375
finally:
376
parser.dispose()
377
378
use_c_parser_directly()
379
```
380
381
### Loader/Dumper Selection Strategy
382
383
```python
384
import yaml
385
386
class YAMLProcessor:
387
"""YAML processor that automatically selects optimal implementations."""
388
389
def __init__(self, use_c_extensions=None):
390
"""
391
Initialize processor with C extension preference.
392
393
Parameters:
394
- use_c_extensions: True to force C, False to force Python, None for auto
395
"""
396
if use_c_extensions is None:
397
self.use_c = yaml.__with_libyaml__
398
else:
399
self.use_c = use_c_extensions and yaml.__with_libyaml__
400
401
# Select optimal loaders and dumpers
402
if self.use_c:
403
self.safe_loader = yaml.CSafeLoader
404
self.full_loader = yaml.CFullLoader
405
self.unsafe_loader = yaml.CUnsafeLoader
406
self.safe_dumper = yaml.CSafeDumper
407
self.dumper = yaml.CDumper
408
print("Using C-based implementations")
409
else:
410
self.safe_loader = yaml.SafeLoader
411
self.full_loader = yaml.FullLoader
412
self.unsafe_loader = yaml.UnsafeLoader
413
self.safe_dumper = yaml.SafeDumper
414
self.dumper = yaml.Dumper
415
print("Using Python-based implementations")
416
417
def safe_load(self, yaml_input):
418
"""Load YAML safely with optimal performance."""
419
return yaml.load(yaml_input, Loader=self.safe_loader)
420
421
def full_load(self, yaml_input):
422
"""Load YAML with full features and optimal performance."""
423
return yaml.load(yaml_input, Loader=self.full_loader)
424
425
def safe_dump(self, data, **kwargs):
426
"""Dump YAML safely with optimal performance."""
427
return yaml.dump(data, Dumper=self.safe_dumper, **kwargs)
428
429
def dump(self, data, **kwargs):
430
"""Dump YAML with full features and optimal performance."""
431
return yaml.dump(data, Dumper=self.dumper, **kwargs)
432
433
# Usage
434
processor = YAMLProcessor() # Auto-select based on availability
435
436
config_data = {
437
'app': {'name': 'Test', 'version': '1.0'},
438
'database': {'host': 'localhost', 'port': 5432},
439
'features': ['auth', 'logging', 'metrics']
440
}
441
442
# Process with optimal implementation
443
yaml_output = processor.safe_dump(config_data, indent=2)
444
loaded_data = processor.safe_load(yaml_output)
445
446
print(f"Round-trip successful: {config_data == loaded_data}")
447
448
# Force Python implementation for comparison
449
python_processor = YAMLProcessor(use_c_extensions=False)
450
python_yaml = python_processor.safe_dump(config_data, indent=2)
451
452
# Force C implementation (if available)
453
if yaml.__with_libyaml__:
454
c_processor = YAMLProcessor(use_c_extensions=True)
455
c_yaml = c_processor.safe_dump(config_data, indent=2)
456
457
print(f"Python and C outputs identical: {python_yaml == c_yaml}")
458
```