YAML parser and emitter for Python with complete YAML 1.1 support, Unicode handling, and optional LibYAML bindings for high performance
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Advanced customization capabilities for extending YAML processing with custom constructors, representers, and resolvers. Tailor PyYAML behavior to handle custom data types and implement domain-specific YAML formats.
Add custom constructors to handle specific YAML tags and convert them to Python objects during loading.
def add_constructor(tag, constructor, Loader=None):
"""
Add a constructor for the given tag.
Args:
tag (str): YAML tag to handle (e.g., '!custom', 'tag:example.com,2000:app/custom')
constructor (Callable): Function that accepts (loader, node) and returns Python object
Loader (type, optional): Specific loader class to add to. If None, adds to multiple loaders.
Constructor Function Signature:
def constructor(loader: BaseLoader, node: Node) -> Any
"""
def add_multi_constructor(tag_prefix, multi_constructor, Loader=None):
"""
Add a multi-constructor for the given tag prefix.
Multi-constructor is called for any tag that starts with the specified prefix.
Args:
tag_prefix (str): Tag prefix to match (e.g., '!custom:', 'tag:example.com,2000:app/')
multi_constructor (Callable): Function that accepts (loader, tag_suffix, node)
Loader (type, optional): Specific loader class to add to
Multi-Constructor Function Signature:
def multi_constructor(loader: BaseLoader, tag_suffix: str, node: Node) -> Any
"""Add custom representers to control how Python objects are converted to YAML during dumping.
def add_representer(data_type, representer, Dumper=Dumper):
"""
Add a representer for the given type.
Args:
data_type (type): Python type to represent
representer (Callable): Function that accepts (dumper, data) and returns Node
Dumper (type, optional): Dumper class to add to (default: Dumper)
Representer Function Signature:
def representer(dumper: BaseDumper, data: Any) -> Node
"""
def add_multi_representer(data_type, multi_representer, Dumper=Dumper):
"""
Add a representer for the given type and its subclasses.
Multi-representer handles the specified type and all its subclasses.
Args:
data_type (type): Base Python type to represent
multi_representer (Callable): Function that accepts (dumper, data) and returns Node
Dumper (type, optional): Dumper class to add to
Multi-Representer Function Signature:
def multi_representer(dumper: BaseDumper, data: Any) -> Node
"""Add custom resolvers to automatically detect and tag scalar values based on patterns.
def add_implicit_resolver(tag, regexp, first=None, Loader=None, Dumper=Dumper):
"""
Add an implicit scalar detector.
If a scalar value matches the given regexp, the corresponding tag is assigned.
Args:
tag (str): YAML tag to assign when pattern matches
regexp (re.Pattern): Regular expression to match scalar values
first (str, optional): Sequence of possible first characters for optimization
Loader (type, optional): Loader class to add to
Dumper (type, optional): Dumper class to add to
"""
def add_path_resolver(tag, path, kind=None, Loader=None, Dumper=Dumper):
"""
Add a path-based resolver for the given tag.
A path is a list of keys that forms a path to a node in the representation tree.
Args:
tag (str): YAML tag to assign when path matches
path (list): List of keys forming path to node (strings, integers, or None)
kind (type, optional): Node type to match (ScalarNode, SequenceNode, MappingNode)
Loader (type, optional): Loader class to add to
Dumper (type, optional): Dumper class to add to
"""import yaml
from decimal import Decimal
from datetime import datetime
import re
# Custom constructor for Decimal type
def decimal_constructor(loader, node):
"""Convert YAML scalar to Decimal."""
value = loader.construct_scalar(node)
return Decimal(value)
# Custom representer for Decimal type
def decimal_representer(dumper, data):
"""Convert Decimal to YAML scalar."""
return dumper.represent_scalar('!decimal', str(data))
# Register custom handlers
yaml.add_constructor('!decimal', decimal_constructor)
yaml.add_representer(Decimal, decimal_representer)
# Usage
yaml_content = """
price: !decimal 19.99
tax_rate: !decimal 0.08
"""
data = yaml.load(yaml_content, yaml.Loader)
print(f"Price: {data['price']} ({type(data['price'])})") # Decimal
# Dump back to YAML
output_data = {'total': Decimal('27.50'), 'discount': Decimal('5.00')}
yaml_output = yaml.dump(output_data)
print(yaml_output)
# discount: !decimal 5.00
# total: !decimal 27.50import yaml
def env_constructor(loader, tag_suffix, node):
"""Constructor for environment variables with different types."""
value = loader.construct_scalar(node)
if tag_suffix == 'str':
return str(value)
elif tag_suffix == 'int':
return int(value)
elif tag_suffix == 'bool':
return value.lower() in ('true', '1', 'yes', 'on')
elif tag_suffix == 'list':
return value.split(',')
else:
return value
# Register multi-constructor for !env: prefix
yaml.add_multi_constructor('!env:', env_constructor)
yaml_content = """
database_host: !env:str localhost
database_port: !env:int 5432
debug_mode: !env:bool true
allowed_hosts: !env:list host1,host2,host3
"""
data = yaml.load(yaml_content, yaml.Loader)
print(f"Port: {data['database_port']} ({type(data['database_port'])})") # int
print(f"Debug: {data['debug_mode']} ({type(data['debug_mode'])})") # bool
print(f"Hosts: {data['allowed_hosts']}") # ['host1', 'host2', 'host3']import yaml
import re
# Add resolver for email addresses
email_pattern = re.compile(r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$')
yaml.add_implicit_resolver('!email', email_pattern, ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'])
# Constructor for email addresses
def email_constructor(loader, node):
value = loader.construct_scalar(node)
return {'email': value, 'domain': value.split('@')[1]}
yaml.add_constructor('!email', email_constructor)
yaml_content = """
admin: admin@example.com
support: support@company.org
"""
data = yaml.load(yaml_content, yaml.Loader)
print(f"Admin: {data['admin']}") # {'email': 'admin@example.com', 'domain': 'example.com'}import yaml
from yaml.nodes import ScalarNode
# Add path resolver for configuration values
yaml.add_path_resolver('!config', ['config', None], ScalarNode)
def config_constructor(loader, node):
"""Special handling for config values."""
value = loader.construct_scalar(node)
return f"CONFIG:{value}"
yaml.add_constructor('!config', config_constructor)
yaml_content = """
config:
database_url: postgresql://localhost/myapp
api_key: secret123
timeout: 30
"""
data = yaml.load(yaml_content, yaml.Loader)
print(data['config']['database_url']) # CONFIG:postgresql://localhost/myappimport yaml
from datetime import datetime
import json
class ApplicationLoader(yaml.SafeLoader):
"""Custom loader for application-specific YAML."""
pass
class ApplicationDumper(yaml.SafeDumper):
"""Custom dumper for application-specific YAML."""
pass
# JSON constructor
def json_constructor(loader, node):
"""Parse JSON embedded in YAML."""
value = loader.construct_scalar(node)
return json.loads(value)
# JSON representer
def json_representer(dumper, data):
"""Represent dict as embedded JSON."""
return dumper.represent_scalar('!json', json.dumps(data))
# Register with custom classes
ApplicationLoader.add_constructor('!json', json_constructor)
ApplicationDumper.add_representer(dict, json_representer)
# Timestamp constructor
def timestamp_constructor(loader, node):
value = loader.construct_scalar(node)
return datetime.fromisoformat(value)
ApplicationLoader.add_constructor('!timestamp', timestamp_constructor)
yaml_content = """
metadata: !json {"version": "1.0", "author": "Developer"}
created: !timestamp 2023-01-01T10:00:00
"""
data = yaml.load(yaml_content, ApplicationLoader)
print(f"Metadata: {data['metadata']}") # {'version': '1.0', 'author': 'Developer'}
print(f"Created: {data['created']}") # datetime objectCreate self-serializing objects using the YAMLObject base class:
import yaml
class Person(yaml.YAMLObject):
yaml_tag = '!Person'
yaml_loader = yaml.Loader
yaml_dumper = yaml.Dumper
def __init__(self, name, age, email):
self.name = name
self.age = age
self.email = email
def __repr__(self):
return f"Person(name={self.name!r}, age={self.age!r}, email={self.email!r})"
# Usage - automatic registration
yaml_content = """
person: !Person
name: John Doe
age: 30
email: john@example.com
"""
data = yaml.load(yaml_content, yaml.Loader)
print(data['person']) # Person(name='John Doe', age=30, email='john@example.com')
# Automatic dumping
person = Person("Jane Smith", 25, "jane@example.com")
yaml_output = yaml.dump({'employee': person})
print(yaml_output)import yaml
class DatabaseConfig:
def __init__(self, host, port, database):
self.host = host
self.port = port
self.database = database
self.connection_string = f"postgresql://{host}:{port}/{database}"
def database_constructor(loader, node):
"""Constructor that maintains parsing state."""
# Get the mapping as a dictionary
config = loader.construct_mapping(node, deep=True)
# Validate required fields
required = ['host', 'port', 'database']
missing = [field for field in required if field not in config]
if missing:
raise yaml.ConstructorError(
None, None,
f"Missing required fields: {missing}",
node.start_mark
)
return DatabaseConfig(
host=config['host'],
port=config['port'],
database=config['database']
)
yaml.add_constructor('!database', database_constructor)
yaml_content = """
prod_db: !database
host: prod.example.com
port: 5432
database: production
"""
data = yaml.load(yaml_content, yaml.Loader)
print(data['prod_db'].connection_string)import yaml
class VersionedData:
def __init__(self, version, data):
self.version = version
self.data = data
def versioned_multi_constructor(loader, tag_suffix, node):
"""Handle versioned data tags like !v1.0, !v2.0, etc."""
version = tag_suffix
data = loader.construct_mapping(node, deep=True)
return VersionedData(version, data)
def versioned_representer(dumper, data):
"""Represent versioned data with appropriate tag."""
tag = f'!v{data.version}'
return dumper.represent_mapping(tag, data.data)
yaml.add_multi_constructor('!v', versioned_multi_constructor)
yaml.add_representer(VersionedData, versioned_representer)
yaml_content = """
config: !v1.2
api_endpoint: /api/v1
features: [auth, logging]
"""
data = yaml.load(yaml_content, yaml.Loader)
print(f"Version: {data['config'].version}") # 1.2
print(f"Features: {data['config'].data['features']}") # ['auth', 'logging']Install with Tessl CLI
npx tessl i tessl/pypi-pyyaml