CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-types-pyyaml

Type stubs for PyYAML, a full-featured YAML framework for Python

Overview
Eval results
Files

c-extensions.mddocs/

C Extensions

High-performance C-based implementations of loaders, dumpers, and processing components for improved performance. These extensions provide significant speed improvements for YAML processing when the LibYAML C library is available.

Capabilities

C Parser

High-performance C-based parser that replaces the Python Reader, Scanner, Parser, and Composer components.

class CParser:
    """
    C-based parser for improved performance.
    
    Combines Reader, Scanner, Parser, and Composer functionality
    in a single C implementation for maximum efficiency.
    """
    
    def __init__(self, stream):
        """
        Initialize C parser with input stream.
        
        Parameters:
        - stream: Input stream (string, bytes, or file-like object)
        """
    
    def dispose(self) -> None:
        """Clean up parser resources."""
    
    def get_token(self):
        """Get next token from stream."""
    
    def peek_token(self):
        """Peek at next token without consuming it."""
    
    def check_token(self, *choices) -> bool:
        """Check if next token matches any of the given choices."""
    
    def get_event(self):
        """Get next event from stream."""
    
    def peek_event(self):
        """Peek at next event without consuming it."""
    
    def check_event(self, *choices) -> bool:
        """Check if next event matches any of the given choices."""
    
    def check_node(self) -> bool:
        """Check if a node is available."""
    
    def get_node(self):
        """Get next node from stream."""
    
    def get_single_node(self):
        """Get single document node from stream."""

C Emitter

High-performance C-based emitter that replaces the Python Serializer and Emitter components.

class CEmitter:
    """
    C-based emitter for improved performance.
    
    Combines Serializer and Emitter functionality
    in a single C implementation for maximum efficiency.
    """

C Loader Classes

Complete loader implementations using C-based parsing for maximum performance.

class CBaseLoader(CParser, BaseConstructor, BaseResolver):
    """
    Base C loader combining C parser with Python constructor and resolver.
    
    Provides the foundation for other C loader classes while maintaining
    compatibility with Python-based construction and resolution.
    """

class CLoader(CParser, SafeConstructor, Resolver):
    """
    Standard C loader with safe construction.
    
    Equivalent to SafeLoader but with C-based parsing for improved performance.
    Recommended for general use with untrusted YAML input.
    """

class CSafeLoader(CParser, SafeConstructor, Resolver):
    """
    Safe C loader (alias for CLoader).
    
    Provides safe YAML loading with C-based performance improvements.
    Only constructs standard YAML types, preventing code execution.
    """

class CFullLoader(CParser, FullConstructor, Resolver):
    """
    Full-featured C loader with extended Python object support.
    
    Equivalent to FullLoader but with C-based parsing for improved performance.
    Supports additional Python types while remaining safer than CUnsafeLoader.
    """

class CUnsafeLoader(CParser, UnsafeConstructor, Resolver):
    """
    Unsafe C loader allowing arbitrary Python object construction.
    
    WARNING: Can execute arbitrary Python code. Only use with trusted input.
    Provides maximum functionality with C-based performance improvements.
    """

C Dumper Classes

Complete dumper implementations using C-based emission for maximum performance.

class CBaseDumper(CEmitter, BaseRepresenter, BaseResolver):
    """
    Base C dumper combining C emitter with Python representer and resolver.
    
    Provides the foundation for other C dumper classes while maintaining
    compatibility with Python-based representation and resolution.
    """

class CDumper(CEmitter, SafeRepresenter, Resolver):
    """
    Standard C dumper with safe representation.
    
    Equivalent to SafeDumper but with C-based emission for improved performance.
    Only represents standard Python types safely.
    """

class CSafeDumper(CEmitter, SafeRepresenter, Resolver):
    """
    Safe C dumper (alias for CDumper).
    
    Provides safe YAML dumping with C-based performance improvements.
    Recommended for general use when LibYAML is available.
    """

Performance Detection

Utility for checking C extension availability.

__with_libyaml__: bool
    """
    Boolean flag indicating whether LibYAML C extensions are available.
    
    - True: C extensions are available and will provide performance benefits
    - False: Only Python implementations are available
    """

Usage Examples

Basic C Extension Usage

import yaml

# Check if C extensions are available
if yaml.__with_libyaml__:
    print("LibYAML C extensions are available")
    print("C loaders and dumpers will provide better performance")
else:
    print("LibYAML C extensions are not available")
    print("Using Python implementations")

# Use C-based safe loading (if available)
yaml_input = """
name: Performance Test
data:
  - item: 1
    value: alpha
  - item: 2
    value: beta
  - item: 3
    value: gamma
settings:
  enabled: true
  timeout: 30
  debug: false
"""

# CSafeLoader provides the same safety as SafeLoader with better performance
if yaml.__with_libyaml__:
    data = yaml.load(yaml_input, Loader=yaml.CSafeLoader)
else:
    data = yaml.load(yaml_input, Loader=yaml.SafeLoader)

print(f"Loaded data: {data}")

# CSafeDumper provides the same safety as SafeDumper with better performance  
if yaml.__with_libyaml__:
    yaml_output = yaml.dump(data, Dumper=yaml.CSafeDumper)
else:
    yaml_output = yaml.dump(data, Dumper=yaml.SafeDumper)

print(f"Dumped YAML:\n{yaml_output}")

Performance Comparison

import yaml
import time

def performance_test():
    """Compare performance between Python and C implementations."""
    
    # Create a larger YAML document for testing
    test_data = {
        'users': [
            {
                'id': i,
                'name': f'User{i}',
                'email': f'user{i}@example.com',
                'settings': {
                    'theme': 'dark' if i % 2 else 'light',
                    'notifications': True,
                    'language': 'en'
                },
                'scores': [float(j * i) for j in range(10)]
            }
            for i in range(1, 1001)  # 1000 users
        ],
        'metadata': {
            'version': '1.0',
            'created': '2024-01-15T10:30:00',
            'description': 'Performance test data'
        }
    }
    
    print(f"Test data: {len(test_data['users'])} users")
    
    # Test Python SafeDumper
    start_time = time.time()
    python_yaml = yaml.dump(test_data, Dumper=yaml.SafeDumper)
    python_dump_time = time.time() - start_time
    
    # Test C SafeDumper (if available)
    if yaml.__with_libyaml__:
        start_time = time.time()
        c_yaml = yaml.dump(test_data, Dumper=yaml.CSafeDumper)
        c_dump_time = time.time() - start_time
        
        print(f"Python SafeDumper: {python_dump_time:.4f} seconds")
        print(f"C SafeDumper: {c_dump_time:.4f} seconds")
        print(f"C dumper is {python_dump_time / c_dump_time:.2f}x faster")
        
        # Verify outputs are equivalent
        assert python_yaml == c_yaml, "Outputs should be identical"
        
        # Test loading performance
        start_time = time.time()
        python_data = yaml.load(python_yaml, Loader=yaml.SafeLoader)
        python_load_time = time.time() - start_time
        
        start_time = time.time()
        c_data = yaml.load(c_yaml, Loader=yaml.CSafeLoader)
        c_load_time = time.time() - start_time
        
        print(f"Python SafeLoader: {python_load_time:.4f} seconds")
        print(f"C SafeLoader: {c_load_time:.4f} seconds")
        print(f"C loader is {python_load_time / c_load_time:.2f}x faster")
        
        # Verify loaded data is equivalent
        assert python_data == c_data, "Loaded data should be identical"
        
    else:
        print(f"Python SafeDumper: {python_dump_time:.4f} seconds")
        print("C extensions not available for comparison")

performance_test()

Automatic C Extension Usage

import yaml

def get_best_loader():
    """Get the best available loader (C if available, Python otherwise)."""
    return yaml.CSafeLoader if yaml.__with_libyaml__ else yaml.SafeLoader

def get_best_dumper():
    """Get the best available dumper (C if available, Python otherwise)."""
    return yaml.CSafeDumper if yaml.__with_libyaml__ else yaml.SafeDumper

def load_yaml_optimized(yaml_input):
    """Load YAML using the fastest available implementation."""
    return yaml.load(yaml_input, Loader=get_best_loader())

def dump_yaml_optimized(data):
    """Dump YAML using the fastest available implementation."""
    return yaml.dump(data, Dumper=get_best_dumper())

# Usage
yaml_config = """
app:
  name: My Application
  version: 1.0.0
  features:
    - authentication
    - logging
    - caching
database:
  host: localhost
  port: 5432
  name: myapp_db
"""

# Automatically uses C extensions if available
config = load_yaml_optimized(yaml_config)
print(f"Loaded config with {get_best_loader().__name__}")

# Dump back using optimal dumper
optimized_yaml = dump_yaml_optimized(config)
print(f"Dumped config with {get_best_dumper().__name__}")

Direct C Component Usage

import yaml
from io import StringIO

def use_c_parser_directly():
    """Use CParser directly for fine-grained control."""
    
    if not yaml.__with_libyaml__:
        print("C extensions not available")
        return
    
    yaml_input = """
    documents:
      - title: Document 1
        content: Content of first document
      - title: Document 2
        content: Content of second document
    """
    
    # Use CParser directly
    parser = yaml.CParser(yaml_input)
    
    try:
        print("Parsing events:")
        while True:
            event = parser.get_event()
            print(f"  {type(event).__name__}")
            if isinstance(event, yaml.StreamEndEvent):
                break
    finally:
        parser.dispose()
    
    # Use CParser for nodes
    parser = yaml.CParser(yaml_input)
    try:
        node = parser.get_single_node()
        print(f"\nRoot node: {type(node).__name__} with tag {node.tag}")
        
        # Manually construct from node
        constructor = yaml.SafeConstructor()
        data = constructor.construct_document(node)
        print(f"Constructed data: {data}")
        
    finally:
        parser.dispose()

use_c_parser_directly()

Loader/Dumper Selection Strategy

import yaml

class YAMLProcessor:
    """YAML processor that automatically selects optimal implementations."""
    
    def __init__(self, use_c_extensions=None):
        """
        Initialize processor with C extension preference.
        
        Parameters:
        - use_c_extensions: True to force C, False to force Python, None for auto
        """
        if use_c_extensions is None:
            self.use_c = yaml.__with_libyaml__
        else:
            self.use_c = use_c_extensions and yaml.__with_libyaml__
        
        # Select optimal loaders and dumpers
        if self.use_c:
            self.safe_loader = yaml.CSafeLoader
            self.full_loader = yaml.CFullLoader
            self.unsafe_loader = yaml.CUnsafeLoader
            self.safe_dumper = yaml.CSafeDumper
            self.dumper = yaml.CDumper
            print("Using C-based implementations")
        else:
            self.safe_loader = yaml.SafeLoader
            self.full_loader = yaml.FullLoader
            self.unsafe_loader = yaml.UnsafeLoader
            self.safe_dumper = yaml.SafeDumper
            self.dumper = yaml.Dumper
            print("Using Python-based implementations")
    
    def safe_load(self, yaml_input):
        """Load YAML safely with optimal performance."""
        return yaml.load(yaml_input, Loader=self.safe_loader)
    
    def full_load(self, yaml_input):
        """Load YAML with full features and optimal performance."""
        return yaml.load(yaml_input, Loader=self.full_loader)
    
    def safe_dump(self, data, **kwargs):
        """Dump YAML safely with optimal performance."""
        return yaml.dump(data, Dumper=self.safe_dumper, **kwargs)
    
    def dump(self, data, **kwargs):
        """Dump YAML with full features and optimal performance."""
        return yaml.dump(data, Dumper=self.dumper, **kwargs)

# Usage
processor = YAMLProcessor()  # Auto-select based on availability

config_data = {
    'app': {'name': 'Test', 'version': '1.0'},
    'database': {'host': 'localhost', 'port': 5432},
    'features': ['auth', 'logging', 'metrics']
}

# Process with optimal implementation
yaml_output = processor.safe_dump(config_data, indent=2)
loaded_data = processor.safe_load(yaml_output)

print(f"Round-trip successful: {config_data == loaded_data}")

# Force Python implementation for comparison
python_processor = YAMLProcessor(use_c_extensions=False)
python_yaml = python_processor.safe_dump(config_data, indent=2)

# Force C implementation (if available)
if yaml.__with_libyaml__:
    c_processor = YAMLProcessor(use_c_extensions=True)
    c_yaml = c_processor.safe_dump(config_data, indent=2)
    
    print(f"Python and C outputs identical: {python_yaml == c_yaml}")

Install with Tessl CLI

npx tessl i tessl/pypi-types-pyyaml

docs

advanced-components.md

c-extensions.md

custom-objects.md

dumping.md

errors.md

index.md

loaders-dumpers.md

loading.md

low-level.md

registration.md

tile.json