CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-asdf

Python implementation of the Advanced Scientific Data Format (ASDF) Standard

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Overview
Eval results
Files

utilities.mddocs/

Utilities and Helpers

Utility functions for inspecting ASDF files, testing extensions, and working with ASDF data structures programmatically. These tools provide convenience methods for debugging, development, and integration testing.

Capabilities

File Inspection

Display and analyze ASDF file structure and contents for debugging and exploration.

def info(node_or_path, max_rows=24, max_cols=120, show_values=True):
    """
    Print rendering of ASDF tree structure to stdout.
    
    Parameters:
    - node_or_path: ASDF file path, file-like object, or tree node to inspect
    - max_rows (int): Maximum number of array rows to display
    - max_cols (int): Maximum number of array columns to display  
    - show_values (bool): Whether to show actual values for small arrays
    
    Example output:
    root (AsdfObject)
    ├─ array_data (ndarray): shape=(100, 50), dtype=float64
    ├─ metadata (dict)
    │  ├─ title (str): "Sample Dataset"
    │  └─ version (str): "1.0"
    └─ parameters (list): 3 items
    """

Tree Search

Search through ASDF tree structures to find specific nodes, types, or values.

class AsdfSearchResult:
    """
    Result object containing search matches with tree navigation methods.
    """
    
    def __iter__(self):
        """Iterate over search results."""
    
    def __len__(self):
        """Number of search results."""
    
    def __getitem__(self, index):
        """Get result by index."""
    
    @property
    def paths(self) -> list:
        """List of tree paths for all matches."""
    
    @property  
    def nodes(self) -> list:
        """List of matching nodes."""

# Available through AsdfFile.search() method:
def search(tree, key=None, type_=None, value=None, filter_=None):
    """
    Search through tree for matching nodes.
    
    Parameters:
    - tree: Tree or AsdfFile to search
    - key: Key name to match (string or regex)
    - type_: Python type to match  
    - value: Specific value to match
    - filter_: Custom filter function taking (key, value) and returning bool
    
    Returns:
    AsdfSearchResult: Object containing all matches
    """

Tree Manipulation

Utilities for working with ASDF tree structures programmatically.

# Available through asdf.treeutil module (internal but useful):

def walk_tree(tree):
    """
    Walk through all nodes in an ASDF tree.
    
    Parameters:
    - tree: Tree structure to walk
    
    Yields:
    (path, key, value) tuples for each node
    """

def get_tree_path(tree, path):
    """
    Get value at specific path in tree.
    
    Parameters:
    - tree: Tree structure
    - path (list): Path components as list
    
    Returns:
    Value at the specified path
    """

def set_tree_path(tree, path, value):
    """
    Set value at specific path in tree.
    
    Parameters:
    - tree: Tree structure to modify
    - path (list): Path components as list
    - value: Value to set at path
    """

Reference Utilities

Work with ASDF references and external data.

# Available through AsdfFile methods:

def resolve_references(af):
    """
    Resolve all external references in ASDF file.
    
    Parameters:
    - af (AsdfFile): ASDF file containing references
    
    Side effects:
    Replaces reference objects with actual data
    """

def find_references(af):
    """
    Find all references in ASDF tree.
    
    Parameters:
    - af (AsdfFile): ASDF file to search
    
    Returns:
    list: All reference objects found in tree
    """

Schema Utilities

Work with ASDF schemas and validation.

# Available through asdf.schema module:

def load_schema(schema_uri):
    """
    Load ASDF schema by URI.
    
    Parameters:
    - schema_uri (str): URI of schema to load
    
    Returns:
    dict: Loaded schema definition
    """

def validate_tree(tree, schema_uri, extension_manager=None):
    """
    Validate tree against specific schema.
    
    Parameters:
    - tree: Tree structure to validate
    - schema_uri (str): URI of schema for validation
    - extension_manager (ExtensionManager, optional): Extensions for validation
    
    Raises:
    ValidationError: If validation fails
    """

Usage Examples

File Inspection and Debugging

import asdf
import numpy as np

# Create test file with complex structure
data = {
    "experiments": {
        "exp1": {
            "data": np.random.random((100, 50)),
            "metadata": {"date": "2024-01-01", "researcher": "Dr. Smith"},
            "parameters": {"temperature": 25.0, "pressure": 1.013}
        },
        "exp2": {
            "data": np.arange(1000).reshape(40, 25),
            "metadata": {"date": "2024-01-02", "researcher": "Dr. Jones"},
            "parameters": {"temperature": 30.0, "pressure": 1.020}
        }
    },
    "summary": {"total_experiments": 2, "status": "complete"}
}

af = asdf.AsdfFile(data)
af.write_to("experiments.asdf")

# Inspect file structure
asdf.info("experiments.asdf")
# Output:
# root (AsdfObject)
# ├─ experiments (dict)
# │  ├─ exp1 (dict)
# │  │  ├─ data (ndarray): shape=(100, 50), dtype=float64
# │  │  ├─ metadata (dict): 2 items
# │  │  └─ parameters (dict): 2 items
# │  └─ exp2 (dict)
# │     ├─ data (ndarray): shape=(40, 25), dtype=int64
# │     ├─ metadata (dict): 2 items
# │     └─ parameters (dict): 2 items
# └─ summary (dict): 2 items

# Show more detail with values
asdf.info("experiments.asdf", show_values=True, max_rows=5)

Tree Search Operations

# Search for specific keys
with asdf.open("experiments.asdf") as af:
    # Find all metadata
    metadata_results = af.search(key="metadata")
    print(f"Found {len(metadata_results)} metadata objects")
    
    for result in metadata_results:
        print(f"Path: {result.path}")
        print(f"Researcher: {result.node['researcher']}")
    
    # Find numpy arrays
    array_results = af.search(type_=np.ndarray)
    print(f"Found {len(array_results)} arrays")
    
    for result in array_results:
        print(f"Array at {result.path}: shape={result.node.shape}")
    
    # Find specific values
    temp_results = af.search(value=25.0)
    print(f"Found temperature 25.0 at: {temp_results.paths}")
    
    # Custom search with filter
    large_arrays = af.search(filter_=lambda k, v: 
        isinstance(v, np.ndarray) and v.size > 1000)
    print(f"Found {len(large_arrays)} large arrays")

Testing and Validation

from asdf.testing import roundtrip_object, yaml_to_asdf

# Test custom object serialization
class CustomData:
    def __init__(self, values):
        self.values = values
    
    def __eq__(self, other):
        return isinstance(other, CustomData) and self.values == other.values

# Test roundtrip with custom converter
original = CustomData([1, 2, 3, 4, 5])

try:
    restored = roundtrip_object(original)
    assert original == restored
    print("Roundtrip test passed")
except Exception as e:
    print(f"Roundtrip test failed: {e}")

# Test YAML parsing
yaml_content = """
experiment:
  name: "Temperature Study"
  data: [20.1, 22.3, 24.7, 21.9]
  conditions:
    humidity: 0.65
    pressure: 1013.25
metadata:
  version: 1.0
  created: "2024-01-01"
"""

af = yaml_to_asdf(yaml_content)
print(f"Parsed experiment: {af.tree['experiment']['name']}")
print(f"Data points: {len(af.tree['experiment']['data'])}")

Reference Management

# Create file with external references
from asdf import ExternalArrayReference

# Create main file with external reference
external_ref = ExternalArrayReference(
    "external_data.asdf",
    "large_dataset",
    np.float64,
    (10000, 1000)
)

main_data = {
    "local_data": np.random.random(100),
    "external_data": external_ref,
    "metadata": {"source": "external"}
}

af = asdf.AsdfFile(main_data)
af.write_to("main_with_refs.asdf")

# Find and resolve references
with asdf.open("main_with_refs.asdf") as af:
    # Find all references
    refs = af.find_references()
    print(f"Found {len(refs)} references:")
    
    for ref in refs:
        if isinstance(ref, ExternalArrayReference):
            print(f"  External array: {ref.fileuri} -> {ref.target}")
            print(f"  Shape: {ref.shape}, dtype: {ref.dtype}")
    
    # Resolve references (if external files exist)
    try:
        af.resolve_references()
        print("All references resolved successfully")
    except FileNotFoundError as e:
        print(f"Could not resolve reference: {e}")

Development and Debugging Tools

# Tree walking for analysis
def analyze_tree_structure(af):
    """Analyze ASDF tree structure for debugging."""
    
    type_counts = {}
    total_arrays = 0
    total_array_elements = 0
    
    def walk_node(node, path=""):
        nonlocal total_arrays, total_array_elements
        
        if isinstance(node, dict):
            for key, value in node.items():
                new_path = f"{path}/{key}" if path else key
                walk_node(value, new_path)
                
        elif isinstance(node, list):
            for i, value in enumerate(node):
                new_path = f"{path}[{i}]"
                walk_node(value, new_path)
                
        elif isinstance(node, np.ndarray):
            total_arrays += 1
            total_array_elements += node.size
            print(f"Array at {path}: {node.shape} {node.dtype}")
            
        # Count types
        node_type = type(node).__name__
        type_counts[node_type] = type_counts.get(node_type, 0) + 1
    
    walk_node(af.tree)
    
    print(f"\nTree Analysis:")
    print(f"  Total arrays: {total_arrays}")
    print(f"  Total array elements: {total_array_elements}")
    print(f"  Type distribution:")
    for typ, count in sorted(type_counts.items()):
        print(f"    {typ}: {count}")

# Use analysis tool
with asdf.open("experiments.asdf") as af:
    analyze_tree_structure(af)

Performance Profiling

import time
import sys

def profile_asdf_operations(data_sizes):
    """Profile ASDF operations for performance analysis."""
    
    results = []
    
    for size in data_sizes:
        # Create test data
        test_data = {
            "array": np.random.random((size, size)),
            "metadata": {"size": size, "created": time.time()}
        }
        
        # Time write operation
        start_time = time.time()
        af = asdf.AsdfFile(test_data)
        af.write_to(f"test_{size}.asdf")
        write_time = time.time() - start_time
        
        # Time read operation
        start_time = time.time()
        with asdf.open(f"test_{size}.asdf") as af:
            _ = af.tree["array"].sum()  # Force array load
        read_time = time.time() - start_time
        
        # Get file size
        file_size = os.path.getsize(f"test_{size}.asdf")
        
        results.append({
            "size": size,
            "write_time": write_time,
            "read_time": read_time,
            "file_size": file_size,
            "elements": size * size
        })
        
        print(f"Size {size}x{size}: write={write_time:.3f}s, "
              f"read={read_time:.3f}s, file={file_size/1024/1024:.1f}MB")
    
    return results

# Profile different array sizes
sizes = [100, 500, 1000, 2000]
profile_results = profile_asdf_operations(sizes)

Install with Tessl CLI

npx tessl i tessl/pypi-asdf

docs

configuration.md

core-data-types.md

data-serialization.md

extension-system.md

file-operations.md

index.md

utilities.md

tile.json