CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-asdf

Python implementation of the Advanced Scientific Data Format (ASDF) Standard

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Overview
Eval results
Files

data-serialization.mddocs/

Data Serialization

High-level functions for serializing and deserializing Python objects to/from ASDF format using string representations. These functions provide convenient alternatives to file-based operations for in-memory processing and testing.

Capabilities

String Deserialization

Load Python objects from ASDF-formatted strings with full support for validation and extensions.

def loads(asdf_string, *, uri=None, validate_checksums=False, extensions=None, 
          custom_schema=None):
    """
    Load object tree from ASDF string.
    
    Parameters:
    - asdf_string (str): ASDF-formatted string content
    - uri (str, optional): URI for resolving relative references
    - validate_checksums (bool): Validate array checksums on load
    - extensions (Extension or list, optional): Additional extensions for custom types
    - custom_schema (str, optional): Path to custom validation schema
    
    Returns:
    Object tree (typically dict) containing deserialized data
    
    Raises:
    ValidationError: If validation fails
    AsdfSerializationError: If deserialization fails
    """

String Serialization

Serialize Python objects to ASDF-formatted strings with comprehensive control over output format and compression.

def dumps(tree, *, version=None, extensions=None, all_array_storage=NotSet, 
          all_array_compression=NotSet, compression_kwargs=NotSet, pad_blocks=False, 
          custom_schema=None) -> str:
    """
    Serialize object tree to ASDF string.
    
    Parameters:
    - tree: Object tree to serialize (typically dict)
    - version (str, optional): ASDF version to use (e.g., "1.5.0")
    - extensions (Extension or list, optional): Additional extensions for custom types
    - all_array_storage (str, optional): Override array storage ('internal', 'external')
    - all_array_compression (str, optional): Compression algorithm ('none', 'zlib', 'bzp2', 'lz4')
    - compression_kwargs (dict, optional): Algorithm-specific compression parameters
    - pad_blocks (bool): Add padding to array blocks for streaming performance
    - custom_schema (str, optional): Path to custom validation schema
    
    Returns:
    str: ASDF-formatted string representation
    
    Raises:
    AsdfSerializationError: If serialization fails
    ValidationError: If custom schema validation fails
    """

Usage Examples

Basic String Serialization

import asdf
import numpy as np

# Create data structure
data = {
    "scalars": {
        "temperature": 25.5,
        "pressure": 101325,  
        "active": True
    },
    "arrays": {
        "measurements": np.array([1.2, 3.4, 5.6, 7.8]),
        "timestamps": np.arange(100, dtype='datetime64[s]')
    },
    "metadata": {
        "instrument": "Spectrometer X1",
        "operator": "Dr. Smith"
    }
}

# Serialize to string
asdf_string = asdf.dumps(data)
print(len(asdf_string))  # String length
print(asdf_string[:200])  # First 200 characters

String Deserialization

# Deserialize from string
loaded_data = asdf.loads(asdf_string)

# Access the data
print(loaded_data["scalars"]["temperature"])  # 25.5
print(loaded_data["arrays"]["measurements"].dtype)  # float64
print(type(loaded_data["arrays"]["timestamps"]))  # numpy.ndarray

Compression in String Operations

# Serialize with compression
compressed_string = asdf.dumps(
    data, 
    all_array_compression='zlib',
    compression_kwargs={'level': 9}
)

# Compare sizes
original_size = len(asdf.dumps(data))
compressed_size = len(compressed_string)
print(f"Compression ratio: {original_size / compressed_size:.2f}x")

# Deserialize compressed data
loaded_compressed = asdf.loads(compressed_string)
# Arrays are automatically decompressed

Version Control

# Serialize to specific ASDF version
v1_4_string = asdf.dumps(data, version="1.4.0")
v1_5_string = asdf.dumps(data, version="1.5.0")

# Load and check version compatibility
loaded = asdf.loads(v1_4_string)

Custom Extensions in String Operations

from asdf.extension import Extension

# Custom extension for special types
class MyExtension(Extension):
    extension_uri = "asdf://example.com/myext/extensions/myext-1.0.0"
    # ... extension implementation

# Serialize with extensions
custom_string = asdf.dumps(
    {"custom_object": MyCustomType()}, 
    extensions=[MyExtension()]
)

# Deserialize with same extensions
loaded_custom = asdf.loads(custom_string, extensions=[MyExtension()])

Testing and Validation

import tempfile

# Round-trip testing
original = {"test_data": np.random.random(100)}
serialized = asdf.dumps(original)
deserialized = asdf.loads(serialized)

# Verify data integrity
np.testing.assert_array_equal(
    original["test_data"], 
    deserialized["test_data"]
)

# Validate against custom schema
try:
    validated_string = asdf.dumps(
        data, 
        custom_schema="my_validation_schema.json"
    )
except ValidationError as e:
    print(f"Validation failed: {e}")

Memory-Efficient Processing

# For large datasets, consider streaming operations
def process_large_dataset(data_chunks):
    results = []
    for chunk in data_chunks:
        # Serialize chunk
        chunk_string = asdf.dumps({"chunk": chunk})
        
        # Process serialized data (e.g., send over network)
        processed_string = process_remotely(chunk_string)
        
        # Deserialize result
        result = asdf.loads(processed_string)
        results.append(result["chunk"])
    
    return results

Install with Tessl CLI

npx tessl i tessl/pypi-asdf

docs

configuration.md

core-data-types.md

data-serialization.md

extension-system.md

file-operations.md

index.md

utilities.md

tile.json