tessl install tessl/pypi-kedro@1.1.0Kedro helps you build production-ready data and analytics pipelines
Agent Success
Agent success rate when using this tile
98%
Improvement
Agent success rate improvement when using this tile compared to baseline
1.32x
Baseline
Agent success rate without this tile
74%
A utility for managing dataset versions in a data pipeline, allowing you to load specific versions of datasets and save new versions with timestamp-based versioning.
@generates
from typing import Dict, Optional, Any
from collections import namedtuple
# Version tuple for specifying load and save versions
Version = namedtuple('Version', ['load', 'save'])
class VersionedDataset:
"""A dataset wrapper that supports versioning."""
def __init__(self, filepath: str):
"""
Initialize a versioned dataset.
Args:
filepath: Base filepath for the dataset
"""
pass
def load(self, version: Optional[str] = None) -> Any:
"""
Load data from a specific version or the latest version.
Args:
version: Version to load. If None, loads the latest version.
Returns:
The loaded data
Raises:
ValueError: If the specified version doesn't exist
"""
pass
def save(self, data: Any, version: Optional[str] = None) -> str:
"""
Save data with a specific version or auto-generate a version.
Args:
data: Data to save
version: Version string to use. If None, generates timestamp-based version.
Returns:
The version string used for saving
"""
pass
def list_versions(self) -> list[str]:
"""
List all available versions for this dataset.
Returns:
List of version strings, sorted newest to oldest
"""
pass
def run_pipeline(
datasets: Dict[str, VersionedDataset],
pipeline_func: callable,
load_versions: Optional[Dict[str, str]] = None,
save_version: Optional[str] = None
) -> Dict[str, str]:
"""
Run a pipeline with specified load and save versions.
Args:
datasets: Dictionary of dataset names to VersionedDataset objects
pipeline_func: Function that takes loaded data and returns output data
load_versions: Optional dict mapping dataset names to versions to load
save_version: Optional version string to use for all saved outputs
Returns:
Dictionary mapping output dataset names to the versions they were saved with
"""
passProvides data versioning and pipeline management capabilities.
@satisfied-by