CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-quilt3

Quilt manages data like code with packages, repositories, browsing and revision history for machine learning and data-driven domains

Pending
Overview
Eval results
Files

package-management.mddocs/

Data Package Management

Core functionality for creating, building, installing, and managing data packages with versioning, metadata handling, and collaborative workflows.

Capabilities

Package Creation and Building

Create new packages and build them to registries with versioning and metadata.

class Package:
    def __init__(self):
        """Creates an empty package."""
        
    def build(self, name: str, registry: str = None, message: str = None, *, workflow = ...) -> str:
        """
        Serializes this package to a registry.

        Parameters:
        - name: Name of the package
        - registry: Registry to build the package to (defaults to configured registry)
        - message: Commit message for the build
        - workflow: Workflow configuration for validation

        Returns:
        Top hash of the built package
        """

    def set_dir(self, lkey: str, path: str = None, meta: dict = None, update_policy: str = "incoming", unversioned: bool = False):
        """
        Adds all files from path to the package.

        Parameters:
        - lkey: Logical key prefix for the directory in the package
        - path: Local directory path to add
        - meta: Metadata to associate with the directory
        - update_policy: How to handle conflicts ("incoming", "existing")
        - unversioned: Whether to include unversioned files
        """

    def set_meta(self, meta: dict):
        """
        Sets user metadata on this Package.

        Parameters:
        - meta: Dictionary of metadata to set
        """

    def set(self, logical_key: str, entry=None, meta: dict = None, serialization_location: str = None, serialization_format_opts: dict = None, unversioned: bool = False):
        """
        Returns self with logical_key set to entry.

        Parameters:
        - logical_key: Logical key to set in the package
        - entry: PackageEntry to set, or local path, or None
        - meta: User metadata dict to attach to entry
        - serialization_location: Where to serialize entry if it's an object
        - serialization_format_opts: Options for serialization format
        - unversioned: If True, don't include version ID in package

        Returns:
        Modified package
        """

    @property
    def meta(self) -> dict:
        """
        Get user metadata for this package.

        Returns:
        Dictionary of user metadata
        """

    def delete(self, logical_key: str):
        """
        Returns self with logical_key removed.

        Parameters:
        - logical_key: Key to remove from package

        Returns:
        Modified package
        """

    def push(self, name: str, registry: str = None, dest: str = None, message: str = None, selector_fn=None, *, workflow=..., force: bool = False, dedupe: bool = False):
        """
        Copy objects from this package to a different location.

        Parameters:
        - name: Name for package in registry
        - registry: Registry where package will be stored
        - dest: Destination for package objects (S3 or local)
        - message: Commit message for package
        - selector_fn: Function to filter which entries to push
        - workflow: Workflow configuration
        - force: Force push even if conflicts exist
        - dedupe: Skip uploading duplicate files

        Returns:
        New package containing copied objects
        """

Package Installation and Browsing

Install and browse existing packages from registries.

class Package:
    @classmethod
    def install(cls, name: str, registry: str = None, top_hash: str = None, dest: str = None, dest_registry: str = None, *, path: str = None):
        """
        Install a package from a registry.

        Parameters:
        - name: Name of the package to install
        - registry: Registry to install from (defaults to default remote registry)
        - top_hash: Specific version hash to install (defaults to latest)
        - dest: Local destination directory for downloaded files
        - dest_registry: Registry to install to (defaults to local registry)
        - path: If specified, downloads only this path or its children

        Returns:
        Installed Package object
        """

    @classmethod
    def browse(cls, name: str, registry: str = None, top_hash: str = None):
        """
        Browse an existing package without installing.

        Parameters:
        - name: Name of the package to browse
        - registry: Registry to browse from
        - top_hash: Specific version hash to browse

        Returns:
        Package object for browsing
        """

    @classmethod
    def load(cls, readable_file):
        """
        Load a package from a readable file-like object.

        Parameters:
        - readable_file: File-like object containing serialized package

        Returns:
        Package object loaded from file
        """

    @classmethod
    def resolve_hash(cls, name: str, registry: str, hash_prefix: str) -> str:
        """
        Resolve a shortened hash to the full hash for the package.

        Parameters:
        - name: Name of the package
        - registry: Registry containing the package
        - hash_prefix: Shortened hash to resolve

        Returns:
        Full hash string
        """

    @classmethod
    def rollback(cls, name: str, registry: str, top_hash: str):
        """
        Set the "latest" version of a package to the given hash.

        Parameters:
        - name: Name of the package
        - registry: Registry containing the package
        - top_hash: Hash to set as latest version
        """

Package Navigation and Inspection

Navigate package contents and inspect metadata.

class Package:
    def __contains__(self, logical_key: str) -> bool:
        """
        Checks whether the package contains a specified logical_key.

        Parameters:
        - logical_key: Key to check for

        Returns:
        True if key exists in package
        """

    def __getitem__(self, logical_key: str):
        """
        Filters the package based on prefix, and returns either a new Package
        or a PackageEntry.

        Parameters:
        - logical_key: Key or prefix to retrieve

        Returns:
        PackageEntry for files, Package for directories
        """

    def __iter__(self):
        """Iterator over package keys."""

    def __len__(self) -> int:
        """Number of direct children in package."""

    def keys(self) -> list:
        """
        Returns logical keys in the package.

        Returns:
        List of logical keys
        """

    def walk(self):
        """
        Generator that traverses all entries in the package tree and returns tuples of (key, entry),
        with keys in alphabetical order.

        Yields:
        Tuples of (logical_key, PackageEntry)
        """

    def get(self, logical_key: str) -> str:
        """
        Gets object from logical_key and returns its physical path.
        Equivalent to self[logical_key].get().

        Parameters:
        - logical_key: Key to retrieve

        Returns:
        Physical path to the object
        """

    @property
    def readme(self):
        """
        Returns the README PackageEntry if it exists.

        Returns:
        PackageEntry for README file or None
        """

Package Analysis and Comparison

Compare packages and analyze their contents.

class Package:
    def diff(self, other_pkg) -> tuple:
        """
        Returns three lists -- added, modified, deleted.

        Parameters:
        - other_pkg: Package to compare against

        Returns:
        Tuple of (added_keys, modified_keys, deleted_keys)
        """

    def map(self, f, include_directories: bool = False):
        """
        Performs a user-specified operation on each entry in the package.

        Parameters:
        - f: Function to apply to each entry
        - include_directories: Whether to include directory metadata

        Returns:
        List of function results
        """

    def filter(self, f, include_directories: bool = False):
        """
        Applies a user-specified operation to each entry in the package,
        removing results that evaluate to False from the output.

        Parameters:
        - f: Filter function returning boolean
        - include_directories: Whether to include directory metadata

        Returns:
        New Package with filtered entries
        """

    def verify(self, src: str, extra_files_ok: bool = False) -> bool:
        """
        Check if the contents of the given directory matches the package manifest.

        Parameters:
        - src: Directory path to verify against
        - extra_files_ok: Whether extra files in directory are acceptable

        Returns:
        True if directory matches package manifest
        """

Package Serialization and Hashing

Serialize packages and work with package hashes.

class Package:
    def dump(self, writable_file):
        """
        Serializes this package to a writable file-like object.

        Parameters:
        - writable_file: File-like object to write to
        """

    def manifest(self):
        """
        Provides a generator of the dicts that make up the serialized package.

        Yields:
        Dictionary entries representing package manifest
        """

    @property
    def top_hash(self) -> str:
        """
        Returns the top hash of the package.

        Returns:
        SHA256 hash string identifying the package state
        """

    def fetch(self, dest: str = './'):
        """
        Copy all descendants to dest. Descendants are written under their logical
        names relative to self.

        Parameters:
        - dest: Destination directory path
        """

Package Selector Functions

Static methods for filtering package entries during operations.

class Package:
    @staticmethod
    def selector_fn_copy_all(*args) -> bool:
        """
        Selector function that includes all entries.

        Returns:
        Always True
        """

    @staticmethod
    def selector_fn_copy_local(logical_key: str, entry) -> bool:
        """
        Selector function that includes only local entries.

        Parameters:
        - logical_key: Logical key of the entry
        - entry: PackageEntry object

        Returns:
        True if entry is local, False otherwise
        """

Usage Examples

Basic Package Creation

import quilt3

# Create a new package
pkg = quilt3.Package()

# Add a directory of files
pkg.set_dir("data/", "path/to/my/data/")

# Add metadata
pkg.set_meta({
    "description": "My research dataset",
    "version": "1.0.0",
    "tags": ["research", "experiment"]
})

# Build and save to registry
top_hash = pkg.build("my-username/my-dataset", message="Initial dataset version")
print(f"Package built with hash: {top_hash}")

Package Installation and Browsing

# Browse an existing package
pkg = quilt3.Package.browse("my-username/my-dataset")

# Check package contents
print("Package contents:")
for key in pkg.keys():
    print(f"  {key}")

# Install to local directory
quilt3.Package.install("my-username/my-dataset", dest="./my-data/")

# Install specific version
quilt3.Package.install("my-username/my-dataset", 
                      top_hash="abc123...", 
                      dest="./my-data-v1/")

Package Comparison and Analysis

# Compare two package versions
pkg1 = quilt3.Package.browse("my-username/my-dataset", top_hash="version1_hash")
pkg2 = quilt3.Package.browse("my-username/my-dataset", top_hash="version2_hash")

added, modified, deleted = pkg1.diff(pkg2)
print(f"Changes: {len(added)} added, {len(modified)} modified, {len(deleted)} deleted")

# Filter package entries
large_files = pkg.filter(lambda lk, entry: entry.size > 1000000)
print(f"Found {len(large_files)} files larger than 1MB")

Install with Tessl CLI

npx tessl i tessl/pypi-quilt3

docs

admin.md

bucket-operations.md

config-session.md

data-access.md

hooks.md

index.md

package-management.md

registry-operations.md

tile.json