Quilt manages data like code with packages, repositories, browsing and revision history for machine learning and data-driven domains
—
Core functionality for creating, building, installing, and managing data packages with versioning, metadata handling, and collaborative workflows.
Create new packages and build them to registries with versioning and metadata.
class Package:
def __init__(self):
"""Creates an empty package."""
def build(self, name: str, registry: str = None, message: str = None, *, workflow = ...) -> str:
"""
Serializes this package to a registry.
Parameters:
- name: Name of the package
- registry: Registry to build the package to (defaults to configured registry)
- message: Commit message for the build
- workflow: Workflow configuration for validation
Returns:
Top hash of the built package
"""
def set_dir(self, lkey: str, path: str = None, meta: dict = None, update_policy: str = "incoming", unversioned: bool = False):
"""
Adds all files from path to the package.
Parameters:
- lkey: Logical key prefix for the directory in the package
- path: Local directory path to add
- meta: Metadata to associate with the directory
- update_policy: How to handle conflicts ("incoming", "existing")
- unversioned: Whether to include unversioned files
"""
def set_meta(self, meta: dict):
"""
Sets user metadata on this Package.
Parameters:
- meta: Dictionary of metadata to set
"""
def set(self, logical_key: str, entry=None, meta: dict = None, serialization_location: str = None, serialization_format_opts: dict = None, unversioned: bool = False):
"""
Returns self with logical_key set to entry.
Parameters:
- logical_key: Logical key to set in the package
- entry: PackageEntry to set, or local path, or None
- meta: User metadata dict to attach to entry
- serialization_location: Where to serialize entry if it's an object
- serialization_format_opts: Options for serialization format
- unversioned: If True, don't include version ID in package
Returns:
Modified package
"""
@property
def meta(self) -> dict:
"""
Get user metadata for this package.
Returns:
Dictionary of user metadata
"""
def delete(self, logical_key: str):
"""
Returns self with logical_key removed.
Parameters:
- logical_key: Key to remove from package
Returns:
Modified package
"""
def push(self, name: str, registry: str = None, dest: str = None, message: str = None, selector_fn=None, *, workflow=..., force: bool = False, dedupe: bool = False):
"""
Copy objects from this package to a different location.
Parameters:
- name: Name for package in registry
- registry: Registry where package will be stored
- dest: Destination for package objects (S3 or local)
- message: Commit message for package
- selector_fn: Function to filter which entries to push
- workflow: Workflow configuration
- force: Force push even if conflicts exist
- dedupe: Skip uploading duplicate files
Returns:
New package containing copied objects
"""Install and browse existing packages from registries.
class Package:
@classmethod
def install(cls, name: str, registry: str = None, top_hash: str = None, dest: str = None, dest_registry: str = None, *, path: str = None):
"""
Install a package from a registry.
Parameters:
- name: Name of the package to install
- registry: Registry to install from (defaults to default remote registry)
- top_hash: Specific version hash to install (defaults to latest)
- dest: Local destination directory for downloaded files
- dest_registry: Registry to install to (defaults to local registry)
- path: If specified, downloads only this path or its children
Returns:
Installed Package object
"""
@classmethod
def browse(cls, name: str, registry: str = None, top_hash: str = None):
"""
Browse an existing package without installing.
Parameters:
- name: Name of the package to browse
- registry: Registry to browse from
- top_hash: Specific version hash to browse
Returns:
Package object for browsing
"""
@classmethod
def load(cls, readable_file):
"""
Load a package from a readable file-like object.
Parameters:
- readable_file: File-like object containing serialized package
Returns:
Package object loaded from file
"""
@classmethod
def resolve_hash(cls, name: str, registry: str, hash_prefix: str) -> str:
"""
Resolve a shortened hash to the full hash for the package.
Parameters:
- name: Name of the package
- registry: Registry containing the package
- hash_prefix: Shortened hash to resolve
Returns:
Full hash string
"""
@classmethod
def rollback(cls, name: str, registry: str, top_hash: str):
"""
Set the "latest" version of a package to the given hash.
Parameters:
- name: Name of the package
- registry: Registry containing the package
- top_hash: Hash to set as latest version
"""Navigate package contents and inspect metadata.
class Package:
def __contains__(self, logical_key: str) -> bool:
"""
Checks whether the package contains a specified logical_key.
Parameters:
- logical_key: Key to check for
Returns:
True if key exists in package
"""
def __getitem__(self, logical_key: str):
"""
Filters the package based on prefix, and returns either a new Package
or a PackageEntry.
Parameters:
- logical_key: Key or prefix to retrieve
Returns:
PackageEntry for files, Package for directories
"""
def __iter__(self):
"""Iterator over package keys."""
def __len__(self) -> int:
"""Number of direct children in package."""
def keys(self) -> list:
"""
Returns logical keys in the package.
Returns:
List of logical keys
"""
def walk(self):
"""
Generator that traverses all entries in the package tree and returns tuples of (key, entry),
with keys in alphabetical order.
Yields:
Tuples of (logical_key, PackageEntry)
"""
def get(self, logical_key: str) -> str:
"""
Gets object from logical_key and returns its physical path.
Equivalent to self[logical_key].get().
Parameters:
- logical_key: Key to retrieve
Returns:
Physical path to the object
"""
@property
def readme(self):
"""
Returns the README PackageEntry if it exists.
Returns:
PackageEntry for README file or None
"""Compare packages and analyze their contents.
class Package:
def diff(self, other_pkg) -> tuple:
"""
Returns three lists -- added, modified, deleted.
Parameters:
- other_pkg: Package to compare against
Returns:
Tuple of (added_keys, modified_keys, deleted_keys)
"""
def map(self, f, include_directories: bool = False):
"""
Performs a user-specified operation on each entry in the package.
Parameters:
- f: Function to apply to each entry
- include_directories: Whether to include directory metadata
Returns:
List of function results
"""
def filter(self, f, include_directories: bool = False):
"""
Applies a user-specified operation to each entry in the package,
removing results that evaluate to False from the output.
Parameters:
- f: Filter function returning boolean
- include_directories: Whether to include directory metadata
Returns:
New Package with filtered entries
"""
def verify(self, src: str, extra_files_ok: bool = False) -> bool:
"""
Check if the contents of the given directory matches the package manifest.
Parameters:
- src: Directory path to verify against
- extra_files_ok: Whether extra files in directory are acceptable
Returns:
True if directory matches package manifest
"""Serialize packages and work with package hashes.
class Package:
def dump(self, writable_file):
"""
Serializes this package to a writable file-like object.
Parameters:
- writable_file: File-like object to write to
"""
def manifest(self):
"""
Provides a generator of the dicts that make up the serialized package.
Yields:
Dictionary entries representing package manifest
"""
@property
def top_hash(self) -> str:
"""
Returns the top hash of the package.
Returns:
SHA256 hash string identifying the package state
"""
def fetch(self, dest: str = './'):
"""
Copy all descendants to dest. Descendants are written under their logical
names relative to self.
Parameters:
- dest: Destination directory path
"""Static methods for filtering package entries during operations.
class Package:
@staticmethod
def selector_fn_copy_all(*args) -> bool:
"""
Selector function that includes all entries.
Returns:
Always True
"""
@staticmethod
def selector_fn_copy_local(logical_key: str, entry) -> bool:
"""
Selector function that includes only local entries.
Parameters:
- logical_key: Logical key of the entry
- entry: PackageEntry object
Returns:
True if entry is local, False otherwise
"""import quilt3
# Create a new package
pkg = quilt3.Package()
# Add a directory of files
pkg.set_dir("data/", "path/to/my/data/")
# Add metadata
pkg.set_meta({
"description": "My research dataset",
"version": "1.0.0",
"tags": ["research", "experiment"]
})
# Build and save to registry
top_hash = pkg.build("my-username/my-dataset", message="Initial dataset version")
print(f"Package built with hash: {top_hash}")# Browse an existing package
pkg = quilt3.Package.browse("my-username/my-dataset")
# Check package contents
print("Package contents:")
for key in pkg.keys():
print(f" {key}")
# Install to local directory
quilt3.Package.install("my-username/my-dataset", dest="./my-data/")
# Install specific version
quilt3.Package.install("my-username/my-dataset",
top_hash="abc123...",
dest="./my-data-v1/")# Compare two package versions
pkg1 = quilt3.Package.browse("my-username/my-dataset", top_hash="version1_hash")
pkg2 = quilt3.Package.browse("my-username/my-dataset", top_hash="version2_hash")
added, modified, deleted = pkg1.diff(pkg2)
print(f"Changes: {len(added)} added, {len(modified)} modified, {len(deleted)} deleted")
# Filter package entries
large_files = pkg.filter(lambda lk, entry: entry.size > 1000000)
print(f"Found {len(large_files)} files larger than 1MB")Install with Tessl CLI
npx tessl i tessl/pypi-quilt3