Quilt manages data like code with packages, repositories, browsing and revision history for machine learning and data-driven domains
npx @tessl/cli install tessl/pypi-quilt3@7.0.0Quilt manages data like code with packages, repositories, browsing and revision history for machine learning, biotech, and other data-driven domains. It provides comprehensive data package management with versioning, metadata management, collaborative workflows, and integrates with cloud storage services like AWS S3.
pip install quilt3import quilt3For specific functionality:
from quilt3 import Package, Bucket
import quilt3.adminfrom typing import Union, Optional, Callable, Any
# Hook type aliases
BuildClientHook = Callable[..., Any]import quilt3
# Configure your Quilt catalog
quilt3.config('https://your-catalog-url.com')
# Create and build a data package
pkg = quilt3.Package()
pkg.set_dir("data/", "path/to/local/directory/")
pkg.set_meta({"description": "My dataset"})
pkg.build("my-username/my-package")
# Browse and install existing packages
pkg = quilt3.Package.browse("my-username/my-package")
pkg.install("my-username/my-package", dest="./downloaded-data/")
# Work with S3 buckets
bucket = quilt3.Bucket("s3://my-bucket")
bucket.put_file("data.csv", "local/path/data.csv")
data = bucket.select("data.csv", "SELECT * FROM S3Object LIMIT 10")Quilt3 is built around several key concepts:
Core functionality for creating, building, installing, and managing data packages. Includes package versioning, metadata handling, and collaborative workflows.
class Package:
def __init__(self): ...
def build(self, name: str, registry: str = None, message: str = None) -> str: ...
def install(cls, name: str, registry: str = None, top_hash: str = None, dest: str = None): ...
def browse(cls, name: str, registry: str = None, top_hash: str = None): ...
def set_dir(self, lkey: str, path: str = None, meta: dict = None): ...
def set_meta(self, meta: dict): ...Methods for accessing, deserializing, and working with data files within packages. Supports various data formats and provides caching and optimization features.
class PackageEntry:
def get(self) -> str: ...
def get_bytes(self, use_cache_if_available: bool = True) -> bytes: ...
def get_as_json(self, use_cache_if_available: bool = True) -> dict: ...
def deserialize(self, func=None, **format_opts): ...
def fetch(self, dest: str = None): ...Direct S3 bucket interface for file operations, listing, searching, and SQL queries. Provides high-level abstractions over AWS S3 operations.
class Bucket:
def __init__(self, bucket_uri: str): ...
def put_file(self, key: str, path: str): ...
def put_dir(self, key: str, directory: str): ...
def fetch(self, key: str, path: str): ...
def ls(self, path: str = None, recursive: bool = False): ...
def select(self, key: str, query: str, raw: bool = False): ...Authentication, configuration, and session management for Quilt catalogs and AWS services. Handles login/logout, credentials, and configuration settings.
def config(*catalog_url, **config_values): ...
def login(): ...
def logout(): ...
def logged_in() -> str: ...
def get_boto3_session(fallback: bool = True): ...Configuration and Session Management
Administrative capabilities for managing users, roles, SSO configuration, and other Quilt stack administrative tasks.
import quilt3.admin
# Sub-modules: users, roles, sso_config, tabulator
# Types: User, ManagedRole, UnmanagedRole, SSOConfigFunctions for working with package registries, including listing packages, searching, copying data, and package deletion.
def list_packages(registry: str = None) -> list: ...
def list_package_versions(name: str, registry: str = None) -> list: ...
def search(query: Union[str, dict], limit: int = 10) -> list: ...
def copy(src: str, dest: str): ...
def delete_package(name: str, registry: str = None, top_hash: str = None): ...Extension system for customizing Quilt3 behavior through configurable hook functions.
import quilt3.hooks
def get_build_s3_client_hook() -> Optional[BuildClientHook]: ...
def set_build_s3_client_hook(hook: Optional[BuildClientHook]) -> Optional[BuildClientHook]: ...class PackageException(Exception):
"""Exception relating to package validity."""
pass
class QuiltException(Exception):
"""General Quilt operation errors."""
pass
class QuiltConflictException(QuiltException):
"""Conflicts in package operations."""
pass