Easily pick a place to store data for your Python code with standardized directory management, caching, and data format support.
—
PyStow provides a comprehensive file download and caching system that automatically manages file retrieval, storage, and cache validation. Files are downloaded once and reused from cache on subsequent requests.
def ensure(key: str, *subkeys: str, url: str, name: str | None = None, version: VersionHint = None, force: bool = False, download_kwargs: Mapping[str, Any] | None = None) -> Path:
"""Ensure a file is downloaded.
Args:
key: The name of the module. No funny characters. The envvar <key>_HOME where
key is uppercased is checked first before using the default home directory.
subkeys: A sequence of additional strings to join. If none are given, returns
the directory for this module.
url: The URL to download.
name: Overrides the name of the file at the end of the URL, if given. Also
useful for URLs that don't have proper filenames with extensions.
version: The optional version, or no-argument callable that returns an
optional version. This is prepended before the subkeys.
force: Should the download be done again, even if the path already exists?
Defaults to false.
download_kwargs: Keyword arguments to pass through to pystow.utils.download.
Returns:
The path of the file that has been downloaded (or already exists)
"""def ensure_custom(key: str, *subkeys: str, name: str, force: bool = False, provider: Provider, **kwargs: Any) -> Path:
"""Ensure a file is present, and run a custom create function otherwise.
Args:
key: The name of the module. No funny characters. The envvar <key>_HOME where
key is uppercased is checked first before using the default home directory.
subkeys: A sequence of additional strings to join. If none are given, returns
the directory for this module.
name: The file name.
force: Should the file be re-created, even if the path already exists?
provider: The file provider. Will be run with the path as the first
positional argument, if the file needs to be generated.
kwargs: Additional keyword-based parameters passed to the provider.
Returns:
The path of the file that has been created (or already exists)
"""@contextmanager
def open(key: str, *subkeys: str, name: str, mode: Literal["r", "rb", "rt", "w", "wb", "wt"] = "r", open_kwargs: Mapping[str, Any] | None = None, ensure_exists: bool = False) -> Generator[StringIO | BytesIO, None, None]:
"""Open a file.
Args:
key: The name of the module. No funny characters. The envvar <key>_HOME where
key is uppercased is checked first before using the default home directory.
subkeys: A sequence of additional strings to join. If none are given, returns
the directory for this module.
name: The name of the file to open
mode: The read or write mode, passed to open
open_kwargs: Additional keyword arguments passed to open
ensure_exists: Should the directory the file is in be made? Set to true on
write operations.
Yields:
An open file object
"""@contextmanager
def open_gz(key: str, *subkeys: str, name: str, mode: Literal["r", "w", "rt", "wt", "rb", "wb"] = "rb", open_kwargs: Mapping[str, Any] | None = None, ensure_exists: bool = False) -> Generator[StringIO | BytesIO, None, None]:
"""Open a gzipped file that exists already.
Args:
key: The name of the module. No funny characters. The envvar <key>_HOME where
key is uppercased is checked first before using the default home directory.
subkeys: A sequence of additional strings to join. If none are given, returns
the directory for this module.
name: The name of the file to open
mode: The read mode, passed to gzip.open
open_kwargs: Additional keyword arguments passed to gzip.open
ensure_exists: Should the file be made? Set to true on write operations.
Yields:
An open file object
"""@contextmanager
def ensure_open(key: str, *subkeys: str, url: str, name: str | None = None, force: bool = False, download_kwargs: Mapping[str, Any] | None = None, mode: Literal["r", "rt", "w", "wt"] | Literal["rb", "wb"] = "r", open_kwargs: Mapping[str, Any] | None = None) -> Generator[StringIO | BytesIO, None, None]:
"""Ensure a file is downloaded and open it.
Args:
key: The name of the module. No funny characters. The envvar <key>_HOME
where key is uppercased is checked first before using the default home
directory.
subkeys: A sequence of additional strings to join. If none are given, returns
the directory for this module.
url: The URL to download.
name: Overrides the name of the file at the end of the URL, if given. Also
useful for URLs that don't have proper filenames with extensions.
force: Should the download be done again, even if the path already exists?
Defaults to false.
download_kwargs: Keyword arguments to pass through to pystow.utils.download.
mode: The read mode, passed to lzma.open
open_kwargs: Additional keyword arguments passed to lzma.open
Yields:
An open file object
"""@contextmanager
def open_gz(key: str, *subkeys: str, name: str, mode: Literal["r", "w", "rt", "wt", "rb", "wb"] = "rb", open_kwargs: Mapping[str, Any] | None = None, ensure_exists: bool = False) -> Generator[StringIO | BytesIO, None, None]:
"""Open a gzipped file that exists already.
Args:
key: The name of the module. No funny characters. The envvar <key>_HOME where
key is uppercased is checked first before using the default home directory.
subkeys: A sequence of additional strings to join. If none are given, returns
the directory for this module.
name: The name of the file to open
mode: The read mode, passed to gzip.open
open_kwargs: Additional keyword arguments passed to gzip.open
ensure_exists: Should the file be made? Set to true on write operations.
Yields:
An open file object
"""import pystow
# Download a file with automatic caching
path = pystow.ensure(
"myapp", "datasets",
url="https://example.com/data.csv",
name="dataset.csv"
)
# File is cached - subsequent calls return immediately
path = pystow.ensure(
"myapp", "datasets",
url="https://example.com/data.csv",
name="dataset.csv"
)
# Force re-download
path = pystow.ensure(
"myapp", "datasets",
url="https://example.com/data.csv",
name="dataset.csv",
force=True
)import pystow
import requests
def get_data_version():
"""Get current version from API"""
response = requests.get("https://api.example.com/version")
return response.json()["version"]
# Version-aware download
path = pystow.ensure(
"myapp", "datasets",
url="https://example.com/data.csv",
version=get_data_version
)
# Stores in: ~/.data/myapp/v1.2.3/datasets/data.csvimport pystow
import pandas as pd
def create_processed_data(path, raw_data_url):
"""Custom function to create processed data file"""
# Download raw data
raw_path = pystow.ensure(
"myapp", "raw",
url=raw_data_url
)
# Process data
df = pd.read_csv(raw_path)
processed_df = df.groupby('category').sum()
# Save to the target path
processed_df.to_csv(path)
# Ensure processed data exists
processed_path = pystow.ensure_custom(
"myapp", "processed",
name="aggregated_data.csv",
provider=create_processed_data,
raw_data_url="https://example.com/raw_data.csv"
)import pystow
# Read from existing file
with pystow.open("myapp", "config", name="settings.txt", mode="r") as file:
config = file.read()
# Write to file (creates directories automatically)
with pystow.open("myapp", "logs", name="app.log", mode="w", ensure_exists=True) as file:
file.write("Application started\n")
# Download and read in one step
with pystow.ensure_open(
"myapp", "data",
url="https://example.com/data.txt"
) as file:
content = file.read()
# Work with gzipped files
with pystow.open_gz("myapp", "compressed", name="data.gz", mode="rt") as file:
data = file.read()import pystow
# Configure download behavior
path = pystow.ensure(
"myapp", "data",
url="https://example.com/large_file.zip",
download_kwargs={
"timeout": 300, # 5 minute timeout
"stream": True, # Stream download
"verify": True, # Verify SSL certificates
"headers": { # Custom headers
"User-Agent": "MyApp/1.0"
}
}
)import pystow
# Create module instance
module = pystow.module("myapp")
# Download files using module
data_path = module.ensure(
"datasets",
url="https://example.com/data.csv"
)
# Open files using module
with module.open("config", name="settings.json", mode="r") as file:
config = json.load(file)
# Custom file creation with module
processed_path = module.ensure_custom(
"processed",
name="summary.txt",
provider=lambda path: path.write_text("Summary complete")
)Install with Tessl CLI
npx tessl i tessl/pypi-pystow