Easily pick a place to store data for your Python code with standardized directory management, caching, and data format support.
—
PyStow provides built-in support for common data formats with automatic parsing and serialization. It integrates with popular libraries like pandas, lxml, and rdflib to handle CSV, JSON, XML, RDF, Excel, and Python objects seamlessly.
def ensure_csv(key: str, *subkeys: str, url: str, name: str | None = None, force: bool = False, download_kwargs: Mapping[str, Any] | None = None, read_csv_kwargs: Mapping[str, Any] | None = None) -> pd.DataFrame:
"""Download a CSV and open as a dataframe with pandas.
Args:
key: The module name
subkeys: A sequence of additional strings to join. If none are given, returns
the directory for this module.
url: The URL to download.
name: Overrides the name of the file at the end of the URL, if given. Also
useful for URLs that don't have proper filenames with extensions.
force: Should the download be done again, even if the path already exists?
Defaults to false.
download_kwargs: Keyword arguments to pass through to pystow.utils.download.
read_csv_kwargs: Keyword arguments to pass through to pandas.read_csv.
Returns:
A pandas DataFrame
"""def ensure_excel(key: str, *subkeys: str, url: str, name: str | None = None, force: bool = False, download_kwargs: Mapping[str, Any] | None = None, read_excel_kwargs: Mapping[str, Any] | None = None) -> pd.DataFrame:
"""Download an excel file and open as a dataframe with pandas.
Args:
key: The module name
subkeys: A sequence of additional strings to join. If none are given, returns
the directory for this module.
url: The URL to download.
name: Overrides the name of the file at the end of the URL, if given. Also
useful for URLs that don't have proper filenames with extensions.
force: Should the download be done again, even if the path already exists?
Defaults to false.
download_kwargs: Keyword arguments to pass through to pystow.utils.download.
read_excel_kwargs: Keyword arguments to pass through to pandas.read_excel.
Returns:
A pandas DataFrame
"""def load_df(key: str, *subkeys: str, name: str, read_csv_kwargs: Mapping[str, Any] | None = None) -> pd.DataFrame:
"""Open a pre-existing CSV as a dataframe with pandas.
Args:
key: The module name
subkeys: A sequence of additional strings to join. If none are given, returns
the directory for this module.
name: Overrides the name of the file at the end of the URL, if given. Also
useful for URLs that don't have proper filenames with extensions.
read_csv_kwargs: Keyword arguments to pass through to pandas.read_csv.
Returns:
A pandas DataFrame
"""
def dump_df(key: str, *subkeys: str, name: str, obj: pd.DataFrame, sep: str = "\t", index: bool = False, to_csv_kwargs: Mapping[str, Any] | None = None) -> None:
"""Dump a dataframe to a TSV file with pandas.
Args:
key: The module name
subkeys: A sequence of additional strings to join. If none are given, returns
the directory for this module.
name: Overrides the name of the file at the end of the URL, if given. Also
useful for URLs that don't have proper filenames with extensions.
obj: The dataframe to dump
sep: The separator to use, defaults to a tab
index: Should the index be dumped? Defaults to false.
to_csv_kwargs: Keyword arguments to pass through to pandas.DataFrame.to_csv.
"""def ensure_json(key: str, *subkeys: str, url: str, name: str | None = None, force: bool = False, download_kwargs: Mapping[str, Any] | None = None, open_kwargs: Mapping[str, Any] | None = None, json_load_kwargs: Mapping[str, Any] | None = None) -> JSON:
"""Download JSON and open with json.
Args:
key: The module name
subkeys: A sequence of additional strings to join. If none are given, returns
the directory for this module.
url: The URL to download.
name: Overrides the name of the file at the end of the URL, if given. Also
useful for URLs that don't have proper filenames with extensions.
force: Should the download be done again, even if the path already exists?
Defaults to false.
download_kwargs: Keyword arguments to pass through to pystow.utils.download.
open_kwargs: Additional keyword arguments passed to open
json_load_kwargs: Keyword arguments to pass through to json.load.
Returns:
A JSON object (list, dict, etc.)
"""def ensure_json_bz2(key: str, *subkeys: str, url: str, name: str | None = None, force: bool = False, download_kwargs: Mapping[str, Any] | None = None, open_kwargs: Mapping[str, Any] | None = None, json_load_kwargs: Mapping[str, Any] | None = None) -> JSON:
"""Download BZ2-compressed JSON and open with json.
Args:
key: The module name
subkeys: A sequence of additional strings to join. If none are given, returns
the directory for this module.
url: The URL to download.
name: Overrides the name of the file at the end of the URL, if given. Also
useful for URLs that don't have proper filenames with extensions.
force: Should the download be done again, even if the path already exists?
Defaults to false.
download_kwargs: Keyword arguments to pass through to pystow.utils.download.
open_kwargs: Additional keyword arguments passed to bz2.open
json_load_kwargs: Keyword arguments to pass through to json.load.
Returns:
A JSON object (list, dict, etc.)
"""def load_json(key: str, *subkeys: str, name: str, json_load_kwargs: Mapping[str, Any] | None = None) -> JSON:
"""Open a JSON file json.
Args:
key: The module name
subkeys: A sequence of additional strings to join. If none are given, returns
the directory for this module.
name: The name of the file to open
json_load_kwargs: Keyword arguments to pass through to json.load.
Returns:
A JSON object (list, dict, etc.)
"""
def dump_json(key: str, *subkeys: str, name: str, obj: JSON, open_kwargs: Mapping[str, Any] | None = None, json_dump_kwargs: Mapping[str, Any] | None = None) -> None:
"""Dump an object to a file with json.
Args:
key: The module name
subkeys: A sequence of additional strings to join. If none are given, returns
the directory for this module.
name: The name of the file to open
obj: The object to dump
open_kwargs: Additional keyword arguments passed to open
json_dump_kwargs: Keyword arguments to pass through to json.dump.
"""def ensure_xml(key: str, *subkeys: str, url: str, name: str | None = None, force: bool = False, download_kwargs: Mapping[str, Any] | None = None, parse_kwargs: Mapping[str, Any] | None = None) -> lxml.etree.ElementTree:
"""Download an XML file and open it with lxml.
Args:
key: The module name
subkeys: A sequence of additional strings to join. If none are given, returns
the directory for this module.
url: The URL to download.
name: Overrides the name of the file at the end of the URL, if given. Also
useful for URLs that don't have proper filenames with extensions.
force: Should the download be done again, even if the path already exists?
Defaults to false.
download_kwargs: Keyword arguments to pass through to pystow.utils.download.
parse_kwargs: Keyword arguments to pass through to lxml.etree.parse.
Returns:
An ElementTree object
"""def load_xml(key: str, *subkeys: str, name: str, parse_kwargs: Mapping[str, Any] | None = None) -> lxml.etree.ElementTree:
"""Load an XML file with lxml.
Args:
key: The module name
subkeys: A sequence of additional strings to join. If none are given, returns
the directory for this module.
name: The name of the file to open
parse_kwargs: Keyword arguments to pass through to lxml.etree.parse.
Returns:
An ElementTree object
"""
def dump_xml(key: str, *subkeys: str, name: str, obj: lxml.etree.ElementTree, open_kwargs: Mapping[str, Any] | None = None, write_kwargs: Mapping[str, Any] | None = None) -> None:
"""Dump an XML element tree to a file with lxml.
Args:
key: The module name
subkeys: A sequence of additional strings to join. If none are given, returns
the directory for this module.
name: The name of the file to open
obj: The object to dump
open_kwargs: Additional keyword arguments passed to open
write_kwargs: Keyword arguments to pass through to lxml.etree.ElementTree.write.
"""def ensure_rdf(key: str, *subkeys: str, url: str, name: str | None = None, force: bool = False, download_kwargs: Mapping[str, Any] | None = None, precache: bool = True, parse_kwargs: Mapping[str, Any] | None = None) -> rdflib.Graph:
"""Download a RDF file and open with rdflib.
Args:
key: The module name
subkeys: A sequence of additional strings to join. If none are given, returns
the directory for this module.
url: The URL to download.
name: Overrides the name of the file at the end of the URL, if given. Also
useful for URLs that don't have proper filenames with extensions.
force: Should the download be done again, even if the path already exists?
Defaults to false.
download_kwargs: Keyword arguments to pass through to pystow.utils.download.
precache: Should the parsed rdflib.Graph be stored as a pickle for
fast loading?
parse_kwargs: Keyword arguments to pass through to pystow.utils.read_rdf
and transitively to rdflib.Graph.parse.
Returns:
An RDF graph
"""def load_rdf(key: str, *subkeys: str, name: str | None = None, parse_kwargs: Mapping[str, Any] | None = None) -> rdflib.Graph:
"""Open an RDF file with rdflib.
Args:
key: The name of the module. No funny characters. The envvar <key>_HOME where
key is uppercased is checked first before using the default home directory.
subkeys: A sequence of additional strings to join. If none are given, returns
the directory for this module.
name: The name of the file to open
parse_kwargs: Keyword arguments to pass through to pystow.utils.read_rdf
and transitively to rdflib.Graph.parse.
Returns:
An RDF graph
"""
def dump_rdf(key: str, *subkeys: str, name: str, obj: rdflib.Graph, format: str = "turtle", serialize_kwargs: Mapping[str, Any] | None = None) -> None:
"""Dump an RDF graph to a file with rdflib.
Args:
key: The name of the module. No funny characters. The envvar <key>_HOME where
key is uppercased is checked first before using the default home directory.
subkeys: A sequence of additional strings to join. If none are given, returns
the directory for this module.
name: The name of the file to open
obj: The object to dump
format: The format to dump in
serialize_kwargs: Keyword arguments to through to rdflib.Graph.serialize.
"""def ensure_pickle(key: str, *subkeys: str, url: str, name: str | None = None, force: bool = False, download_kwargs: Mapping[str, Any] | None = None, mode: Literal["rb"] = "rb", open_kwargs: Mapping[str, Any] | None = None, pickle_load_kwargs: Mapping[str, Any] | None = None) -> Any:
"""Download a pickle file and open with pickle.
Args:
key: The module name
subkeys: A sequence of additional strings to join. If none are given, returns
the directory for this module.
url: The URL to download.
name: Overrides the name of the file at the end of the URL, if given. Also
useful for URLs that don't have proper filenames with extensions.
force: Should the download be done again, even if the path already exists?
Defaults to false.
download_kwargs: Keyword arguments to pass through to pystow.utils.download.
mode: The read mode, passed to open
open_kwargs: Additional keyword arguments passed to open
pickle_load_kwargs: Keyword arguments to pass through to pickle.load.
Returns:
Any object
"""
def load_pickle(key: str, *subkeys: str, name: str, mode: Literal["rb"] = "rb", open_kwargs: Mapping[str, Any] | None = None, pickle_load_kwargs: Mapping[str, Any] | None = None) -> Any:
"""Open a pickle file with pickle.
Args:
key: The module name
subkeys: A sequence of additional strings to join. If none are given, returns
the directory for this module.
name: The name of the file to open
mode: The read mode, passed to open
open_kwargs: Additional keyword arguments passed to open
pickle_load_kwargs: Keyword arguments to pass through to pickle.load.
Returns:
Any object
"""
def dump_pickle(key: str, *subkeys: str, name: str, obj: Any, mode: Literal["wb"] = "wb", open_kwargs: Mapping[str, Any] | None = None, pickle_dump_kwargs: Mapping[str, Any] | None = None) -> None:
"""Dump an object to a file with pickle.
Args:
key: The module name
subkeys: A sequence of additional strings to join. If none are given, returns
the directory for this module.
name: The name of the file to open
obj: The object to dump
mode: The read mode, passed to open
open_kwargs: Additional keyword arguments passed to open
pickle_dump_kwargs: Keyword arguments to pass through to pickle.dump.
"""import pystow
import pandas as pd
# Download and parse CSV
df = pystow.ensure_csv(
"myapp", "datasets",
url="https://example.com/data.csv",
read_csv_kwargs={"sep": ",", "header": 0}
)
# Load existing CSV
df = pystow.load_df("myapp", "processed", name="clean_data.csv")
# Save DataFrame
pystow.dump_df(
"myapp", "outputs",
name="results.tsv",
obj=df,
sep="\t"
)
# Excel files
excel_df = pystow.ensure_excel(
"myapp", "reports",
url="https://example.com/report.xlsx",
read_excel_kwargs={"sheet_name": "Summary"}
)import pystow
# Download and parse JSON
config = pystow.ensure_json(
"myapp", "config",
url="https://api.example.com/config.json"
)
# Load existing JSON
data = pystow.load_json("myapp", "cache", name="api_response.json")
# Save JSON data
pystow.dump_json(
"myapp", "outputs",
name="results.json",
obj={"status": "complete", "count": 42},
json_dump_kwargs={"indent": 2}
)
# Compressed JSON
large_data = pystow.ensure_json_bz2(
"myapp", "datasets",
url="https://example.com/large_dataset.json.bz2"
)import pystow
from lxml import etree
# Download and parse XML
tree = pystow.ensure_xml(
"myapp", "schemas",
url="https://example.com/schema.xml"
)
# Access elements
root = tree.getroot()
elements = root.xpath("//element[@type='important']")
# Load existing XML
local_tree = pystow.load_xml("myapp", "data", name="document.xml")
# Save XML
pystow.dump_xml(
"myapp", "outputs",
name="modified.xml",
obj=tree
)import pystow
import rdflib
# Download and parse RDF with caching
graph = pystow.ensure_rdf(
"myapp", "ontologies",
url="https://example.com/ontology.rdf.gz",
parse_kwargs={"format": "xml"},
precache=True # Cache parsed graph as pickle for speed
)
# Query the graph
results = graph.query("""
SELECT ?subject ?predicate ?object
WHERE { ?subject ?predicate ?object }
LIMIT 10
""")
# Save RDF in different format
pystow.dump_rdf(
"myapp", "outputs",
name="data.ttl",
obj=graph,
format="turtle"
)import pystow
# Download and load pickled object
model = pystow.ensure_pickle(
"myapp", "models",
url="https://example.com/trained_model.pkl"
)
# Save Python object
data_structure = {"key": "value", "list": [1, 2, 3]}
pystow.dump_pickle(
"myapp", "cache",
name="data.pkl",
obj=data_structure
)
# Load existing pickle
cached_data = pystow.load_pickle("myapp", "cache", name="data.pkl")Install with Tessl CLI
npx tessl i tessl/pypi-pystow