A Python utility belt containing simple tools, a stdlib like feel, and extra batteries
Download files with progress tracking, verification, and comprehensive caching systems for computations and data.
Download files from URLs with progress tracking, hash verification, and caching support.
def download(url, fpath=None, hash_prefix=None, hasher='sha512', **kwargs):
"""
Download file from URL with progress and verification.
Args:
url (str): URL to download from
fpath (str|Path): Local file path (auto-generated if None)
hash_prefix (str): Expected hash prefix for verification
hasher (str): Hash algorithm ('sha512', 'sha256', 'md5')
verbose (int): Verbosity level
chunk_size (int): Download chunk size in bytes
timeout (float): Connection timeout
Returns:
str: Path to downloaded file
Raises:
URLError: Download failed
HashMismatchError: Hash verification failed
"""
def grabdata(url, fpath=None, dpath=None, fname=None, **kwargs):
"""
Download and cache data with automatic path handling.
Args:
url (str): URL to download
fpath (str): Explicit file path
dpath (str): Directory for cached file
fname (str): Filename for cached file
**kwargs: Additional download options
Returns:
str: Path to cached file
"""
class DownloadManager:
"""
Manage multiple download operations with queuing and progress tracking.
"""
def __init__(self, max_workers=4): ...
def submit(self, url, fpath=None, **kwargs): ...
def download_all(self): ...
def __enter__(self): ...
def __exit__(self, exc_type, exc_val, exc_tb): ...Cache expensive computations to disk with dependency tracking and automatic invalidation.
class Cacher:
"""
On-disk caching with dependency tracking.
Automatically invalidates cache when dependencies change.
"""
def __init__(self, fname, depends=None, dpath=None, appname='ubelt', **kwargs):
"""
Args:
fname (str): Cache filename
depends: Dependencies that invalidate cache when changed
dpath (str): Cache directory
appname (str): Application name for cache organization
**kwargs: Additional cache options
"""
def tryload(self):
"""
Try to load cached result.
Returns:
object|None: Cached result or None if cache miss/invalid
"""
def save(self, data):
"""
Save data to cache.
Args:
data: Data to cache
"""
def clear(self):
"""Clear cached data."""
def exists(self):
"""
Check if cache exists and is valid.
Returns:
bool: True if cache exists and dependencies unchanged
"""
def ensure(self, func, *args, **kwargs):
"""
Ensure cached result exists, computing if necessary.
Args:
func: Function to call if cache miss
*args: Arguments for func
**kwargs: Keyword arguments for func
Returns:
object: Cached or computed result
"""
class CacheStamp:
"""
Lightweight cache stamping for file-producing computations.
Tracks when outputs are newer than inputs.
"""
def __init__(self, fname, dpath=None, **kwargs): ...
def expired(self, *depends):
"""
Check if cache is expired relative to dependencies.
Args:
*depends: File paths or other dependencies
Returns:
bool: True if cache is expired
"""
def renew(self):
"""Update cache timestamp."""
def clear(self):
"""Remove cache stamp."""import ubelt as ub
# Simple download
url = 'https://example.com/data.zip'
fpath = ub.download(url)
print(f"Downloaded to: {fpath}")
# Download with verification
url = 'https://example.com/important.tar.gz'
expected_hash = 'a1b2c3d4e5f6...' # First few characters of expected hash
fpath = ub.download(url, hash_prefix=expected_hash, hasher='sha256')
# Download to specific location
local_path = './downloads/myfile.zip'
ub.download(url, fpath=local_path, verbose=2)
# Download with caching (won't re-download if file exists)
cached_file = ub.grabdata(url, dpath='./cache')import ubelt as ub
# Download multiple files
urls = [
'https://example.com/file1.zip',
'https://example.com/file2.tar.gz',
'https://example.com/file3.json'
]
# Sequential downloads
files = []
for url in urls:
fpath = ub.download(url, dpath='./downloads')
files.append(fpath)
# Parallel downloads with DownloadManager
with ub.DownloadManager(max_workers=3) as dm:
futures = []
for url in urls:
future = dm.submit(url, dpath='./downloads')
futures.append(future)
# Get results
files = [future.result() for future in futures]import ubelt as ub
import time
def expensive_computation(n):
"""Simulate expensive computation"""
print(f"Computing for n={n}...")
time.sleep(2) # Simulate work
return n ** 2
# Basic caching
cache = ub.Cacher('computation_cache')
result = cache.tryload()
if result is None:
result = expensive_computation(100)
cache.save(result)
print(f"Result: {result}")
# Dependency-based caching
input_file = 'input.txt'
with open(input_file, 'w') as f:
f.write('some input data')
# Cache depends on input file
cache = ub.Cacher('file_processing', depends=[input_file])
result = cache.tryload()
if result is None:
# Process the file
with open(input_file, 'r') as f:
data = f.read()
result = data.upper() # Simple processing
cache.save(result)
# Cache will be invalidated if input.txt changes
# Using ensure for cleaner code
def process_data(filename):
with open(filename, 'r') as f:
return f.read().upper()
cache = ub.Cacher('processing', depends=[input_file])
result = cache.ensure(process_data, input_file)import ubelt as ub
# Stamp-based caching for file generation
input_files = ['input1.txt', 'input2.txt', 'config.json']
output_file = 'processed_output.json'
stamp = ub.CacheStamp('processing_stamp')
if stamp.expired(*input_files, output_file):
print("Processing files...")
# Do expensive file processing
processed_data = {'result': 'processed'}
# Write output
import json
with open(output_file, 'w') as f:
json.dump(processed_data, f)
# Update stamp
stamp.renew()
else:
print("Using cached output")
# Output file exists and is newer than inputsimport ubelt as ub
# Cache with custom dependencies
def get_data_hash():
"""Get hash of current data state"""
return ub.hash_data({'version': '1.2', 'config': 'prod'})
# Cache that depends on data state, not just files
cache = ub.Cacher('model_cache', depends=[get_data_hash()])
def train_model():
print("Training model...")
return {'accuracy': 0.95, 'model': 'trained_weights'}
model = cache.ensure(train_model)
# Organized caching with app-specific directories
user_cache = ub.Cacher('user_prefs', appname='myapp')
model_cache = ub.Cacher('models', appname='myapp', dpath='./models')
# Clear caches when needed
if need_fresh_data:
cache.clear()
# Check cache status
if cache.exists():
print("Cache is valid")
data = cache.tryload()
else:
print("Cache expired or missing")Install with Tessl CLI
npx tessl i tessl/pypi-ubelt