Lightweight pipelining with Python functions for disk-caching, parallel computing, and fast compressed persistence
npx @tessl/cli install tessl/pypi-joblib@1.5.0Lightweight pipelining with Python functions providing transparent disk-caching, embarrassingly parallel computing, and fast compressed persistence. Joblib is optimized for fast and robust operation on large data with specific optimizations for NumPy arrays, serving as a foundation for computational-heavy jobs requiring performance and reproducibility.
pip install joblibimport joblibCommon imports for specific functionality:
from joblib import Memory, Parallel, delayed
from joblib import dump, loadfrom joblib import Memory, Parallel, delayed, dump, load
import numpy as np
# 1. Caching expensive computations
mem = Memory(location='./cache')
@mem.cache
def expensive_function(x):
return x ** 2 + np.sum(x)
data = np.random.random(1000)
result = expensive_function(data) # Cached for future calls
# 2. Parallel processing
def process_item(item):
return item ** 2
results = Parallel(n_jobs=4)(delayed(process_item)(i) for i in range(10))
# 3. Fast persistence
large_data = {'array': np.random.random((1000, 1000)), 'metadata': {'version': 1}}
dump(large_data, 'data.pkl', compress=3)
loaded_data = load('data.pkl')Joblib provides three core architectural components:
This design enables seamless integration into scientific computing workflows, providing performance optimizations while maintaining simple, readable code patterns.
Transparent disk-caching of function results using the memoize pattern. Provides automatic cache invalidation, configurable storage backends, and memory-mapped array support for handling large datasets efficiently.
class Memory(Logger):
def __init__(self, location=None, backend="local", mmap_mode=None, compress=False, verbose=1, backend_options=None): ...
def cache(self, func=None, ignore=None, verbose=None, mmap_mode=False, cache_validation_callback=None): ...
class MemorizedResult:
def get(self): ...
def clear(self): ...
def expires_after(days=0, seconds=0, microseconds=0, milliseconds=0, minutes=0, hours=0, weeks=0): ...
def register_store_backend(backend_name, backend): ...Embarrassingly parallel computing with readable list comprehension syntax. Supports multiple backends (threading, multiprocessing, loky, dask) with automatic backend selection and comprehensive configuration options.
class Parallel(Logger):
def __init__(self, n_jobs=None, backend=None, return_as="list", verbose=0, timeout=None,
pre_dispatch="2 * n_jobs", batch_size="auto", temp_folder=None,
max_nbytes="1M", mmap_mode="r", prefer=None, require=None, **backend_kwargs): ...
def __call__(self, iterable): ...
def delayed(function): ...
def cpu_count(only_physical_cores=False): ...
def effective_n_jobs(n_jobs=-1): ...
class parallel_config: ...
class parallel_backend: ...
def register_parallel_backend(name, factory, make_default=False): ...Fast compressed persistence optimized for Python objects containing large NumPy arrays. Provides memory mapping support, multiple compression algorithms, and cross-platform compatibility as a replacement for pickle.
def dump(value, filename, compress=0, protocol=None): ...
def load(filename, mmap_mode=None, ensure_native_byte_order="auto"): ...Core utilities including object hashing, logging with timing, backend infrastructure, and compression management for extending joblib's functionality.
def hash(obj, hash_name="md5", coerce_mmap=False): ...
def wrap_non_picklable_objects(obj, keep_wrapper=True): ...
class Logger:
def __init__(self, depth=3, name=None): ...
class PrintTime:
def __init__(self, logfile=None, logdir=None): ...
class ParallelBackendBase: ...
class StoreBackendBase: ...
def register_compressor(compressor_name, compressor, force=False): ...# Core backend types
class ParallelBackendBase:
"""Abstract base class for parallel execution backends."""
default_n_jobs = 1
supports_inner_max_num_threads = False
supports_retrieve_callback = False
supports_return_generator = False
supports_timeout = False
def effective_n_jobs(self, n_jobs): ...
def submit(self, func, callback=None): ...
def retrieve_result(self, futures, timeout=None): ...
class StoreBackendBase:
"""Abstract base class for storage backends."""
def _open_item(self, f, mode): ...
def _item_exists(self, location): ...
def _move_item(self, src, dst): ...
def clear_item(self, call_id): ...
def clear_path(self, path): ...
def clear(self): ...
# Configuration types
from typing import Union, Optional
BackendName = str # "threading", "multiprocessing", "loky", "sequential", "dask"
CompressionLevel = Union[int, bool, tuple] # 0-9, True/False, or (method, level)
MMapMode = Optional[str] # None, "r+", "r", "w+", "c"