Disk Cache -- Disk and file backed persistent cache.
—
DiskCache provides flexible serialization engines that handle the conversion between Python objects and disk storage. The Disk class provides the base functionality with pickle-based serialization, while JSONDisk offers JSON serialization with compression for better compatibility and human-readable storage.
The base serialization class that handles conversion between Python objects and disk storage using pickle and multiple storage modes.
class Disk:
def __init__(self, directory, min_file_size=0, pickle_protocol=0):
"""
Initialize disk serialization engine.
Args:
directory (str): Directory path for file storage
min_file_size (int): Minimum size for file storage. Default 0.
Values smaller than this are stored in database.
pickle_protocol (int): Pickle protocol version. Default 0 (most compatible).
"""
@property
def directory(self):
"""Directory path for file storage."""
@property
def min_file_size(self):
"""Minimum file size threshold for disk storage."""
@property
def pickle_protocol(self):
"""Pickle protocol version used for serialization."""Methods for serializing and deserializing cache keys.
def hash(self, key):
"""
Compute portable hash for cache key.
Args:
key: Cache key (must be hashable)
Returns:
int: Hash value for the key
"""
def put(self, key):
"""
Serialize key for database storage.
Args:
key: Cache key to serialize
Returns:
Tuple of (database_key, raw_flag) where:
- database_key: Serialized key for database storage
- raw_flag: Boolean indicating if key is stored raw
"""
def get(self, key, raw):
"""
Deserialize key from database storage.
Args:
key: Serialized key from database
raw (bool): Whether key was stored raw
Returns:
Original Python key object
"""Methods for serializing and deserializing cache values with multiple storage modes.
def store(self, value, read, key=UNKNOWN):
"""
Serialize value for storage.
Determines the best storage mode and location (database vs file)
based on value type and size.
Args:
value: Python value to serialize
read (bool): Whether value should be stored for file reading
key: Cache key (for filename generation)
Returns:
Tuple of (size, mode, filename, db_value) where:
- size: Storage size in bytes
- mode: Storage mode (0=none, 1=raw, 2=binary, 3=text, 4=pickle)
- filename: File path if stored as file, else None
- db_value: Serialized value for database storage
"""
def fetch(self, mode, filename, value, read):
"""
Deserialize value from storage.
Args:
mode (int): Storage mode used during store()
filename (str): File path if value stored as file
value: Database-stored value
read (bool): Whether to return file handle instead of value
Returns:
Original Python value, or file handle if read=True
"""Methods for managing file storage and cleanup.
def filename(self, key=UNKNOWN, value=UNKNOWN):
"""
Generate filename and full path for storage.
Args:
key: Cache key (optional, for unique naming)
value: Value to store (optional, for type-based naming)
Returns:
Tuple of (filename, full_path) where:
- filename: Generated filename
- full_path: Complete file path in directory
"""
def remove(self, file_path):
"""
Safely remove file from storage.
Args:
file_path (str): Path to file to remove
Returns:
bool: True if file was removed, False if it didn't exist
"""Enhanced serialization engine that uses JSON with optional compression, providing better compatibility and human-readable storage.
class JSONDisk(Disk):
def __init__(self, directory, compress_level=1, **kwargs):
"""
Initialize JSON disk serialization engine.
Args:
directory (str): Directory path for file storage
compress_level (int): zlib compression level (0-9). Default 1.
0 = no compression, 9 = maximum compression
**kwargs: Additional arguments passed to Disk constructor
"""
@property
def compress_level(self):
"""zlib compression level (0-9)."""
@compress_level.setter
def compress_level(self, value):
"""Set zlib compression level."""JSON-specific key serialization with compression.
def put(self, key):
"""
Serialize key using JSON and optional compression.
Args:
key: Cache key to serialize (must be JSON-serializable)
Returns:
Tuple of (compressed_json_key, raw_flag)
Raises:
TypeError: If key is not JSON-serializable
"""
def get(self, key, raw):
"""
Deserialize key from compressed JSON.
Args:
key: Compressed JSON key from database
raw (bool): Whether key was stored raw
Returns:
Original Python key object
"""JSON-specific value serialization with compression.
def store(self, value, read, key=UNKNOWN):
"""
Serialize value using JSON and optional compression.
Args:
value: Python value to serialize (must be JSON-serializable)
read (bool): Whether value should be stored for file reading
key: Cache key (for filename generation)
Returns:
Tuple of (size, mode, filename, compressed_json_value)
Raises:
TypeError: If value is not JSON-serializable
"""
def fetch(self, mode, filename, value, read):
"""
Deserialize value from compressed JSON.
Args:
mode (int): Storage mode used during store()
filename (str): File path if value stored as file
value: Compressed JSON value from database
read (bool): Whether to return file handle instead of value
Returns:
Original Python value, or file handle if read=True
"""DiskCache uses different storage modes based on value type and size:
import diskcache
# Create cache with default Disk serialization
cache = diskcache.Cache('/tmp/pickle_cache')
# Store various Python objects
cache.set('string', 'Hello, World!')
cache.set('number', 42)
cache.set('list', [1, 2, 3, 4, 5])
cache.set('dict', {'key': 'value', 'nested': {'a': 1}})
# Custom objects work with pickle
class Person:
def __init__(self, name, age):
self.name = name
self.age = age
def __repr__(self):
return f"Person('{self.name}', {self.age})"
cache.set('person', Person('Alice', 30))
# Retrieve objects
print(cache.get('string')) # 'Hello, World!'
print(cache.get('person')) # Person('Alice', 30)import diskcache
import pickle
# Custom Disk with specific settings
custom_disk = diskcache.Disk(
directory='/tmp/custom_serialization',
min_file_size=1024, # Store values >= 1KB as files
pickle_protocol=pickle.HIGHEST_PROTOCOL # Use latest pickle protocol
)
cache = diskcache.Cache('/tmp/custom_cache', disk=custom_disk)
# Small values stored in database
cache.set('small', 'small value')
# Large values stored as files
large_data = 'x' * 2000 # 2KB string
cache.set('large', large_data)
print(f"Small value: {cache.get('small')}")
print(f"Large value length: {len(cache.get('large'))}")import diskcache
# Create cache with JSON serialization
json_disk = diskcache.JSONDisk('/tmp/json_serialization', compress_level=6)
cache = diskcache.Cache('/tmp/json_cache', disk=json_disk)
# Store JSON-compatible data
cache.set('config', {
'debug': True,
'max_connections': 100,
'allowed_ips': ['192.168.1.1', '10.0.0.1'],
'settings': {
'timeout': 30,
'retries': 3
}
})
cache.set('metrics', [
{'timestamp': 1609459200, 'value': 42.5},
{'timestamp': 1609459260, 'value': 38.2},
{'timestamp': 1609459320, 'value': 45.1}
])
# Retrieve and use data
config = cache.get('config')
print(f"Debug mode: {config['debug']}")
print(f"Max connections: {config['max_connections']}")
metrics = cache.get('metrics')
print(f"Latest metric: {metrics[-1]}")import diskcache
import json
# Test different compression levels
test_data = {
'users': [{'id': i, 'name': f'user_{i}', 'data': 'x' * 100} for i in range(100)]
}
# No compression
disk_no_compress = diskcache.JSONDisk('/tmp/no_compress', compress_level=0)
cache_no_compress = diskcache.Cache('/tmp/cache_no_compress', disk=disk_no_compress)
# Maximum compression
disk_max_compress = diskcache.JSONDisk('/tmp/max_compress', compress_level=9)
cache_max_compress = diskcache.Cache('/tmp/cache_max_compress', disk=disk_max_compress)
# Store same data in both caches
cache_no_compress.set('data', test_data)
cache_max_compress.set('data', test_data)
# Compare storage sizes
size_no_compress = cache_no_compress.volume()
size_max_compress = cache_max_compress.volume()
print(f"No compression: {size_no_compress} bytes")
print(f"Max compression: {size_max_compress} bytes")
print(f"Compression ratio: {size_no_compress / size_max_compress:.2f}x")import diskcache
# Configure for file-based storage of large items
disk = diskcache.Disk('/tmp/file_storage', min_file_size=100) # Store items >= 100 bytes as files
cache = diskcache.Cache('/tmp/file_cache', disk=disk)
# Small item - stored in database
cache.set('small', 'tiny')
# Large item - stored as file
large_content = 'This is a large content string. ' * 10 # > 100 bytes
cache.set('large', large_content)
# Read mode - store as file for direct file access
with open('/tmp/sample.txt', 'w') as f:
f.write('Sample file content for reading')
with open('/tmp/sample.txt', 'rb') as f:
file_content = f.read()
cache.set('file_data', file_content, read=True)
# Get file handle instead of content
file_handle = cache.get('file_data', read=True)
if file_handle:
content = file_handle.read()
print(f"File content: {content.decode()}")
file_handle.close()import diskcache
# Create disk instance directly
disk = diskcache.Disk('/tmp/direct_disk')
# Manual serialization operations
test_key = 'my_key'
test_value = {'data': [1, 2, 3], 'timestamp': 1609459200}
# Serialize key
db_key, raw_flag = disk.put(test_key)
print(f"Serialized key: {db_key}, raw: {raw_flag}")
# Serialize value
size, mode, filename, db_value = disk.store(test_value, read=False)
print(f"Value size: {size}, mode: {mode}, filename: {filename}")
# Deserialize key
original_key = disk.get(db_key, raw_flag)
print(f"Deserialized key: {original_key}")
# Deserialize value
original_value = disk.fetch(mode, filename, db_value, read=False)
print(f"Deserialized value: {original_value}")
# Generate filename
fname, full_path = disk.filename(key=test_key, value=test_value)
print(f"Generated filename: {fname}")
print(f"Full path: {full_path}")import diskcache
import json
import pickle
class CustomDisk(diskcache.Disk):
"""Custom serialization that prefers JSON when possible, falls back to pickle."""
def store(self, value, read, key=diskcache.UNKNOWN):
# Try JSON first
try:
json_data = json.dumps(value, separators=(',', ':'))
# Store as text mode with custom marker
return len(json_data), 3, None, json_data.encode('utf-8')
except (TypeError, ValueError):
# Fall back to pickle for non-JSON-serializable objects
return super().store(value, read, key)
def fetch(self, mode, filename, value, read):
if mode == 3 and filename is None:
# Our custom JSON format
try:
json_str = value.decode('utf-8')
return json.loads(json_str)
except (UnicodeDecodeError, json.JSONDecodeError):
pass
# Fall back to parent implementation
return super().fetch(mode, filename, value, read)
# Use custom disk
custom_disk = CustomDisk('/tmp/custom_disk')
cache = diskcache.Cache('/tmp/custom_cache', disk=custom_disk)
# JSON-serializable data uses JSON
cache.set('json_data', {'numbers': [1, 2, 3], 'text': 'hello'})
# Non-JSON data uses pickle
class CustomClass:
def __init__(self, value):
self.value = value
def __repr__(self):
return f"CustomClass({self.value})"
cache.set('pickle_data', CustomClass(42))
# Retrieve both
json_result = cache.get('json_data')
pickle_result = cache.get('pickle_data')
print(f"JSON data: {json_result}")
print(f"Pickle data: {pickle_result}")# Use Disk for maximum compatibility and Python object support
disk_cache = diskcache.Cache('/tmp/python_objects', disk=diskcache.Disk())
# Use JSONDisk for cross-language compatibility and human-readable storage
json_cache = diskcache.Cache('/tmp/json_data',
disk=diskcache.JSONDisk(compress_level=3))
# Use appropriate compression levels
# - Level 1: Fast compression, good for temporary data
# - Level 6: Balanced compression/speed, good for general use
# - Level 9: Maximum compression, good for long-term storage# Configure file threshold based on your use case
# Small threshold: More items stored as files (faster access, more files)
small_file_disk = diskcache.Disk('/tmp/small_files', min_file_size=512)
# Large threshold: More items in database (fewer files, may be slower for large items)
large_file_disk = diskcache.Disk('/tmp/large_files', min_file_size=10240)import diskcache
try:
# JSONDisk with data that can't be JSON-serialized
json_cache = diskcache.Cache('/tmp/json_test',
disk=diskcache.JSONDisk(compress_level=1))
# This will work
json_cache.set('good_data', {'key': 'value'})
# This will raise TypeError
json_cache.set('bad_data', set([1, 2, 3])) # Sets aren't JSON-serializable
except TypeError as e:
print(f"JSON serialization error: {e}")
# Fall back to pickle-based cache
pickle_cache = diskcache.Cache('/tmp/pickle_fallback')
pickle_cache.set('bad_data', set([1, 2, 3])) # This works with pickleInstall with Tessl CLI
npx tessl i tessl/pypi-diskcache