A persistent cache for python requests
76
Serialization in requests-cache handles the conversion of HTTP response objects to and from storage formats. The system supports multiple serializers including pickle variants, JSON with binary support, YAML, and BSON, each optimized for different use cases and storage backends.
The core serialization architecture uses a pipeline of stages to transform response data through multiple processing steps.
class SerializerPipeline:
"""
Chain of serialization stages that process response data.
Pipelines combine multiple stages like base conversion (cattrs),
format-specific preprocessing, and final serialization.
"""
def __init__(
self,
stages: List[Stage],
name: Optional[str] = None
):
"""
Create serializer pipeline.
Parameters:
- stages: List of processing stages in order
- name: Optional name for the pipeline
"""
def dumps(self, obj: Any) -> bytes:
"""
Serialize object to bytes.
Parameters:
- obj: Object to serialize (typically CachedResponse)
Returns:
Serialized data as bytes
"""
def loads(self, data: bytes) -> Any:
"""
Deserialize bytes to object.
Parameters:
- data: Serialized data as bytes
Returns:
Deserialized object
"""
def set_decode_content(self, decode_content: bool) -> None:
"""
Configure whether to decode response content during serialization.
Parameters:
- decode_content: If True, decode binary content to text when possible
"""
class Stage:
"""
Individual processing stage within a serialization pipeline.
Stages can perform data transformation, format conversion,
or final serialization steps.
"""
def dumps(self, obj: Any) -> Any:
"""Process object for serialization."""
def loads(self, data: Any) -> Any:
"""Process data during deserialization."""
def init_serializer(
serializer: Optional[SerializerType],
decode_content: bool
) -> Optional[SerializerPipeline]:
"""
Initialize serializer by name or instance.
Parameters:
- serializer: Serializer name, Stage, or SerializerPipeline
- decode_content: Whether to decode response content
Returns:
Configured SerializerPipeline or None for no serialization
"""Core stage that handles conversion between response objects and serializable data structures using the cattrs library.
class CattrStage(Stage):
"""
Serialization stage using cattrs converter.
Handles the complex conversion between CachedResponse objects
and dictionary representations that can be processed by
format-specific serializers.
"""
def __init__(
self,
factory: Optional[Callable] = None,
decode_content: bool = True,
**kwargs
):
"""
Create cattrs stage.
Parameters:
- factory: Optional factory function for converter creation
- decode_content: Whether to decode binary response content
- **kwargs: Additional converter configuration
"""
def dumps(self, response: CachedResponse) -> Dict[str, Any]:
"""Convert CachedResponse to serializable dict."""
def loads(self, data: Dict[str, Any]) -> CachedResponse:
"""Convert dict back to CachedResponse."""Pre-configured serializer pipelines for different formats and use cases.
# Pickle-based serializers
pickle_serializer: SerializerPipeline
"""
Standard pickle serialization for Python objects.
Fast and preserves all Python types but not cross-language compatible.
"""
safe_pickle_serializer: SerializerPipeline
"""
Pickle serialization with signature validation using itsdangerous.
Provides tamper detection for cached data.
"""
# JSON serializers
json_serializer: SerializerPipeline
"""
JSON serialization with binary content encoding.
Cross-language compatible, handles binary data via base64 encoding.
Uses ujson if available for better performance.
"""
# YAML serializer
yaml_serializer: SerializerPipeline
"""
YAML serialization for human-readable cached data.
Useful for debugging and manual cache inspection.
"""
# BSON serializers
bson_serializer: SerializerPipeline
"""
BSON (Binary JSON) serialization for MongoDB compatibility.
Efficient binary format with rich type support.
"""
bson_document_serializer: SerializerPipeline
"""
BSON serialization optimized for document storage.
Uses BSON-native types where possible.
"""
# Backend-specific serializers
dynamodb_document_serializer: SerializerPipeline
"""
Serialization optimized for DynamoDB document format.
Handles DynamoDB type constraints and limits.
"""
# Partial serializers
dict_serializer: SerializerPipeline
"""
Converts responses to dictionaries without final serialization.
Used internally and for debugging.
"""
utf8_encoder: Stage
"""
UTF-8 encoding stage for text-based formats.
Converts strings to bytes using UTF-8 encoding.
"""Basic serializer selection:
from requests_cache import CachedSession
# Default pickle serializer (fast, Python-only)
session = CachedSession('cache', serializer='pickle')
# JSON serializer (cross-language compatible)
session = CachedSession('cache', serializer='json')
# YAML serializer (human readable)
session = CachedSession('cache', serializer='yaml')
# BSON serializer (binary JSON)
session = CachedSession('cache', serializer='bson')Backend-optimized serializers:
from requests_cache import CachedSession
# Optimized for DynamoDB backend
session = CachedSession(
'cache',
backend='dynamodb',
serializer='json' # DynamoDB works well with JSON
)
# Optimized for MongoDB/GridFS
session = CachedSession(
'cache',
backend='mongodb',
serializer='bson' # Native BSON support
)
# Safe pickle with signature validation
session = CachedSession(
'cache',
serializer=requests_cache.safe_pickle_serializer
)Creating custom serializers for specific requirements:
# Example: Custom pipeline creation
from requests_cache.serializers import SerializerPipeline, CattrStage
import json
class CustomJSONStage(Stage):
"""Custom JSON stage with specific formatting."""
def dumps(self, obj: Dict[str, Any]) -> bytes:
return json.dumps(obj, indent=2, sort_keys=True).encode('utf-8')
def loads(self, data: bytes) -> Dict[str, Any]:
return json.loads(data.decode('utf-8'))
# Create custom pipeline
custom_serializer = SerializerPipeline([
CattrStage(decode_content=True),
CustomJSONStage()
], name='custom_json')
# Use custom serializer
session = CachedSession(
'cache',
serializer=custom_serializer
)Global configuration for available serializers:
SERIALIZERS = {
'bson': bson_serializer,
'json': json_serializer,
'pickle': pickle_serializer,
'yaml': yaml_serializer,
}
"""Mapping of serializer names to pipeline instances."""Serializer feature comparison:
from requests_cache import CachedSession
import time
# Test different serializers
serializers = ['pickle', 'json', 'yaml', 'bson']
for serializer_name in serializers:
session = CachedSession(f'{serializer_name}_cache', serializer=serializer_name)
start_time = time.time()
response = session.get('https://httpbin.org/json')
serialize_time = time.time() - start_time
print(f"{serializer_name:6} - Time: {serialize_time:.3f}s, Size: {response.size} bytes")
# All serializers preserve the same interface
assert response.json()['slideshow']['title'] == 'Sample Slide Show'Content decoding configuration:
from requests_cache import CachedSession
# Decode binary content to text when possible (default)
session = CachedSession('cache', serializer='json')
# Keep binary content as-is (faster, but larger cache files)
from requests_cache.serializers import init_serializer
serializer = init_serializer('json', decode_content=False)
session = CachedSession('cache', serializer=serializer)Different serializers have different performance characteristics:
# Performance-optimized configuration
session = CachedSession(
'cache',
backend='sqlite', # Fast local storage
serializer='pickle', # Fastest serialization
)
# Cross-platform compatibility
session = CachedSession(
'cache',
backend='redis', # Shared storage
serializer='json', # Cross-language support
)
# Debugging-friendly setup
session = CachedSession(
'cache',
backend='filesystem', # Easy file access
serializer='yaml', # Human-readable output
)Different serializers handle binary response content differently:
from requests_cache import CachedSession
# Download binary content
session = CachedSession('cache', serializer='json')
response = session.get('https://httpbin.org/image/png')
# JSON serializer base64-encodes binary data
print(f"Content type: {response.headers['content-type']}")
print(f"Content length: {len(response.content)} bytes")
print(f"From cache: {response.from_cache}")
# Binary data is preserved across cache storage
assert response.content[:8] == b'\x89PNG\r\n\x1a\n' # PNG headerSerialization errors and fallback behavior:
from requests_cache import CachedSession
import requests_cache.serializers
# Serializers handle various edge cases
session = CachedSession('cache', serializer='json')
try:
# Some responses may have serialization challenges
response = session.get('https://httpbin.org/encoding/utf8')
print("Serialization successful")
except Exception as e:
print(f"Serialization error: {e}")
# Fallback to pickle for problematic responses
session.serializer = requests_cache.serializers.pickle_serializer
response = session.get('https://httpbin.org/encoding/utf8')
print("Fallback serialization successful")# Serializer type specifications
SerializerType = Union[str, SerializerPipeline, Stage]
"""Union type for serializer specifications (name, pipeline, or stage)."""
# Stage processing types
StageInput = Any # Input to serialization stage
StageOutput = Any # Output from serialization stageInstall with Tessl CLI
npx tessl i tessl/pypi-requests-cacheevals
scenario-1
scenario-2
scenario-3
scenario-4
scenario-5
scenario-6
scenario-7
scenario-8
scenario-9
scenario-10