Modern high-performance serialization utilities for Python
Enhanced pickle serialization using cloudpickle for cloud computing compatibility, supporting complex objects including functions, lambdas, classes, and other Python constructs that standard pickle cannot handle.
Core pickle operations using cloudpickle for enhanced compatibility with cloud computing environments and complex Python objects.
def pickle_dumps(data, protocol=None):
"""
Serialize Python object with cloudpickle.
Parameters:
- data: Python object to serialize (supports functions, lambdas, classes)
- protocol (int | None): Pickle protocol version (-1 for highest available)
Returns:
bytes: Serialized pickle data
"""
def pickle_loads(data):
"""
Deserialize pickle bytes to Python object.
Parameters:
- data (bytes): Pickle data to deserialize
Returns:
Any: Deserialized Python object
"""Direct access to cloudpickle functionality for custom serialization scenarios.
class CloudPickler:
"""
Enhanced pickler class with cloud computing support.
Handles serialization of functions, lambdas, classes, and other
constructs that standard pickle cannot serialize.
"""
def __init__(self, file, protocol=None, buffer_callback=None):
"""
Initialize CloudPickler.
Parameters:
- file: File-like object to write to
- protocol (int): Pickle protocol version
- buffer_callback (callable): Buffer callback for pickle protocol 5
"""
def dump(self, obj):
"""
Serialize object to file.
Parameters:
- obj: Object to serialize
"""
# Compatibility alias
Pickler = CloudPickler
def dump(obj, file, protocol=None):
"""
Serialize object to file using cloudpickle.
Parameters:
- obj: Object to serialize
- file: File-like object to write to
- protocol (int): Pickle protocol version
"""
def dumps(obj, protocol=None):
"""
Serialize object to bytes (alias for pickle_dumps).
Parameters:
- obj: Object to serialize
- protocol (int): Pickle protocol version
Returns:
bytes: Serialized data
"""
def loads(data):
"""
Deserialize from bytes (alias for pickle.loads from standard library).
Parameters:
- data (bytes): Serialized data
Returns:
Any: Deserialized object
"""
def load(file):
"""
Deserialize from file-like object (alias for pickle.load from standard library).
Parameters:
- file: File-like object containing pickled data
Returns:
Any: Deserialized object
"""import srsly
# Standard data structures
data = {
"name": "example",
"values": [1, 2, 3, 4, 5],
"config": {"debug": True, "timeout": 30}
}
# Serialize to bytes
pickled_data = srsly.pickle_dumps(data)
print(f"Pickled size: {len(pickled_data)} bytes")
# Deserialize from bytes
unpickled_data = srsly.pickle_loads(pickled_data)
print(f"Name: {unpickled_data['name']}")
print(f"Debug: {unpickled_data['config']['debug']}")import srsly
# Define functions and lambdas
def custom_function(x, y):
"""Custom function that can be pickled with cloudpickle."""
return x * y + 10
lambda_func = lambda x: x ** 2 + 1
# Serialize functions (not possible with standard pickle)
func_data = srsly.pickle_dumps(custom_function)
lambda_data = srsly.pickle_dumps(lambda_func)
# Deserialize and use functions
restored_func = srsly.pickle_loads(func_data)
restored_lambda = srsly.pickle_loads(lambda_data)
print(f"Function result: {restored_func(3, 4)}") # 22
print(f"Lambda result: {restored_lambda(5)}") # 26import srsly
# Define a custom class
class DataProcessor:
def __init__(self, multiplier=2):
self.multiplier = multiplier
self.history = []
def process(self, value):
result = value * self.multiplier
self.history.append((value, result))
return result
def get_stats(self):
if not self.history:
return {"count": 0, "avg_input": 0, "avg_output": 0}
inputs = [h[0] for h in self.history]
outputs = [h[1] for h in self.history]
return {
"count": len(self.history),
"avg_input": sum(inputs) / len(inputs),
"avg_output": sum(outputs) / len(outputs)
}
# Create and use processor
processor = DataProcessor(multiplier=3)
processor.process(10)
processor.process(20)
processor.process(15)
# Serialize the entire object with state
processor_data = srsly.pickle_dumps(processor)
# Deserialize and continue using
restored_processor = srsly.pickle_loads(processor_data)
print(f"Stats: {restored_processor.get_stats()}")
print(f"Processing 25: {restored_processor.process(25)}")import srsly
# Complex nested data structure
complex_data = {
"functions": {
"add": lambda x, y: x + y,
"multiply": lambda x, y: x * y
},
"data": list(range(1000)),
"metadata": {"version": "1.0", "author": "system"}
}
# Use highest protocol for best compression
pickled_high = srsly.pickle_dumps(complex_data, protocol=-1)
print(f"High protocol size: {len(pickled_high)} bytes")
# Use specific protocol
pickled_v4 = srsly.pickle_dumps(complex_data, protocol=4)
print(f"Protocol 4 size: {len(pickled_v4)} bytes")
# Deserialize and test functions
restored = srsly.pickle_loads(pickled_high)
add_func = restored["functions"]["add"]
multiply_func = restored["functions"]["multiply"]
print(f"Add function: {add_func(5, 3)}") # 8
print(f"Multiply function: {multiply_func(4, 7)}") # 28
print(f"Data length: {len(restored['data'])}") # 1000import srsly.cloudpickle as cloudpickle
import io
# Serialize to file-like object
data = {"message": "Hello, world!", "numbers": [1, 2, 3]}
buffer = io.BytesIO()
# Use cloudpickle directly for file operations
cloudpickle.dump(data, buffer)
# Read back from buffer
buffer.seek(0)
loaded_data = cloudpickle.load(buffer)
print(f"Message: {loaded_data['message']}")
# Alternative: use the high-level functions
buffer = io.BytesIO()
pickled = srsly.pickle_dumps(data)
buffer.write(pickled)
buffer.seek(0)
restored = srsly.pickle_loads(buffer.read())
print(f"Numbers: {restored['numbers']}")