0
# Extended Pickler Classes
1
2
dill provides enhanced Pickler and Unpickler classes that extend Python's standard pickle classes with support for complex objects including functions, classes, lambdas, and other previously unpickleable types.
3
4
## Enhanced Pickler Class
5
6
```python { .api }
7
class Pickler:
8
"""
9
Extended pickler with additional capabilities for complex objects.
10
11
Provides enhanced serialization support beyond standard pickle.Pickler,
12
including functions, classes, nested structures, and other complex types
13
that standard pickle cannot handle.
14
15
Attributes:
16
- memo: dict, memoization cache for object references
17
- bin: bool, binary mode flag
18
- fast: bool, fast mode for performance optimization
19
- dispatch_table: dict, custom type dispatch table
20
21
Methods:
22
- dump(obj): serialize object to file
23
- save(obj): internal save method
24
- persistent_id(obj): handle persistent object references
25
"""
26
```
27
28
## Enhanced Unpickler Class
29
30
```python { .api }
31
class Unpickler:
32
"""
33
Extended unpickler with additional capabilities for complex objects.
34
35
Provides enhanced deserialization support beyond standard pickle.Unpickler,
36
with improved error handling, type restoration, and support for complex
37
object reconstruction.
38
39
Attributes:
40
- memo: dict, memoization cache for object reconstruction
41
- encoding: str, text encoding for string objects
42
- errors: str, error handling mode
43
44
Methods:
45
- load(): deserialize object from file
46
- persistent_load(pid): handle persistent object loading
47
"""
48
```
49
50
## Usage Examples
51
52
### Basic Pickler Usage
53
54
```python
55
import dill
56
import io
57
58
# Create a function with closure
59
def create_counter(start=0):
60
count = start
61
def increment():
62
nonlocal count
63
count += 1
64
return count
65
return increment
66
67
counter = create_counter(10)
68
69
# Use Pickler class directly
70
buffer = io.BytesIO()
71
pickler = dill.Pickler(buffer)
72
pickler.dump(counter)
73
74
# Use Unpickler class directly
75
buffer.seek(0)
76
unpickler = dill.Unpickler(buffer)
77
restored_counter = unpickler.load()
78
79
print(restored_counter()) # 11
80
print(restored_counter()) # 12
81
```
82
83
### Custom Protocol and Options
84
85
```python
86
import dill
87
import io
88
89
# Advanced pickler configuration
90
buffer = io.BytesIO()
91
pickler = dill.Pickler(buffer, protocol=dill.HIGHEST_PROTOCOL)
92
93
# Configure pickler options through settings
94
original_settings = dill.settings.copy()
95
dill.settings['byref'] = True
96
dill.settings['recurse'] = True
97
98
complex_object = create_complex_nested_structure()
99
pickler.dump(complex_object)
100
101
# Restore original settings
102
dill.settings.update(original_settings)
103
104
# Unpickle with custom error handling
105
buffer.seek(0)
106
unpickler = dill.Unpickler(buffer)
107
try:
108
restored_object = unpickler.load()
109
except dill.UnpicklingError as e:
110
print(f"Unpickling failed: {e}")
111
```
112
113
### Integration with File Objects
114
115
```python
116
import dill
117
118
# Direct file usage
119
with open('complex_data.pkl', 'wb') as f:
120
pickler = dill.Pickler(f)
121
pickler.dump(my_function)
122
pickler.dump(my_class)
123
pickler.dump(my_instance)
124
125
# Load multiple objects
126
with open('complex_data.pkl', 'rb') as f:
127
unpickler = dill.Unpickler(f)
128
restored_function = unpickler.load()
129
restored_class = unpickler.load()
130
restored_instance = unpickler.load()
131
```
132
133
## Advanced Features
134
135
### Custom Dispatch Tables
136
137
```python
138
import dill
139
import io
140
141
# Custom type handling
142
def save_custom_type(pickler, obj):
143
# Custom serialization logic
144
pickler.write(b'custom_marker')
145
pickler.save(obj.__dict__)
146
147
# Create pickler with custom dispatch
148
buffer = io.BytesIO()
149
pickler = dill.Pickler(buffer)
150
151
# Add custom type handler
152
if not hasattr(pickler, 'dispatch_table'):
153
pickler.dispatch_table = {}
154
pickler.dispatch_table[MyCustomType] = save_custom_type
155
156
# Use with custom type
157
custom_obj = MyCustomType()
158
pickler.dump(custom_obj)
159
```
160
161
### Memory-Efficient Streaming
162
163
```python
164
import dill
165
166
def serialize_large_dataset(data_iterator, filename):
167
"""Serialize large dataset in streaming fashion."""
168
with open(filename, 'wb') as f:
169
pickler = dill.Pickler(f)
170
171
# Stream objects one by one
172
for item in data_iterator:
173
pickler.dump(item)
174
175
def deserialize_large_dataset(filename):
176
"""Deserialize large dataset in streaming fashion."""
177
with open(filename, 'rb') as f:
178
unpickler = dill.Unpickler(f)
179
180
while True:
181
try:
182
item = unpickler.load()
183
yield item
184
except EOFError:
185
break
186
187
# Usage
188
large_data = [complex_object(i) for i in range(10000)]
189
serialize_large_dataset(large_data, 'large_dataset.pkl')
190
191
# Process items one by one without loading all into memory
192
for item in deserialize_large_dataset('large_dataset.pkl'):
193
process_item(item)
194
```
195
196
## Error Handling and Debugging
197
198
```python
199
import dill
200
import io
201
from dill import PicklingError, UnpicklingError
202
203
def safe_pickle_with_diagnostics(obj, buffer):
204
"""Pickle with comprehensive error handling."""
205
try:
206
pickler = dill.Pickler(buffer)
207
pickler.dump(obj)
208
return True
209
except PicklingError as e:
210
print(f"Pickling failed: {e}")
211
212
# Use diagnostic tools
213
bad_items = dill.detect.baditems(obj)
214
if bad_items:
215
print("Unpickleable items found:")
216
for item in bad_items:
217
print(f" {item}")
218
219
return False
220
except Exception as e:
221
print(f"Unexpected error: {e}")
222
return False
223
224
def safe_unpickle_with_recovery(buffer):
225
"""Unpickle with error recovery."""
226
try:
227
buffer.seek(0)
228
unpickler = dill.Unpickler(buffer)
229
return unpickler.load()
230
except UnpicklingError as e:
231
print(f"Unpickling failed: {e}")
232
return None
233
except Exception as e:
234
print(f"Unexpected error during unpickling: {e}")
235
return None
236
```
237
238
## Performance Optimization
239
240
```python
241
import dill
242
import io
243
244
# Optimize for speed
245
def fast_pickle_config():
246
"""Configure dill for maximum speed."""
247
# Use highest protocol
248
protocol = dill.HIGHEST_PROTOCOL
249
250
# Configure for speed over size
251
settings = {
252
'protocol': protocol,
253
'byref': False, # Avoid reference resolution overhead
254
'recurse': False # Avoid deep recursion overhead
255
}
256
257
return settings
258
259
# Optimize for size
260
def compact_pickle_config():
261
"""Configure dill for minimum size."""
262
settings = {
263
'protocol': dill.HIGHEST_PROTOCOL,
264
'byref': True, # Use references to reduce duplication
265
'recurse': True # Ensure complete object graphs
266
}
267
268
return settings
269
270
# Apply configuration
271
fast_settings = fast_pickle_config()
272
buffer = io.BytesIO()
273
274
# Create optimized pickler
275
pickler = dill.Pickler(buffer, protocol=fast_settings['protocol'])
276
277
# Apply settings through global configuration
278
original_settings = dill.settings.copy()
279
dill.settings.update(fast_settings)
280
281
try:
282
pickler.dump(large_complex_object)
283
finally:
284
# Restore original settings
285
dill.settings.update(original_settings)
286
```
287
288
## Thread Safety Considerations
289
290
```python
291
import dill
292
import threading
293
import io
294
295
class ThreadSafePickler:
296
"""Thread-safe wrapper for dill pickler operations."""
297
298
def __init__(self):
299
self._lock = threading.Lock()
300
301
def dump_to_bytes(self, obj, **kwargs):
302
"""Thread-safe serialization to bytes."""
303
with self._lock:
304
return dill.dumps(obj, **kwargs)
305
306
def load_from_bytes(self, data, **kwargs):
307
"""Thread-safe deserialization from bytes."""
308
with self._lock:
309
return dill.loads(data, **kwargs)
310
311
def dump_to_file(self, obj, filename, **kwargs):
312
"""Thread-safe serialization to file."""
313
with self._lock:
314
with open(filename, 'wb') as f:
315
pickler = dill.Pickler(f)
316
pickler.dump(obj)
317
318
def load_from_file(self, filename, **kwargs):
319
"""Thread-safe deserialization from file."""
320
with self._lock:
321
with open(filename, 'rb') as f:
322
unpickler = dill.Unpickler(f)
323
return unpickler.load()
324
325
# Usage in multithreaded environment
326
safe_pickler = ThreadSafePickler()
327
328
def worker_thread(obj, thread_id):
329
data = safe_pickler.dump_to_bytes(obj)
330
restored = safe_pickler.load_from_bytes(data)
331
print(f"Thread {thread_id}: Object successfully serialized and restored")
332
333
# Start multiple threads
334
threads = []
335
for i in range(5):
336
t = threading.Thread(target=worker_thread, args=(complex_object, i))
337
threads.append(t)
338
t.start()
339
340
# Wait for completion
341
for t in threads:
342
t.join()
343
```