0
# Specialized Payload Tries
1
2
Advanced trie implementations that map unicode keys to lists of custom data payloads. BytesTrie handles arbitrary bytes objects while RecordTrie provides structured data support with automatic serialization using Python's struct module.
3
4
## Capabilities
5
6
### Bytes Payload Trie
7
8
Maps unicode string keys to lists of bytes objects, enabling storage of binary data, serialized objects, or any bytes-based payloads associated with string keys.
9
10
```python { .api }
11
class BytesTrie:
12
def __init__(self, arg=None, value_separator=b'\xff', **options):
13
"""
14
Create a trie mapping unicode keys to lists of bytes payloads.
15
16
Args:
17
arg (iterable, optional): Iterable of (unicode_key, bytes_payload) tuples
18
value_separator (bytes): Separator between keys and payloads (default: b'\xff')
19
**options: Same configuration options as Trie class
20
"""
21
22
def get(self, key, default=None) -> list:
23
"""
24
Return list of bytes payloads for key or default if not found.
25
26
Args:
27
key: Unicode key to look up (str or bytes)
28
default: Value to return if key not found
29
30
Returns:
31
list or default: List of bytes objects or default value
32
"""
33
34
def __getitem__(self, key) -> list:
35
"""
36
Return list of bytes payloads for key.
37
38
Args:
39
key: Unicode key to look up
40
41
Returns:
42
list: List of bytes objects
43
44
Raises:
45
KeyError: If key is not present
46
"""
47
48
def get_value(self, key: str) -> list:
49
"""
50
Return list of bytes payloads for unicode key.
51
52
Args:
53
key (str): Unicode key to look up
54
55
Returns:
56
list: List of bytes objects
57
"""
58
59
def b_get_value(self, key: bytes) -> list:
60
"""
61
Return list of bytes payloads for UTF-8 encoded key.
62
63
Args:
64
key (bytes): UTF-8 encoded key to look up
65
66
Returns:
67
list: List of bytes objects
68
"""
69
70
def prefixes(self, key: str) -> list:
71
"""
72
Return list of all prefixes of key that have values.
73
74
Args:
75
key (str): Unicode key to find prefixes for
76
77
Returns:
78
list: List of prefix strings that exist in trie
79
"""
80
81
def items(self, prefix="") -> list:
82
"""
83
Return list of (key, payload) pairs with optional prefix.
84
85
Args:
86
prefix (str): Unicode prefix to filter items
87
88
Returns:
89
list: List of (unicode_key, bytes_payload) tuples
90
"""
91
92
def iteritems(self, prefix=""):
93
"""
94
Return iterator over (key, payload) pairs with optional prefix.
95
96
Args:
97
prefix (str): Unicode prefix to filter items
98
99
Yields:
100
tuple: (unicode_key, bytes_payload) pairs
101
"""
102
103
def keys(self, prefix="") -> list:
104
"""
105
Return list of unicode keys with optional prefix.
106
107
Args:
108
prefix (str): Unicode prefix to filter keys
109
110
Returns:
111
list: List of unicode keys
112
"""
113
114
def iterkeys(self, prefix=""):
115
"""
116
Return iterator over unicode keys with optional prefix.
117
118
Args:
119
prefix (str): Unicode prefix to filter keys
120
121
Yields:
122
str: Unicode keys
123
"""
124
125
def _raw_key(self, key: str, payload: bytes) -> bytes:
126
"""
127
Combine unicode key with bytes payload using value separator.
128
129
Args:
130
key (str): Unicode key
131
payload (bytes): Bytes payload to combine with key
132
133
Returns:
134
bytes: Combined key and payload with separator
135
"""
136
```
137
138
### Record Payload Trie
139
140
Maps unicode string keys to lists of structured data tuples using Python's struct module for automatic serialization and deserialization.
141
142
```python { .api }
143
class RecordTrie:
144
def __init__(self, fmt: str, arg=None, **options):
145
"""
146
Create a trie mapping unicode keys to lists of structured data tuples.
147
148
Args:
149
fmt (str): Struct format string for data serialization
150
arg (iterable, optional): Iterable of (unicode_key, data_tuple) pairs
151
**options: Same configuration options as Trie class
152
"""
153
154
def get(self, key, default=None) -> list:
155
"""
156
Return list of data tuples for key or default if not found.
157
158
Args:
159
key: Unicode key to look up
160
default: Value to return if key not found
161
162
Returns:
163
list or default: List of unpacked data tuples or default value
164
"""
165
166
def __getitem__(self, key) -> list:
167
"""
168
Return list of data tuples for key.
169
170
Args:
171
key: Unicode key to look up
172
173
Returns:
174
list: List of unpacked data tuples
175
176
Raises:
177
KeyError: If key is not present
178
"""
179
180
def items(self, prefix="") -> list:
181
"""
182
Return list of (key, data_tuple) pairs with optional prefix.
183
184
Args:
185
prefix (str): Unicode prefix to filter items
186
187
Returns:
188
list: List of (unicode_key, data_tuple) pairs
189
"""
190
191
def iteritems(self, prefix=""):
192
"""
193
Return iterator over (key, data_tuple) pairs with optional prefix.
194
195
Args:
196
prefix (str): Unicode prefix to filter items
197
198
Yields:
199
tuple: (unicode_key, data_tuple) pairs
200
"""
201
```
202
203
### Common Inherited Operations
204
205
Both BytesTrie and RecordTrie inherit container and serialization operations:
206
207
```python { .api }
208
# Container operations
209
def __contains__(self, key) -> bool:
210
"""Check if key exists in trie."""
211
212
def __len__(self) -> int:
213
"""Return number of key-value pairs."""
214
215
def __iter__(self):
216
"""Iterate over all keys."""
217
218
# Serialization operations inherited from base trie
219
def save(self, path: str):
220
"""Save trie to file path."""
221
222
def load(self, path: str):
223
"""Load trie from file path."""
224
225
def tobytes(self) -> bytes:
226
"""Return raw trie content as bytes."""
227
228
def frombytes(self, data: bytes):
229
"""Load trie from raw bytes."""
230
231
def mmap(self, path: str):
232
"""Memory map trie file for efficient access."""
233
```
234
235
## Usage Examples
236
237
### BytesTrie for Binary Data Storage
238
239
```python
240
import marisa_trie
241
import json
242
243
# Store JSON data as bytes payloads
244
data = [
245
('user:john', json.dumps({'id': 1, 'name': 'John'}).encode('utf-8')),
246
('user:jane', json.dumps({'id': 2, 'name': 'Jane'}).encode('utf-8')),
247
('user:john', json.dumps({'role': 'admin'}).encode('utf-8')), # Multiple values per key
248
('config:db', b'host=localhost;port=5432'),
249
('config:cache', b'redis://localhost:6379')
250
]
251
252
bytes_trie = marisa_trie.BytesTrie(data)
253
254
# Retrieve all values for a key (returns list)
255
user_data = bytes_trie['user:john']
256
print(f"User john data: {[json.loads(d.decode()) for d in user_data]}")
257
# Output: [{'id': 1, 'name': 'John'}, {'role': 'admin'}]
258
259
# Get single value or default
260
cache_config = bytes_trie.get('config:cache', [b'default'])[0]
261
print(f"Cache config: {cache_config.decode()}")
262
263
# Find all keys with prefix
264
user_keys = bytes_trie.keys(prefix='user:')
265
print(f"User keys: {user_keys}")
266
```
267
268
### BytesTrie with Custom Separators
269
270
```python
271
# Use custom separator to avoid conflicts with data
272
custom_trie = marisa_trie.BytesTrie(
273
[('key1', b'data\xff'), ('key2', b'more\xff')],
274
value_separator=b'\x00' # Use null byte as separator
275
)
276
277
values = custom_trie['key1']
278
print(f"Values: {values}") # [b'data\xff']
279
```
280
281
### RecordTrie for Structured Data
282
283
```python
284
import marisa_trie
285
286
# Store structured numeric data
287
# Struct format: '<H?' = little-endian unsigned short + boolean
288
record_data = [
289
('product:apple', (100, True)), # (price_cents, in_stock)
290
('product:apple', (95, True)), # Price history - multiple records per key
291
('product:banana', (50, False)),
292
('product:orange', (75, True))
293
]
294
295
record_trie = marisa_trie.RecordTrie('<H?', record_data)
296
297
# Retrieve structured data (automatically unpacked)
298
apple_records = record_trie['product:apple']
299
print(f"Apple records: {apple_records}")
300
# Output: [(100, True), (95, True)]
301
302
for price, in_stock in apple_records:
303
print(f"Apple: ${price/100:.2f}, Available: {in_stock}")
304
305
# Iterate over all products
306
for key, (price, in_stock) in record_trie.iteritems():
307
product = key.split(':')[1]
308
print(f"{product}: ${price/100:.2f}, Available: {in_stock}")
309
```
310
311
### Complex Record Formats
312
313
```python
314
# More complex struct format for mixed data types
315
# Format: '<10sHf?' = 10-char string + unsigned short + float + boolean
316
complex_data = [
317
('server:web1', (b'nginx ', 80, 99.5, True)), # (name, port, uptime%, active)
318
('server:web2', (b'apache ', 8080, 95.2, True)),
319
('server:db1', (b'postgres ', 5432, 99.9, True)),
320
]
321
322
server_trie = marisa_trie.RecordTrie('<10sHf?', complex_data)
323
324
for server, (name, port, uptime, active) in server_trie.iteritems():
325
name_str = name.decode().strip()
326
status = "UP" if active else "DOWN"
327
print(f"{server}: {name_str}:{port} ({uptime:.1f}% uptime) - {status}")
328
```
329
330
### Prefix Search with Payloads
331
332
```python
333
# Find all configuration entries
334
config_items = bytes_trie.items(prefix='config:')
335
for key, payload in config_items:
336
setting = key.split(':')[1]
337
value = payload.decode()
338
print(f"{setting}: {value}")
339
340
# Find users and their data
341
for user_key in bytes_trie.keys(prefix='user:'):
342
user_payloads = bytes_trie[user_key]
343
user_name = user_key.split(':')[1]
344
print(f"User {user_name} has {len(user_payloads)} data entries")
345
```
346
347
### Serialization and Persistence
348
349
```python
350
# Save specialized tries
351
bytes_trie.save('data_store.trie')
352
record_trie.save('records.trie')
353
354
# Load with proper format specification for RecordTrie
355
loaded_records = marisa_trie.RecordTrie('<H?')
356
loaded_records.load('records.trie')
357
358
# Verify data integrity
359
assert loaded_records['product:apple'] == [(100, True), (95, True)]
360
```
361
362
### Performance Considerations
363
364
```python
365
# For large datasets, use appropriate configuration
366
large_bytes_trie = marisa_trie.BytesTrie(
367
large_data,
368
cache_size=marisa_trie.HUGE_CACHE,
369
order=marisa_trie.WEIGHT_ORDER, # Optimize for frequent lookups
370
binary=True # Use binary tail storage for better compression
371
)
372
373
# Memory mapping for very large tries
374
large_bytes_trie.save('large_data.trie')
375
mapped_trie = marisa_trie.BytesTrie()
376
mapped_trie.mmap('large_data.trie') # Memory-efficient access
377
```