or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

bytes-record-tries.mdconfiguration.mdindex.mdtrie-classes.md

bytes-record-tries.mddocs/

0

# Specialized Payload Tries

1

2

Advanced trie implementations that map unicode keys to lists of custom data payloads. BytesTrie handles arbitrary bytes objects while RecordTrie provides structured data support with automatic serialization using Python's struct module.

3

4

## Capabilities

5

6

### Bytes Payload Trie

7

8

Maps unicode string keys to lists of bytes objects, enabling storage of binary data, serialized objects, or any bytes-based payloads associated with string keys.

9

10

```python { .api }

11

class BytesTrie:

12

def __init__(self, arg=None, value_separator=b'\xff', **options):

13

"""

14

Create a trie mapping unicode keys to lists of bytes payloads.

15

16

Args:

17

arg (iterable, optional): Iterable of (unicode_key, bytes_payload) tuples

18

value_separator (bytes): Separator between keys and payloads (default: b'\xff')

19

**options: Same configuration options as Trie class

20

"""

21

22

def get(self, key, default=None) -> list:

23

"""

24

Return list of bytes payloads for key or default if not found.

25

26

Args:

27

key: Unicode key to look up (str or bytes)

28

default: Value to return if key not found

29

30

Returns:

31

list or default: List of bytes objects or default value

32

"""

33

34

def __getitem__(self, key) -> list:

35

"""

36

Return list of bytes payloads for key.

37

38

Args:

39

key: Unicode key to look up

40

41

Returns:

42

list: List of bytes objects

43

44

Raises:

45

KeyError: If key is not present

46

"""

47

48

def get_value(self, key: str) -> list:

49

"""

50

Return list of bytes payloads for unicode key.

51

52

Args:

53

key (str): Unicode key to look up

54

55

Returns:

56

list: List of bytes objects

57

"""

58

59

def b_get_value(self, key: bytes) -> list:

60

"""

61

Return list of bytes payloads for UTF-8 encoded key.

62

63

Args:

64

key (bytes): UTF-8 encoded key to look up

65

66

Returns:

67

list: List of bytes objects

68

"""

69

70

def prefixes(self, key: str) -> list:

71

"""

72

Return list of all prefixes of key that have values.

73

74

Args:

75

key (str): Unicode key to find prefixes for

76

77

Returns:

78

list: List of prefix strings that exist in trie

79

"""

80

81

def items(self, prefix="") -> list:

82

"""

83

Return list of (key, payload) pairs with optional prefix.

84

85

Args:

86

prefix (str): Unicode prefix to filter items

87

88

Returns:

89

list: List of (unicode_key, bytes_payload) tuples

90

"""

91

92

def iteritems(self, prefix=""):

93

"""

94

Return iterator over (key, payload) pairs with optional prefix.

95

96

Args:

97

prefix (str): Unicode prefix to filter items

98

99

Yields:

100

tuple: (unicode_key, bytes_payload) pairs

101

"""

102

103

def keys(self, prefix="") -> list:

104

"""

105

Return list of unicode keys with optional prefix.

106

107

Args:

108

prefix (str): Unicode prefix to filter keys

109

110

Returns:

111

list: List of unicode keys

112

"""

113

114

def iterkeys(self, prefix=""):

115

"""

116

Return iterator over unicode keys with optional prefix.

117

118

Args:

119

prefix (str): Unicode prefix to filter keys

120

121

Yields:

122

str: Unicode keys

123

"""

124

125

def _raw_key(self, key: str, payload: bytes) -> bytes:

126

"""

127

Combine unicode key with bytes payload using value separator.

128

129

Args:

130

key (str): Unicode key

131

payload (bytes): Bytes payload to combine with key

132

133

Returns:

134

bytes: Combined key and payload with separator

135

"""

136

```

137

138

### Record Payload Trie

139

140

Maps unicode string keys to lists of structured data tuples using Python's struct module for automatic serialization and deserialization.

141

142

```python { .api }

143

class RecordTrie:

144

def __init__(self, fmt: str, arg=None, **options):

145

"""

146

Create a trie mapping unicode keys to lists of structured data tuples.

147

148

Args:

149

fmt (str): Struct format string for data serialization

150

arg (iterable, optional): Iterable of (unicode_key, data_tuple) pairs

151

**options: Same configuration options as Trie class

152

"""

153

154

def get(self, key, default=None) -> list:

155

"""

156

Return list of data tuples for key or default if not found.

157

158

Args:

159

key: Unicode key to look up

160

default: Value to return if key not found

161

162

Returns:

163

list or default: List of unpacked data tuples or default value

164

"""

165

166

def __getitem__(self, key) -> list:

167

"""

168

Return list of data tuples for key.

169

170

Args:

171

key: Unicode key to look up

172

173

Returns:

174

list: List of unpacked data tuples

175

176

Raises:

177

KeyError: If key is not present

178

"""

179

180

def items(self, prefix="") -> list:

181

"""

182

Return list of (key, data_tuple) pairs with optional prefix.

183

184

Args:

185

prefix (str): Unicode prefix to filter items

186

187

Returns:

188

list: List of (unicode_key, data_tuple) pairs

189

"""

190

191

def iteritems(self, prefix=""):

192

"""

193

Return iterator over (key, data_tuple) pairs with optional prefix.

194

195

Args:

196

prefix (str): Unicode prefix to filter items

197

198

Yields:

199

tuple: (unicode_key, data_tuple) pairs

200

"""

201

```

202

203

### Common Inherited Operations

204

205

Both BytesTrie and RecordTrie inherit container and serialization operations:

206

207

```python { .api }

208

# Container operations

209

def __contains__(self, key) -> bool:

210

"""Check if key exists in trie."""

211

212

def __len__(self) -> int:

213

"""Return number of key-value pairs."""

214

215

def __iter__(self):

216

"""Iterate over all keys."""

217

218

# Serialization operations inherited from base trie

219

def save(self, path: str):

220

"""Save trie to file path."""

221

222

def load(self, path: str):

223

"""Load trie from file path."""

224

225

def tobytes(self) -> bytes:

226

"""Return raw trie content as bytes."""

227

228

def frombytes(self, data: bytes):

229

"""Load trie from raw bytes."""

230

231

def mmap(self, path: str):

232

"""Memory map trie file for efficient access."""

233

```

234

235

## Usage Examples

236

237

### BytesTrie for Binary Data Storage

238

239

```python

240

import marisa_trie

241

import json

242

243

# Store JSON data as bytes payloads

244

data = [

245

('user:john', json.dumps({'id': 1, 'name': 'John'}).encode('utf-8')),

246

('user:jane', json.dumps({'id': 2, 'name': 'Jane'}).encode('utf-8')),

247

('user:john', json.dumps({'role': 'admin'}).encode('utf-8')), # Multiple values per key

248

('config:db', b'host=localhost;port=5432'),

249

('config:cache', b'redis://localhost:6379')

250

]

251

252

bytes_trie = marisa_trie.BytesTrie(data)

253

254

# Retrieve all values for a key (returns list)

255

user_data = bytes_trie['user:john']

256

print(f"User john data: {[json.loads(d.decode()) for d in user_data]}")

257

# Output: [{'id': 1, 'name': 'John'}, {'role': 'admin'}]

258

259

# Get single value or default

260

cache_config = bytes_trie.get('config:cache', [b'default'])[0]

261

print(f"Cache config: {cache_config.decode()}")

262

263

# Find all keys with prefix

264

user_keys = bytes_trie.keys(prefix='user:')

265

print(f"User keys: {user_keys}")

266

```

267

268

### BytesTrie with Custom Separators

269

270

```python

271

# Use custom separator to avoid conflicts with data

272

custom_trie = marisa_trie.BytesTrie(

273

[('key1', b'data\xff'), ('key2', b'more\xff')],

274

value_separator=b'\x00' # Use null byte as separator

275

)

276

277

values = custom_trie['key1']

278

print(f"Values: {values}") # [b'data\xff']

279

```

280

281

### RecordTrie for Structured Data

282

283

```python

284

import marisa_trie

285

286

# Store structured numeric data

287

# Struct format: '<H?' = little-endian unsigned short + boolean

288

record_data = [

289

('product:apple', (100, True)), # (price_cents, in_stock)

290

('product:apple', (95, True)), # Price history - multiple records per key

291

('product:banana', (50, False)),

292

('product:orange', (75, True))

293

]

294

295

record_trie = marisa_trie.RecordTrie('<H?', record_data)

296

297

# Retrieve structured data (automatically unpacked)

298

apple_records = record_trie['product:apple']

299

print(f"Apple records: {apple_records}")

300

# Output: [(100, True), (95, True)]

301

302

for price, in_stock in apple_records:

303

print(f"Apple: ${price/100:.2f}, Available: {in_stock}")

304

305

# Iterate over all products

306

for key, (price, in_stock) in record_trie.iteritems():

307

product = key.split(':')[1]

308

print(f"{product}: ${price/100:.2f}, Available: {in_stock}")

309

```

310

311

### Complex Record Formats

312

313

```python

314

# More complex struct format for mixed data types

315

# Format: '<10sHf?' = 10-char string + unsigned short + float + boolean

316

complex_data = [

317

('server:web1', (b'nginx ', 80, 99.5, True)), # (name, port, uptime%, active)

318

('server:web2', (b'apache ', 8080, 95.2, True)),

319

('server:db1', (b'postgres ', 5432, 99.9, True)),

320

]

321

322

server_trie = marisa_trie.RecordTrie('<10sHf?', complex_data)

323

324

for server, (name, port, uptime, active) in server_trie.iteritems():

325

name_str = name.decode().strip()

326

status = "UP" if active else "DOWN"

327

print(f"{server}: {name_str}:{port} ({uptime:.1f}% uptime) - {status}")

328

```

329

330

### Prefix Search with Payloads

331

332

```python

333

# Find all configuration entries

334

config_items = bytes_trie.items(prefix='config:')

335

for key, payload in config_items:

336

setting = key.split(':')[1]

337

value = payload.decode()

338

print(f"{setting}: {value}")

339

340

# Find users and their data

341

for user_key in bytes_trie.keys(prefix='user:'):

342

user_payloads = bytes_trie[user_key]

343

user_name = user_key.split(':')[1]

344

print(f"User {user_name} has {len(user_payloads)} data entries")

345

```

346

347

### Serialization and Persistence

348

349

```python

350

# Save specialized tries

351

bytes_trie.save('data_store.trie')

352

record_trie.save('records.trie')

353

354

# Load with proper format specification for RecordTrie

355

loaded_records = marisa_trie.RecordTrie('<H?')

356

loaded_records.load('records.trie')

357

358

# Verify data integrity

359

assert loaded_records['product:apple'] == [(100, True), (95, True)]

360

```

361

362

### Performance Considerations

363

364

```python

365

# For large datasets, use appropriate configuration

366

large_bytes_trie = marisa_trie.BytesTrie(

367

large_data,

368

cache_size=marisa_trie.HUGE_CACHE,

369

order=marisa_trie.WEIGHT_ORDER, # Optimize for frequent lookups

370

binary=True # Use binary tail storage for better compression

371

)

372

373

# Memory mapping for very large tries

374

large_bytes_trie.save('large_data.trie')

375

mapped_trie = marisa_trie.BytesTrie()

376

mapped_trie.mmap('large_data.trie') # Memory-efficient access

377

```