or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

bytes-record-tries.mdconfiguration.mdindex.mdtrie-classes.md

configuration.mddocs/

0

# Configuration and Constants

1

2

Configuration constants for optimizing trie performance, memory usage, and behavior. These constants control cache sizes, node ordering, tail storage methods, and trie count limits to fine-tune performance for specific use cases.

3

4

## Capabilities

5

6

### Cache Size Configuration

7

8

Controls the cache size used during trie construction and queries, affecting both performance and memory usage during operations.

9

10

```python { .api }

11

# Cache size constants (in order of size)

12

DEFAULT_CACHE: int # Default cache size for balanced performance

13

HUGE_CACHE: int # Largest cache for maximum performance

14

LARGE_CACHE: int # Large cache for high-performance scenarios

15

NORMAL_CACHE: int # Normal cache size

16

SMALL_CACHE: int # Small cache for memory-constrained environments

17

TINY_CACHE: int # Smallest cache for minimal memory usage

18

```

19

20

### Node Ordering Configuration

21

22

Determines how nodes are arranged within the trie structure, affecting both lookup performance and the order of iteration results.

23

24

```python { .api }

25

# Node ordering constants

26

LABEL_ORDER: int # Arrange nodes in ascending label order (predictable iteration)

27

WEIGHT_ORDER: int # Arrange nodes in descending weight order (faster matching)

28

DEFAULT_ORDER: int # Default node ordering strategy

29

```

30

31

### Tail Storage Configuration

32

33

Controls how the trie stores the tail portions of keys, affecting memory usage and compatibility with different data types.

34

35

```python { .api }

36

# Tail storage method constants

37

TEXT_TAIL: int # Store tails as null-terminated strings (text data)

38

BINARY_TAIL: int # Store tails as byte sequences with bit vectors (binary data)

39

DEFAULT_TAIL: int # Default tail storage method

40

```

41

42

### Trie Count Limits

43

44

Defines the valid range for the number of tries used in the underlying MARISA-trie structure.

45

46

```python { .api }

47

# Trie count constants

48

MIN_NUM_TRIES: int # Minimum number of tries allowed

49

MAX_NUM_TRIES: int # Maximum number of tries allowed

50

DEFAULT_NUM_TRIES: int # Default number of tries

51

```

52

53

## Usage Examples

54

55

### Performance Optimization

56

57

```python

58

import marisa_trie

59

60

# High-performance configuration for frequent lookups

61

fast_trie = marisa_trie.Trie(

62

['frequent', 'lookups', 'data'],

63

cache_size=marisa_trie.HUGE_CACHE, # Maximum cache for speed

64

order=marisa_trie.WEIGHT_ORDER, # Optimize for lookup performance

65

num_tries=7 # Higher trie count for speed

66

)

67

68

# Memory-efficient configuration

69

compact_trie = marisa_trie.Trie(

70

['memory', 'efficient', 'storage'],

71

cache_size=marisa_trie.TINY_CACHE, # Minimal cache for memory savings

72

binary=True, # Use binary tail storage

73

num_tries=marisa_trie.MIN_NUM_TRIES # Minimum tries for space

74

)

75

```

76

77

### Data Type Optimization

78

79

```python

80

# Optimize for text data with predictable ordering

81

text_trie = marisa_trie.Trie(

82

sorted_word_list,

83

order=marisa_trie.LABEL_ORDER, # Maintain alphabetical order

84

binary=False # Use text tail storage (default)

85

)

86

87

# Optimize for binary data or data with null bytes

88

binary_trie = marisa_trie.BinaryTrie(

89

binary_keys,

90

binary=True, # Force binary tail storage

91

cache_size=marisa_trie.LARGE_CACHE

92

)

93

```

94

95

### Weighted Optimization

96

97

```python

98

# Use weights with appropriate ordering for best performance

99

high_freq_words = ['the', 'and', 'or', 'but']

100

low_freq_words = ['sesquipedalian', 'antidisestablishmentarianism']

101

all_words = high_freq_words + low_freq_words

102

103

# Assign higher weights to frequently accessed words

104

weights = [100] * len(high_freq_words) + [1] * len(low_freq_words)

105

106

optimized_trie = marisa_trie.Trie(

107

all_words,

108

weights=weights,

109

order=marisa_trie.WEIGHT_ORDER, # Essential for weight optimization

110

cache_size=marisa_trie.LARGE_CACHE

111

)

112

113

# Verify that high-frequency words get better performance

114

print(f"'the' lookup speed optimized: {optimized_trie.key_id('the')}")

115

```

116

117

### Custom Configuration Validation

118

119

```python

120

def create_optimized_trie(keys, performance_level='balanced'):

121

"""Create trie with performance-appropriate settings."""

122

123

configs = {

124

'minimal': {

125

'cache_size': marisa_trie.TINY_CACHE,

126

'num_tries': marisa_trie.MIN_NUM_TRIES,

127

'binary': True

128

},

129

'balanced': {

130

'cache_size': marisa_trie.DEFAULT_CACHE,

131

'num_tries': marisa_trie.DEFAULT_NUM_TRIES,

132

'order': marisa_trie.DEFAULT_ORDER

133

},

134

'maximum': {

135

'cache_size': marisa_trie.HUGE_CACHE,

136

'num_tries': marisa_trie.MAX_NUM_TRIES,

137

'order': marisa_trie.WEIGHT_ORDER

138

}

139

}

140

141

config = configs.get(performance_level, configs['balanced'])

142

return marisa_trie.Trie(keys, **config)

143

144

# Usage

145

words = ['apple', 'banana', 'cherry']

146

fast_trie = create_optimized_trie(words, 'maximum')

147

compact_trie = create_optimized_trie(words, 'minimal')

148

```

149

150

### Configuration Impact Examples

151

152

```python

153

import time

154

import marisa_trie

155

156

large_keys = [f"key_{i:06d}" for i in range(10000)]

157

158

# Measure performance with different cache sizes

159

def benchmark_cache_size(keys, cache_size, name):

160

start = time.time()

161

trie = marisa_trie.Trie(keys, cache_size=cache_size)

162

build_time = time.time() - start

163

164

start = time.time()

165

for i in range(1000):

166

_ = trie.key_id(keys[i % len(keys)])

167

lookup_time = time.time() - start

168

169

print(f"{name}: Build={build_time:.3f}s, Lookup={lookup_time:.3f}s")

170

171

# Compare cache configurations

172

benchmark_cache_size(large_keys, marisa_trie.TINY_CACHE, "Tiny Cache")

173

benchmark_cache_size(large_keys, marisa_trie.DEFAULT_CACHE, "Default Cache")

174

benchmark_cache_size(large_keys, marisa_trie.HUGE_CACHE, "Huge Cache")

175

```

176

177

### Error Handling for Configuration

178

179

```python

180

try:

181

# Invalid num_tries value

182

invalid_trie = marisa_trie.Trie(

183

['test'],

184

num_tries=999 # Exceeds MAX_NUM_TRIES

185

)

186

except ValueError as e:

187

print(f"Configuration error: {e}")

188

189

# Check valid ranges

190

print(f"Valid trie count range: {marisa_trie.MIN_NUM_TRIES} to {marisa_trie.MAX_NUM_TRIES}")

191

print(f"Default configuration: {marisa_trie.DEFAULT_NUM_TRIES} tries")

192

193

# Validate configuration before use

194

def validate_trie_config(num_tries):

195

if not (marisa_trie.MIN_NUM_TRIES <= num_tries <= marisa_trie.MAX_NUM_TRIES):

196

raise ValueError(

197

f"num_tries must be between {marisa_trie.MIN_NUM_TRIES} "

198

f"and {marisa_trie.MAX_NUM_TRIES}, got {num_tries}"

199

)

200

return True

201

202

# Safe configuration

203

try:

204

validate_trie_config(5)

205

safe_trie = marisa_trie.Trie(['safe', 'config'], num_tries=5)

206

print("Configuration validated and trie created successfully")

207

except ValueError as e:

208

print(f"Invalid configuration: {e}")

209

```

210

211

### Advanced Configuration Patterns

212

213

```python

214

# Configuration for different use cases

215

def get_trie_config(use_case):

216

"""Return optimal configuration for specific use cases."""

217

218

configs = {

219

'autocomplete': {

220

'cache_size': marisa_trie.LARGE_CACHE,

221

'order': marisa_trie.LABEL_ORDER, # Predictable prefix order

222

'binary': False

223

},

224

'dictionary_lookup': {

225

'cache_size': marisa_trie.DEFAULT_CACHE,

226

'order': marisa_trie.WEIGHT_ORDER, # Optimize for frequent words

227

'binary': False

228

},

229

'binary_data': {

230

'cache_size': marisa_trie.NORMAL_CACHE,

231

'binary': True, # Handle null bytes properly

232

'order': marisa_trie.DEFAULT_ORDER

233

},

234

'memory_constrained': {

235

'cache_size': marisa_trie.TINY_CACHE,

236

'num_tries': marisa_trie.MIN_NUM_TRIES,

237

'binary': True # Better compression

238

}

239

}

240

241

return configs.get(use_case, configs['dictionary_lookup'])

242

243

# Usage examples

244

autocomplete_trie = marisa_trie.Trie(

245

search_suggestions,

246

**get_trie_config('autocomplete')

247

)

248

249

binary_lookup = marisa_trie.BinaryTrie(

250

binary_patterns,

251

**get_trie_config('binary_data')

252

)

253

```