0
# Dictionary Interface
1
2
Dict-like operations for accessing stored patterns and values, including existence checking, value retrieval, and iteration over keys, values, and items with optional filtering.
3
4
## Capabilities
5
6
### Value Access
7
8
Retrieve values associated with keys using dict-like methods.
9
10
```python { .api }
11
def get(self, key, default=None):
12
"""
13
Return the value associated with the key string.
14
15
Parameters:
16
- key: Key to look up
17
- default: Value to return if key not found
18
19
Returns:
20
The value associated with key, or default if key not found
21
22
Raises:
23
- KeyError: If key not found and no default provided
24
"""
25
26
def __getitem__(self, key):
27
"""
28
Get value for key using bracket notation.
29
30
Parameters:
31
- key: Key to look up
32
33
Returns:
34
The value associated with key
35
36
Raises:
37
- KeyError: If key not found
38
"""
39
```
40
41
#### Usage Examples
42
43
```python
44
import ahocorasick
45
46
automaton = ahocorasick.Automaton()
47
automaton.add_word('hello', 'greeting')
48
automaton.add_word('world', 'place')
49
automaton.add_word('python', {'type': 'language', 'year': 1991})
50
51
# Using get() method
52
greeting = automaton.get('hello') # 'greeting'
53
missing = automaton.get('missing', 'not found') # 'not found'
54
55
# Using bracket notation
56
place = automaton['world'] # 'place'
57
lang_info = automaton['python'] # {'type': 'language', 'year': 1991}
58
59
# KeyError when key doesn't exist
60
try:
61
value = automaton['missing']
62
except KeyError:
63
print("Key not found")
64
```
65
66
### Existence Checking
67
68
Check if keys exist in the automaton.
69
70
```python { .api }
71
def exists(self, key):
72
"""
73
Return True if the key is present in the trie.
74
75
Parameters:
76
- key: Key to check
77
78
Returns:
79
bool: True if key exists, False otherwise
80
"""
81
82
def __contains__(self, key):
83
"""
84
Support for 'in' operator.
85
86
Parameters:
87
- key: Key to check
88
89
Returns:
90
bool: True if key exists, False otherwise
91
"""
92
```
93
94
#### Usage Examples
95
96
```python
97
automaton = ahocorasick.Automaton()
98
automaton.add_word('cat', 'animal')
99
automaton.add_word('car', 'vehicle')
100
101
# Using exists() method
102
has_cat = automaton.exists('cat') # True
103
has_dog = automaton.exists('dog') # False
104
105
# Using 'in' operator
106
if 'car' in automaton:
107
print("Found car!")
108
109
if 'bike' not in automaton:
110
print("Bike not found")
111
```
112
113
### Prefix Matching
114
115
Check if a key is a prefix of any stored pattern.
116
117
```python { .api }
118
def match(self, key):
119
"""
120
Return True if there is a prefix (or key) equal to key present in the trie.
121
122
Parameters:
123
- key: Key to check as prefix
124
125
Returns:
126
bool: True if key is a prefix of any stored pattern
127
128
Examples:
129
If 'example' is in the trie, then match('e'), match('ex'),
130
match('exa'), ..., match('example') all return True.
131
"""
132
133
def longest_prefix(self, string):
134
"""
135
Return the length of the longest prefix of string that exists in the trie.
136
137
Parameters:
138
- string: String to check
139
140
Returns:
141
int: Length of longest matching prefix
142
"""
143
```
144
145
#### Usage Examples
146
147
```python
148
automaton = ahocorasick.Automaton()
149
automaton.add_word('example', 'demo')
150
automaton.add_word('explain', 'clarify')
151
152
# Prefix matching
153
print(automaton.match('e')) # True - 'e' is prefix of 'example'
154
print(automaton.match('ex')) # True - 'ex' is prefix of 'example'
155
print(automaton.match('exam')) # True - 'exam' is prefix of 'example'
156
print(automaton.match('example')) # True - exact match
157
print(automaton.match('test')) # False - no pattern starts with 'test'
158
159
# Longest prefix
160
length = automaton.longest_prefix('examples') # 7 (length of 'example')
161
length = automaton.longest_prefix('expla') # 5 (length of 'expla')
162
length = automaton.longest_prefix('xyz') # 0 (no matching prefix)
163
```
164
165
### Key Iteration
166
167
Iterate over stored keys with optional filtering.
168
169
```python { .api }
170
def keys(self, prefix=None, wildcard=None, how=ahocorasick.MATCH_AT_LEAST_PREFIX):
171
"""
172
Return an iterator on keys.
173
174
Parameters:
175
- prefix: Optional prefix string to filter keys
176
- wildcard: Optional single character for pattern matching
177
- how: How to match patterns (MATCH_EXACT_LENGTH, MATCH_AT_LEAST_PREFIX,
178
MATCH_AT_MOST_PREFIX)
179
180
Returns:
181
Iterator yielding keys that match the criteria
182
"""
183
184
def __iter__(self):
185
"""
186
Default iteration over all keys.
187
188
Returns:
189
Iterator over all keys in the automaton
190
"""
191
```
192
193
#### Usage Examples
194
195
```python
196
automaton = ahocorasick.Automaton()
197
words = ['cat', 'car', 'card', 'care', 'careful', 'dog', 'door']
198
for word in words:
199
automaton.add_word(word, len(word))
200
201
# Iterate over all keys
202
all_keys = list(automaton.keys())
203
print("All keys:", all_keys)
204
205
# Alternative using __iter__
206
all_keys_iter = list(automaton)
207
print("All keys (iter):", all_keys_iter)
208
209
# Filter by prefix
210
car_words = list(automaton.keys(prefix='car'))
211
print("Keys starting with 'car':", car_words) # ['car', 'card', 'care', 'careful']
212
213
# Wildcard matching
214
pattern_keys = list(automaton.keys(prefix='ca.', wildcard='.'))
215
print("Keys matching 'ca.':", pattern_keys) # ['cat', 'car']
216
217
# Different matching modes with wildcards
218
exact_match = list(automaton.keys(prefix='ca.', wildcard='.',
219
how=ahocorasick.MATCH_EXACT_LENGTH))
220
print("Exact length match:", exact_match) # ['cat', 'car'] (exactly 3 chars)
221
222
at_least_match = list(automaton.keys(prefix='ca.', wildcard='.',
223
how=ahocorasick.MATCH_AT_LEAST_PREFIX))
224
print("At least prefix:", at_least_match) # ['cat', 'car', 'card', 'care', 'careful']
225
```
226
227
### Value Iteration
228
229
Iterate over stored values with same filtering options as keys.
230
231
```python { .api }
232
def values(self, prefix=None, wildcard=None, how=ahocorasick.MATCH_AT_LEAST_PREFIX):
233
"""
234
Return an iterator on values associated with keys.
235
236
Parameters:
237
- prefix: Optional prefix string to filter keys
238
- wildcard: Optional single character for pattern matching
239
- how: How to match patterns
240
241
Returns:
242
Iterator yielding values for keys that match the criteria
243
"""
244
```
245
246
#### Usage Example
247
248
```python
249
automaton = ahocorasick.Automaton()
250
words = {'cat': 'animal', 'car': 'vehicle', 'card': 'object', 'dog': 'animal'}
251
for word, category in words.items():
252
automaton.add_word(word, category)
253
254
# All values
255
all_values = list(automaton.values())
256
print("All values:", all_values)
257
258
# Values for keys starting with 'car'
259
car_values = list(automaton.values(prefix='car'))
260
print("Values for 'car' prefix:", car_values) # ['vehicle', 'object']
261
```
262
263
### Item Iteration
264
265
Iterate over key-value pairs with filtering options.
266
267
```python { .api }
268
def items(self, prefix=None, wildcard=None, how=ahocorasick.MATCH_AT_LEAST_PREFIX):
269
"""
270
Return an iterator on tuples of (key, value).
271
272
Parameters:
273
- prefix: Optional prefix string to filter keys
274
- wildcard: Optional single character for pattern matching
275
- how: How to match patterns
276
277
Returns:
278
Iterator yielding (key, value) tuples for keys that match criteria
279
"""
280
```
281
282
#### Usage Example
283
284
```python
285
automaton = ahocorasick.Automaton()
286
animals = {'cat': 'feline', 'car': 'vehicle', 'care': 'concern', 'dog': 'canine'}
287
for word, meaning in animals.items():
288
automaton.add_word(word, meaning)
289
290
# All items
291
all_items = list(automaton.items())
292
print("All items:", all_items)
293
294
# Items with prefix
295
car_items = list(automaton.items(prefix='car'))
296
print("Items with 'car' prefix:", car_items) # [('car', 'vehicle'), ('care', 'concern')]
297
298
# Items matching wildcard pattern
299
three_char_items = list(automaton.items(prefix='...', wildcard='.',
300
how=ahocorasick.MATCH_EXACT_LENGTH))
301
print("3-character items:", three_char_items) # [('cat', 'feline'), ('car', 'vehicle'), ('dog', 'canine')]
302
```
303
304
### Length Operation
305
306
Get the number of stored patterns.
307
308
```python { .api }
309
def __len__(self):
310
"""
311
Return the number of distinct keys added to the trie.
312
313
Returns:
314
int: Number of keys in the automaton
315
"""
316
```
317
318
#### Usage Example
319
320
```python
321
automaton = ahocorasick.Automaton()
322
print(len(automaton)) # 0
323
324
automaton.add_word('hello', 1)
325
automaton.add_word('world', 2)
326
print(len(automaton)) # 2
327
328
automaton.add_word('hello', 3) # Updating existing key
329
print(len(automaton)) # Still 2 (no new key added)
330
```
331
332
## Pattern Matching Modes
333
334
When using wildcard patterns, you can control how matches are found:
335
336
### MATCH_EXACT_LENGTH
337
Match keys that have exactly the same length as the pattern.
338
339
```python
340
# Pattern: 'c.t' (3 characters)
341
# Matches: 'cat', 'cut', 'cot'
342
# Doesn't match: 'cart', 'c', 'cute'
343
```
344
345
### MATCH_AT_LEAST_PREFIX (Default)
346
Match keys that are at least as long as the pattern.
347
348
```python
349
# Pattern: 'c.t' (3 characters)
350
# Matches: 'cat', 'cart', 'cute', 'cattle'
351
# Doesn't match: 'c', 'ca'
352
```
353
354
### MATCH_AT_MOST_PREFIX
355
Match keys that are at most as long as the pattern.
356
357
```python
358
# Pattern: 'c.t' (3 characters)
359
# Matches: 'cat', 'c', 'ca'
360
# Doesn't match: 'cart', 'cute'
361
```
362
363
## Advanced Usage Patterns
364
365
### Batch Operations
366
367
```python
368
def batch_check_existence(automaton, keys_to_check):
369
"""Check existence of multiple keys efficiently."""
370
results = {}
371
for key in keys_to_check:
372
results[key] = key in automaton
373
return results
374
375
def batch_get_values(automaton, keys_to_get, default=None):
376
"""Get values for multiple keys with default."""
377
results = {}
378
for key in keys_to_get:
379
results[key] = automaton.get(key, default)
380
return results
381
```
382
383
### Pattern Statistics
384
385
```python
386
def analyze_patterns(automaton):
387
"""Analyze stored patterns."""
388
stats = {
389
'total_patterns': len(automaton),
390
'avg_length': 0,
391
'length_distribution': {},
392
'prefix_groups': {}
393
}
394
395
total_length = 0
396
for key in automaton.keys():
397
length = len(key)
398
total_length += length
399
400
# Length distribution
401
stats['length_distribution'][length] = \
402
stats['length_distribution'].get(length, 0) + 1
403
404
# Prefix grouping
405
if length > 0:
406
prefix = key[0]
407
if prefix not in stats['prefix_groups']:
408
stats['prefix_groups'][prefix] = []
409
stats['prefix_groups'][prefix].append(key)
410
411
if stats['total_patterns'] > 0:
412
stats['avg_length'] = total_length / stats['total_patterns']
413
414
return stats
415
```
416
417
### Custom Filtering
418
419
```python
420
def filter_by_value_type(automaton, value_type):
421
"""Get keys whose values match a specific type."""
422
matching_keys = []
423
for key, value in automaton.items():
424
if isinstance(value, value_type):
425
matching_keys.append(key)
426
return matching_keys
427
428
def filter_by_value_condition(automaton, condition_func):
429
"""Get keys whose values satisfy a condition."""
430
matching_items = []
431
for key, value in automaton.items():
432
if condition_func(value):
433
matching_items.append((key, value))
434
return matching_items
435
```