or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

automaton-construction.mddictionary-interface.mdindex.mdpattern-search.mdserialization.md

dictionary-interface.mddocs/

0

# Dictionary Interface

1

2

Dict-like operations for accessing stored patterns and values, including existence checking, value retrieval, and iteration over keys, values, and items with optional filtering.

3

4

## Capabilities

5

6

### Value Access

7

8

Retrieve values associated with keys using dict-like methods.

9

10

```python { .api }

11

def get(self, key, default=None):

12

"""

13

Return the value associated with the key string.

14

15

Parameters:

16

- key: Key to look up

17

- default: Value to return if key not found

18

19

Returns:

20

The value associated with key, or default if key not found

21

22

Raises:

23

- KeyError: If key not found and no default provided

24

"""

25

26

def __getitem__(self, key):

27

"""

28

Get value for key using bracket notation.

29

30

Parameters:

31

- key: Key to look up

32

33

Returns:

34

The value associated with key

35

36

Raises:

37

- KeyError: If key not found

38

"""

39

```

40

41

#### Usage Examples

42

43

```python

44

import ahocorasick

45

46

automaton = ahocorasick.Automaton()

47

automaton.add_word('hello', 'greeting')

48

automaton.add_word('world', 'place')

49

automaton.add_word('python', {'type': 'language', 'year': 1991})

50

51

# Using get() method

52

greeting = automaton.get('hello') # 'greeting'

53

missing = automaton.get('missing', 'not found') # 'not found'

54

55

# Using bracket notation

56

place = automaton['world'] # 'place'

57

lang_info = automaton['python'] # {'type': 'language', 'year': 1991}

58

59

# KeyError when key doesn't exist

60

try:

61

value = automaton['missing']

62

except KeyError:

63

print("Key not found")

64

```

65

66

### Existence Checking

67

68

Check if keys exist in the automaton.

69

70

```python { .api }

71

def exists(self, key):

72

"""

73

Return True if the key is present in the trie.

74

75

Parameters:

76

- key: Key to check

77

78

Returns:

79

bool: True if key exists, False otherwise

80

"""

81

82

def __contains__(self, key):

83

"""

84

Support for 'in' operator.

85

86

Parameters:

87

- key: Key to check

88

89

Returns:

90

bool: True if key exists, False otherwise

91

"""

92

```

93

94

#### Usage Examples

95

96

```python

97

automaton = ahocorasick.Automaton()

98

automaton.add_word('cat', 'animal')

99

automaton.add_word('car', 'vehicle')

100

101

# Using exists() method

102

has_cat = automaton.exists('cat') # True

103

has_dog = automaton.exists('dog') # False

104

105

# Using 'in' operator

106

if 'car' in automaton:

107

print("Found car!")

108

109

if 'bike' not in automaton:

110

print("Bike not found")

111

```

112

113

### Prefix Matching

114

115

Check if a key is a prefix of any stored pattern.

116

117

```python { .api }

118

def match(self, key):

119

"""

120

Return True if there is a prefix (or key) equal to key present in the trie.

121

122

Parameters:

123

- key: Key to check as prefix

124

125

Returns:

126

bool: True if key is a prefix of any stored pattern

127

128

Examples:

129

If 'example' is in the trie, then match('e'), match('ex'),

130

match('exa'), ..., match('example') all return True.

131

"""

132

133

def longest_prefix(self, string):

134

"""

135

Return the length of the longest prefix of string that exists in the trie.

136

137

Parameters:

138

- string: String to check

139

140

Returns:

141

int: Length of longest matching prefix

142

"""

143

```

144

145

#### Usage Examples

146

147

```python

148

automaton = ahocorasick.Automaton()

149

automaton.add_word('example', 'demo')

150

automaton.add_word('explain', 'clarify')

151

152

# Prefix matching

153

print(automaton.match('e')) # True - 'e' is prefix of 'example'

154

print(automaton.match('ex')) # True - 'ex' is prefix of 'example'

155

print(automaton.match('exam')) # True - 'exam' is prefix of 'example'

156

print(automaton.match('example')) # True - exact match

157

print(automaton.match('test')) # False - no pattern starts with 'test'

158

159

# Longest prefix

160

length = automaton.longest_prefix('examples') # 7 (length of 'example')

161

length = automaton.longest_prefix('expla') # 5 (length of 'expla')

162

length = automaton.longest_prefix('xyz') # 0 (no matching prefix)

163

```

164

165

### Key Iteration

166

167

Iterate over stored keys with optional filtering.

168

169

```python { .api }

170

def keys(self, prefix=None, wildcard=None, how=ahocorasick.MATCH_AT_LEAST_PREFIX):

171

"""

172

Return an iterator on keys.

173

174

Parameters:

175

- prefix: Optional prefix string to filter keys

176

- wildcard: Optional single character for pattern matching

177

- how: How to match patterns (MATCH_EXACT_LENGTH, MATCH_AT_LEAST_PREFIX,

178

MATCH_AT_MOST_PREFIX)

179

180

Returns:

181

Iterator yielding keys that match the criteria

182

"""

183

184

def __iter__(self):

185

"""

186

Default iteration over all keys.

187

188

Returns:

189

Iterator over all keys in the automaton

190

"""

191

```

192

193

#### Usage Examples

194

195

```python

196

automaton = ahocorasick.Automaton()

197

words = ['cat', 'car', 'card', 'care', 'careful', 'dog', 'door']

198

for word in words:

199

automaton.add_word(word, len(word))

200

201

# Iterate over all keys

202

all_keys = list(automaton.keys())

203

print("All keys:", all_keys)

204

205

# Alternative using __iter__

206

all_keys_iter = list(automaton)

207

print("All keys (iter):", all_keys_iter)

208

209

# Filter by prefix

210

car_words = list(automaton.keys(prefix='car'))

211

print("Keys starting with 'car':", car_words) # ['car', 'card', 'care', 'careful']

212

213

# Wildcard matching

214

pattern_keys = list(automaton.keys(prefix='ca.', wildcard='.'))

215

print("Keys matching 'ca.':", pattern_keys) # ['cat', 'car']

216

217

# Different matching modes with wildcards

218

exact_match = list(automaton.keys(prefix='ca.', wildcard='.',

219

how=ahocorasick.MATCH_EXACT_LENGTH))

220

print("Exact length match:", exact_match) # ['cat', 'car'] (exactly 3 chars)

221

222

at_least_match = list(automaton.keys(prefix='ca.', wildcard='.',

223

how=ahocorasick.MATCH_AT_LEAST_PREFIX))

224

print("At least prefix:", at_least_match) # ['cat', 'car', 'card', 'care', 'careful']

225

```

226

227

### Value Iteration

228

229

Iterate over stored values with same filtering options as keys.

230

231

```python { .api }

232

def values(self, prefix=None, wildcard=None, how=ahocorasick.MATCH_AT_LEAST_PREFIX):

233

"""

234

Return an iterator on values associated with keys.

235

236

Parameters:

237

- prefix: Optional prefix string to filter keys

238

- wildcard: Optional single character for pattern matching

239

- how: How to match patterns

240

241

Returns:

242

Iterator yielding values for keys that match the criteria

243

"""

244

```

245

246

#### Usage Example

247

248

```python

249

automaton = ahocorasick.Automaton()

250

words = {'cat': 'animal', 'car': 'vehicle', 'card': 'object', 'dog': 'animal'}

251

for word, category in words.items():

252

automaton.add_word(word, category)

253

254

# All values

255

all_values = list(automaton.values())

256

print("All values:", all_values)

257

258

# Values for keys starting with 'car'

259

car_values = list(automaton.values(prefix='car'))

260

print("Values for 'car' prefix:", car_values) # ['vehicle', 'object']

261

```

262

263

### Item Iteration

264

265

Iterate over key-value pairs with filtering options.

266

267

```python { .api }

268

def items(self, prefix=None, wildcard=None, how=ahocorasick.MATCH_AT_LEAST_PREFIX):

269

"""

270

Return an iterator on tuples of (key, value).

271

272

Parameters:

273

- prefix: Optional prefix string to filter keys

274

- wildcard: Optional single character for pattern matching

275

- how: How to match patterns

276

277

Returns:

278

Iterator yielding (key, value) tuples for keys that match criteria

279

"""

280

```

281

282

#### Usage Example

283

284

```python

285

automaton = ahocorasick.Automaton()

286

animals = {'cat': 'feline', 'car': 'vehicle', 'care': 'concern', 'dog': 'canine'}

287

for word, meaning in animals.items():

288

automaton.add_word(word, meaning)

289

290

# All items

291

all_items = list(automaton.items())

292

print("All items:", all_items)

293

294

# Items with prefix

295

car_items = list(automaton.items(prefix='car'))

296

print("Items with 'car' prefix:", car_items) # [('car', 'vehicle'), ('care', 'concern')]

297

298

# Items matching wildcard pattern

299

three_char_items = list(automaton.items(prefix='...', wildcard='.',

300

how=ahocorasick.MATCH_EXACT_LENGTH))

301

print("3-character items:", three_char_items) # [('cat', 'feline'), ('car', 'vehicle'), ('dog', 'canine')]

302

```

303

304

### Length Operation

305

306

Get the number of stored patterns.

307

308

```python { .api }

309

def __len__(self):

310

"""

311

Return the number of distinct keys added to the trie.

312

313

Returns:

314

int: Number of keys in the automaton

315

"""

316

```

317

318

#### Usage Example

319

320

```python

321

automaton = ahocorasick.Automaton()

322

print(len(automaton)) # 0

323

324

automaton.add_word('hello', 1)

325

automaton.add_word('world', 2)

326

print(len(automaton)) # 2

327

328

automaton.add_word('hello', 3) # Updating existing key

329

print(len(automaton)) # Still 2 (no new key added)

330

```

331

332

## Pattern Matching Modes

333

334

When using wildcard patterns, you can control how matches are found:

335

336

### MATCH_EXACT_LENGTH

337

Match keys that have exactly the same length as the pattern.

338

339

```python

340

# Pattern: 'c.t' (3 characters)

341

# Matches: 'cat', 'cut', 'cot'

342

# Doesn't match: 'cart', 'c', 'cute'

343

```

344

345

### MATCH_AT_LEAST_PREFIX (Default)

346

Match keys that are at least as long as the pattern.

347

348

```python

349

# Pattern: 'c.t' (3 characters)

350

# Matches: 'cat', 'cart', 'cute', 'cattle'

351

# Doesn't match: 'c', 'ca'

352

```

353

354

### MATCH_AT_MOST_PREFIX

355

Match keys that are at most as long as the pattern.

356

357

```python

358

# Pattern: 'c.t' (3 characters)

359

# Matches: 'cat', 'c', 'ca'

360

# Doesn't match: 'cart', 'cute'

361

```

362

363

## Advanced Usage Patterns

364

365

### Batch Operations

366

367

```python

368

def batch_check_existence(automaton, keys_to_check):

369

"""Check existence of multiple keys efficiently."""

370

results = {}

371

for key in keys_to_check:

372

results[key] = key in automaton

373

return results

374

375

def batch_get_values(automaton, keys_to_get, default=None):

376

"""Get values for multiple keys with default."""

377

results = {}

378

for key in keys_to_get:

379

results[key] = automaton.get(key, default)

380

return results

381

```

382

383

### Pattern Statistics

384

385

```python

386

def analyze_patterns(automaton):

387

"""Analyze stored patterns."""

388

stats = {

389

'total_patterns': len(automaton),

390

'avg_length': 0,

391

'length_distribution': {},

392

'prefix_groups': {}

393

}

394

395

total_length = 0

396

for key in automaton.keys():

397

length = len(key)

398

total_length += length

399

400

# Length distribution

401

stats['length_distribution'][length] = \

402

stats['length_distribution'].get(length, 0) + 1

403

404

# Prefix grouping

405

if length > 0:

406

prefix = key[0]

407

if prefix not in stats['prefix_groups']:

408

stats['prefix_groups'][prefix] = []

409

stats['prefix_groups'][prefix].append(key)

410

411

if stats['total_patterns'] > 0:

412

stats['avg_length'] = total_length / stats['total_patterns']

413

414

return stats

415

```

416

417

### Custom Filtering

418

419

```python

420

def filter_by_value_type(automaton, value_type):

421

"""Get keys whose values match a specific type."""

422

matching_keys = []

423

for key, value in automaton.items():

424

if isinstance(value, value_type):

425

matching_keys.append(key)

426

return matching_keys

427

428

def filter_by_value_condition(automaton, condition_func):

429

"""Get keys whose values satisfy a condition."""

430

matching_items = []

431

for key, value in automaton.items():

432

if condition_func(value):

433

matching_items.append((key, value))

434

return matching_items

435

```