or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

core-operations.mdgeographic-services.mdindex.mdshort-numbers.mdtext-processing.mdutilities.md

text-processing.mddocs/

0

# Text Processing

1

2

Phone number discovery and extraction from unstructured text, plus as-you-type formatting for user interfaces. These tools enable applications to find phone numbers in documents, messages, and user input while providing real-time formatting feedback.

3

4

## Capabilities

5

6

### Phone Number Discovery in Text

7

8

Extract and identify phone numbers from unstructured text using configurable matching algorithms with different levels of strictness.

9

10

```python { .api }

11

class PhoneNumberMatcher:

12

"""Finds phone numbers in text strings."""

13

14

def __init__(self, text: str, region: str | None, leniency: int = Leniency.VALID, max_tries: int = 65535):

15

"""

16

Initialize matcher for finding phone numbers in text.

17

18

Parameters:

19

- text: Text to search for phone numbers

20

- region: Default region for parsing numbers without country codes

21

- leniency: Matching strictness level (Leniency enum value)

22

- max_tries: Maximum number of matches to attempt (default: 65535)

23

"""

24

25

def has_next(self) -> bool:

26

"""Check if more matches exist."""

27

28

def next(self) -> PhoneNumberMatch:

29

"""Get next phone number match."""

30

31

def __iter__(self):

32

"""Iterator support for for-loops."""

33

34

class PhoneNumberMatch:

35

"""Represents a phone number found in text."""

36

37

start: int # Start position in text

38

end: int # End position in text

39

raw_string: str # Raw matched string from text

40

number: PhoneNumber # Parsed phone number object

41

42

def __init__(self, start: int, raw_string: str, numobj: PhoneNumber):

43

"""

44

Initialize a phone number match.

45

46

Parameters:

47

- start: Start position in original text

48

- raw_string: Raw string that was matched

49

- numobj: Parsed PhoneNumber object

50

"""

51

```

52

53

**Leniency Levels:**

54

55

```python { .api }

56

class Leniency:

57

"""Leniency levels for phone number matching in text."""

58

POSSIBLE = 0 # Most permissive, matches possible numbers

59

VALID = 1 # Matches valid numbers only (default)

60

STRICT_GROUPING = 2 # Valid + proper digit grouping

61

EXACT_GROUPING = 3 # Most restrictive, exact formatting match

62

```

63

64

**Usage Examples:**

65

66

```python

67

# Basic phone number extraction

68

text = "Call me at 510-748-8230 if it's before 9:30, or on 703-4800500 after 10am."

69

70

# Find all phone numbers with default settings

71

matches = list(phonenumbers.PhoneNumberMatcher(text, "US"))

72

73

for match in matches:

74

print(f"Found: {match.raw_string}")

75

print(f"Position: {match.start}-{match.end}")

76

print(f"Formatted: {phonenumbers.format_number(match.number, phonenumbers.PhoneNumberFormat.E164)}")

77

print()

78

79

# Output:

80

# Found: 510-748-8230

81

# Position: 11-23

82

# Formatted: +15107488230

83

#

84

# Found: 703-4800500

85

# Position: 51-62

86

# Formatted: +17034800500

87

88

# Using different leniency levels

89

strict_matcher = phonenumbers.PhoneNumberMatcher(text, "US", phonenumbers.Leniency.EXACT_GROUPING)

90

lenient_matcher = phonenumbers.PhoneNumberMatcher(text, "US", phonenumbers.Leniency.POSSIBLE)

91

92

strict_matches = list(strict_matcher)

93

lenient_matches = list(lenient_matcher)

94

95

print(f"Strict matching found: {len(strict_matches)} numbers")

96

print(f"Lenient matching found: {len(lenient_matches)} numbers")

97

```

98

99

### International Phone Number Detection

100

101

Extract phone numbers from international text with various country contexts.

102

103

**Usage Examples:**

104

105

```python

106

# International text with mixed formats

107

international_text = """

108

Contact our offices:

109

US Office: +1 (650) 253-2222

110

UK Office: +44 20 8366 1177

111

Local UK: 020 8366 1177

112

France: +33 1 42 68 53 00

113

Germany: 030 12345678

114

"""

115

116

# Parse with no default region (requires country codes)

117

international_matches = list(phonenumbers.PhoneNumberMatcher(international_text, None))

118

119

for match in international_matches:

120

region = phonenumbers.region_code_for_number(match.number)

121

formatted = phonenumbers.format_number(match.number, phonenumbers.PhoneNumberFormat.INTERNATIONAL)

122

print(f"{match.raw_string} -> {formatted} ({region})")

123

124

# Parse with specific region context

125

uk_context_matches = list(phonenumbers.PhoneNumberMatcher(international_text, "GB"))

126

127

# This will also match "020 8366 1177" as a valid UK number

128

for match in uk_context_matches:

129

if match.raw_string == "020 8366 1177":

130

print(f"Local UK number detected: {match.raw_string}")

131

132

# Multi-region extraction with context switching

133

def extract_numbers_by_region(text, regions):

134

"""Extract numbers trying different regional contexts."""

135

all_matches = {}

136

137

for region in regions:

138

matcher = phonenumbers.PhoneNumberMatcher(text, region)

139

matches = list(matcher)

140

all_matches[region] = matches

141

142

return all_matches

143

144

regions_to_try = ["US", "GB", "DE", "FR"]

145

regional_results = extract_numbers_by_region(international_text, regions_to_try)

146

147

for region, matches in regional_results.items():

148

print(f"\nWith {region} context: {len(matches)} matches")

149

for match in matches:

150

print(f" {match.raw_string}")

151

```

152

153

### Text Processing with Match Filtering

154

155

Advanced filtering and processing of found phone numbers.

156

157

**Usage Examples:**

158

159

```python

160

# Complex text with various number formats

161

complex_text = """

162

Customer service: 1-800-555-0123 (toll-free)

163

Emergency: 911

164

International: +44 20 7946 0958

165

Fax: (555) 123-4567 ext. 789

166

Invalid: 123-45 (too short)

167

Another: +1.555.987.6543

168

Website: Call 555-HELP (555-4357) for assistance

169

"""

170

171

# Extract with different leniency levels

172

def compare_leniency_levels(text, region):

173

"""Compare results across different leniency levels."""

174

leniency_levels = [

175

(phonenumbers.Leniency.POSSIBLE, "Possible"),

176

(phonenumbers.Leniency.VALID, "Valid"),

177

(phonenumbers.Leniency.STRICT_GROUPING, "Strict Grouping"),

178

(phonenumbers.Leniency.EXACT_GROUPING, "Exact Grouping")

179

]

180

181

for leniency, name in leniency_levels:

182

matcher = phonenumbers.PhoneNumberMatcher(text, region, leniency)

183

matches = list(matcher)

184

print(f"\n{name} ({len(matches)} matches):")

185

186

for match in matches:

187

print(f" '{match.raw_string}' at position {match.start}-{match.end}")

188

189

compare_leniency_levels(complex_text, "US")

190

191

# Filter matches by criteria

192

def filter_matches(text, region, filter_func):

193

"""Filter phone number matches by custom criteria."""

194

matcher = phonenumbers.PhoneNumberMatcher(text, region)

195

return [match for match in matcher if filter_func(match)]

196

197

# Filter for toll-free numbers only

198

def is_toll_free(match):

199

num_type = phonenumbers.number_type(match.number)

200

return num_type == phonenumbers.PhoneNumberType.TOLL_FREE

201

202

toll_free_matches = filter_matches(complex_text, "US", is_toll_free)

203

print(f"\nToll-free numbers found: {len(toll_free_matches)}")

204

205

# Filter for specific regions

206

def is_uk_number(match):

207

region = phonenumbers.region_code_for_number(match.number)

208

return region == "GB"

209

210

uk_matches = filter_matches(complex_text, None, is_uk_number)

211

print(f"UK numbers found: {len(uk_matches)}")

212

213

# Extract and categorize numbers

214

def categorize_numbers(text, region):

215

"""Categorize found phone numbers by type."""

216

matcher = phonenumbers.PhoneNumberMatcher(text, region)

217

categories = {

218

"Mobile": [],

219

"Fixed Line": [],

220

"Toll Free": [],

221

"Emergency": [],

222

"Other": []

223

}

224

225

for match in matcher:

226

num_type = phonenumbers.number_type(match.number)

227

228

if num_type == phonenumbers.PhoneNumberType.MOBILE:

229

categories["Mobile"].append(match)

230

elif num_type == phonenumbers.PhoneNumberType.FIXED_LINE:

231

categories["Fixed Line"].append(match)

232

elif num_type == phonenumbers.PhoneNumberType.TOLL_FREE:

233

categories["Toll Free"].append(match)

234

else:

235

# Check if it's an emergency number

236

formatted = phonenumbers.format_number(match.number, phonenumbers.PhoneNumberFormat.E164)

237

if phonenumbers.is_emergency_number(formatted.replace("+", ""), region):

238

categories["Emergency"].append(match)

239

else:

240

categories["Other"].append(match)

241

242

return categories

243

244

categorized = categorize_numbers(complex_text, "US")

245

for category, matches in categorized.items():

246

if matches:

247

print(f"\n{category} ({len(matches)}):")

248

for match in matches:

249

print(f" {match.raw_string}")

250

```

251

252

### As-You-Type Formatting

253

254

Real-time phone number formatting for user input interfaces, providing immediate feedback as users type.

255

256

```python { .api }

257

class AsYouTypeFormatter:

258

"""Formats phone numbers as digits are entered."""

259

260

def __init__(self, region_code: str):

261

"""

262

Initialize formatter for specified region.

263

264

Parameters:

265

- region_code: Region code for formatting context (e.g., "US", "GB")

266

"""

267

268

def input_digit(self, next_char: str, remember_position: bool = False) -> str:

269

"""

270

Add next digit and get formatted result.

271

272

Parameters:

273

- next_char: Next character entered by user

274

- remember_position: Whether to remember cursor position for get_remembered_position()

275

276

Returns:

277

Formatted phone number string with current input

278

"""

279

280

def clear(self):

281

"""Clear all input and reset formatter."""

282

283

def get_remembered_position(self) -> int:

284

"""Get position of remembered cursor location."""

285

```

286

287

**Usage Examples:**

288

289

```python

290

# Basic as-you-type formatting

291

formatter = phonenumbers.AsYouTypeFormatter("US")

292

293

# Simulate user typing digits one by one

294

digits = "6502532222"

295

print("User input -> Formatted output")

296

print("-" * 30)

297

298

for digit in digits:

299

result = formatter.input_digit(digit)

300

print(f"'{digit}' -> '{result}'")

301

302

# Output:

303

# '6' -> '6'

304

# '5' -> '65'

305

# '0' -> '650'

306

# '2' -> '650-2'

307

# '5' -> '650-25'

308

# '3' -> '650-253'

309

# '2' -> '650-2532'

310

# '2' -> '(650) 253-22'

311

# '2' -> '(650) 253-222'

312

# '2' -> '(650) 253-2222'

313

314

# International number formatting

315

international_formatter = phonenumbers.AsYouTypeFormatter("US")

316

international_digits = "+442083661177"

317

318

print("\nInternational formatting:")

319

for char in international_digits:

320

result = international_formatter.input_digit(char)

321

print(f"'{char}' -> '{result}'")

322

323

# Position remembering for cursor tracking

324

formatter_with_cursor = phonenumbers.AsYouTypeFormatter("US")

325

digits_with_cursor = "6502532222"

326

327

# Remember position after 7th digit

328

for i, digit in enumerate(digits_with_cursor):

329

remember = (i == 6) # Remember position after "6502532"

330

result = formatter_with_cursor.input_digit(digit, remember)

331

332

if remember:

333

remembered_pos = formatter_with_cursor.get_remembered_position()

334

print(f"Cursor position remembered: {remembered_pos} in '{result}'")

335

336

# Clear and restart

337

formatter.clear()

338

new_result = formatter.input_digit("4")

339

print(f"After clear: '{new_result}'") # "4"

340

```

341

342

### Real-World Text Processing Applications

343

344

Practical examples for common use cases.

345

346

**Usage Examples:**

347

348

```python

349

# Email/document phone number extraction

350

def extract_contact_info(document_text, default_region="US"):

351

"""Extract all phone numbers from a document."""

352

matcher = phonenumbers.PhoneNumberMatcher(document_text, default_region)

353

354

contacts = []

355

for match in matcher:

356

contact_info = {

357

"raw_text": match.raw_string,

358

"position": f"{match.start}-{match.end}",

359

"formatted_national": phonenumbers.format_number(

360

match.number, phonenumbers.PhoneNumberFormat.NATIONAL

361

),

362

"formatted_international": phonenumbers.format_number(

363

match.number, phonenumbers.PhoneNumberFormat.INTERNATIONAL

364

),

365

"region": phonenumbers.region_code_for_number(match.number),

366

"type": phonenumbers.number_type(match.number),

367

"is_valid": phonenumbers.is_valid_number(match.number)

368

}

369

contacts.append(contact_info)

370

371

return contacts

372

373

# Example document

374

business_card = """

375

John Smith, CEO

376

Acme Corporation

377

Phone: (555) 123-4567

378

Mobile: 555.987.6543

379

International: +1-555-246-8101

380

Fax: (555) 123-4568

381

"""

382

383

contacts = extract_contact_info(business_card)

384

for contact in contacts:

385

print(f"Found: {contact['raw_text']} -> {contact['formatted_national']}")

386

387

# Live input field formatting simulation

388

class PhoneInputField:

389

"""Simulates a phone input field with real-time formatting."""

390

391

def __init__(self, region_code="US"):

392

self.formatter = phonenumbers.AsYouTypeFormatter(region_code)

393

self.value = ""

394

395

def on_key_press(self, key):

396

"""Handle user key press."""

397

if key.isdigit() or key in "+()-. ":

398

self.value = self.formatter.input_digit(key)

399

return self.value

400

elif key == "BACKSPACE":

401

# In real implementation, would need to handle backspace properly

402

self.formatter.clear()

403

return ""

404

return self.value

405

406

def clear(self):

407

"""Clear the field."""

408

self.formatter.clear()

409

self.value = ""

410

411

# Simulate user input

412

phone_field = PhoneInputField("US")

413

user_input = "6502532222"

414

415

print("Phone input field simulation:")

416

for char in user_input:

417

display_value = phone_field.on_key_press(char)

418

print(f"User typed '{char}' -> Display: '{display_value}'")

419

420

# Multi-format phone number search

421

def find_phone_variations(text, phone_to_find, region):

422

"""Find all variations of a specific phone number in text."""

423

try:

424

target_number = phonenumbers.parse(phone_to_find, region)

425

target_e164 = phonenumbers.format_number(target_number, phonenumbers.PhoneNumberFormat.E164)

426

427

matcher = phonenumbers.PhoneNumberMatcher(text, region, phonenumbers.Leniency.POSSIBLE)

428

variations = []

429

430

for match in matcher:

431

match_e164 = phonenumbers.format_number(match.number, phonenumbers.PhoneNumberFormat.E164)

432

if match_e164 == target_e164:

433

variations.append(match.raw_string)

434

435

return variations

436

except phonenumbers.NumberParseException:

437

return []

438

439

# Find all ways a number appears in text

440

text_with_variations = """

441

Contact us at (650) 253-2222 or 650-253-2222.

442

International callers: +1 650 253 2222

443

Text: 6502532222

444

"""

445

446

variations = find_phone_variations(text_with_variations, "6502532222", "US")

447

print(f"\nVariations found: {variations}")

448

# Output: ['(650) 253-2222', '650-253-2222', '+1 650 253 2222', '6502532222']

449

```

450

451

### Advanced Text Processing Features

452

453

Sophisticated text processing capabilities for complex scenarios.

454

455

**Usage Examples:**

456

457

```python

458

# Batch processing with performance optimization

459

def batch_extract_numbers(documents, default_region="US", max_matches_per_doc=100):

460

"""Extract phone numbers from multiple documents efficiently."""

461

results = {}

462

463

for doc_id, text in documents.items():

464

try:

465

matcher = phonenumbers.PhoneNumberMatcher(text, default_region,

466

phonenumbers.Leniency.VALID,

467

max_matches_per_doc)

468

469

matches = []

470

for match in matcher:

471

matches.append({

472

"text": match.raw_string,

473

"start": match.start,

474

"end": match.end,

475

"e164": phonenumbers.format_number(match.number, phonenumbers.PhoneNumberFormat.E164),

476

"region": phonenumbers.region_code_for_number(match.number)

477

})

478

479

results[doc_id] = matches

480

481

except Exception as e:

482

results[doc_id] = {"error": str(e)}

483

484

return results

485

486

# Example batch processing

487

sample_documents = {

488

"email_1": "Please call me at (555) 123-4567 or email john@example.com",

489

"email_2": "UK office: +44 20 7946 0958, US office: 1-800-555-0199",

490

"invoice": "Questions? Contact us at 650.253.2222"

491

}

492

493

batch_results = batch_extract_numbers(sample_documents)

494

for doc_id, matches in batch_results.items():

495

if isinstance(matches, list):

496

print(f"\n{doc_id}: {len(matches)} phone numbers")

497

for match in matches:

498

print(f" {match['text']} -> {match['e164']}")

499

500

# Smart region detection

501

def smart_extract_with_region_detection(text, candidate_regions=None):

502

"""Extract numbers trying to detect the most likely region."""

503

if candidate_regions is None:

504

candidate_regions = ["US", "GB", "CA", "AU", "DE", "FR"]

505

506

region_scores = {}

507

508

# Try each region and count valid matches

509

for region in candidate_regions:

510

matcher = phonenumbers.PhoneNumberMatcher(text, region, phonenumbers.Leniency.VALID)

511

valid_matches = list(matcher)

512

region_scores[region] = len(valid_matches)

513

514

# Use region with most valid matches

515

best_region = max(region_scores, key=region_scores.get) if region_scores else "US"

516

517

# Extract with best region

518

final_matcher = phonenumbers.PhoneNumberMatcher(text, best_region)

519

return list(final_matcher), best_region

520

521

mixed_text = """

522

Call our London office at 020 7946 0958 or

523

our New York office at (212) 555-0123.

524

Emergency UK: 999, Emergency US: 911

525

"""

526

527

matches, detected_region = smart_extract_with_region_detection(mixed_text)

528

print(f"\nDetected best region: {detected_region}")

529

print(f"Found {len(matches)} matches")

530

```