or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

as-you-type-formatting.mdcore-parsing-formatting.mdindex.mdnumber-validation.mdphone-number-matching.mdregion-metadata.mdshort-numbers.mdutility-functions.md

phone-number-matching.mddocs/

0

# Phone Number Matching

1

2

Advanced pattern matching to find and extract phone numbers from text, with configurable leniency levels and comprehensive match information. This capability enables extraction of phone numbers from unstructured text like documents, emails, and web pages.

3

4

## Capabilities

5

6

### PhoneNumberMatcher Class

7

8

Iterator class that finds phone number matches in text with various leniency options.

9

10

```python { .api }

11

class PhoneNumberMatcher:

12

"""

13

Iterator for finding phone numbers in text.

14

15

Scans through text and yields PhoneNumberMatch objects for

16

each phone number found, with configurable leniency levels.

17

"""

18

19

def __init__(self, text: str, region: str, leniency: Leniency = None,

20

max_tries: int = 65536):

21

"""

22

Initialize matcher for finding phone numbers in text.

23

24

Parameters:

25

- text: Text to search for phone numbers

26

- region: Two-letter region code for parsing context

27

- leniency: Matching strictness level (defaults to Leniency.VALID)

28

- max_tries: Maximum number of matching attempts to prevent infinite loops

29

"""

30

31

def __iter__(self):

32

"""Return iterator interface."""

33

34

def __next__(self):

35

"""Get next phone number match."""

36

```

37

38

### PhoneNumberMatch Class

39

40

Represents a phone number found in text with position and metadata information.

41

42

```python { .api }

43

class PhoneNumberMatch:

44

"""

45

Represents a phone number match found in text.

46

47

Contains the matched phone number, its position in the text,

48

and the raw text that was matched.

49

"""

50

51

def start(self) -> int:

52

"""

53

Get the start position of the match in the original text.

54

55

Returns:

56

Zero-based index of match start position

57

"""

58

59

def end(self) -> int:

60

"""

61

Get the end position of the match in the original text.

62

63

Returns:

64

Zero-based index of match end position (exclusive)

65

"""

66

67

def number(self) -> PhoneNumber:

68

"""

69

Get the parsed phone number from the match.

70

71

Returns:

72

PhoneNumber object representing the matched number

73

"""

74

75

def raw_string(self) -> str:

76

"""

77

Get the raw text that was matched.

78

79

Returns:

80

Original text substring that contained the phone number

81

"""

82

```

83

84

### Leniency Levels

85

86

Control how strict the matching algorithm should be when finding phone numbers.

87

88

```python { .api }

89

class Leniency:

90

"""

91

Leniency levels for phone number matching.

92

93

Controls how strict the matcher is when identifying

94

potential phone numbers in text.

95

"""

96

97

POSSIBLE = 0

98

"""Match numbers that are possible (basic length checks)."""

99

100

VALID = 1

101

"""Match only valid phone numbers (default level)."""

102

103

STRICT_GROUPING = 2

104

"""Match only numbers with correct punctuation grouping."""

105

106

EXACT_GROUPING = 3

107

"""Match only numbers with exact formatting patterns."""

108

```

109

110

## Usage Examples

111

112

### Basic Phone Number Extraction

113

114

```python

115

import phonenumbers

116

117

# Text containing various phone numbers

118

text = """

119

Contact us at 650-253-2222 or call our international line at +44 20 8366 1177.

120

You can also reach support at (800) 555-1234 or send a fax to 650.253.2223.

121

Our office number is 1-650-253-2222 extension 1234.

122

"""

123

124

print("Phone numbers found in text:")

125

for match in phonenumbers.PhoneNumberMatcher(text, "US"):

126

number = match.number()

127

formatted = phonenumbers.format_number(number, phonenumbers.PhoneNumberFormat.INTERNATIONAL)

128

print(f" Position {match.start()}-{match.end()}: '{match.raw_string()}' -> {formatted}")

129

```

130

131

### Leniency Level Comparison

132

133

```python

134

import phonenumbers

135

from phonenumbers import Leniency

136

137

text = "Call me at 555-1234 or 1-800-FLOWERS today!"

138

139

leniency_levels = [

140

(Leniency.POSSIBLE, "POSSIBLE"),

141

(Leniency.VALID, "VALID"),

142

(Leniency.STRICT_GROUPING, "STRICT_GROUPING"),

143

(Leniency.EXACT_GROUPING, "EXACT_GROUPING")

144

]

145

146

for leniency, name in leniency_levels:

147

print(f"\n{name} leniency:")

148

matches = list(phonenumbers.PhoneNumberMatcher(text, "US", leniency))

149

print(f" Found {len(matches)} matches")

150

151

for match in matches:

152

formatted = phonenumbers.format_number(

153

match.number(),

154

phonenumbers.PhoneNumberFormat.INTERNATIONAL

155

)

156

print(f" '{match.raw_string()}' -> {formatted}")

157

```

158

159

### Document Processing Pipeline

160

161

```python

162

import phonenumbers

163

import re

164

165

class PhoneNumberExtractor:

166

"""Extract and normalize phone numbers from documents."""

167

168

def __init__(self, default_region="US", leniency=Leniency.VALID):

169

self.default_region = default_region

170

self.leniency = leniency

171

172

def extract_from_text(self, text, region=None):

173

"""Extract all phone numbers from text."""

174

search_region = region or self.default_region

175

matches = []

176

177

for match in phonenumbers.PhoneNumberMatcher(text, search_region, self.leniency):

178

number = match.number()

179

180

matches.append({

181

'raw_text': match.raw_string(),

182

'start_pos': match.start(),

183

'end_pos': match.end(),

184

'parsed_number': number,

185

'formatted': {

186

'e164': phonenumbers.format_number(number, phonenumbers.PhoneNumberFormat.E164),

187

'international': phonenumbers.format_number(number, phonenumbers.PhoneNumberFormat.INTERNATIONAL),

188

'national': phonenumbers.format_number(number, phonenumbers.PhoneNumberFormat.NATIONAL)

189

},

190

'is_valid': phonenumbers.is_valid_number(number),

191

'number_type': phonenumbers.number_type(number),

192

'region': phonenumbers.region_code_for_number(number)

193

})

194

195

return matches

196

197

def extract_unique_numbers(self, text, region=None):

198

"""Extract unique phone numbers, removing duplicates."""

199

all_matches = self.extract_from_text(text, region)

200

unique_numbers = {}

201

202

for match in all_matches:

203

e164 = match['formatted']['e164']

204

if e164 not in unique_numbers:

205

unique_numbers[e164] = match

206

else:

207

# Keep the match with better formatting or more context

208

existing = unique_numbers[e164]

209

if len(match['raw_text']) > len(existing['raw_text']):

210

unique_numbers[e164] = match

211

212

return list(unique_numbers.values())

213

214

def anonymize_text(self, text, replacement="[PHONE]", region=None):

215

"""Replace phone numbers in text with anonymized placeholders."""

216

search_region = region or self.default_region

217

218

# Find all matches and sort by position (descending to avoid offset issues)

219

matches = []

220

for match in phonenumbers.PhoneNumberMatcher(text, search_region, self.leniency):

221

matches.append((match.start(), match.end()))

222

223

matches.sort(reverse=True)

224

225

# Replace from end to beginning

226

anonymized_text = text

227

for start, end in matches:

228

anonymized_text = anonymized_text[:start] + replacement + anonymized_text[end:]

229

230

return anonymized_text

231

232

# Example usage

233

extractor = PhoneNumberExtractor("US")

234

235

sample_document = """

236

Please contact our sales team at 1-800-555-SALE (1-800-555-7253) or

237

our technical support at +1 (650) 253-2222. International customers

238

can reach us at +44 20 8366 1177 or +33 1 42 68 53 00.

239

240

For urgent matters, call our emergency line: 911

241

For billing questions: 650.253.2223 ext. 100

242

"""

243

244

print("=== Phone Number Extraction ===")

245

matches = extractor.extract_from_text(sample_document)

246

for i, match in enumerate(matches):

247

print(f"{i+1}. '{match['raw_text']}' (pos {match['start_pos']}-{match['end_pos']})")

248

print(f" -> {match['formatted']['international']}")

249

print(f" -> Type: {match['number_type']}, Region: {match['region']}")

250

print()

251

252

print("=== Unique Numbers ===")

253

unique = extractor.extract_unique_numbers(sample_document)

254

for match in unique:

255

print(f"- {match['formatted']['international']} ({match['region']})")

256

257

print("=== Anonymized Text ===")

258

anonymized = extractor.anonymize_text(sample_document)

259

print(anonymized)

260

```

261

262

### Contact Information Extraction

263

264

```python

265

import phonenumbers

266

import re

267

268

class ContactExtractor:

269

"""Extract structured contact information from text."""

270

271

def __init__(self, default_region="US"):

272

self.default_region = default_region

273

self.email_pattern = re.compile(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b')

274

275

def extract_contacts(self, text):

276

"""Extract phone numbers, emails, and other contact info."""

277

contacts = {

278

'phone_numbers': [],

279

'emails': [],

280

'text_segments': []

281

}

282

283

# Extract phone numbers

284

for match in phonenumbers.PhoneNumberMatcher(text, self.default_region):

285

contacts['phone_numbers'].append({

286

'raw': match.raw_string(),

287

'formatted': phonenumbers.format_number(

288

match.number(),

289

phonenumbers.PhoneNumberFormat.INTERNATIONAL

290

),

291

'type': phonenumbers.number_type(match.number()),

292

'position': (match.start(), match.end())

293

})

294

295

# Extract email addresses

296

for match in self.email_pattern.finditer(text):

297

contacts['emails'].append({

298

'email': match.group(),

299

'position': (match.start(), match.end())

300

})

301

302

# Extract text segments between contact info

303

all_positions = []

304

for phone in contacts['phone_numbers']:

305

all_positions.append(phone['position'])

306

for email in contacts['emails']:

307

all_positions.append(email['position'])

308

309

all_positions.sort()

310

311

# Get text segments

312

last_end = 0

313

for start, end in all_positions:

314

if start > last_end:

315

segment = text[last_end:start].strip()

316

if segment:

317

contacts['text_segments'].append(segment)

318

last_end = end

319

320

# Final segment

321

if last_end < len(text):

322

segment = text[last_end:].strip()

323

if segment:

324

contacts['text_segments'].append(segment)

325

326

return contacts

327

328

def format_contact_card(self, text):

329

"""Format extracted contact information as a structured card."""

330

contacts = self.extract_contacts(text)

331

332

card = []

333

334

# Group phone numbers by type

335

phones_by_type = {}

336

for phone in contacts['phone_numbers']:

337

phone_type = phone['type']

338

if phone_type not in phones_by_type:

339

phones_by_type[phone_type] = []

340

phones_by_type[phone_type].append(phone['formatted'])

341

342

# Format phone numbers

343

for phone_type, numbers in phones_by_type.items():

344

type_name = str(phone_type).replace('PhoneNumberType.', '').title()

345

card.append(f"{type_name}: {', '.join(numbers)}")

346

347

# Add emails

348

if contacts['emails']:

349

emails = [email['email'] for email in contacts['emails']]

350

card.append(f"Email: {', '.join(emails)}")

351

352

# Add other text

353

if contacts['text_segments']:

354

card.append(f"Notes: {' | '.join(contacts['text_segments'])}")

355

356

return '\n'.join(card)

357

358

# Example usage

359

extractor = ContactExtractor("US")

360

361

business_card_text = """

362

John Smith - Sales Manager

363

Acme Corporation

364

Phone: (650) 253-2222

365

Mobile: 650.555.1234

366

Email: john.smith@acme.com

367

Alternative: jsmith@gmail.com

368

369

Call anytime between 9 AM - 5 PM PST

370

Emergency contact: +1-800-555-HELP

371

"""

372

373

print("=== Contact Extraction ===")

374

contacts = extractor.extract_contacts(business_card_text)

375

376

print(f"Phone numbers found: {len(contacts['phone_numbers'])}")

377

for phone in contacts['phone_numbers']:

378

print(f" - {phone['raw']} -> {phone['formatted']} ({phone['type']})")

379

380

print(f"\nEmails found: {len(contacts['emails'])}")

381

for email in contacts['emails']:

382

print(f" - {email['email']}")

383

384

print(f"\nText segments: {len(contacts['text_segments'])}")

385

for segment in contacts['text_segments']:

386

print(f" - {segment}")

387

388

print("\n=== Formatted Contact Card ===")

389

card = extractor.format_contact_card(business_card_text)

390

print(card)

391

```

392

393

### Bulk Text Processing

394

395

```python

396

import phonenumbers

397

from concurrent.futures import ThreadPoolExecutor

398

import json

399

400

class BulkPhoneExtractor:

401

"""Process multiple documents for phone number extraction."""

402

403

def __init__(self, default_region="US", max_workers=4):

404

self.default_region = default_region

405

self.max_workers = max_workers

406

407

def process_document(self, doc_id, text, region=None):

408

"""Process a single document."""

409

search_region = region or self.default_region

410

411

result = {

412

'doc_id': doc_id,

413

'phone_numbers': [],

414

'stats': {

415

'total_matches': 0,

416

'valid_numbers': 0,

417

'unique_numbers': 0

418

}

419

}

420

421

seen_numbers = set()

422

423

for match in phonenumbers.PhoneNumberMatcher(text, search_region):

424

number = match.number()

425

e164 = phonenumbers.format_number(number, phonenumbers.PhoneNumberFormat.E164)

426

427

is_valid = phonenumbers.is_valid_number(number)

428

429

result['phone_numbers'].append({

430

'raw_text': match.raw_string(),

431

'e164': e164,

432

'international': phonenumbers.format_number(number, phonenumbers.PhoneNumberFormat.INTERNATIONAL),

433

'is_valid': is_valid,

434

'type': str(phonenumbers.number_type(number)),

435

'region': phonenumbers.region_code_for_number(number),

436

'position': [match.start(), match.end()]

437

})

438

439

result['stats']['total_matches'] += 1

440

if is_valid:

441

result['stats']['valid_numbers'] += 1

442

443

seen_numbers.add(e164)

444

445

result['stats']['unique_numbers'] = len(seen_numbers)

446

return result

447

448

def process_documents(self, documents):

449

"""Process multiple documents in parallel."""

450

with ThreadPoolExecutor(max_workers=self.max_workers) as executor:

451

futures = []

452

453

for doc_id, text, region in documents:

454

future = executor.submit(self.process_document, doc_id, text, region)

455

futures.append(future)

456

457

results = []

458

for future in futures:

459

try:

460

result = future.result()

461

results.append(result)

462

except Exception as e:

463

print(f"Error processing document: {e}")

464

465

return results

466

467

def generate_summary_report(self, results):

468

"""Generate summary statistics across all documents."""

469

total_docs = len(results)

470

total_matches = sum(r['stats']['total_matches'] for r in results)

471

total_valid = sum(r['stats']['valid_numbers'] for r in results)

472

473

# Collect all unique numbers across documents

474

all_numbers = set()

475

regions = {}

476

types = {}

477

478

for result in results:

479

for phone in result['phone_numbers']:

480

if phone['is_valid']:

481

all_numbers.add(phone['e164'])

482

483

region = phone['region']

484

regions[region] = regions.get(region, 0) + 1

485

486

phone_type = phone['type']

487

types[phone_type] = types.get(phone_type, 0) + 1

488

489

return {

490

'summary': {

491

'total_documents': total_docs,

492

'total_matches': total_matches,

493

'valid_numbers': total_valid,

494

'unique_numbers_global': len(all_numbers),

495

'average_matches_per_doc': total_matches / total_docs if total_docs > 0 else 0

496

},

497

'regions': regions,

498

'types': types

499

}

500

501

# Example usage

502

extractor = BulkPhoneExtractor("US", max_workers=2)

503

504

# Sample documents to process

505

documents = [

506

("doc1", "Call us at 650-253-2222 or +44 20 8366 1177", "US"),

507

("doc2", "Support: 1-800-555-1234, International: +33 1 42 68 53 00", "US"),

508

("doc3", "Office: (555) 123-4567, Mobile: 555.987.6543", "US"),

509

("doc4", "Invalid phone: 123-456, Valid: +1-650-253-2222", "US"),

510

]

511

512

print("=== Bulk Processing Results ===")

513

results = extractor.process_documents(documents)

514

515

for result in results:

516

print(f"\nDocument {result['doc_id']}:")

517

print(f" Total matches: {result['stats']['total_matches']}")

518

print(f" Valid numbers: {result['stats']['valid_numbers']}")

519

print(f" Unique numbers: {result['stats']['unique_numbers']}")

520

521

for phone in result['phone_numbers'][:3]: # Show first 3

522

status = "✓" if phone['is_valid'] else "✗"

523

print(f" {status} {phone['raw_text']} -> {phone['international']}")

524

525

print("\n=== Summary Report ===")

526

summary = extractor.generate_summary_report(results)

527

print(json.dumps(summary, indent=2))

528

```