Tessl Tile for pypi/phonenumberslite@9.0.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

as-you-type-formatting.md core-parsing-formatting.md index.md number-validation.md phone-number-matching.md region-metadata.md short-numbers.md utility-functions.md

phone-number-matching.mddocs/

0
# Phone Number Matching
1

2
Advanced pattern matching to find and extract phone numbers from text, with configurable leniency levels and comprehensive match information. This capability enables extraction of phone numbers from unstructured text like documents, emails, and web pages.
3

4
## Capabilities
5

6
### PhoneNumberMatcher Class
7

8
Iterator class that finds phone number matches in text with various leniency options.
9

10
```python { .api }
11
class PhoneNumberMatcher:
12
    """
13
    Iterator for finding phone numbers in text.
14
    
15
    Scans through text and yields PhoneNumberMatch objects for
16
    each phone number found, with configurable leniency levels.
17
    """
18
    
19
    def __init__(self, text: str, region: str, leniency: Leniency = None, 
20
                 max_tries: int = 65536):
21
        """
22
        Initialize matcher for finding phone numbers in text.
23
        
24
        Parameters:
25
        - text: Text to search for phone numbers
26
        - region: Two-letter region code for parsing context
27
        - leniency: Matching strictness level (defaults to Leniency.VALID)
28
        - max_tries: Maximum number of matching attempts to prevent infinite loops
29
        """
30
    
31
    def __iter__(self):
32
        """Return iterator interface."""
33
    
34
    def __next__(self):
35
        """Get next phone number match."""
36
```
37

38
### PhoneNumberMatch Class
39

40
Represents a phone number found in text with position and metadata information.
41

42
```python { .api }
43
class PhoneNumberMatch:
44
    """
45
    Represents a phone number match found in text.
46
    
47
    Contains the matched phone number, its position in the text,
48
    and the raw text that was matched.
49
    """
50
    
51
    def start(self) -> int:
52
        """
53
        Get the start position of the match in the original text.
54
        
55
        Returns:
56
        Zero-based index of match start position
57
        """
58
    
59
    def end(self) -> int:
60
        """
61
        Get the end position of the match in the original text.
62
        
63
        Returns:
64
        Zero-based index of match end position (exclusive)
65
        """
66
    
67
    def number(self) -> PhoneNumber:
68
        """
69
        Get the parsed phone number from the match.
70
        
71
        Returns:
72
        PhoneNumber object representing the matched number
73
        """
74
    
75
    def raw_string(self) -> str:
76
        """
77
        Get the raw text that was matched.
78
        
79
        Returns:
80
        Original text substring that contained the phone number
81
        """
82
```
83

84
### Leniency Levels
85

86
Control how strict the matching algorithm should be when finding phone numbers.
87

88
```python { .api }
89
class Leniency:
90
    """
91
    Leniency levels for phone number matching.
92
    
93
    Controls how strict the matcher is when identifying
94
    potential phone numbers in text.
95
    """
96
    
97
    POSSIBLE = 0
98
    """Match numbers that are possible (basic length checks)."""
99
    
100
    VALID = 1  
101
    """Match only valid phone numbers (default level)."""
102
    
103
    STRICT_GROUPING = 2
104
    """Match only numbers with correct punctuation grouping."""
105
    
106
    EXACT_GROUPING = 3
107
    """Match only numbers with exact formatting patterns."""
108
```
109

110
## Usage Examples
111

112
### Basic Phone Number Extraction
113

114
```python
115
import phonenumbers
116

117
# Text containing various phone numbers
118
text = """
119
Contact us at 650-253-2222 or call our international line at +44 20 8366 1177.
120
You can also reach support at (800) 555-1234 or send a fax to 650.253.2223.
121
Our office number is 1-650-253-2222 extension 1234.
122
"""
123

124
print("Phone numbers found in text:")
125
for match in phonenumbers.PhoneNumberMatcher(text, "US"):
126
    number = match.number()
127
    formatted = phonenumbers.format_number(number, phonenumbers.PhoneNumberFormat.INTERNATIONAL)
128
    print(f"  Position {match.start()}-{match.end()}: '{match.raw_string()}' -> {formatted}")
129
```
130

131
### Leniency Level Comparison
132

133
```python
134
import phonenumbers
135
from phonenumbers import Leniency
136

137
text = "Call me at 555-1234 or 1-800-FLOWERS today!"
138

139
leniency_levels = [
140
    (Leniency.POSSIBLE, "POSSIBLE"),
141
    (Leniency.VALID, "VALID"),
142
    (Leniency.STRICT_GROUPING, "STRICT_GROUPING"),
143
    (Leniency.EXACT_GROUPING, "EXACT_GROUPING")
144
]
145

146
for leniency, name in leniency_levels:
147
    print(f"\n{name} leniency:")
148
    matches = list(phonenumbers.PhoneNumberMatcher(text, "US", leniency))
149
    print(f"  Found {len(matches)} matches")
150
    
151
    for match in matches:
152
        formatted = phonenumbers.format_number(
153
            match.number(), 
154
            phonenumbers.PhoneNumberFormat.INTERNATIONAL
155
        )
156
        print(f"    '{match.raw_string()}' -> {formatted}")
157
```
158

159
### Document Processing Pipeline
160

161
```python
162
import phonenumbers
163
import re
164

165
class PhoneNumberExtractor:
166
    """Extract and normalize phone numbers from documents."""
167
    
168
    def __init__(self, default_region="US", leniency=Leniency.VALID):
169
        self.default_region = default_region
170
        self.leniency = leniency
171
    
172
    def extract_from_text(self, text, region=None):
173
        """Extract all phone numbers from text."""
174
        search_region = region or self.default_region
175
        matches = []
176
        
177
        for match in phonenumbers.PhoneNumberMatcher(text, search_region, self.leniency):
178
            number = match.number()
179
            
180
            matches.append({
181
                'raw_text': match.raw_string(),
182
                'start_pos': match.start(),
183
                'end_pos': match.end(),
184
                'parsed_number': number,
185
                'formatted': {
186
                    'e164': phonenumbers.format_number(number, phonenumbers.PhoneNumberFormat.E164),
187
                    'international': phonenumbers.format_number(number, phonenumbers.PhoneNumberFormat.INTERNATIONAL),
188
                    'national': phonenumbers.format_number(number, phonenumbers.PhoneNumberFormat.NATIONAL)
189
                },
190
                'is_valid': phonenumbers.is_valid_number(number),
191
                'number_type': phonenumbers.number_type(number),
192
                'region': phonenumbers.region_code_for_number(number)
193
            })
194
        
195
        return matches
196
    
197
    def extract_unique_numbers(self, text, region=None):
198
        """Extract unique phone numbers, removing duplicates."""
199
        all_matches = self.extract_from_text(text, region)
200
        unique_numbers = {}
201
        
202
        for match in all_matches:
203
            e164 = match['formatted']['e164']
204
            if e164 not in unique_numbers:
205
                unique_numbers[e164] = match
206
            else:
207
                # Keep the match with better formatting or more context
208
                existing = unique_numbers[e164]
209
                if len(match['raw_text']) > len(existing['raw_text']):
210
                    unique_numbers[e164] = match
211
        
212
        return list(unique_numbers.values())
213
    
214
    def anonymize_text(self, text, replacement="[PHONE]", region=None):
215
        """Replace phone numbers in text with anonymized placeholders."""
216
        search_region = region or self.default_region
217
        
218
        # Find all matches and sort by position (descending to avoid offset issues)
219
        matches = []
220
        for match in phonenumbers.PhoneNumberMatcher(text, search_region, self.leniency):
221
            matches.append((match.start(), match.end()))
222
        
223
        matches.sort(reverse=True)
224
        
225
        # Replace from end to beginning
226
        anonymized_text = text
227
        for start, end in matches:
228
            anonymized_text = anonymized_text[:start] + replacement + anonymized_text[end:]
229
        
230
        return anonymized_text
231

232
# Example usage
233
extractor = PhoneNumberExtractor("US")
234

235
sample_document = """
236
Please contact our sales team at 1-800-555-SALE (1-800-555-7253) or 
237
our technical support at +1 (650) 253-2222. International customers 
238
can reach us at +44 20 8366 1177 or +33 1 42 68 53 00.
239

240
For urgent matters, call our emergency line: 911
241
For billing questions: 650.253.2223 ext. 100
242
"""
243

244
print("=== Phone Number Extraction ===")
245
matches = extractor.extract_from_text(sample_document)
246
for i, match in enumerate(matches):
247
    print(f"{i+1}. '{match['raw_text']}' (pos {match['start_pos']}-{match['end_pos']})")
248
    print(f"   -> {match['formatted']['international']}")
249
    print(f"   -> Type: {match['number_type']}, Region: {match['region']}")
250
    print()
251

252
print("=== Unique Numbers ===")
253
unique = extractor.extract_unique_numbers(sample_document)
254
for match in unique:
255
    print(f"- {match['formatted']['international']} ({match['region']})")
256

257
print("=== Anonymized Text ===")
258
anonymized = extractor.anonymize_text(sample_document)
259
print(anonymized)
260
```
261

262
### Contact Information Extraction
263

264
```python
265
import phonenumbers
266
import re
267

268
class ContactExtractor:
269
    """Extract structured contact information from text."""
270
    
271
    def __init__(self, default_region="US"):
272
        self.default_region = default_region
273
        self.email_pattern = re.compile(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b')
274
    
275
    def extract_contacts(self, text):
276
        """Extract phone numbers, emails, and other contact info."""
277
        contacts = {
278
            'phone_numbers': [],
279
            'emails': [],
280
            'text_segments': []
281
        }
282
        
283
        # Extract phone numbers
284
        for match in phonenumbers.PhoneNumberMatcher(text, self.default_region):
285
            contacts['phone_numbers'].append({
286
                'raw': match.raw_string(),
287
                'formatted': phonenumbers.format_number(
288
                    match.number(), 
289
                    phonenumbers.PhoneNumberFormat.INTERNATIONAL
290
                ),
291
                'type': phonenumbers.number_type(match.number()),
292
                'position': (match.start(), match.end())
293
            })
294
        
295
        # Extract email addresses
296
        for match in self.email_pattern.finditer(text):
297
            contacts['emails'].append({
298
                'email': match.group(),
299
                'position': (match.start(), match.end())
300
            })
301
        
302
        # Extract text segments between contact info
303
        all_positions = []
304
        for phone in contacts['phone_numbers']:
305
            all_positions.append(phone['position'])
306
        for email in contacts['emails']:
307
            all_positions.append(email['position'])
308
        
309
        all_positions.sort()
310
        
311
        # Get text segments
312
        last_end = 0
313
        for start, end in all_positions:
314
            if start > last_end:
315
                segment = text[last_end:start].strip()
316
                if segment:
317
                    contacts['text_segments'].append(segment)
318
            last_end = end
319
        
320
        # Final segment
321
        if last_end < len(text):
322
            segment = text[last_end:].strip()
323
            if segment:
324
                contacts['text_segments'].append(segment)
325
        
326
        return contacts
327
    
328
    def format_contact_card(self, text):
329
        """Format extracted contact information as a structured card."""
330
        contacts = self.extract_contacts(text)
331
        
332
        card = []
333
        
334
        # Group phone numbers by type
335
        phones_by_type = {}
336
        for phone in contacts['phone_numbers']:
337
            phone_type = phone['type']
338
            if phone_type not in phones_by_type:
339
                phones_by_type[phone_type] = []
340
            phones_by_type[phone_type].append(phone['formatted'])
341
        
342
        # Format phone numbers
343
        for phone_type, numbers in phones_by_type.items():
344
            type_name = str(phone_type).replace('PhoneNumberType.', '').title()
345
            card.append(f"{type_name}: {', '.join(numbers)}")
346
        
347
        # Add emails
348
        if contacts['emails']:
349
            emails = [email['email'] for email in contacts['emails']]
350
            card.append(f"Email: {', '.join(emails)}")
351
        
352
        # Add other text
353
        if contacts['text_segments']:
354
            card.append(f"Notes: {' | '.join(contacts['text_segments'])}")
355
        
356
        return '\n'.join(card)
357

358
# Example usage
359
extractor = ContactExtractor("US")
360

361
business_card_text = """
362
John Smith - Sales Manager
363
Acme Corporation
364
Phone: (650) 253-2222
365
Mobile: 650.555.1234
366
Email: john.smith@acme.com
367
Alternative: jsmith@gmail.com
368

369
Call anytime between 9 AM - 5 PM PST
370
Emergency contact: +1-800-555-HELP
371
"""
372

373
print("=== Contact Extraction ===")
374
contacts = extractor.extract_contacts(business_card_text)
375

376
print(f"Phone numbers found: {len(contacts['phone_numbers'])}")
377
for phone in contacts['phone_numbers']:
378
    print(f"  - {phone['raw']} -> {phone['formatted']} ({phone['type']})")
379

380
print(f"\nEmails found: {len(contacts['emails'])}")
381
for email in contacts['emails']:
382
    print(f"  - {email['email']}")
383

384
print(f"\nText segments: {len(contacts['text_segments'])}")
385
for segment in contacts['text_segments']:
386
    print(f"  - {segment}")
387

388
print("\n=== Formatted Contact Card ===")
389
card = extractor.format_contact_card(business_card_text)
390
print(card)
391
```
392

393
### Bulk Text Processing
394

395
```python
396
import phonenumbers
397
from concurrent.futures import ThreadPoolExecutor
398
import json
399

400
class BulkPhoneExtractor:
401
    """Process multiple documents for phone number extraction."""
402
    
403
    def __init__(self, default_region="US", max_workers=4):
404
        self.default_region = default_region
405
        self.max_workers = max_workers
406
    
407
    def process_document(self, doc_id, text, region=None):
408
        """Process a single document."""
409
        search_region = region or self.default_region
410
        
411
        result = {
412
            'doc_id': doc_id,
413
            'phone_numbers': [],
414
            'stats': {
415
                'total_matches': 0,
416
                'valid_numbers': 0,
417
                'unique_numbers': 0
418
            }
419
        }
420
        
421
        seen_numbers = set()
422
        
423
        for match in phonenumbers.PhoneNumberMatcher(text, search_region):
424
            number = match.number()
425
            e164 = phonenumbers.format_number(number, phonenumbers.PhoneNumberFormat.E164)
426
            
427
            is_valid = phonenumbers.is_valid_number(number)
428
            
429
            result['phone_numbers'].append({
430
                'raw_text': match.raw_string(),
431
                'e164': e164,
432
                'international': phonenumbers.format_number(number, phonenumbers.PhoneNumberFormat.INTERNATIONAL),
433
                'is_valid': is_valid,
434
                'type': str(phonenumbers.number_type(number)),
435
                'region': phonenumbers.region_code_for_number(number),
436
                'position': [match.start(), match.end()]
437
            })
438
            
439
            result['stats']['total_matches'] += 1
440
            if is_valid:
441
                result['stats']['valid_numbers'] += 1
442
            
443
            seen_numbers.add(e164)
444
        
445
        result['stats']['unique_numbers'] = len(seen_numbers)
446
        return result
447
    
448
    def process_documents(self, documents):
449
        """Process multiple documents in parallel."""
450
        with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
451
            futures = []
452
            
453
            for doc_id, text, region in documents:
454
                future = executor.submit(self.process_document, doc_id, text, region)
455
                futures.append(future)
456
            
457
            results = []
458
            for future in futures:
459
                try:
460
                    result = future.result()
461
                    results.append(result)
462
                except Exception as e:
463
                    print(f"Error processing document: {e}")
464
            
465
            return results
466
    
467
    def generate_summary_report(self, results):
468
        """Generate summary statistics across all documents."""
469
        total_docs = len(results)
470
        total_matches = sum(r['stats']['total_matches'] for r in results)
471
        total_valid = sum(r['stats']['valid_numbers'] for r in results)
472
        
473
        # Collect all unique numbers across documents
474
        all_numbers = set()
475
        regions = {}
476
        types = {}
477
        
478
        for result in results:
479
            for phone in result['phone_numbers']:
480
                if phone['is_valid']:
481
                    all_numbers.add(phone['e164'])
482
                    
483
                    region = phone['region']
484
                    regions[region] = regions.get(region, 0) + 1
485
                    
486
                    phone_type = phone['type']
487
                    types[phone_type] = types.get(phone_type, 0) + 1
488
        
489
        return {
490
            'summary': {
491
                'total_documents': total_docs,
492
                'total_matches': total_matches,
493
                'valid_numbers': total_valid,
494
                'unique_numbers_global': len(all_numbers),
495
                'average_matches_per_doc': total_matches / total_docs if total_docs > 0 else 0
496
            },
497
            'regions': regions,
498
            'types': types
499
        }
500

501
# Example usage
502
extractor = BulkPhoneExtractor("US", max_workers=2)
503

504
# Sample documents to process
505
documents = [
506
    ("doc1", "Call us at 650-253-2222 or +44 20 8366 1177", "US"),
507
    ("doc2", "Support: 1-800-555-1234, International: +33 1 42 68 53 00", "US"),
508
    ("doc3", "Office: (555) 123-4567, Mobile: 555.987.6543", "US"),
509
    ("doc4", "Invalid phone: 123-456, Valid: +1-650-253-2222", "US"),
510
]
511

512
print("=== Bulk Processing Results ===")
513
results = extractor.process_documents(documents)
514

515
for result in results:
516
    print(f"\nDocument {result['doc_id']}:")
517
    print(f"  Total matches: {result['stats']['total_matches']}")
518
    print(f"  Valid numbers: {result['stats']['valid_numbers']}")
519
    print(f"  Unique numbers: {result['stats']['unique_numbers']}")
520
    
521
    for phone in result['phone_numbers'][:3]:  # Show first 3
522
        status = "✓" if phone['is_valid'] else "✗"
523
        print(f"    {status} {phone['raw_text']} -> {phone['international']}")
524

525
print("\n=== Summary Report ===")
526
summary = extractor.generate_summary_report(results)
527
print(json.dumps(summary, indent=2))
528
```

Version

Tile

Files

phone-number-matching.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

phone-number-matching.mddocs/