Python port of Google's libphonenumber library for parsing, formatting, storing and validating international phone numbers
—
Phone number discovery and extraction from unstructured text, plus as-you-type formatting for user interfaces. These tools enable applications to find phone numbers in documents, messages, and user input while providing real-time formatting feedback.
Extract and identify phone numbers from unstructured text using configurable matching algorithms with different levels of strictness.
class PhoneNumberMatcher:
"""Finds phone numbers in text strings."""
def __init__(self, text: str, region: str | None, leniency: int = Leniency.VALID, max_tries: int = 65535):
"""
Initialize matcher for finding phone numbers in text.
Parameters:
- text: Text to search for phone numbers
- region: Default region for parsing numbers without country codes
- leniency: Matching strictness level (Leniency enum value)
- max_tries: Maximum number of matches to attempt (default: 65535)
"""
def has_next(self) -> bool:
"""Check if more matches exist."""
def next(self) -> PhoneNumberMatch:
"""Get next phone number match."""
def __iter__(self):
"""Iterator support for for-loops."""
class PhoneNumberMatch:
"""Represents a phone number found in text."""
start: int # Start position in text
end: int # End position in text
raw_string: str # Raw matched string from text
number: PhoneNumber # Parsed phone number object
def __init__(self, start: int, raw_string: str, numobj: PhoneNumber):
"""
Initialize a phone number match.
Parameters:
- start: Start position in original text
- raw_string: Raw string that was matched
- numobj: Parsed PhoneNumber object
"""Leniency Levels:
class Leniency:
"""Leniency levels for phone number matching in text."""
POSSIBLE = 0 # Most permissive, matches possible numbers
VALID = 1 # Matches valid numbers only (default)
STRICT_GROUPING = 2 # Valid + proper digit grouping
EXACT_GROUPING = 3 # Most restrictive, exact formatting matchUsage Examples:
# Basic phone number extraction
text = "Call me at 510-748-8230 if it's before 9:30, or on 703-4800500 after 10am."
# Find all phone numbers with default settings
matches = list(phonenumbers.PhoneNumberMatcher(text, "US"))
for match in matches:
print(f"Found: {match.raw_string}")
print(f"Position: {match.start}-{match.end}")
print(f"Formatted: {phonenumbers.format_number(match.number, phonenumbers.PhoneNumberFormat.E164)}")
print()
# Output:
# Found: 510-748-8230
# Position: 11-23
# Formatted: +15107488230
#
# Found: 703-4800500
# Position: 51-62
# Formatted: +17034800500
# Using different leniency levels
strict_matcher = phonenumbers.PhoneNumberMatcher(text, "US", phonenumbers.Leniency.EXACT_GROUPING)
lenient_matcher = phonenumbers.PhoneNumberMatcher(text, "US", phonenumbers.Leniency.POSSIBLE)
strict_matches = list(strict_matcher)
lenient_matches = list(lenient_matcher)
print(f"Strict matching found: {len(strict_matches)} numbers")
print(f"Lenient matching found: {len(lenient_matches)} numbers")Extract phone numbers from international text with various country contexts.
Usage Examples:
# International text with mixed formats
international_text = """
Contact our offices:
US Office: +1 (650) 253-2222
UK Office: +44 20 8366 1177
Local UK: 020 8366 1177
France: +33 1 42 68 53 00
Germany: 030 12345678
"""
# Parse with no default region (requires country codes)
international_matches = list(phonenumbers.PhoneNumberMatcher(international_text, None))
for match in international_matches:
region = phonenumbers.region_code_for_number(match.number)
formatted = phonenumbers.format_number(match.number, phonenumbers.PhoneNumberFormat.INTERNATIONAL)
print(f"{match.raw_string} -> {formatted} ({region})")
# Parse with specific region context
uk_context_matches = list(phonenumbers.PhoneNumberMatcher(international_text, "GB"))
# This will also match "020 8366 1177" as a valid UK number
for match in uk_context_matches:
if match.raw_string == "020 8366 1177":
print(f"Local UK number detected: {match.raw_string}")
# Multi-region extraction with context switching
def extract_numbers_by_region(text, regions):
"""Extract numbers trying different regional contexts."""
all_matches = {}
for region in regions:
matcher = phonenumbers.PhoneNumberMatcher(text, region)
matches = list(matcher)
all_matches[region] = matches
return all_matches
regions_to_try = ["US", "GB", "DE", "FR"]
regional_results = extract_numbers_by_region(international_text, regions_to_try)
for region, matches in regional_results.items():
print(f"\nWith {region} context: {len(matches)} matches")
for match in matches:
print(f" {match.raw_string}")Advanced filtering and processing of found phone numbers.
Usage Examples:
# Complex text with various number formats
complex_text = """
Customer service: 1-800-555-0123 (toll-free)
Emergency: 911
International: +44 20 7946 0958
Fax: (555) 123-4567 ext. 789
Invalid: 123-45 (too short)
Another: +1.555.987.6543
Website: Call 555-HELP (555-4357) for assistance
"""
# Extract with different leniency levels
def compare_leniency_levels(text, region):
"""Compare results across different leniency levels."""
leniency_levels = [
(phonenumbers.Leniency.POSSIBLE, "Possible"),
(phonenumbers.Leniency.VALID, "Valid"),
(phonenumbers.Leniency.STRICT_GROUPING, "Strict Grouping"),
(phonenumbers.Leniency.EXACT_GROUPING, "Exact Grouping")
]
for leniency, name in leniency_levels:
matcher = phonenumbers.PhoneNumberMatcher(text, region, leniency)
matches = list(matcher)
print(f"\n{name} ({len(matches)} matches):")
for match in matches:
print(f" '{match.raw_string}' at position {match.start}-{match.end}")
compare_leniency_levels(complex_text, "US")
# Filter matches by criteria
def filter_matches(text, region, filter_func):
"""Filter phone number matches by custom criteria."""
matcher = phonenumbers.PhoneNumberMatcher(text, region)
return [match for match in matcher if filter_func(match)]
# Filter for toll-free numbers only
def is_toll_free(match):
num_type = phonenumbers.number_type(match.number)
return num_type == phonenumbers.PhoneNumberType.TOLL_FREE
toll_free_matches = filter_matches(complex_text, "US", is_toll_free)
print(f"\nToll-free numbers found: {len(toll_free_matches)}")
# Filter for specific regions
def is_uk_number(match):
region = phonenumbers.region_code_for_number(match.number)
return region == "GB"
uk_matches = filter_matches(complex_text, None, is_uk_number)
print(f"UK numbers found: {len(uk_matches)}")
# Extract and categorize numbers
def categorize_numbers(text, region):
"""Categorize found phone numbers by type."""
matcher = phonenumbers.PhoneNumberMatcher(text, region)
categories = {
"Mobile": [],
"Fixed Line": [],
"Toll Free": [],
"Emergency": [],
"Other": []
}
for match in matcher:
num_type = phonenumbers.number_type(match.number)
if num_type == phonenumbers.PhoneNumberType.MOBILE:
categories["Mobile"].append(match)
elif num_type == phonenumbers.PhoneNumberType.FIXED_LINE:
categories["Fixed Line"].append(match)
elif num_type == phonenumbers.PhoneNumberType.TOLL_FREE:
categories["Toll Free"].append(match)
else:
# Check if it's an emergency number
formatted = phonenumbers.format_number(match.number, phonenumbers.PhoneNumberFormat.E164)
if phonenumbers.is_emergency_number(formatted.replace("+", ""), region):
categories["Emergency"].append(match)
else:
categories["Other"].append(match)
return categories
categorized = categorize_numbers(complex_text, "US")
for category, matches in categorized.items():
if matches:
print(f"\n{category} ({len(matches)}):")
for match in matches:
print(f" {match.raw_string}")Real-time phone number formatting for user input interfaces, providing immediate feedback as users type.
class AsYouTypeFormatter:
"""Formats phone numbers as digits are entered."""
def __init__(self, region_code: str):
"""
Initialize formatter for specified region.
Parameters:
- region_code: Region code for formatting context (e.g., "US", "GB")
"""
def input_digit(self, next_char: str, remember_position: bool = False) -> str:
"""
Add next digit and get formatted result.
Parameters:
- next_char: Next character entered by user
- remember_position: Whether to remember cursor position for get_remembered_position()
Returns:
Formatted phone number string with current input
"""
def clear(self):
"""Clear all input and reset formatter."""
def get_remembered_position(self) -> int:
"""Get position of remembered cursor location."""Usage Examples:
# Basic as-you-type formatting
formatter = phonenumbers.AsYouTypeFormatter("US")
# Simulate user typing digits one by one
digits = "6502532222"
print("User input -> Formatted output")
print("-" * 30)
for digit in digits:
result = formatter.input_digit(digit)
print(f"'{digit}' -> '{result}'")
# Output:
# '6' -> '6'
# '5' -> '65'
# '0' -> '650'
# '2' -> '650-2'
# '5' -> '650-25'
# '3' -> '650-253'
# '2' -> '650-2532'
# '2' -> '(650) 253-22'
# '2' -> '(650) 253-222'
# '2' -> '(650) 253-2222'
# International number formatting
international_formatter = phonenumbers.AsYouTypeFormatter("US")
international_digits = "+442083661177"
print("\nInternational formatting:")
for char in international_digits:
result = international_formatter.input_digit(char)
print(f"'{char}' -> '{result}'")
# Position remembering for cursor tracking
formatter_with_cursor = phonenumbers.AsYouTypeFormatter("US")
digits_with_cursor = "6502532222"
# Remember position after 7th digit
for i, digit in enumerate(digits_with_cursor):
remember = (i == 6) # Remember position after "6502532"
result = formatter_with_cursor.input_digit(digit, remember)
if remember:
remembered_pos = formatter_with_cursor.get_remembered_position()
print(f"Cursor position remembered: {remembered_pos} in '{result}'")
# Clear and restart
formatter.clear()
new_result = formatter.input_digit("4")
print(f"After clear: '{new_result}'") # "4"Practical examples for common use cases.
Usage Examples:
# Email/document phone number extraction
def extract_contact_info(document_text, default_region="US"):
"""Extract all phone numbers from a document."""
matcher = phonenumbers.PhoneNumberMatcher(document_text, default_region)
contacts = []
for match in matcher:
contact_info = {
"raw_text": match.raw_string,
"position": f"{match.start}-{match.end}",
"formatted_national": phonenumbers.format_number(
match.number, phonenumbers.PhoneNumberFormat.NATIONAL
),
"formatted_international": phonenumbers.format_number(
match.number, phonenumbers.PhoneNumberFormat.INTERNATIONAL
),
"region": phonenumbers.region_code_for_number(match.number),
"type": phonenumbers.number_type(match.number),
"is_valid": phonenumbers.is_valid_number(match.number)
}
contacts.append(contact_info)
return contacts
# Example document
business_card = """
John Smith, CEO
Acme Corporation
Phone: (555) 123-4567
Mobile: 555.987.6543
International: +1-555-246-8101
Fax: (555) 123-4568
"""
contacts = extract_contact_info(business_card)
for contact in contacts:
print(f"Found: {contact['raw_text']} -> {contact['formatted_national']}")
# Live input field formatting simulation
class PhoneInputField:
"""Simulates a phone input field with real-time formatting."""
def __init__(self, region_code="US"):
self.formatter = phonenumbers.AsYouTypeFormatter(region_code)
self.value = ""
def on_key_press(self, key):
"""Handle user key press."""
if key.isdigit() or key in "+()-. ":
self.value = self.formatter.input_digit(key)
return self.value
elif key == "BACKSPACE":
# In real implementation, would need to handle backspace properly
self.formatter.clear()
return ""
return self.value
def clear(self):
"""Clear the field."""
self.formatter.clear()
self.value = ""
# Simulate user input
phone_field = PhoneInputField("US")
user_input = "6502532222"
print("Phone input field simulation:")
for char in user_input:
display_value = phone_field.on_key_press(char)
print(f"User typed '{char}' -> Display: '{display_value}'")
# Multi-format phone number search
def find_phone_variations(text, phone_to_find, region):
"""Find all variations of a specific phone number in text."""
try:
target_number = phonenumbers.parse(phone_to_find, region)
target_e164 = phonenumbers.format_number(target_number, phonenumbers.PhoneNumberFormat.E164)
matcher = phonenumbers.PhoneNumberMatcher(text, region, phonenumbers.Leniency.POSSIBLE)
variations = []
for match in matcher:
match_e164 = phonenumbers.format_number(match.number, phonenumbers.PhoneNumberFormat.E164)
if match_e164 == target_e164:
variations.append(match.raw_string)
return variations
except phonenumbers.NumberParseException:
return []
# Find all ways a number appears in text
text_with_variations = """
Contact us at (650) 253-2222 or 650-253-2222.
International callers: +1 650 253 2222
Text: 6502532222
"""
variations = find_phone_variations(text_with_variations, "6502532222", "US")
print(f"\nVariations found: {variations}")
# Output: ['(650) 253-2222', '650-253-2222', '+1 650 253 2222', '6502532222']Sophisticated text processing capabilities for complex scenarios.
Usage Examples:
# Batch processing with performance optimization
def batch_extract_numbers(documents, default_region="US", max_matches_per_doc=100):
"""Extract phone numbers from multiple documents efficiently."""
results = {}
for doc_id, text in documents.items():
try:
matcher = phonenumbers.PhoneNumberMatcher(text, default_region,
phonenumbers.Leniency.VALID,
max_matches_per_doc)
matches = []
for match in matcher:
matches.append({
"text": match.raw_string,
"start": match.start,
"end": match.end,
"e164": phonenumbers.format_number(match.number, phonenumbers.PhoneNumberFormat.E164),
"region": phonenumbers.region_code_for_number(match.number)
})
results[doc_id] = matches
except Exception as e:
results[doc_id] = {"error": str(e)}
return results
# Example batch processing
sample_documents = {
"email_1": "Please call me at (555) 123-4567 or email john@example.com",
"email_2": "UK office: +44 20 7946 0958, US office: 1-800-555-0199",
"invoice": "Questions? Contact us at 650.253.2222"
}
batch_results = batch_extract_numbers(sample_documents)
for doc_id, matches in batch_results.items():
if isinstance(matches, list):
print(f"\n{doc_id}: {len(matches)} phone numbers")
for match in matches:
print(f" {match['text']} -> {match['e164']}")
# Smart region detection
def smart_extract_with_region_detection(text, candidate_regions=None):
"""Extract numbers trying to detect the most likely region."""
if candidate_regions is None:
candidate_regions = ["US", "GB", "CA", "AU", "DE", "FR"]
region_scores = {}
# Try each region and count valid matches
for region in candidate_regions:
matcher = phonenumbers.PhoneNumberMatcher(text, region, phonenumbers.Leniency.VALID)
valid_matches = list(matcher)
region_scores[region] = len(valid_matches)
# Use region with most valid matches
best_region = max(region_scores, key=region_scores.get) if region_scores else "US"
# Extract with best region
final_matcher = phonenumbers.PhoneNumberMatcher(text, best_region)
return list(final_matcher), best_region
mixed_text = """
Call our London office at 020 7946 0958 or
our New York office at (212) 555-0123.
Emergency UK: 999, Emergency US: 911
"""
matches, detected_region = smart_extract_with_region_detection(mixed_text)
print(f"\nDetected best region: {detected_region}")
print(f"Found {len(matches)} matches")Install with Tessl CLI
npx tessl i tessl/pypi-phonenumbers