Accurately separates a URL's subdomain, domain, and public suffix using the Public Suffix List
Comprehensive result handling through the ExtractResult dataclass, providing properties and methods for reconstructing domains, handling IP addresses, accessing metadata, and working with parsed URL components in various formats.
The core data structure returned by all extraction operations, containing the parsed URL components and metadata.
from dataclasses import dataclass, field
@dataclass(order=True)
class ExtractResult:
subdomain: str
"""All subdomains beneath the domain, empty string if none"""
domain: str
"""The topmost domain name, or hostname-like content if no valid domain"""
suffix: str
"""The public suffix (TLD), empty string if none or invalid"""
is_private: bool
"""Whether the suffix belongs to PSL private domains"""
registry_suffix: str = field(repr=False)
"""The registry suffix, unaffected by include_psl_private_domains setting"""Basic Usage:
import tldextract
result = tldextract.extract('http://forums.news.cnn.com/')
print(f"Subdomain: '{result.subdomain}'") # 'forums.news'
print(f"Domain: '{result.domain}'") # 'cnn'
print(f"Suffix: '{result.suffix}'") # 'com'
print(f"Is Private: {result.is_private}") # FalseProperties for reconstructing various forms of the original domain name from the parsed components.
@property
def fqdn(self) -> str:
"""
Fully Qualified Domain Name if there is a proper domain and suffix.
Returns:
Complete domain name or empty string if invalid
"""
@property
def top_domain_under_public_suffix(self) -> str:
"""
Domain and suffix joined with a dot if both are present.
Returns:
Registered domain name or empty string if invalid
"""
@property
def top_domain_under_registry_suffix(self) -> str:
"""
Top domain under registry suffix, handling PSL private domains.
Returns:
Registry domain name or empty string if invalid
"""
@property
def registered_domain(self) -> str:
"""
DEPRECATED: Use top_domain_under_public_suffix instead.
Returns:
Same as top_domain_under_public_suffix
"""Usage Examples:
import tldextract
# Standard domain reconstruction
result = tldextract.extract('http://forums.bbc.co.uk/path')
print(result.fqdn) # 'forums.bbc.co.uk'
print(result.top_domain_under_public_suffix) # 'bbc.co.uk'
# No subdomain
result = tldextract.extract('google.com')
print(result.fqdn) # 'google.com'
print(result.top_domain_under_public_suffix) # 'google.com'
# Invalid domain (IP address)
result = tldextract.extract('http://127.0.0.1:8080')
print(result.fqdn) # '' (empty string)
print(result.top_domain_under_public_suffix) # '' (empty string)
# Private domain handling
result = tldextract.extract('waiterrant.blogspot.com', include_psl_private_domains=True)
print(result.top_domain_under_public_suffix) # 'waiterrant.blogspot.com'
print(result.top_domain_under_registry_suffix) # 'blogspot.com'Properties for detecting and extracting IP addresses from the parsed results.
@property
def ipv4(self) -> str:
"""
IPv4 address if input was a valid IPv4, empty string otherwise.
Returns:
IPv4 address string or empty string
"""
@property
def ipv6(self) -> str:
"""
IPv6 address if input was a valid IPv6, empty string otherwise.
Returns:
IPv6 address string or empty string
"""Usage Examples:
import tldextract
# IPv4 detection
result = tldextract.extract('http://192.168.1.1:8080/path')
print(result.ipv4) # '192.168.1.1'
print(result.ipv6) # ''
print(result.domain) # '192.168.1.1'
print(result.suffix) # ''
# IPv6 detection
result = tldextract.extract('http://[2001:db8::1]/path')
print(result.ipv4) # ''
print(result.ipv6) # '2001:db8::1'
print(result.domain) # '[2001:db8::1]'
# Invalid IP addresses
result = tldextract.extract('http://256.1.1.1/') # Invalid IPv4
print(result.ipv4) # ''
print(result.domain) # '256.1.1.1'
result = tldextract.extract('http://127.0.0.1.1/') # Invalid format
print(result.ipv4) # ''
print(result.domain) # '127.0.0.1.1'Property for converting domain names to reverse DNS notation, commonly used in package naming and namespace organization.
@property
def reverse_domain_name(self) -> str:
"""
Domain name in reverse DNS notation.
Joins components as: suffix.domain.reversed_subdomain_parts
Returns:
Reverse domain name string
"""Usage Examples:
import tldextract
# Simple domain
result = tldextract.extract('login.example.com')
print(result.reverse_domain_name) # 'com.example.login'
# Complex subdomain
result = tldextract.extract('api.v2.auth.example.com')
print(result.reverse_domain_name) # 'com.example.auth.v2.api'
# Country code TLD
result = tldextract.extract('login.example.co.uk')
print(result.reverse_domain_name) # 'co.uk.example.login'
# No subdomain
result = tldextract.extract('example.com')
print(result.reverse_domain_name) # 'com.example'Understanding how PSL private domains affect the result structure and property values.
import tldextract
# Default: private domains treated as regular domains
result = tldextract.extract('waiterrant.blogspot.com')
print(result.subdomain) # 'waiterrant'
print(result.domain) # 'blogspot'
print(result.suffix) # 'com'
print(result.is_private) # False
print(result.registry_suffix) # 'com'
print(result.top_domain_under_public_suffix) # 'blogspot.com'
print(result.top_domain_under_registry_suffix) # 'blogspot.com'import tldextract
# Private domains included in suffix
result = tldextract.extract('waiterrant.blogspot.com', include_psl_private_domains=True)
print(result.subdomain) # ''
print(result.domain) # 'waiterrant'
print(result.suffix) # 'blogspot.com'
print(result.is_private) # True
print(result.registry_suffix) # 'com'
print(result.top_domain_under_public_suffix) # 'waiterrant.blogspot.com'
print(result.top_domain_under_registry_suffix) # 'blogspot.com'When the input domain doesn't have a recognized public suffix:
import tldextract
result = tldextract.extract('google.notavalidsuffix')
print(result.subdomain) # 'google'
print(result.domain) # 'notavalidsuffix'
print(result.suffix) # ''
print(result.fqdn) # ''import tldextract
result = tldextract.extract('http://localhost:8080')
print(result.subdomain) # ''
print(result.domain) # 'localhost'
print(result.suffix) # ''
print(result.fqdn) # ''
result = tldextract.extract('http://intranet.corp')
print(result.subdomain) # 'intranet'
print(result.domain) # 'corp'
print(result.suffix) # ''International domain names are automatically handled:
import tldextract
# Punycode is automatically decoded internally
result = tldextract.extract('http://xn--n3h.com') # ☃.com
print(result.domain) # Handled correctly
# Unicode domains work directly
result = tldextract.extract('http://münchen.de')
print(result.domain) # 'münchen'
print(result.suffix) # 'de'ExtractResult objects support comparison and sorting operations:
import tldextract
results = [
tldextract.extract('b.example.com'),
tldextract.extract('a.example.com'),
tldextract.extract('c.example.org')
]
# Results are sortable (order=True in dataclass)
sorted_results = sorted(results)
for result in sorted_results:
print(result.fqdn)
# Output will be in lexicographic order
# Equality comparison
result1 = tldextract.extract('example.com')
result2 = tldextract.extract('http://example.com/')
print(result1 == result2) # True - same parsed componentsExtractResult provides readable string representation:
import tldextract
result = tldextract.extract('http://forums.news.cnn.com/')
print(result)
# ExtractResult(subdomain='forums.news', domain='cnn', suffix='com', is_private=False)
print(repr(result))
# Same detailed representationInstall with Tessl CLI
npx tessl i tessl/pypi-tldextract