A simple Python module for parsing human names into their individual components.
—
Comprehensive configuration system for customizing parsing behavior, including management of titles, suffixes, prefixes, conjunctions, and parsing rules.
Main configuration container that holds all parsing rules and behavior settings.
class Constants:
def __init__(self, prefixes=PREFIXES, suffix_acronyms=SUFFIX_ACRONYMS,
suffix_not_acronyms=SUFFIX_NOT_ACRONYMS, titles=TITLES,
first_name_titles=FIRST_NAME_TITLES, conjunctions=CONJUNCTIONS,
capitalization_exceptions=CAPITALIZATION_EXCEPTIONS, regexes=REGEXES):
"""
Configuration container for parser behavior.
Args:
prefixes: Name prefixes like 'de', 'van', 'von'
suffix_acronyms: Acronym suffixes like 'Ph.D.', 'M.D.'
suffix_not_acronyms: Non-acronym suffixes like 'Jr.', 'Sr.'
titles: Person titles like 'Dr.', 'Mr.', 'Hon.'
first_name_titles: Special titles that indicate first names like 'Sir'
conjunctions: Name conjunctions like 'and', 'of', 'the'
capitalization_exceptions: Special capitalization rules
regexes: Regular expression patterns for parsing
"""
@property
def suffixes_prefixes_titles(self):
"""Combined set of all suffixes, prefixes, and titles for lookups."""
# Class-level formatting defaults
string_format = "{title} {first} {middle} {last} {suffix} ({nickname})"
initials_format = "{first} {middle} {last}"
initials_delimiter = "."
empty_attribute_default = ""
capitalize_name = False
force_mixed_case_capitalization = FalseUsage Examples:
from nameparser import HumanName
from nameparser.config import Constants
# Create custom configuration
custom_config = Constants()
custom_config.titles.add('Prof', 'Dean')
custom_config.suffixes.add('Esq')
# Use with HumanName instance
name = HumanName("Prof John Smith Esq", constants=custom_config)
print(name.title) # 'Prof'
print(name.suffix) # 'Esq'Specialized set class for managing configuration word lists with normalization and method chaining.
class SetManager:
def __init__(self, elements):
"""
Easily add and remove config variables per module or instance.
Normalizes strings (lowercase, no periods) for comparison.
Args:
elements (set): Initial set of elements
"""
def add(self, *strings):
"""
Add normalized strings to the set. Can pass multiple strings.
Normalizes by converting to lowercase and removing periods.
Args:
*strings (str): One or more strings to add
Returns:
SetManager: Self for method chaining
"""
def remove(self, *strings):
"""
Remove normalized strings from the set.
Args:
*strings (str): One or more strings to remove
Returns:
SetManager: Self for method chaining
"""
def add_with_encoding(self, s, encoding=None):
"""
Add string with explicit encoding specification.
Args:
s (str): String to add
encoding (str): Character encoding (defaults to UTF-8)
"""Usage Examples:
from nameparser.config import CONSTANTS
# Method chaining for bulk updates
CONSTANTS.titles.remove('hon').add('chemistry', 'dean', 'provost')
# Add multiple items
CONSTANTS.suffixes.add('Esq', 'CPA', 'MBA')
# Remove multiple items
CONSTANTS.conjunctions.remove('and', 'or')
# Check membership (normalized)
print('dr' in CONSTANTS.titles) # True (matches 'Dr.')
print('DR.' in CONSTANTS.titles) # True (normalized to 'dr')Dictionary subclass that provides dot-notation access for configuration dictionaries.
class TupleManager(dict):
"""
Dictionary with dot.notation access for configuration tuples.
Makes tuple constants more user-friendly.
"""
def __getattr__(self, attr):
"""Access dictionary keys as attributes."""
def __setattr__(self, attr, value):
"""Set dictionary keys as attributes."""
def __delattr__(self, attr):
"""Delete dictionary keys as attributes."""Usage Examples:
from nameparser.config import CONSTANTS
# Access regex patterns with dot notation
initial_pattern = CONSTANTS.regexes.initial
roman_pattern = CONSTANTS.regexes.roman_numeral
# Access capitalization exceptions
macdonald = CONSTANTS.capitalization_exceptions['macdonald'] # 'MacDonald'Global configuration instance shared by all HumanName instances by default.
# Global configuration constant
from nameparser.config import CONSTANTS
# Configuration sets (SetManager instances)
CONSTANTS.prefixes: SetManager # Name prefixes
CONSTANTS.titles: SetManager # Person titles
CONSTANTS.first_name_titles: SetManager # Special first name titles
CONSTANTS.suffix_acronyms: SetManager # Acronym suffixes
CONSTANTS.suffix_not_acronyms: SetManager # Non-acronym suffixes
CONSTANTS.conjunctions: SetManager # Name conjunctions
# Configuration dictionaries (TupleManager instances)
CONSTANTS.capitalization_exceptions: TupleManager # Special capitalization
CONSTANTS.regexes: TupleManager # Regex patterns
# Combined sets (computed properties)
CONSTANTS.suffixes_prefixes_titles: set # Combined set for lookupsUsage Examples:
from nameparser import HumanName
from nameparser.config import CONSTANTS
# Global configuration affects all instances
CONSTANTS.titles.add('Captain', 'Colonel')
CONSTANTS.suffixes.add('PE', 'RN')
CONSTANTS.capitalize_name = True
# All new instances use updated configuration
name1 = HumanName("captain john doe pe")
print(name1.title) # 'Captain'
print(name1.suffix) # 'PE'
print(str(name1)) # 'Captain John Doe PE' (auto-capitalized)
name2 = HumanName("colonel jane smith rn")
print(name2.title) # 'Colonel'
print(name2.suffix) # 'RN'Create HumanName instances with custom configuration that doesn't affect other instances.
# Per-instance configuration
name = HumanName("Full Name", constants=None) # Creates new Constants instance
name = HumanName("Full Name", constants=custom_constants) # Uses provided instanceUsage Examples:
from nameparser import HumanName
from nameparser.config import Constants
# Create instance with isolated configuration
name = HumanName("Dean Robert Johns", None)
name.C.titles.add('dean') # Only affects this instance
name.parse_full_name() # Re-parse with new config
print(name.title) # 'Dean'
print(name.has_own_config) # True
# Other instances unaffected
other_name = HumanName("Dean Mary Smith")
print(other_name.title) # '' (Dean not recognized)
print(other_name.first) # 'Dean'
# Custom Constants instance
academic_config = Constants()
academic_config.titles.add('Prof', 'Dean', 'Provost', 'Chancellor')
academic_config.suffixes.add('Ph.D', 'Ed.D', 'Sc.D')
name = HumanName("Prof Jane Smith Ph.D", constants=academic_config)
print(name.title) # 'Prof'
print(name.suffix) # 'Ph.D'Methods for testing whether name components belong to specific categories.
def is_title(self, value: str) -> bool:
"""Check if value is in the titles set."""
def is_conjunction(self, piece: str | list) -> bool:
"""Check if piece is a conjunction and not an initial."""
def is_prefix(self, piece: str | list) -> bool:
"""Check if piece is in the prefixes set."""
def is_suffix(self, piece: str | list) -> bool:
"""Check if piece is in the suffixes set and not an initial."""
def is_roman_numeral(self, value: str) -> bool:
"""Check if value matches roman numeral pattern."""
def is_rootname(self, piece: str) -> bool:
"""Check if piece is a core name part (not title, suffix, or prefix)."""
def is_an_initial(self, value: str) -> bool:
"""Check if value is a single letter or letter with period."""
def are_suffixes(self, pieces: list) -> bool:
"""Check if all pieces in list are suffixes."""Usage Examples:
from nameparser import HumanName
name = HumanName("Dr. John van der Berg III")
# Test individual components
print(name.is_title("Dr.")) # True
print(name.is_prefix("van")) # True
print(name.is_conjunction("der")) # True
print(name.is_suffix("III")) # True
print(name.is_roman_numeral("III")) # True
print(name.is_an_initial("J.")) # True
print(name.is_rootname("John")) # True
# Test with lists
print(name.is_suffix(["Jr", "Sr"])) # True
print(name.are_suffixes(["III", "Jr"])) # TruePre-loaded configuration sets covering common names, titles, and patterns.
# Default configuration sets imported from config modules
from nameparser.config.prefixes import PREFIXES
from nameparser.config.titles import TITLES, FIRST_NAME_TITLES
from nameparser.config.suffixes import SUFFIX_ACRONYMS, SUFFIX_NOT_ACRONYMS
from nameparser.config.conjunctions import CONJUNCTIONS
from nameparser.config.capitalization import CAPITALIZATION_EXCEPTIONS
from nameparser.config.regexes import REGEXES
# Encoding constant
from nameparser.config import DEFAULT_ENCODING # 'UTF-8'Contents include:
Usage Examples:
from nameparser.config import CONSTANTS
# Explore default configuration
print(list(CONSTANTS.titles)[:10]) # First 10 titles
print(list(CONSTANTS.prefixes)) # All prefixes
print(list(CONSTANTS.suffix_acronyms)[:5]) # First 5 acronym suffixes
# Check what's included
print('dr' in CONSTANTS.titles) # True
print('von' in CONSTANTS.prefixes) # True
print('phd' in CONSTANTS.suffix_acronyms) # TrueInstall with Tessl CLI
npx tessl i tessl/pypi-nameparser