A simple Python module for parsing human names into their individual components.
npx @tessl/cli install tessl/pypi-nameparser@1.1.0A simple Python module for parsing human names into their individual components (title, first, middle, last, suffix, nickname). It uses a rule-based approach to classify name parts based on their position in the string and matches against known titles and suffixes, supporting various name formats and provides extensive customization capabilities.
pip install nameparserfrom nameparser import HumanNameAdvanced configuration:
from nameparser import HumanName
from nameparser.config import CONSTANTSThe nameparser supports various name structures. The general supported format is "Title First Middle Last Suffix", where all pieces are optional. Comma-separated formats like "Last, First" are also supported.
from nameparser import HumanName
# Standard format: Title First Middle Last Suffix
name = HumanName("Dr. Juan Q. Xavier de la Vega III")
print(name.title) # 'Dr.'
print(name.first) # 'Juan'
print(name.middle) # 'Q. Xavier'
print(name.last) # 'de la Vega'
print(name.suffix) # 'III'
# Comma-separated format: Last, First Middle
name = HumanName("Smith, John Michael")
print(name.first) # 'John'
print(name.middle) # 'Michael'
print(name.last) # 'Smith'
# Names with nicknames in quotes or parentheses
name = HumanName('Robert "Bob" Johnson')
print(name.first) # 'Robert'
print(name.nickname) # 'Bob'
print(name.last) # 'Johnson'
name = HumanName('William (Bill) Gates III')
print(name.first) # 'William'
print(name.nickname) # 'Bill'
print(name.last) # 'Gates'
print(name.suffix) # 'III'
# Complex titles and prefixes
name = HumanName("The Right Honorable Jane van der Berg-Smith")
print(name.title) # 'The Right Honorable'
print(name.first) # 'Jane'
print(name.last) # 'van der Berg-Smith'
# Get name as dictionary
name_dict = name.as_dict()
print(name_dict) # Dictionary with all components
# Format names with custom templates
name.string_format = "{first} {last}"
print(str(name)) # Formatted outputThe nameparser library is built around the HumanName class which provides a simple interface for parsing and accessing name components. The parser uses:
Main functionality for parsing names into components and accessing parsed results as properties, dictionaries, or formatted strings.
class HumanName:
def __init__(self, full_name="", constants=CONSTANTS, encoding=DEFAULT_ENCODING,
string_format=None, initials_format=None, initials_delimiter=None,
first=None, middle=None, last=None, title=None, suffix=None,
nickname=None):
"""
Parse a person's name into individual components.
Args:
full_name (str): The name string to be parsed
constants: Constants instance for configuration (None for per-instance config)
encoding (str): String encoding (default: UTF-8)
string_format (str): Python string formatting template
initials_format (str): Python initials string formatting template
initials_delimiter (str): String delimiter for initials
first (str): Pre-set first name (bypasses parsing)
middle (str): Pre-set middle name (bypasses parsing)
last (str): Pre-set last name (bypasses parsing)
title (str): Pre-set title (bypasses parsing)
suffix (str): Pre-set suffix (bypasses parsing)
nickname (str): Pre-set nickname (bypasses parsing)
"""
@property
def title(self):
"""str: The person's titles."""
@property
def first(self):
"""str: The person's first name."""
@property
def middle(self):
"""str: The person's middle names."""
@property
def last(self):
"""str: The person's last name."""
@property
def suffix(self):
"""str: The person's suffixes."""
@property
def nickname(self):
"""str: The person's nicknames."""
@property
def surnames(self):
"""str: A string of all middle names followed by the last name."""
@property
def full_name(self):
"""str: The string output of the HumanName instance."""
@property
def original(self):
"""str: The original string, untouched by the parser."""
@property
def unparsable(self):
"""bool: Whether the name could be parsed successfully."""
@property
def has_own_config(self):
"""bool: True if using per-instance configuration."""
def as_dict(self, include_empty=True):
"""
Return the parsed name as a dictionary of its attributes.
Args:
include_empty (bool): Include keys for empty name attributes
Returns:
dict: Dictionary containing name components
"""Format names with custom templates and generate initials from name components.
def initials(self):
"""
Return period-delimited initials of first, middle and optionally last name.
Returns:
str: Formatted initials string based on initials_format template
"""
def initials_list(self):
"""
Returns the initials as a list of individual letters.
Returns:
list: List of initial letters from first, middle, and last names
"""
def capitalize(self, force=None):
"""
Correct capitalization of names entered in all upper or lower case.
Args:
force (bool): Forces capitalization of mixed case strings
"""Customize parsing behavior through configuration constants and per-instance settings.
class Constants:
def __init__(self, prefixes=PREFIXES, suffix_acronyms=SUFFIX_ACRONYMS,
suffix_not_acronyms=SUFFIX_NOT_ACRONYMS, titles=TITLES,
first_name_titles=FIRST_NAME_TITLES, conjunctions=CONJUNCTIONS,
capitalization_exceptions=CAPITALIZATION_EXCEPTIONS, regexes=REGEXES):
"""
Configuration container for parser behavior.
Args:
prefixes: Name prefixes like 'de', 'van', 'von'
suffix_acronyms: Acronym suffixes like 'Ph.D.', 'M.D.'
suffix_not_acronyms: Non-acronym suffixes like 'Jr.', 'Sr.'
titles: Person titles like 'Dr.', 'Mr.', 'Hon.'
first_name_titles: Special titles that indicate first names like 'Sir'
conjunctions: Name conjunctions like 'and', 'of', 'the'
capitalization_exceptions: Special capitalization rules
regexes: Regular expression patterns for parsing
"""
@property
def suffixes_prefixes_titles(self):
"""set: Combined set of all suffixes, prefixes, and titles for lookups."""
class SetManager:
def add(self, *strings):
"""
Add normalized strings to the set. Returns self for chaining.
Args:
*strings: One or more strings to add
Returns:
SetManager: Self for method chaining
"""
def remove(self, *strings):
"""
Remove normalized strings from the set. Returns self for chaining.
Args:
*strings: One or more strings to remove
Returns:
SetManager: Self for method chaining
"""# Main class for name parsing
class HumanName:
# Name component properties
title: str
first: str
middle: str
last: str
suffix: str
nickname: str
surnames: str
full_name: str
original: str
unparsable: bool
has_own_config: bool
# Internal list properties (accessible but primarily for advanced use)
title_list: list
first_list: list
middle_list: list
last_list: list
suffix_list: list
nickname_list: list
surnames_list: list
# Configuration classes
class Constants:
string_format: str
initials_format: str
initials_delimiter: str
empty_attribute_default: str
capitalize_name: bool
force_mixed_case_capitalization: bool
class SetManager:
elements: set
class TupleManager(dict):
pass