CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-nameparser

A simple Python module for parsing human names into their individual components.

Pending
Overview
Eval results
Files

configuration.mddocs/

Configuration and Customization

Comprehensive configuration system for customizing parsing behavior, including management of titles, suffixes, prefixes, conjunctions, and parsing rules.

Capabilities

Constants Class

Main configuration container that holds all parsing rules and behavior settings.

class Constants:
    def __init__(self, prefixes=PREFIXES, suffix_acronyms=SUFFIX_ACRONYMS,
                 suffix_not_acronyms=SUFFIX_NOT_ACRONYMS, titles=TITLES,
                 first_name_titles=FIRST_NAME_TITLES, conjunctions=CONJUNCTIONS,
                 capitalization_exceptions=CAPITALIZATION_EXCEPTIONS, regexes=REGEXES):
        """
        Configuration container for parser behavior.

        Args:
            prefixes: Name prefixes like 'de', 'van', 'von'
            suffix_acronyms: Acronym suffixes like 'Ph.D.', 'M.D.'
            suffix_not_acronyms: Non-acronym suffixes like 'Jr.', 'Sr.'
            titles: Person titles like 'Dr.', 'Mr.', 'Hon.'
            first_name_titles: Special titles that indicate first names like 'Sir'
            conjunctions: Name conjunctions like 'and', 'of', 'the'
            capitalization_exceptions: Special capitalization rules
            regexes: Regular expression patterns for parsing
        """

    @property
    def suffixes_prefixes_titles(self):
        """Combined set of all suffixes, prefixes, and titles for lookups."""

    # Class-level formatting defaults
    string_format = "{title} {first} {middle} {last} {suffix} ({nickname})"
    initials_format = "{first} {middle} {last}"
    initials_delimiter = "."
    empty_attribute_default = ""
    capitalize_name = False
    force_mixed_case_capitalization = False

Usage Examples:

from nameparser import HumanName
from nameparser.config import Constants

# Create custom configuration
custom_config = Constants()
custom_config.titles.add('Prof', 'Dean')
custom_config.suffixes.add('Esq')

# Use with HumanName instance
name = HumanName("Prof John Smith Esq", constants=custom_config)
print(name.title)   # 'Prof'
print(name.suffix)  # 'Esq'

SetManager Class

Specialized set class for managing configuration word lists with normalization and method chaining.

class SetManager:
    def __init__(self, elements):
        """
        Easily add and remove config variables per module or instance.
        Normalizes strings (lowercase, no periods) for comparison.

        Args:
            elements (set): Initial set of elements
        """

    def add(self, *strings):
        """
        Add normalized strings to the set. Can pass multiple strings.
        Normalizes by converting to lowercase and removing periods.

        Args:
            *strings (str): One or more strings to add

        Returns:
            SetManager: Self for method chaining
        """

    def remove(self, *strings):
        """
        Remove normalized strings from the set.

        Args:
            *strings (str): One or more strings to remove

        Returns:
            SetManager: Self for method chaining
        """

    def add_with_encoding(self, s, encoding=None):
        """
        Add string with explicit encoding specification.

        Args:
            s (str): String to add
            encoding (str): Character encoding (defaults to UTF-8)
        """

Usage Examples:

from nameparser.config import CONSTANTS

# Method chaining for bulk updates
CONSTANTS.titles.remove('hon').add('chemistry', 'dean', 'provost')

# Add multiple items
CONSTANTS.suffixes.add('Esq', 'CPA', 'MBA')

# Remove multiple items
CONSTANTS.conjunctions.remove('and', 'or')

# Check membership (normalized)
print('dr' in CONSTANTS.titles)      # True (matches 'Dr.')
print('DR.' in CONSTANTS.titles)     # True (normalized to 'dr')

TupleManager Class

Dictionary subclass that provides dot-notation access for configuration dictionaries.

class TupleManager(dict):
    """
    Dictionary with dot.notation access for configuration tuples.
    Makes tuple constants more user-friendly.
    """

    def __getattr__(self, attr):
        """Access dictionary keys as attributes."""

    def __setattr__(self, attr, value):
        """Set dictionary keys as attributes."""

    def __delattr__(self, attr):
        """Delete dictionary keys as attributes."""

Usage Examples:

from nameparser.config import CONSTANTS

# Access regex patterns with dot notation
initial_pattern = CONSTANTS.regexes.initial
roman_pattern = CONSTANTS.regexes.roman_numeral

# Access capitalization exceptions
macdonald = CONSTANTS.capitalization_exceptions['macdonald']  # 'MacDonald'

Module-Level Configuration

Global configuration instance shared by all HumanName instances by default.

# Global configuration constant
from nameparser.config import CONSTANTS

# Configuration sets (SetManager instances)
CONSTANTS.prefixes: SetManager         # Name prefixes
CONSTANTS.titles: SetManager           # Person titles  
CONSTANTS.first_name_titles: SetManager  # Special first name titles
CONSTANTS.suffix_acronyms: SetManager     # Acronym suffixes
CONSTANTS.suffix_not_acronyms: SetManager # Non-acronym suffixes
CONSTANTS.conjunctions: SetManager        # Name conjunctions

# Configuration dictionaries (TupleManager instances)
CONSTANTS.capitalization_exceptions: TupleManager  # Special capitalization
CONSTANTS.regexes: TupleManager                    # Regex patterns

# Combined sets (computed properties)
CONSTANTS.suffixes_prefixes_titles: set  # Combined set for lookups

Usage Examples:

from nameparser import HumanName
from nameparser.config import CONSTANTS

# Global configuration affects all instances
CONSTANTS.titles.add('Captain', 'Colonel')
CONSTANTS.suffixes.add('PE', 'RN')
CONSTANTS.capitalize_name = True

# All new instances use updated configuration
name1 = HumanName("captain john doe pe")
print(name1.title)   # 'Captain'
print(name1.suffix)  # 'PE'
print(str(name1))    # 'Captain John Doe PE' (auto-capitalized)

name2 = HumanName("colonel jane smith rn")
print(name2.title)   # 'Colonel'
print(name2.suffix)  # 'RN'

Per-Instance Configuration

Create HumanName instances with custom configuration that doesn't affect other instances.

# Per-instance configuration
name = HumanName("Full Name", constants=None)  # Creates new Constants instance
name = HumanName("Full Name", constants=custom_constants)  # Uses provided instance

Usage Examples:

from nameparser import HumanName
from nameparser.config import Constants

# Create instance with isolated configuration
name = HumanName("Dean Robert Johns", None)
name.C.titles.add('dean')          # Only affects this instance
name.parse_full_name()             # Re-parse with new config

print(name.title)                  # 'Dean'
print(name.has_own_config)         # True

# Other instances unaffected
other_name = HumanName("Dean Mary Smith")
print(other_name.title)            # '' (Dean not recognized)
print(other_name.first)            # 'Dean'

# Custom Constants instance
academic_config = Constants()
academic_config.titles.add('Prof', 'Dean', 'Provost', 'Chancellor')
academic_config.suffixes.add('Ph.D', 'Ed.D', 'Sc.D')

name = HumanName("Prof Jane Smith Ph.D", constants=academic_config)
print(name.title)                  # 'Prof'
print(name.suffix)                 # 'Ph.D'

Name Classification Helpers

Methods for testing whether name components belong to specific categories.

def is_title(self, value: str) -> bool:
    """Check if value is in the titles set."""

def is_conjunction(self, piece: str | list) -> bool:
    """Check if piece is a conjunction and not an initial."""

def is_prefix(self, piece: str | list) -> bool:
    """Check if piece is in the prefixes set."""

def is_suffix(self, piece: str | list) -> bool:
    """Check if piece is in the suffixes set and not an initial."""

def is_roman_numeral(self, value: str) -> bool:
    """Check if value matches roman numeral pattern."""

def is_rootname(self, piece: str) -> bool:
    """Check if piece is a core name part (not title, suffix, or prefix)."""

def is_an_initial(self, value: str) -> bool:
    """Check if value is a single letter or letter with period."""

def are_suffixes(self, pieces: list) -> bool:
    """Check if all pieces in list are suffixes."""

Usage Examples:

from nameparser import HumanName

name = HumanName("Dr. John van der Berg III")

# Test individual components
print(name.is_title("Dr."))         # True
print(name.is_prefix("van"))        # True  
print(name.is_conjunction("der"))   # True
print(name.is_suffix("III"))        # True
print(name.is_roman_numeral("III")) # True
print(name.is_an_initial("J."))     # True
print(name.is_rootname("John"))     # True

# Test with lists
print(name.is_suffix(["Jr", "Sr"]))    # True
print(name.are_suffixes(["III", "Jr"])) # True

Default Configuration Data

Pre-loaded configuration sets covering common names, titles, and patterns.

# Default configuration sets imported from config modules
from nameparser.config.prefixes import PREFIXES
from nameparser.config.titles import TITLES, FIRST_NAME_TITLES  
from nameparser.config.suffixes import SUFFIX_ACRONYMS, SUFFIX_NOT_ACRONYMS
from nameparser.config.conjunctions import CONJUNCTIONS
from nameparser.config.capitalization import CAPITALIZATION_EXCEPTIONS
from nameparser.config.regexes import REGEXES

# Encoding constant
from nameparser.config import DEFAULT_ENCODING  # 'UTF-8'

Contents include:

  • Prefixes: de, van, von, del, della, da, du, le, la, etc.
  • Titles: Dr, Mr, Mrs, Ms, Prof, Hon, Rev, etc.
  • Suffixes: Jr, Sr, II, III, PhD, MD, Esq, etc.
  • Conjunctions: and, or, nor, the, of, etc.
  • Capitalization: McDonald, MacLeod, O'Connor, etc.
  • Regex patterns: For initials, roman numerals, quotes, etc.

Usage Examples:

from nameparser.config import CONSTANTS

# Explore default configuration
print(list(CONSTANTS.titles)[:10])        # First 10 titles
print(list(CONSTANTS.prefixes))           # All prefixes
print(list(CONSTANTS.suffix_acronyms)[:5]) # First 5 acronym suffixes

# Check what's included
print('dr' in CONSTANTS.titles)           # True
print('von' in CONSTANTS.prefixes)        # True  
print('phd' in CONSTANTS.suffix_acronyms) # True

Install with Tessl CLI

npx tessl i tessl/pypi-nameparser

docs

configuration.md

core-parsing.md

formatting.md

index.md

tile.json