Date parsing library designed to parse dates from HTML pages
—
Comprehensive configuration system for customizing dateparser's parsing behavior including date order preferences, timezone handling, language detection settings, and parsing strategies.
Main configuration class that controls all aspects of date parsing behavior with extensive customization options.
class Settings:
"""
Control and configure default parsing behavior of dateparser.
Currently supported settings:
- DATE_ORDER: Order preference for ambiguous dates ('MDY', 'DMY', 'YMD')
- PREFER_LOCALE_DATE_ORDER: Use locale-specific date order
- TIMEZONE: Default timezone for parsing
- TO_TIMEZONE: Convert parsed dates to this timezone
- RETURN_AS_TIMEZONE_AWARE: Return timezone-aware datetime objects
- PREFER_MONTH_OF_YEAR: Prefer specific months ('current', 'last', 'next')
- PREFER_DAY_OF_MONTH: Prefer specific days ('first', 'last', 'current')
- PREFER_DATES_FROM: Prefer past or future dates ('past', 'future')
- RELATIVE_BASE: Base date for relative date parsing
- STRICT_PARSING: Enable strict parsing mode
- REQUIRE_PARTS: Require specific date parts (['day', 'month', 'year'])
- SKIP_TOKENS: Tokens to skip during parsing
- NORMALIZE: Enable text normalization
- RETURN_TIME_AS_PERIOD: Return time ranges as periods
- PARSERS: List of parsers to use
- DEFAULT_LANGUAGES: Default languages for detection
- LANGUAGE_DETECTION_CONFIDENCE_THRESHOLD: Confidence threshold for language detection
- CACHE_SIZE_LIMIT: Maximum cache size for parsed results
"""
def __init__(self, settings=None):
"""
Initialize Settings with custom configuration.
Parameters:
- settings (dict, optional): Dictionary of setting key-value pairs
"""
def replace(self, mod_settings=None, **kwds):
"""
Create new Settings instance with modified values.
Parameters:
- mod_settings (dict, optional): Modified settings dictionary
- **kwds: Individual setting key-value pairs
Returns:
Settings: New Settings instance with updated values
Raises:
TypeError: Invalid setting value type
"""Usage Examples:
from dateparser.conf import Settings
import dateparser
from datetime import datetime
# Basic settings configuration
settings = Settings({
'PREFER_DATES_FROM': 'future',
'TIMEZONE': 'UTC',
'STRICT_PARSING': True
})
date = dateparser.parse('tomorrow', settings=settings)
# Date order preferences
settings = Settings({'DATE_ORDER': 'DMY'})
date = dateparser.parse('15/01/2023', settings=settings) # January 15, 2023
settings = Settings({'DATE_ORDER': 'MDY'})
date = dateparser.parse('01/15/2023', settings=settings) # January 15, 2023
# Timezone handling
settings = Settings({
'TIMEZONE': 'America/New_York',
'TO_TIMEZONE': 'UTC',
'RETURN_AS_TIMEZONE_AWARE': True
})
date = dateparser.parse('2023-01-15 15:30', settings=settings)
# Language preferences
settings = Settings({
'DEFAULT_LANGUAGES': ['es', 'en', 'fr'],
'LANGUAGE_DETECTION_CONFIDENCE_THRESHOLD': 0.7
})
date = dateparser.parse('15 de enero', settings=settings)
# Strict parsing with required parts
settings = Settings({
'STRICT_PARSING': True,
'REQUIRE_PARTS': ['day', 'month', 'year']
})
date = dateparser.parse('January 2023', settings=settings) # Returns None
# Relative date base
base_date = datetime(2023, 6, 15)
settings = Settings({'RELATIVE_BASE': base_date})
date = dateparser.parse('next week', settings=settings)
# Parser selection
settings = Settings({
'PARSERS': ['timestamp', 'absolute-time', 'relative-time']
})
date = dateparser.parse('1674123456', settings=settings)
# Text normalization and token skipping
settings = Settings({
'NORMALIZE': True,
'SKIP_TOKENS': ['on', 'at', 'the']
})
date = dateparser.parse('on the 15th of January', settings=settings)Error handling and validation for settings configuration to ensure proper setup and helpful error messages.
class SettingValidationError(ValueError):
"""
Exception raised when a provided setting is not valid.
Inherits from ValueError and provides detailed error messages
about which setting is invalid and why.
"""
class UnknownTokenError(Exception):
"""
Exception raised when an unknown token is encountered during parsing.
This exception is raised when the parser encounters a token that
cannot be recognized or processed by the language dictionary system.
"""
def check_settings(settings):
"""
Validate settings dictionary for correctness.
Parameters:
- settings (Settings): Settings instance to validate
Raises:
SettingValidationError: When settings contain invalid values
"""Usage Examples:
from dateparser.conf import Settings, SettingValidationError, check_settings
# Handling validation errors
try:
settings = Settings({
'DATE_ORDER': 'INVALID', # Invalid date order
'PARSERS': ['unknown-parser'], # Unknown parser
'REQUIRE_PARTS': ['invalid-part'] # Invalid required part
})
except SettingValidationError as e:
print(f"Settings validation failed: {e}")
# Manual settings validation
settings = Settings({'PREFER_DATES_FROM': 'future'})
try:
check_settings(settings)
print("Settings are valid")
except SettingValidationError as e:
print(f"Invalid settings: {e}")
# Valid settings examples
valid_settings = Settings({
'DATE_ORDER': 'DMY', # Valid: 'MDY', 'DMY', 'YMD'
'PARSERS': ['timestamp', 'absolute-time', 'relative-time'], # Valid parsers
'REQUIRE_PARTS': ['day', 'month'], # Valid parts: 'day', 'month', 'year'
'PREFER_DATES_FROM': 'past', # Valid: 'past', 'future'
'LANGUAGE_DETECTION_CONFIDENCE_THRESHOLD': 0.8 # Valid: 0.0-1.0
})Decorator functions for applying settings to parsing functions and managing configuration context.
def apply_settings(f):
"""
Decorator that applies settings to parsing functions.
Automatically handles settings parameter processing and validation,
ensuring proper Settings instance is passed to wrapped functions.
Parameters:
- f (function): Function to wrap with settings application
Returns:
function: Wrapped function with settings handling
"""Usage Examples:
from dateparser.conf import apply_settings, Settings
# Using apply_settings decorator for custom parsing functions
@apply_settings
def custom_parse_function(date_string, settings=None):
# settings is automatically processed and validated
# Use settings.TIMEZONE, settings.DATE_ORDER, etc.
print(f"Parsing with timezone: {settings.TIMEZONE}")
print(f"Date order preference: {settings.DATE_ORDER}")
return None
# Call with dict settings (automatically converted to Settings)
custom_parse_function("2023-01-15", settings={'TIMEZONE': 'UTC'})
# Call with Settings instance
settings = Settings({'DATE_ORDER': 'DMY'})
custom_parse_function("15/01/2023", settings=settings)
# Call with no settings (uses defaults)
custom_parse_function("January 15, 2023")# Date order for ambiguous dates
DATE_ORDER: str # 'MDY', 'DMY', 'YMD'
PREFER_LOCALE_DATE_ORDER: bool # Use locale-specific order
# Date preference when ambiguous
PREFER_DATES_FROM: str # 'past', 'future'
PREFER_MONTH_OF_YEAR: str # 'current', 'last', 'next'
PREFER_DAY_OF_MONTH: str # 'first', 'last', 'current'TIMEZONE: str # Default timezone (e.g., 'UTC', 'America/New_York')
TO_TIMEZONE: str # Convert results to this timezone
RETURN_AS_TIMEZONE_AWARE: bool # Return timezone-aware datetimesSTRICT_PARSING: bool # Enable strict parsing mode
REQUIRE_PARTS: list # Required date parts: ['day', 'month', 'year']
NORMALIZE: bool # Enable text normalization
RETURN_TIME_AS_PERIOD: bool # Return time ranges as periods
RELATIVE_BASE: datetime # Base date for relative parsingDEFAULT_LANGUAGES: list # Default language codes for detection
LANGUAGE_DETECTION_CONFIDENCE_THRESHOLD: float # 0.0-1.0 thresholdPARSERS: list # Available: ['timestamp', 'relative-time', 'custom-formats',
# 'absolute-time', 'no-spaces-time', 'negative-timestamp']
SKIP_TOKENS: list # Tokens to skip during parsing
CACHE_SIZE_LIMIT: int # Maximum cache size for resultsInstall with Tessl CLI
npx tessl i tessl/pypi-dateparser