Date parsing library designed to parse dates from HTML pages
npx @tessl/cli install tessl/pypi-dateparser@1.2.0A comprehensive Python library for parsing human-readable dates in multiple formats and languages. Dateparser supports almost every existing date format including absolute dates, relative dates (like 'two weeks ago' or 'tomorrow'), timestamps, and dates from HTML pages. The library handles over 200 language locales, making it suitable for international applications that need to process dates from various cultural contexts.
pip install dateparserimport dateparserFor parsing individual dates:
from dateparser import parse
from dateparser import DateDataParserFor searching dates in text:
from dateparser.search import search_datesFor configuration:
from dateparser.conf import Settingsimport dateparser
# Basic date parsing
date = dateparser.parse('2023-01-15')
# Returns: datetime.datetime(2023, 1, 15, 0, 0)
# Multi-language support
date = dateparser.parse('15 de enero de 2023', languages=['es'])
# Returns: datetime.datetime(2023, 1, 15, 0, 0)
# Relative date parsing
date = dateparser.parse('2 weeks ago')
# Returns: datetime corresponding to 2 weeks before current date
# Search for dates in text
from dateparser.search import search_dates
text = "The event happened on January 15, 2023 and ended on Feb 20, 2023"
dates = search_dates(text)
# Returns: [('January 15, 2023', datetime.datetime(2023, 1, 15, 0, 0)),
# ('Feb 20, 2023', datetime.datetime(2023, 2, 20, 0, 0))]
# Custom configuration
from dateparser.conf import Settings
settings = Settings(PREFER_DATES_FROM='future', TIMEZONE='UTC')
date = dateparser.parse('tomorrow', settings=settings)Dateparser uses a modular architecture built around several key components:
DateDataParser class handles language detection, translation, and parsingsearch_dates function finds and parses dates within textPrimary date parsing functionality including the main parse() function and the configurable DateDataParser class for advanced parsing scenarios with language detection and custom settings.
def parse(date_string, date_formats=None, languages=None, locales=None,
region=None, settings=None, detect_languages_function=None):
"""Parse date and time from given date string."""
class DateDataParser:
"""Handles language detection, translation and parsing of date strings."""
def __init__(self, languages=None, locales=None, region=None,
try_previous_locales=False, use_given_order=False,
settings=None, detect_languages_function=None): ...
def get_date_data(self, date_string, date_formats=None): ...Search functionality for finding and parsing multiple dates within text documents, with automatic language detection and customizable settings for batch date extraction.
def search_dates(text, languages=None, settings=None,
add_detected_language=False, detect_languages_function=None):
"""Find all substrings representing date and/or time and parse them."""Comprehensive configuration system for customizing parsing behavior including date order preferences, timezone handling, language detection, and parsing strategies.
class Settings:
"""Control and configure default parsing behavior."""
def __init__(self, settings=None): ...
def replace(self, mod_settings=None, **kwds): ...
class SettingValidationError(ValueError):
"""Raised when a provided setting is not valid."""
class UnknownTokenError(Exception):
"""Raised when an unknown token is encountered during parsing."""Support for non-Gregorian calendar systems including Islamic (Hijri) and Persian (Jalali) calendars for parsing dates in different cultural contexts.
class HijriCalendar(CalendarBase):
"""Support for Hijri (Islamic) calendar dates."""
class JalaliCalendar(CalendarBase):
"""Support for Jalali (Persian) calendar dates."""Additional utility functions for date manipulation, timezone handling, and date range generation to support complex date processing workflows.
def date_range(begin, end, **kwargs):
"""Generate sequence of dates between begin and end."""
def get_intersecting_periods(low, high, period="day"):
"""Get periods that intersect with given range."""class DateData:
"""Container for parsed date information."""
def __init__(self, date_obj=None, period=None, locale=None): ...
date_obj: datetime
period: str
locale: Locale
class CalendarBase:
"""Base setup class for non-Gregorian calendar system."""
def __init__(self, source: str): ...
def get_date(self) -> DateData: ...
class Locale:
"""Class that deals with applicability and translation from a locale."""
def __init__(self, shortname: str, language_info: dict): ...
shortname: str
language_info: dict