Date parsing library designed to parse dates from HTML pages
—
Support for non-Gregorian calendar systems including Islamic (Hijri) and Persian (Jalali) calendars for parsing dates in different cultural contexts. These calendar systems extend dateparser's capabilities beyond the standard Gregorian calendar.
Islamic calendar system support for parsing Hijri dates and converting them to Gregorian datetime objects.
class HijriCalendar(CalendarBase):
"""
Support for Hijri (Islamic) calendar dates.
Converts Hijri dates to Gregorian datetime objects while preserving
the original calendar context for accurate date parsing.
"""
def __init__(self, source):
"""
Initialize Hijri calendar parser.
Parameters:
- source (str): Date string in Hijri format
"""
def get_date(self):
"""
Parse Hijri date string and convert to DateData object.
Returns:
DateData: Object containing converted Gregorian datetime
"""Usage Examples:
from dateparser.calendars.hijri import HijriCalendar
# Parse Hijri dates
hijri_date = "15 محرم 1445" # 15 Muharram 1445
calendar = HijriCalendar(hijri_date)
date_data = calendar.get_date()
gregorian_date = date_data.date_obj # Converted to Gregorian
# Integration with main dateparser
import dateparser
# Configure settings for Hijri calendar support
settings = {'CALENDARS': ['hijri']}
date = dateparser.parse("15 محرم 1445", settings=settings)Persian calendar system support for parsing Jalali (Solar Hijri) dates used in Iran and Afghanistan.
class JalaliCalendar(CalendarBase):
"""
Support for Jalali (Persian) calendar dates.
Handles Solar Hijri calendar dates and converts them to
Gregorian datetime objects for standardized processing.
"""
def __init__(self, source):
"""
Initialize Jalali calendar parser.
Parameters:
- source (str): Date string in Jalali format
"""
def get_date(self):
"""
Parse Jalali date string and convert to DateData object.
Returns:
DateData: Object containing converted Gregorian datetime
"""Usage Examples:
from dateparser.calendars.jalali import JalaliCalendar
# Parse Persian/Jalali dates
jalali_date = "15 فروردین 1402" # 15 Farvardin 1402
calendar = JalaliCalendar(jalali_date)
date_data = calendar.get_date()
gregorian_date = date_data.date_obj # Converted to Gregorian
# Integration with main dateparser
import dateparser
# Configure settings for Jalali calendar support
settings = {'CALENDARS': ['jalali']}
date = dateparser.parse("15 فروردین 1402", settings=settings)Abstract base class for implementing custom calendar systems and extending dateparser with additional calendar support.
class CalendarBase:
"""
Base setup class for non-Gregorian calendar system.
Provides the framework for implementing custom calendar parsers
that can integrate with dateparser's main parsing pipeline.
"""
parser = NotImplemented # Must be implemented by subclasses
def __init__(self, source):
"""
Initialize calendar parser with source date string.
Parameters:
- source (str): Date string passed to calendar parser
"""
def get_date(self):
"""
Parse date string and return DateData object.
Returns:
DateData: Parsed date information, or None if parsing fails
"""Usage Examples:
from dateparser.calendars import CalendarBase
from dateparser.date import DateData
# Example custom calendar implementation
class CustomCalendar(CalendarBase):
def __init__(self, source):
super().__init__(source)
# Initialize custom calendar logic
def get_date(self):
try:
# Custom parsing logic
parsed_date = self.parse_custom_format(self.source)
return DateData(date_obj=parsed_date)
except ValueError:
return None
def parse_custom_format(self, date_string):
# Implement custom parsing logic
pass
# Use custom calendar
custom_date = "CustomFormat:2023:01:15"
calendar = CustomCalendar(custom_date)
date_data = calendar.get_date()Lower-level parser classes that handle the actual conversion logic for specific calendar systems.
class hijri_parser(non_gregorian_parser):
"""
Hijri calendar parser implementation.
Handles conversion between Hijri calendar dates and Gregorian
calendar dates with proper month, day, and year mapping.
"""
calendar_converter = NotImplemented # Hijri to Gregorian converter
default_year: int
default_month: int
default_day: int
class jalali_parser(non_gregorian_parser):
"""
Jalali calendar parser implementation.
Handles conversion between Persian Solar Hijri calendar dates
and Gregorian calendar dates with accurate astronomical calculations.
"""
calendar_converter = NotImplemented # Jalali to Gregorian converter
default_year: int
default_month: int
default_day: int
class non_gregorian_parser:
"""
Base parser class for non-Gregorian calendar systems.
Provides common functionality for calendar conversion including
digit replacement, month name mapping, and date normalization.
"""
@classmethod
def to_latin(cls, source):
"""
Convert non-Latin script to Latin for processing.
Parameters:
- source (str): Date string in original script
Returns:
str: Latinized date string for parsing
"""
def handle_two_digit_year(self, year):
"""
Handle two-digit year conversion for calendar system.
Parameters:
- year (int): Two-digit year
Returns:
int: Full year in calendar system
"""Usage Examples:
# Using parser classes directly for advanced scenarios
from dateparser.calendars.hijri_parser import hijri_parser
from dateparser.calendars.jalali_parser import jalali_parser
from dateparser.conf import Settings
# Direct Hijri parsing
hijri_text = "15 محرم 1445"
latinized = hijri_parser.to_latin(hijri_text)
date_obj, period = hijri_parser.parse(latinized, Settings())
# Direct Jalali parsing
jalali_text = "15 فروردین 1402"
latinized = jalali_parser.to_latin(jalali_text)
date_obj, period = jalali_parser.parse(latinized, Settings())
# Custom non-Gregorian parser
class MyCalendarParser(non_gregorian_parser):
calendar_converter = MyCalendarConverter()
default_year = 2000
default_month = 1
default_day = 1
@classmethod
def to_latin(cls, source):
# Custom script conversion
return super().to_latin(source)Calendar systems require additional dependencies for full functionality:
# For Hijri calendar support
pip install dateparser[calendars] # Installs convertdate, hijridate
# Individual packages
pip install convertdate>=2.2.1
pip install hijridate# Calendar-specific settings
CALENDARS: list # Enable specific calendar systems ['hijri', 'jalali']Usage Examples:
import dateparser
from dateparser.conf import Settings
# Enable multiple calendar systems
settings = Settings({
'CALENDARS': ['hijri', 'jalali'],
'DEFAULT_LANGUAGES': ['ar', 'fa', 'en']
})
# Parse dates from different calendar systems
hijri_date = dateparser.parse("15 محرم 1445", settings=settings)
jalali_date = dateparser.parse("15 فروردین 1402", settings=settings)
gregorian_date = dateparser.parse("January 15, 2023", settings=settings)
# Mixed calendar parsing in text
from dateparser.search import search_dates
text = "Events: 15 محرم 1445 (Hijri), 15 فروردین 1402 (Jalali), January 15, 2023 (Gregorian)"
dates = search_dates(text, settings=settings)
# Returns dates from all calendar systems converted to GregorianInstall with Tessl CLI
npx tessl i tessl/pypi-dateparser