Date parsing library designed to parse dates from HTML pages
—
Additional utility functions for date manipulation, timezone handling, date range generation, and text processing that support complex date processing workflows and integration scenarios.
Generate sequences of dates between specified start and end points with flexible step configurations.
def date_range(begin, end, **kwargs):
"""
Generate sequence of dates between begin and end.
Parameters:
- begin (datetime): Start date
- end (datetime): End date (exclusive)
- **kwargs: Step parameters (days, weeks, months, years, hours, minutes, seconds)
Note: Cannot use year, month, week, day, hour, minute, second as these
are reserved and will raise ValueError
Returns:
generator: Generator yielding datetime objects
Raises:
ValueError: If invalid step arguments are provided
"""Usage Examples:
from dateparser.date import date_range
from datetime import datetime
# Daily range
start = datetime(2023, 1, 1)
end = datetime(2023, 1, 10)
for date in date_range(start, end):
print(date) # 2023-01-01, 2023-01-02, ..., 2023-01-09
# Weekly intervals
for date in date_range(start, end, weeks=1):
print(date) # Every week from start to end
# Monthly intervals
start = datetime(2023, 1, 1)
end = datetime(2023, 6, 1)
for date in date_range(start, end, months=1):
print(date) # First of each month
# Custom step sizes
for date in date_range(start, end, days=3):
print(date) # Every 3 days
# Hourly intervals
start = datetime(2023, 1, 1, 0, 0)
end = datetime(2023, 1, 1, 12, 0)
for date in date_range(start, end, hours=2):
print(date) # Every 2 hoursAnalyze and find intersecting time periods for scheduling and temporal data analysis.
def get_intersecting_periods(low, high, period="day"):
"""
Get periods that intersect with given range.
Parameters:
- low (datetime): Start of time range
- high (datetime): End of time range
- period (str): Period type ('year', 'month', 'week', 'day', 'hour', 'minute', 'second', 'microsecond')
Returns:
generator: Generator yielding period boundaries that intersect with range
Raises:
ValueError: If invalid period type is provided
"""Usage Examples:
from dateparser.date import get_intersecting_periods
from datetime import datetime
# Find intersecting days
start = datetime(2023, 1, 15, 14, 30)
end = datetime(2023, 1, 18, 10, 15)
days = list(get_intersecting_periods(start, end, "day"))
# Returns day boundaries that intersect with the range
# Find intersecting months
start = datetime(2023, 1, 15)
end = datetime(2023, 3, 20)
months = list(get_intersecting_periods(start, end, "month"))
# Returns month boundaries (Feb 1, Mar 1) that intersect
# Hourly intersections for scheduling
start = datetime(2023, 1, 1, 9, 30)
end = datetime(2023, 1, 1, 14, 45)
hours = list(get_intersecting_periods(start, end, "hour"))
# Returns hour boundaries (10:00, 11:00, 12:00, 13:00, 14:00)Utilities for cleaning, normalizing, and preprocessing date strings before parsing.
def sanitize_date(date_string):
"""
Sanitize and normalize date strings for better parsing.
Removes unwanted characters, normalizes whitespace, handles
special Unicode characters, and prepares strings for parsing.
Parameters:
- date_string (str): Raw date string to clean
Returns:
str: Cleaned and normalized date string
"""
def sanitize_spaces(date_string):
"""
Normalize whitespace in date strings.
Parameters:
- date_string (str): Date string with irregular spacing
Returns:
str: Date string with normalized spaces
"""Usage Examples:
from dateparser.date import sanitize_date, sanitize_spaces
# Clean messy date strings
messy_date = " Jan\t15,\n\n2023 \xa0 "
clean_date = sanitize_date(messy_date)
# Returns: "Jan 15, 2023"
normalized = sanitize_spaces("Jan 15, 2023")
# Returns: "Jan 15, 2023"
# Use in preprocessing pipeline
import dateparser
def robust_parse(date_string):
cleaned = sanitize_date(date_string)
return dateparser.parse(cleaned)
date = robust_parse(" \tJanuary\n15\xa0, 2023 ")Comprehensive timezone handling functions for parsing, conversion, and normalization.
class StaticTzInfo(tzinfo):
"""
Static timezone information class for representing fixed timezone offsets.
Used internally by dateparser for timezone-aware datetime objects when
parsing dates with timezone information.
"""
def __init__(self, name, offset):
"""
Initialize static timezone.
Parameters:
- name (str): Timezone name or abbreviation
- offset (timedelta): UTC offset for this timezone
"""
def tzname(self, dt):
"""Return timezone name."""
def utcoffset(self, dt):
"""Return UTC offset."""
def dst(self, dt):
"""Return DST offset (always zero for static timezones)."""
def localize(self, dt, is_dst=False):
"""
Localize naive datetime to this timezone.
Parameters:
- dt (datetime): Naive datetime to localize
- is_dst (bool): DST flag (ignored for static timezones)
Returns:
datetime: Timezone-aware datetime
"""
def get_timezone_from_tz_string(tz_string):
"""
Parse timezone string and return timezone object.
Parameters:
- tz_string (str): Timezone identifier or abbreviation
Returns:
tzinfo: Timezone object for the given string
"""
def apply_timezone(date_time, tz_string):
"""
Apply timezone to datetime object.
Parameters:
- date_time (datetime): Datetime to apply timezone to
- tz_string (str): Timezone identifier
Returns:
datetime: Timezone-aware datetime object
"""
def apply_timezone_from_settings(date_obj, settings):
"""
Apply timezone based on settings configuration.
Parameters:
- date_obj (datetime): Datetime object
- settings (Settings): Settings containing timezone preferences
Returns:
datetime: Datetime with applied timezone settings
"""
def localize_timezone(date_time, tz_string):
"""
Localize naive datetime to specific timezone.
Parameters:
- date_time (datetime): Naive datetime object
- tz_string (str): Target timezone
Returns:
datetime: Localized datetime object
"""
def pop_tz_offset_from_string(date_string, as_offset=True):
"""
Extract timezone offset from date string.
Parameters:
- date_string (str): Date string potentially containing timezone info
- as_offset (bool): Return as offset object rather than string
Returns:
tuple: (cleaned_date_string, timezone_offset_or_name)
"""
def convert_to_local_tz(datetime_obj, datetime_tz_offset):
"""
Convert datetime with timezone offset to local timezone.
Parameters:
- datetime_obj (datetime): Datetime object to convert
- datetime_tz_offset: Timezone offset information
Returns:
datetime: Datetime converted to local timezone
"""Usage Examples:
from dateparser.utils import (
get_timezone_from_tz_string,
apply_timezone,
apply_timezone_from_settings,
localize_timezone
)
from dateparser.conf import Settings
from datetime import datetime
# Parse timezone strings
tz = get_timezone_from_tz_string("America/New_York")
utc_tz = get_timezone_from_tz_string("UTC")
# Apply timezone to datetime
naive_dt = datetime(2023, 1, 15, 14, 30)
aware_dt = apply_timezone(naive_dt, "Europe/London")
# Use settings for timezone application
settings = Settings({
'TIMEZONE': 'America/Los_Angeles',
'TO_TIMEZONE': 'UTC'
})
converted_dt = apply_timezone_from_settings(naive_dt, settings)
# Localize naive datetime
localized = localize_timezone(naive_dt, "Asia/Tokyo")
# Timezone conversion pipeline
def parse_with_timezone(date_string, target_tz="UTC"):
import dateparser
# Parse with automatic timezone detection
date = dateparser.parse(date_string)
if date:
# Apply target timezone
return apply_timezone(date, target_tz)
return None
# Usage
date = parse_with_timezone("2023-01-15 2:30 PM EST", "Europe/Paris")Helper functions for text processing and Unicode handling in date parsing contexts.
def strip_braces(date_string):
"""
Remove braces from date string.
Parameters:
- date_string (str): String potentially containing braces
Returns:
str: String with braces removed
"""
def normalize_unicode(string, form="NFKD"):
"""
Normalize Unicode string for consistent processing.
Parameters:
- string (str): Unicode string to normalize
- form (str): Normalization form ('NFC', 'NFKC', 'NFD', 'NFKD')
Returns:
str: Normalized Unicode string
"""
def combine_dicts(primary_dict, supplementary_dict):
"""
Combine dictionaries with primary taking precedence.
Parameters:
- primary_dict (dict): Primary dictionary
- supplementary_dict (dict): Supplementary values
Returns:
dict: Combined dictionary
"""Usage Examples:
from dateparser.utils import strip_braces, normalize_unicode, combine_dicts
# Clean bracketed dates
date_with_braces = "[January 15, 2023]"
clean_date = strip_braces(date_with_braces)
# Returns: "January 15, 2023"
# Unicode normalization
unicode_date = "Jänüary 15, 2023" # Contains non-ASCII characters
normalized = normalize_unicode(unicode_date)
# Returns normalized ASCII-compatible string
# Configuration merging
default_config = {'TIMEZONE': 'UTC', 'STRICT_PARSING': False}
user_config = {'TIMEZONE': 'America/New_York'}
final_config = combine_dicts(user_config, default_config)
# Returns: {'TIMEZONE': 'America/New_York', 'STRICT_PARSING': False}
# Preprocessing pipeline
def preprocess_date_string(raw_string):
# Remove braces
cleaned = strip_braces(raw_string)
# Normalize Unicode
normalized = normalize_unicode(cleaned)
# Sanitize spacing
from dateparser.date import sanitize_spaces
final = sanitize_spaces(normalized)
return final
processed = preprocess_date_string("[Jänüary 15,\t2023]")Utilities for working with calendar-specific operations and date calculations.
def get_last_day_of_month(year, month):
"""
Get the last day of a specific month and year.
Parameters:
- year (int): Year
- month (int): Month (1-12)
Returns:
int: Last day of the month
"""
def get_previous_leap_year(year):
"""
Find the previous leap year before given year.
Parameters:
- year (int): Reference year
Returns:
int: Previous leap year
"""
def get_next_leap_year(year):
"""
Find the next leap year after given year.
Parameters:
- year (int): Reference year
Returns:
int: Next leap year
"""
def set_correct_day_from_settings(date_obj, settings, current_day=None):
"""
Adjust day based on settings preferences.
Parameters:
- date_obj (datetime): Date to adjust
- settings (Settings): Settings with day preferences
- current_day (int, optional): Current day reference
Returns:
datetime: Date with adjusted day
"""
def set_correct_month_from_settings(date_obj, settings, current_month=None):
"""
Adjust month based on settings preferences.
Parameters:
- date_obj (datetime): Date to adjust
- settings (Settings): Settings with month preferences
- current_month (int, optional): Current month reference
Returns:
datetime: Date with adjusted month
"""Usage Examples:
from dateparser.utils import (
get_last_day_of_month,
get_previous_leap_year, get_next_leap_year,
set_correct_day_from_settings, set_correct_month_from_settings
)
from dateparser.conf import Settings
from datetime import datetime
# Calendar calculations
last_day = get_last_day_of_month(2023, 2) # 28 (not a leap year)
last_day_leap = get_last_day_of_month(2024, 2) # 29 (leap year)
prev_leap = get_previous_leap_year(2023) # 2020
next_leap = get_next_leap_year(2023) # 2024
# Settings-based date adjustment
date = datetime(2023, 1, 15)
settings = Settings({'PREFER_DAY_OF_MONTH': 'first'})
adjusted = set_correct_day_from_settings(date, settings)
# Adjusts to first day of month based on settings
settings = Settings({'PREFER_MONTH_OF_YEAR': 'current'})
adjusted = set_correct_month_from_settings(date, settings, current_month=3)
# Adjusts month based on preference and current contextEssential timezone parsing and conversion utilities for advanced timezone handling scenarios.
def pop_tz_offset_from_string(date_string, as_offset=True):
"""
Extract timezone offset from date string and return cleaned string.
Args:
date_string (str): Date string potentially containing timezone info
as_offset (bool): If True, return StaticTzInfo object; if False, return timezone name
Returns:
tuple: (cleaned_date_string, timezone_info_or_name)
Examples:
>>> pop_tz_offset_from_string("2023-01-15 14:30 UTC")
("2023-01-15 14:30 ", StaticTzInfo('UTC', timedelta(0)))
>>> pop_tz_offset_from_string("2023-01-15 14:30 EST", as_offset=False)
("2023-01-15 14:30 ", "EST")
"""
def word_is_tz(word):
"""
Check if a word represents a timezone abbreviation.
Args:
word (str): Word to check for timezone abbreviation
Returns:
bool: True if word is a recognized timezone abbreviation
Examples:
>>> word_is_tz("UTC")
True
>>> word_is_tz("EST")
True
>>> word_is_tz("hello")
False
"""
def convert_to_local_tz(datetime_obj, datetime_tz_offset):
"""
Convert datetime with timezone offset to local timezone.
Args:
datetime_obj (datetime): Datetime object to convert
datetime_tz_offset (timedelta): Timezone offset of the datetime
Returns:
datetime: Datetime converted to local timezone
Examples:
>>> from datetime import datetime, timedelta
>>> dt = datetime(2023, 1, 15, 14, 30)
>>> offset = timedelta(hours=-5) # EST offset
>>> local_dt = convert_to_local_tz(dt, offset)
"""from dateparser.timezone_parser import pop_tz_offset_from_string, word_is_tz, convert_to_local_tz
# Extract timezone from date string
date_string = "Meeting at 2:30 PM EST on January 15th"
cleaned_string, tz_info = pop_tz_offset_from_string(date_string)
print(f"Cleaned: {cleaned_string}")
print(f"Timezone: {tz_info}")
# Check if word is timezone
words = ["UTC", "EST", "hello", "PST", "world"]
timezones = [word for word in words if word_is_tz(word)]
print(f"Timezones found: {timezones}") # ['UTC', 'EST', 'PST']
# Convert to local timezone
from datetime import datetime, timedelta
utc_time = datetime(2023, 1, 15, 19, 30) # 7:30 PM UTC
est_offset = timedelta(hours=-5)
local_time = convert_to_local_tz(utc_time, est_offset)Install with Tessl CLI
npx tessl i tessl/pypi-dateparser