CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-dateparser

Date parsing library designed to parse dates from HTML pages

Pending
Overview
Eval results
Files

utilities.mddocs/

Utilities

Additional utility functions for date manipulation, timezone handling, date range generation, and text processing that support complex date processing workflows and integration scenarios.

Capabilities

Date Range Generation

Generate sequences of dates between specified start and end points with flexible step configurations.

def date_range(begin, end, **kwargs):
    """
    Generate sequence of dates between begin and end.
    
    Parameters:
    - begin (datetime): Start date
    - end (datetime): End date (exclusive)
    - **kwargs: Step parameters (days, weeks, months, years, hours, minutes, seconds)
                Note: Cannot use year, month, week, day, hour, minute, second as these
                are reserved and will raise ValueError
    
    Returns:
    generator: Generator yielding datetime objects
    
    Raises:
    ValueError: If invalid step arguments are provided
    """

Usage Examples:

from dateparser.date import date_range
from datetime import datetime

# Daily range
start = datetime(2023, 1, 1)
end = datetime(2023, 1, 10)
for date in date_range(start, end):
    print(date)  # 2023-01-01, 2023-01-02, ..., 2023-01-09

# Weekly intervals
for date in date_range(start, end, weeks=1):
    print(date)  # Every week from start to end

# Monthly intervals  
start = datetime(2023, 1, 1)
end = datetime(2023, 6, 1)
for date in date_range(start, end, months=1):
    print(date)  # First of each month

# Custom step sizes
for date in date_range(start, end, days=3):
    print(date)  # Every 3 days

# Hourly intervals
start = datetime(2023, 1, 1, 0, 0)
end = datetime(2023, 1, 1, 12, 0)
for date in date_range(start, end, hours=2):
    print(date)  # Every 2 hours

Time Period Analysis

Analyze and find intersecting time periods for scheduling and temporal data analysis.

def get_intersecting_periods(low, high, period="day"):
    """
    Get periods that intersect with given range.
    
    Parameters:
    - low (datetime): Start of time range
    - high (datetime): End of time range  
    - period (str): Period type ('year', 'month', 'week', 'day', 'hour', 'minute', 'second', 'microsecond')
    
    Returns:
    generator: Generator yielding period boundaries that intersect with range
    
    Raises:
    ValueError: If invalid period type is provided
    """

Usage Examples:

from dateparser.date import get_intersecting_periods
from datetime import datetime

# Find intersecting days
start = datetime(2023, 1, 15, 14, 30)
end = datetime(2023, 1, 18, 10, 15)
days = list(get_intersecting_periods(start, end, "day"))
# Returns day boundaries that intersect with the range

# Find intersecting months
start = datetime(2023, 1, 15)
end = datetime(2023, 3, 20)
months = list(get_intersecting_periods(start, end, "month"))
# Returns month boundaries (Feb 1, Mar 1) that intersect

# Hourly intersections for scheduling
start = datetime(2023, 1, 1, 9, 30)
end = datetime(2023, 1, 1, 14, 45)
hours = list(get_intersecting_periods(start, end, "hour"))
# Returns hour boundaries (10:00, 11:00, 12:00, 13:00, 14:00)

Date String Processing

Utilities for cleaning, normalizing, and preprocessing date strings before parsing.

def sanitize_date(date_string):
    """
    Sanitize and normalize date strings for better parsing.
    
    Removes unwanted characters, normalizes whitespace, handles
    special Unicode characters, and prepares strings for parsing.
    
    Parameters:
    - date_string (str): Raw date string to clean
    
    Returns:
    str: Cleaned and normalized date string
    """

def sanitize_spaces(date_string):
    """
    Normalize whitespace in date strings.
    
    Parameters:
    - date_string (str): Date string with irregular spacing
    
    Returns:
    str: Date string with normalized spaces
    """

Usage Examples:

from dateparser.date import sanitize_date, sanitize_spaces

# Clean messy date strings
messy_date = "  Jan\t15,\n\n2023  \xa0 "
clean_date = sanitize_date(messy_date)
# Returns: "Jan 15, 2023"

normalized = sanitize_spaces("Jan  15,    2023")
# Returns: "Jan 15, 2023"

# Use in preprocessing pipeline
import dateparser

def robust_parse(date_string):
    cleaned = sanitize_date(date_string)
    return dateparser.parse(cleaned)

date = robust_parse("  \tJanuary\n15\xa0,  2023  ")

Timezone Utilities

Comprehensive timezone handling functions for parsing, conversion, and normalization.

class StaticTzInfo(tzinfo):
    """
    Static timezone information class for representing fixed timezone offsets.
    
    Used internally by dateparser for timezone-aware datetime objects when
    parsing dates with timezone information.
    """
    
    def __init__(self, name, offset):
        """
        Initialize static timezone.
        
        Parameters:
        - name (str): Timezone name or abbreviation
        - offset (timedelta): UTC offset for this timezone
        """
    
    def tzname(self, dt):
        """Return timezone name."""
    
    def utcoffset(self, dt):
        """Return UTC offset."""
    
    def dst(self, dt):
        """Return DST offset (always zero for static timezones)."""
    
    def localize(self, dt, is_dst=False):
        """
        Localize naive datetime to this timezone.
        
        Parameters:
        - dt (datetime): Naive datetime to localize
        - is_dst (bool): DST flag (ignored for static timezones)
        
        Returns:
        datetime: Timezone-aware datetime
        """

def get_timezone_from_tz_string(tz_string):
    """
    Parse timezone string and return timezone object.
    
    Parameters:
    - tz_string (str): Timezone identifier or abbreviation
    
    Returns:
    tzinfo: Timezone object for the given string
    """

def apply_timezone(date_time, tz_string):
    """
    Apply timezone to datetime object.
    
    Parameters:
    - date_time (datetime): Datetime to apply timezone to
    - tz_string (str): Timezone identifier
    
    Returns:
    datetime: Timezone-aware datetime object
    """

def apply_timezone_from_settings(date_obj, settings):
    """
    Apply timezone based on settings configuration.
    
    Parameters:
    - date_obj (datetime): Datetime object
    - settings (Settings): Settings containing timezone preferences
    
    Returns:
    datetime: Datetime with applied timezone settings
    """

def localize_timezone(date_time, tz_string):
    """
    Localize naive datetime to specific timezone.
    
    Parameters:
    - date_time (datetime): Naive datetime object
    - tz_string (str): Target timezone
    
    Returns:
    datetime: Localized datetime object
    """

def pop_tz_offset_from_string(date_string, as_offset=True):
    """
    Extract timezone offset from date string.
    
    Parameters:
    - date_string (str): Date string potentially containing timezone info
    - as_offset (bool): Return as offset object rather than string
    
    Returns:
    tuple: (cleaned_date_string, timezone_offset_or_name)
    """

def convert_to_local_tz(datetime_obj, datetime_tz_offset):
    """
    Convert datetime with timezone offset to local timezone.
    
    Parameters:
    - datetime_obj (datetime): Datetime object to convert
    - datetime_tz_offset: Timezone offset information
    
    Returns:
    datetime: Datetime converted to local timezone
    """

Usage Examples:

from dateparser.utils import (
    get_timezone_from_tz_string, 
    apply_timezone, 
    apply_timezone_from_settings,
    localize_timezone
)
from dateparser.conf import Settings
from datetime import datetime

# Parse timezone strings
tz = get_timezone_from_tz_string("America/New_York")
utc_tz = get_timezone_from_tz_string("UTC")

# Apply timezone to datetime
naive_dt = datetime(2023, 1, 15, 14, 30)
aware_dt = apply_timezone(naive_dt, "Europe/London")

# Use settings for timezone application
settings = Settings({
    'TIMEZONE': 'America/Los_Angeles',
    'TO_TIMEZONE': 'UTC'
})
converted_dt = apply_timezone_from_settings(naive_dt, settings)

# Localize naive datetime
localized = localize_timezone(naive_dt, "Asia/Tokyo")

# Timezone conversion pipeline
def parse_with_timezone(date_string, target_tz="UTC"):
    import dateparser
    
    # Parse with automatic timezone detection
    date = dateparser.parse(date_string)
    if date:
        # Apply target timezone
        return apply_timezone(date, target_tz)
    return None

# Usage
date = parse_with_timezone("2023-01-15 2:30 PM EST", "Europe/Paris")

Text Processing Utilities

Helper functions for text processing and Unicode handling in date parsing contexts.

def strip_braces(date_string):
    """
    Remove braces from date string.
    
    Parameters:
    - date_string (str): String potentially containing braces
    
    Returns:
    str: String with braces removed
    """

def normalize_unicode(string, form="NFKD"):
    """
    Normalize Unicode string for consistent processing.
    
    Parameters:
    - string (str): Unicode string to normalize
    - form (str): Normalization form ('NFC', 'NFKC', 'NFD', 'NFKD')
    
    Returns:
    str: Normalized Unicode string
    """

def combine_dicts(primary_dict, supplementary_dict):
    """
    Combine dictionaries with primary taking precedence.
    
    Parameters:
    - primary_dict (dict): Primary dictionary
    - supplementary_dict (dict): Supplementary values
    
    Returns:
    dict: Combined dictionary
    """

Usage Examples:

from dateparser.utils import strip_braces, normalize_unicode, combine_dicts

# Clean bracketed dates
date_with_braces = "[January 15, 2023]"
clean_date = strip_braces(date_with_braces)
# Returns: "January 15, 2023"

# Unicode normalization
unicode_date = "Jänüary 15, 2023"  # Contains non-ASCII characters
normalized = normalize_unicode(unicode_date)
# Returns normalized ASCII-compatible string

# Configuration merging
default_config = {'TIMEZONE': 'UTC', 'STRICT_PARSING': False}
user_config = {'TIMEZONE': 'America/New_York'}
final_config = combine_dicts(user_config, default_config)
# Returns: {'TIMEZONE': 'America/New_York', 'STRICT_PARSING': False}

# Preprocessing pipeline
def preprocess_date_string(raw_string):
    # Remove braces
    cleaned = strip_braces(raw_string)
    # Normalize Unicode
    normalized = normalize_unicode(cleaned)
    # Sanitize spacing
    from dateparser.date import sanitize_spaces
    final = sanitize_spaces(normalized)
    return final

processed = preprocess_date_string("[Jänüary  15,\t2023]")

Calendar Utilities

Utilities for working with calendar-specific operations and date calculations.

def get_last_day_of_month(year, month):
    """
    Get the last day of a specific month and year.
    
    Parameters:
    - year (int): Year
    - month (int): Month (1-12)
    
    Returns:
    int: Last day of the month
    """

def get_previous_leap_year(year):
    """
    Find the previous leap year before given year.
    
    Parameters:
    - year (int): Reference year
    
    Returns:
    int: Previous leap year
    """

def get_next_leap_year(year):
    """
    Find the next leap year after given year.
    
    Parameters:
    - year (int): Reference year
    
    Returns:
    int: Next leap year
    """

def set_correct_day_from_settings(date_obj, settings, current_day=None):
    """
    Adjust day based on settings preferences.
    
    Parameters:
    - date_obj (datetime): Date to adjust
    - settings (Settings): Settings with day preferences
    - current_day (int, optional): Current day reference
    
    Returns:
    datetime: Date with adjusted day
    """

def set_correct_month_from_settings(date_obj, settings, current_month=None):
    """
    Adjust month based on settings preferences.
    
    Parameters:
    - date_obj (datetime): Date to adjust
    - settings (Settings): Settings with month preferences
    - current_month (int, optional): Current month reference
    
    Returns:
    datetime: Date with adjusted month
    """

Usage Examples:

from dateparser.utils import (
    get_last_day_of_month,
    get_previous_leap_year, get_next_leap_year,
    set_correct_day_from_settings, set_correct_month_from_settings
)
from dateparser.conf import Settings
from datetime import datetime

# Calendar calculations
last_day = get_last_day_of_month(2023, 2)  # 28 (not a leap year)
last_day_leap = get_last_day_of_month(2024, 2)  # 29 (leap year)

prev_leap = get_previous_leap_year(2023)  # 2020
next_leap = get_next_leap_year(2023)  # 2024

# Settings-based date adjustment
date = datetime(2023, 1, 15)
settings = Settings({'PREFER_DAY_OF_MONTH': 'first'})
adjusted = set_correct_day_from_settings(date, settings)
# Adjusts to first day of month based on settings

settings = Settings({'PREFER_MONTH_OF_YEAR': 'current'})
adjusted = set_correct_month_from_settings(date, settings, current_month=3)
# Adjusts month based on preference and current context

Additional Timezone Parsing Functions

Essential timezone parsing and conversion utilities for advanced timezone handling scenarios.

def pop_tz_offset_from_string(date_string, as_offset=True):
    """
    Extract timezone offset from date string and return cleaned string.
    
    Args:
        date_string (str): Date string potentially containing timezone info
        as_offset (bool): If True, return StaticTzInfo object; if False, return timezone name
        
    Returns:
        tuple: (cleaned_date_string, timezone_info_or_name)
        
    Examples:
        >>> pop_tz_offset_from_string("2023-01-15 14:30 UTC")
        ("2023-01-15 14:30 ", StaticTzInfo('UTC', timedelta(0)))
        
        >>> pop_tz_offset_from_string("2023-01-15 14:30 EST", as_offset=False)
        ("2023-01-15 14:30 ", "EST")
    """

def word_is_tz(word):
    """
    Check if a word represents a timezone abbreviation.
    
    Args:
        word (str): Word to check for timezone abbreviation
        
    Returns:
        bool: True if word is a recognized timezone abbreviation
        
    Examples:
        >>> word_is_tz("UTC")
        True
        >>> word_is_tz("EST")
        True
        >>> word_is_tz("hello")
        False
    """

def convert_to_local_tz(datetime_obj, datetime_tz_offset):
    """
    Convert datetime with timezone offset to local timezone.
    
    Args:
        datetime_obj (datetime): Datetime object to convert
        datetime_tz_offset (timedelta): Timezone offset of the datetime
        
    Returns:
        datetime: Datetime converted to local timezone
        
    Examples:
        >>> from datetime import datetime, timedelta
        >>> dt = datetime(2023, 1, 15, 14, 30)
        >>> offset = timedelta(hours=-5)  # EST offset
        >>> local_dt = convert_to_local_tz(dt, offset)
    """

Advanced Timezone Integration

from dateparser.timezone_parser import pop_tz_offset_from_string, word_is_tz, convert_to_local_tz

# Extract timezone from date string
date_string = "Meeting at 2:30 PM EST on January 15th"
cleaned_string, tz_info = pop_tz_offset_from_string(date_string)
print(f"Cleaned: {cleaned_string}")
print(f"Timezone: {tz_info}")

# Check if word is timezone
words = ["UTC", "EST", "hello", "PST", "world"]
timezones = [word for word in words if word_is_tz(word)]
print(f"Timezones found: {timezones}")  # ['UTC', 'EST', 'PST']

# Convert to local timezone
from datetime import datetime, timedelta
utc_time = datetime(2023, 1, 15, 19, 30)  # 7:30 PM UTC
est_offset = timedelta(hours=-5)
local_time = convert_to_local_tz(utc_time, est_offset)

Install with Tessl CLI

npx tessl i tessl/pypi-dateparser

docs

calendar-systems.md

configuration.md

core-parsing.md

date-search.md

index.md

utilities.md

tile.json