Date parsing library designed to parse dates from HTML pages
—
Search functionality for finding and parsing multiple dates within text documents. Automatically detects date patterns in natural language text and extracts them as datetime objects with optional language detection information.
Find and parse all date expressions within a text string, supporting automatic language detection and customizable parsing settings.
def search_dates(text, languages=None, settings=None,
add_detected_language=False, detect_languages_function=None):
"""
Find all substrings of the given string which represent date and/or time and parse them.
Parameters:
- text (str): A string in natural language which may contain date and/or time expressions
- languages (list, optional): List of two letters language codes (e.g. ['en', 'es'])
- settings (dict, optional): Configure customized behavior using Settings
- add_detected_language (bool): Include detected language in results
- detect_languages_function (function, optional): Custom language detection function
Returns:
list: List of tuples containing (date_substring, datetime_object) or
(date_substring, datetime_object, detected_language) if add_detected_language=True
Returns None if no dates can be parsed are found
Raises:
ValueError: Unknown Language
"""Usage Examples:
from dateparser.search import search_dates
# Basic date search
text = "The first artificial Earth satellite was launched on 4 October 1957."
dates = search_dates(text)
# Returns: [('on 4 October 1957', datetime.datetime(1957, 10, 4, 0, 0))]
# Multiple dates in text
text = "The client arrived on March 3rd, 2004 and returned on May 6th 2004"
dates = search_dates(text)
# Returns: [('on March 3rd, 2004', datetime.datetime(2004, 3, 3, 0, 0)),
# ('on May 6th 2004', datetime.datetime(2004, 5, 6, 0, 0))]
# With language detection
dates = search_dates(text, add_detected_language=True)
# Returns: [('on March 3rd, 2004', datetime.datetime(2004, 3, 3, 0, 0), 'en'),
# ('on May 6th 2004', datetime.datetime(2004, 5, 6, 0, 0), 'en')]
# Specific languages
text = "El evento fue el 15 de enero de 2023"
dates = search_dates(text, languages=['es'])
# Returns: [('15 de enero de 2023', datetime.datetime(2023, 1, 15, 0, 0))]
# With custom settings
from dateparser.conf import Settings
settings = Settings(PREFER_DATES_FROM='future', TIMEZONE='UTC')
dates = search_dates(text, settings=settings)Lower-level search functionality for custom implementations and advanced search scenarios.
class DateSearchWithDetection:
"""
Core search functionality with automatic language detection.
"""
def search_dates(self, text, languages=None, settings=None,
detect_languages_function=None):
"""
Search for dates in text with language detection.
Parameters:
- text (str): Text to search for dates
- languages (list, optional): Language codes to use
- settings (dict, optional): Parsing settings
- detect_languages_function (function, optional): Custom language detection
Returns:
dict: Dictionary with 'Dates' and 'Language' keys
"""
def preprocess_text(self, text, languages):
"""
Preprocess text before date search.
Parameters:
- text (str): Input text
- languages (list): Language codes
Returns:
str: Preprocessed text
"""Usage Examples:
from dateparser.search.search import DateSearchWithDetection
# Create search instance
search_engine = DateSearchWithDetection()
# Search with custom processing
text = "Meeting scheduled for next Friday and the presentation on January 15th"
result = search_engine.search_dates(text, languages=['en'])
dates = result.get('Dates')
language = result.get('Language')
# Batch processing multiple texts
texts = [
"Event on December 1st, 2023",
"Conference from March 15 to March 17, 2024",
"Deadline is tomorrow"
]
all_dates = []
for text in texts:
preprocessed = search_engine.preprocess_text(text, ['en'])
result = search_engine.search_dates(preprocessed, languages=['en'])
if result.get('Dates'):
all_dates.extend(result['Dates'])Helper functions for processing search results and working with date patterns.
def date_is_relative(translation):
"""
Check if a date translation represents a relative date.
Parameters:
- translation (str): Translated date string
Returns:
bool: True if date is relative (e.g., 'tomorrow', '2 days ago')
"""Usage Examples:
from dateparser.search.search import date_is_relative
# Check if dates are relative
is_relative = date_is_relative("2 days ago") # True
is_relative = date_is_relative("January 15, 2023") # False
# Filter relative dates from search results
from dateparser.search import search_dates
text = "The meeting was yesterday and the deadline is January 15, 2023"
dates = search_dates(text)
relative_dates = []
absolute_dates = []
for date_text, date_obj in dates:
if date_is_relative(date_text):
relative_dates.append((date_text, date_obj))
else:
absolute_dates.append((date_text, date_obj))Install with Tessl CLI
npx tessl i tessl/pypi-dateparser