Airbyte source connector for extracting financial and accounting data from Xero's cloud-based accounting platform.
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Utility functions for parsing and converting Xero's custom data formats and JSON structures. These utilities ensure proper data type conversion, date parsing, and RFC3339 compliance for all Xero API responses.
import decimal
import re
from abc import ABC
from datetime import date, datetime, time, timedelta, timezone
from typing import Any, Iterable, Mapping, MutableMapping, Optional
import pendulum
import requestsXero uses multiple date formats that require specialized parsing to ensure compatibility with downstream systems.
def parse_date(value: str) -> Optional[datetime]:
"""
Parse Xero date strings in various formats to datetime objects.
Supports multiple date formats used by Xero API including:
- .NET JSON format: "/Date(1419937200000+0000)/"
- ISO 8601 format: "2023-08-15T14:30:25Z"
- Partial ISO format: "2023-08-15T14:30:25"
Parameters:
- value: String containing date in any supported format
Returns:
datetime object in UTC timezone, or None if parsing fails
Examples:
- parse_date("/Date(1419937200000+0000)/") -> datetime(2014, 12, 30, 9, 0)
- parse_date("2023-08-15T14:30:25Z") -> datetime(2023, 8, 15, 14, 30, 25)
- parse_date("invalid-date") -> None
"""Custom JSON processing to handle Xero's data structures and ensure RFC3339 compliance.
def _json_load_object_hook(_dict: dict) -> dict:
"""
JSON parse hook to convert Xero date formats to RFC3339 strings.
Automatically processes dictionary objects during JSON parsing
to identify and convert date fields from Xero's formats to
standardized RFC3339 format for downstream compatibility.
Parameters:
- _dict: Dictionary object from JSON parsing containing potential date fields
Returns:
Modified dictionary with converted date strings in RFC3339 format
Date Field Patterns:
- Fields ending in "Date", "DateUTC", or containing "Date" substring
- Common fields: UpdatedDateUTC, CreatedDateUTC, DueDateString, etc.
Conversion Examples:
- "/Date(1419937200000+0000)/" -> "2014-12-30T09:00:00+00:00"
- "2023-08-15T14:30:25" -> "2023-08-15T14:30:25+00:00"
"""Xero's legacy .NET JSON date format requires special parsing:
# .NET JSON date format pattern
NET_JSON_PATTERN = r"/Date\((\d+)([\+\-]\d{4})?\)/"
# Examples of .NET JSON dates from Xero:
NET_JSON_EXAMPLES = [
"/Date(1419937200000+0000)/", # UTC timestamp with timezone
"/Date(1419937200000)/", # UTC timestamp without timezone
"/Date(1419937200000-0500)/", # Timestamp with negative timezone offset
]
# Parsed results (all converted to UTC):
PARSED_RESULTS = [
"2014-12-30T09:00:00+00:00", # December 30, 2014 9:00 AM UTC
"2014-12-30T09:00:00+00:00", # Same timestamp, assumed UTC
"2014-12-30T14:00:00+00:00", # Adjusted for -0500 timezone offset
]Standard ISO date formats are also supported:
# ISO 8601 format examples
ISO_8601_EXAMPLES = [
"2023-08-15T14:30:25Z", # Full UTC format with Z suffix
"2023-08-15T14:30:25+00:00", # Full UTC format with +00:00 offset
"2023-08-15T14:30:25", # Local time without timezone (assumed UTC)
"2023-08-15T14:30:25.123Z", # With milliseconds
]from source_xero.streams import parse_date
from datetime import datetime
# Parse various date formats
net_date = parse_date("/Date(1419937200000+0000)/")
iso_date = parse_date("2023-08-15T14:30:25Z")
partial_date = parse_date("2023-08-15T14:30:25")
print(f".NET date: {net_date}") # 2014-12-30 09:00:00
print(f"ISO date: {iso_date}") # 2023-08-15 14:30:25
print(f"Partial date: {partial_date}") # 2023-08-15 14:30:25
# Handle invalid dates
invalid_date = parse_date("not-a-date")
print(f"Invalid date: {invalid_date}") # Noneimport json
from source_xero.streams import _json_load_object_hook
# Raw JSON response from Xero API
xero_json = '''
{
"ContactID": "12345678-1234-1234-1234-123456789012",
"Name": "Sample Customer",
"UpdatedDateUTC": "/Date(1419937200000+0000)/",
"CreatedDateUTC": "2023-08-15T14:30:25Z",
"EmailAddress": "customer@example.com"
}
'''
# Parse with automatic date conversion
parsed_data = json.loads(xero_json, object_hook=_json_load_object_hook)
print(f"Contact: {parsed_data['Name']}")
print(f"Updated: {parsed_data['UpdatedDateUTC']}") # Converted to RFC3339
print(f"Created: {parsed_data['CreatedDateUTC']}") # Already RFC3339# This processing happens automatically in all Xero streams
def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]:
"""Example of how streams use the date utilities internally."""
response_data = response.json(object_hook=_json_load_object_hook)
data_field = self.data_field()
# Extract records from response
if data_field in response_data:
records = response_data[data_field]
# All date fields are now in RFC3339 format
return records
else:
return []The JSON object hook automatically processes these common date fields:
COMMON_DATE_FIELDS = [
"UpdatedDateUTC", # Most common cursor field
"CreatedDateUTC", # Alternative cursor field
"DueDateString", # Invoice due dates
"DateString", # Transaction dates
"FullyPaidOnDate", # Payment completion dates
"ExpectedArrivalDate", # Purchase order dates
"DeliveryDate", # Delivery scheduling
"PaymentDueDate", # Payment deadlines
"InvoiceDate", # Invoice issue dates
"LastLoginDate", # User activity tracking
]# Date field detection patterns
def is_date_field(field_name: str) -> bool:
"""
Determine if a field name likely contains date data.
Detection criteria:
- Field name ends with "Date" or "DateUTC"
- Field name contains "Date" substring
- Known date field patterns from Xero API
"""
date_patterns = [
field_name.endswith('Date'),
field_name.endswith('DateUTC'),
'Date' in field_name,
field_name.endswith('DateString')
]
return any(date_patterns)The date parser handles various error conditions gracefully:
# Error handling examples
ERROR_CASES = {
"Invalid .NET format": "/Date(invalid)/",
"Malformed timestamp": "/Date(abc123+0000)/",
"Invalid ISO format": "2023-13-45T25:70:99Z",
"Empty string": "",
"None value": None,
"Non-string input": 12345
}
# All error cases return None without raising exceptions
for case, value in ERROR_CASES.items():
result = parse_date(value)
assert result is None, f"{case} should return None"The JSON object hook handles processing errors:
Date parsing uses compiled regex patterns for efficiency:
import re
# Pre-compiled regex for .NET JSON dates
NET_DATE_REGEX = re.compile(r"/Date\((\d+)([\+\-]\d{4})?\)/")
# Single compilation for all parsing operations
# Significantly faster than re-compiling for each dateDate parsing could benefit from caching for repeated values:
# Potential optimization for repeated date values
from functools import lru_cache
@lru_cache(maxsize=1000)
def cached_parse_date(value: str) -> Optional[datetime]:
"""Cached version of parse_date for performance optimization."""
return parse_date(value)The utilities are designed for minimal memory overhead:
The utilities integrate seamlessly with Airbyte CDK:
Converted dates work with common data processing tools:
Install with Tessl CLI
npx tessl i tessl/pypi-source-xero