Constants, helper functions, and miscellaneous utilities for GTFS data manipulation. This module includes configuration constants, calendar operations, feed information functions, and various utility functions.
GTFS_REF: pd.DataFrameReference DataFrame containing GTFS table and column specifications with data types, requirements, and validation rules.
DTYPE: dictData types dictionary for Pandas CSV reads based on GTFS reference specifications.
FEED_ATTRS: listList of primary feed attributes for all standard GTFS tables: ['agency', 'stops', 'routes', 'trips', 'stop_times', 'calendar', 'calendar_dates', 'fare_attributes', 'fare_rules', 'shapes', 'frequencies', 'transfers', 'feed_info', 'attributions'].
DIST_UNITS: listValid distance units: ['ft', 'mi', 'm', 'km'].
WGS84: strWGS84 coordinate reference system identifier: 'EPSG:4326'.
COLORS_SET2: listColorbrewer 8-class Set2 colors for visualizations: A list of hex color codes optimized for categorical data visualization.
STOP_STYLE: dictDefault Leaflet circleMarker style parameters for stop visualization on maps.
def get_dates(feed, *, as_date_obj=False):
"""
Get all valid service dates for the feed.
Parameters:
- feed (Feed): GTFS feed object
- as_date_obj (bool): Return as datetime.date objects instead of strings
Returns:
- list: List of valid service dates
"""
def subset_dates(feed, dates):
"""
Subset dates to those within feed's service period.
Parameters:
- feed (Feed): GTFS feed object
- dates (list): List of dates to filter
Returns:
- list: Filtered dates within feed service period
"""def get_week(feed, k, *, as_date_obj=False):
"""
Get the kth Monday-Sunday week of feed service period.
Parameters:
- feed (Feed): GTFS feed object
- k (int): Week number (0-indexed)
- as_date_obj (bool): Return as datetime.date objects
Returns:
- list: List of dates in the specified week
"""
def get_first_week(feed, *, as_date_obj=False):
"""
Get the first Monday-Sunday week of feed service period.
Parameters:
- feed (Feed): GTFS feed object
- as_date_obj (bool): Return as datetime.date objects
Returns:
- list: List of dates in the first week
"""def datestr_to_date(x, format_str='%Y%m%d', *, inverse=False):
"""
Convert between date strings and datetime.date objects.
Parameters:
- x: Date string or datetime.date object
- format_str (str): Date format string
- inverse (bool): If True, convert date to string
Returns:
- datetime.date or str: Converted date
"""
def timestr_to_seconds(x, *, inverse=False, mod24=False):
"""
Convert time strings to seconds since midnight.
Parameters:
- x: Time string in HH:MM:SS format or seconds
- inverse (bool): If True, convert seconds to time string
- mod24 (bool): Apply modulo 24 hours
Returns:
- int or str: Seconds or time string
"""
def timestr_mod24(timestr):
"""
Apply modulo 24 hours to time string.
Parameters:
- timestr (str): Time string in HH:MM:SS format
Returns:
- int: Hours modulo 24
"""
def weekday_to_str(weekday, *, inverse=False):
"""
Convert between weekday numbers and strings.
Parameters:
- weekday: Weekday number (0=Monday) or string
- inverse (bool): If True, convert string to number
Returns:
- int or str: Weekday number or string
"""def get_segment_length(linestring, p, q=None):
"""
Get length of LineString segment.
Parameters:
- linestring: Shapely LineString
- p (float): Start position along line
- q (float, optional): End position along line
Returns:
- float: Segment length
"""
def is_metric(dist_units):
"""
Check if distance units are metric.
Parameters:
- dist_units (str): Distance units string
Returns:
- bool: True if metric units
"""
def get_convert_dist(dist_units_in, dist_units_out):
"""
Get distance conversion function.
Parameters:
- dist_units_in (str): Input distance units
- dist_units_out (str): Output distance units
Returns:
- function: Distance conversion function
"""def almost_equal(f, g):
"""
Check if two DataFrames are almost equal.
Parameters:
- f (DataFrame): First DataFrame
- g (DataFrame): Second DataFrame
Returns:
- bool: True if DataFrames are almost equal
"""
def is_not_null(df, col_name):
"""
Check if DataFrame column has non-null values.
Parameters:
- df (DataFrame): DataFrame to check
- col_name (str): Column name to check
Returns:
- bool: True if column has non-null values
"""
def get_max_runs(x):
"""
Get maximum run lengths in array.
Parameters:
- x: Array-like input
Returns:
- ndarray: Maximum run lengths
"""
def get_peak_indices(times, counts):
"""
Get indices of peak values in time series.
Parameters:
- times: Array of time values
- counts: Array of count values
Returns:
- ndarray: Indices of peaks
"""
def make_ids(n, prefix='id_'):
"""
Generate n unique ID strings.
Parameters:
- n (int): Number of IDs to generate
- prefix (str): Prefix for IDs
Returns:
- list: List of unique ID strings
"""
def longest_subsequence(seq, mode='strictly', order='increasing', key=None, *, index=False):
"""
Find longest subsequence in sequence.
Parameters:
- seq: Input sequence
- mode (str): Comparison mode ('strictly', 'non')
- order (str): Order ('increasing', 'decreasing')
- key: Key function for comparison
- index (bool): Return indices instead of values
Returns:
- list: Longest subsequence or indices
"""def get_active_trips_df(trip_times):
"""
Get active trips from trip times DataFrame.
Parameters:
- trip_times (DataFrame): Trip times data
Returns:
- Series: Active trips indicator
"""
def combine_time_series(time_series_dict, kind, *, split_directions=False):
"""
Combine multiple time series into one DataFrame.
Parameters:
- time_series_dict (dict): Dictionary of time series
- kind (str): Type of time series
- split_directions (bool): Split by direction
Returns:
- DataFrame: Combined time series
"""
def downsample(time_series, freq):
"""
Downsample time series to lower frequency.
Parameters:
- time_series (DataFrame): Input time series
- freq (str): Target frequency
Returns:
- DataFrame: Downsampled time series
"""
def unstack_time_series(time_series):
"""
Unstack hierarchical time series columns.
Parameters:
- time_series (DataFrame): Hierarchical time series
Returns:
- DataFrame: Unstacked time series
"""
def restack_time_series(unstacked_time_series):
"""
Restack unstacked time series.
Parameters:
- unstacked_time_series (DataFrame): Unstacked time series
Returns:
- DataFrame: Restacked time series
"""def make_html(d):
"""
Convert dictionary to HTML representation.
Parameters:
- d (dict): Dictionary to convert
Returns:
- str: HTML string
"""
def drop_feature_ids(collection):
"""
Remove feature IDs from GeoJSON collection.
Parameters:
- collection (dict): GeoJSON FeatureCollection
Returns:
- dict: Collection without feature IDs
"""def list_fields(feed, table=None):
"""
Describe GTFS table fields and their specifications.
Parameters:
- feed (Feed): GTFS feed object
- table (str, optional): Specific table to describe
Returns:
- DataFrame: Field descriptions and specifications
"""
def describe(feed, sample_date=None):
"""
Get comprehensive feed indicators and summary values.
Parameters:
- feed (Feed): GTFS feed object
- sample_date (str, optional): Date for date-specific metrics
Returns:
- dict: Feed description with key indicators
"""
def assess_quality(feed):
"""
Assess feed quality using various indicators.
Parameters:
- feed (Feed): GTFS feed object
Returns:
- dict: Quality assessment scores and indicators
"""def convert_dist(feed, new_dist_units):
"""
Convert feed distance units to new units.
Parameters:
- feed (Feed): GTFS feed object (modified in-place)
- new_dist_units (str): Target distance units
Returns:
- Feed: Feed with converted distance units
"""
def create_shapes(feed, *, all_trips=False):
"""
Create shapes by connecting stop coordinates for trips.
Parameters:
- feed (Feed): GTFS feed object (modified in-place)
- all_trips (bool): Create shapes for all trips vs only those without shapes
Returns:
- Feed: Feed with generated shapes
"""def restrict_to_trips(feed, trip_ids):
"""
Restrict feed to specific trips and related entities.
Parameters:
- feed (Feed): GTFS feed object (modified in-place)
- trip_ids (list): Trip IDs to retain
Returns:
- Feed: Feed restricted to specified trips
"""
def restrict_to_routes(feed, route_ids):
"""
Restrict feed to specific routes and related entities.
Parameters:
- feed (Feed): GTFS feed object (modified in-place)
- route_ids (list): Route IDs to retain
Returns:
- Feed: Feed restricted to specified routes
"""
def restrict_to_agencies(feed, agency_ids):
"""
Restrict feed to specific agencies and related entities.
Parameters:
- feed (Feed): GTFS feed object (modified in-place)
- agency_ids (list): Agency IDs to retain
Returns:
- Feed: Feed restricted to specified agencies
"""def restrict_to_dates(feed, dates):
"""
Restrict feed to specific service dates.
Parameters:
- feed (Feed): GTFS feed object (modified in-place)
- dates (list): Dates to retain
Returns:
- Feed: Feed restricted to specified dates
"""
def restrict_to_area(feed, area):
"""
Restrict feed to stops and related entities within geographic area.
Parameters:
- feed (Feed): GTFS feed object (modified in-place)
- area: Shapely Polygon or MultiPolygon defining the area
Returns:
- Feed: Feed restricted to specified geographic area
"""def compute_screen_line_counts(feed, screen_lines, dates, segmentize_m=5, *, include_testing_cols=False):
"""
Compute transit line crossing counts at screen lines.
Parameters:
- feed (Feed): GTFS feed object
- screen_lines: Collection of LineString geometries
- dates (list): Dates to analyze
- segmentize_m (float): Segmentization distance in meters
- include_testing_cols (bool): Include debugging columns
Returns:
- DataFrame: Screen line crossing counts by route and time period
"""def get_stop_times(feed, date=None):
"""
Get stop_times DataFrame optionally filtered by date.
Parameters:
- feed (Feed): GTFS feed object
- date (str, optional): Filter by service date (YYYYMMDD)
Returns:
- DataFrame: Stop times data
"""
def append_dist_to_stop_times(feed):
"""
Calculate and append shape_dist_traveled to stop_times.
Parameters:
- feed (Feed): GTFS feed object (modified in-place)
Returns:
- Feed: Feed with updated stop_times
"""
def get_start_and_end_times(feed, date=None):
"""
Get first departure and last arrival times for the feed.
Parameters:
- feed (Feed): GTFS feed object
- date (str, optional): Specific date to analyze
Returns:
- tuple: (earliest_departure, latest_arrival) as time strings
"""def build_route_timetable(feed, route_id, dates):
"""
Build timetable for a specific route.
Parameters:
- feed (Feed): GTFS feed object
- route_id (str): Route ID to build timetable for
- dates (list): Dates to include in timetable
Returns:
- DataFrame: Route timetable with stop times
"""
def build_stop_timetable(feed, stop_id, dates):
"""
Build timetable for a specific stop.
Parameters:
- feed (Feed): GTFS feed object
- stop_id (str): Stop ID to build timetable for
- dates (list): Dates to include in timetable
Returns:
- DataFrame: Stop timetable with arrival/departure times
"""__version__: strPackage version string: "10.3.0".
import gtfs_kit as gk
# Check available distance units
print(gk.DIST_UNITS) # ['ft', 'mi', 'm', 'km']
# Use GTFS reference data
gtfs_spec = gk.GTFS_REF
print(gtfs_spec[gtfs_spec['table'] == 'routes'])
# Use colors for visualization
colors = gk.COLORS_SET2# Get all service dates
feed = gk.read_feed('gtfs.zip', dist_units='km')
all_dates = gk.get_dates(feed)
# Get first week of service
first_week = gk.get_first_week(feed, as_date_obj=True)
# Get specific week
week_3 = gk.get_week(feed, 2) # Third week (0-indexed)# Get comprehensive feed description
description = gk.describe(feed, sample_date='20230101')
print(description)
# Assess feed quality
quality = gk.assess_quality(feed)
print(f"Quality score: {quality}")
# List field specifications
field_info = gk.list_fields(feed, table='routes')# Create a copy for filtering
filtered_feed = feed.copy()
# Restrict to specific routes
route_ids = ['route_1', 'route_2']
gk.restrict_to_routes(filtered_feed, route_ids)
# Restrict to date range
dates = ['20230101', '20230102', '20230103']
gk.restrict_to_dates(filtered_feed, dates)
# Restrict to geographic area
from shapely.geometry import Polygon
bbox = Polygon([(-122.5, 37.7), (-122.3, 37.7), (-122.3, 37.8), (-122.5, 37.8)])
gk.restrict_to_area(filtered_feed, bbox)# Build route timetable
route_timetable = gk.build_route_timetable(feed, 'route_1', ['20230101'])
# Build stop timetable
stop_timetable = gk.build_stop_timetable(feed, 'stop_123', ['20230101'])The utilities module provides essential infrastructure for GTFS data manipulation, analysis, and quality assurance workflows.