or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

docs

data-analysis.mddata-processing.mdfeed-operations.mdgeospatial.mdindex.mdutilities.md
tile.json

data-analysis.mddocs/

Data Analysis

Statistical analysis, time series computation, and performance metrics for transit operations. This module provides comprehensive analysis capabilities for routes, stops, trips, and system-wide metrics across multiple dates and time periods.

Route Analysis

Route Statistics

def compute_route_stats_0(trip_stats_subset, headway_start_time='07:00:00', headway_end_time='19:00:00', *, split_directions=False):
    """
    Compute route statistics for a trip subset.
    
    Parameters:
    - trip_stats_subset (DataFrame): Subset of trip statistics
    - headway_start_time (str): Start time for headway calculations
    - headway_end_time (str): End time for headway calculations  
    - split_directions (bool): Split statistics by direction
    
    Returns:
    - DataFrame: Route statistics
    """

def compute_route_stats(feed, trip_stats_subset, dates, headway_start_time='07:00:00', headway_end_time='19:00:00', *, split_directions=False):
    """
    Compute route statistics for multiple dates.
    
    Parameters:
    - feed (Feed): GTFS feed object
    - trip_stats_subset (DataFrame): Trip statistics subset
    - dates (list): List of dates to analyze
    - headway_start_time (str): Start time for headway calculations
    - headway_end_time (str): End time for headway calculations
    - split_directions (bool): Split statistics by direction
    
    Returns:
    - DataFrame: Route statistics with date index
    """

Route Time Series

def compute_route_time_series_0(trip_stats_subset, date_label='20010101', freq='5Min', *, split_directions=False):
    """
    Compute route time series for a trip subset.
    
    Parameters:
    - trip_stats_subset (DataFrame): Trip statistics subset
    - date_label (str): Date label for the time series
    - freq (str): Frequency for time series sampling
    - split_directions (bool): Split by direction
    
    Returns:
    - DataFrame: Route time series
    """

def build_zero_route_time_series(feed, date_label='20010101', freq='5Min', *, split_directions=False):
    """
    Build a zero-filled route time series template.
    
    Parameters:
    - feed (Feed): GTFS feed object
    - date_label (str): Date label for the time series
    - freq (str): Frequency for time series sampling
    - split_directions (bool): Split by direction
    
    Returns:
    - DataFrame: Zero-filled route time series
    """

def compute_route_time_series(feed, trip_stats_subset, dates, freq='5Min', *, split_directions=False):
    """
    Compute route time series for multiple dates.
    
    Parameters:
    - feed (Feed): GTFS feed object
    - trip_stats_subset (DataFrame): Trip statistics subset  
    - dates (list): List of dates to analyze
    - freq (str): Frequency for time series sampling
    - split_directions (bool): Split by direction
    
    Returns:
    - DataFrame: Route time series with hierarchical columns
    """

Stop Analysis

Stop Statistics

def compute_stop_stats_0(stop_times_subset, trip_subset, headway_start_time='07:00:00', headway_end_time='19:00:00', *, split_directions=False):
    """
    Compute stop statistics for data subsets.
    
    Parameters:
    - stop_times_subset (DataFrame): Stop times subset
    - trip_subset (DataFrame): Trip subset
    - headway_start_time (str): Start time for headway calculations
    - headway_end_time (str): End time for headway calculations
    - split_directions (bool): Split statistics by direction
    
    Returns:
    - DataFrame: Stop statistics
    """

def compute_stop_stats(feed, dates, stop_ids=None, headway_start_time='07:00:00', headway_end_time='19:00:00', *, split_directions=False):
    """
    Compute stop statistics for specified dates.
    
    Parameters:
    - feed (Feed): GTFS feed object
    - dates (list): List of dates to analyze
    - stop_ids (list, optional): Specific stop IDs to analyze
    - headway_start_time (str): Start time for headway calculations
    - headway_end_time (str): End time for headway calculations
    - split_directions (bool): Split statistics by direction
    
    Returns:
    - DataFrame: Stop statistics with date index
    """

def compute_stop_activity(feed, dates):
    """
    Mark stops as active or inactive on specified dates.
    
    Parameters:
    - feed (Feed): GTFS feed object
    - dates (list): List of dates to analyze
    
    Returns:
    - DataFrame: Stop activity indicators by date
    """

Stop Time Series

def compute_stop_time_series_0(stop_times_subset, trip_subset, freq='5Min', date_label='20010101', *, split_directions=False):
    """
    Compute stop time series for data subsets.
    
    Parameters:
    - stop_times_subset (DataFrame): Stop times subset
    - trip_subset (DataFrame): Trip subset
    - freq (str): Frequency for time series sampling
    - date_label (str): Date label for the time series
    - split_directions (bool): Split by direction
    
    Returns:
    - DataFrame: Stop time series
    """

def build_zero_stop_time_series(feed, date_label='20010101', freq='5Min', *, split_directions=False):
    """
    Build a zero-filled stop time series template.
    
    Parameters:
    - feed (Feed): GTFS feed object
    - date_label (str): Date label for the time series
    - freq (str): Frequency for time series sampling
    - split_directions (bool): Split by direction
    
    Returns:
    - DataFrame: Zero-filled stop time series
    """

def compute_stop_time_series(feed, dates, stop_ids=None, freq='5Min', *, split_directions=False):
    """
    Compute stop time series for specified dates.
    
    Parameters:
    - feed (Feed): GTFS feed object
    - dates (list): List of dates to analyze
    - stop_ids (list, optional): Specific stop IDs to analyze
    - freq (str): Frequency for time series sampling
    - split_directions (bool): Split by direction
    
    Returns:
    - DataFrame: Stop time series with hierarchical columns
    """

Trip Analysis

Trip Statistics and Operations

def get_active_services(feed, date):
    """
    Get list of service IDs active on a specific date.
    
    Parameters:
    - feed (Feed): GTFS feed object
    - date (str): Date in YYYYMMDD format
    
    Returns:
    - list: Service IDs active on the date
    """

def compute_trip_activity(feed, dates):
    """
    Mark trips as active or inactive on specified dates.
    
    Parameters:
    - feed (Feed): GTFS feed object
    - dates (list): List of dates to analyze
    
    Returns:
    - DataFrame: Trip activity indicators by date
    """

def compute_busiest_date(feed, dates):
    """
    Get the date with maximum number of active trips.
    
    Parameters:
    - feed (Feed): GTFS feed object
    - dates (list): List of dates to analyze
    
    Returns:
    - str: Date with maximum active trips
    """

def compute_trip_stats(feed, route_ids=None, *, compute_dist_from_shapes=False):
    """
    Compute comprehensive trip statistics.
    
    Parameters:
    - feed (Feed): GTFS feed object
    - route_ids (list, optional): Specific route IDs to analyze
    - compute_dist_from_shapes (bool): Calculate distances from shapes
    
    Returns:
    - DataFrame: Trip statistics including distances, durations, speeds
    """

def name_stop_patterns(feed):
    """
    Assign stop pattern names to trips based on stop sequences.
    
    Parameters:
    - feed (Feed): GTFS feed object
    
    Returns:
    - DataFrame: Trips with assigned stop pattern names
    """

def locate_trips(feed, date, times):
    """
    Get trip positions at specified times.
    
    Parameters:
    - feed (Feed): GTFS feed object
    - date (str): Date in YYYYMMDD format
    - times (list): List of times in HH:MM:SS format
    
    Returns:
    - DataFrame: Trip positions and status at specified times
    """

def build_route_timetable(feed, route_id, dates):
    """
    Build a route timetable showing departure times at stops.
    
    Parameters:
    - feed (Feed): GTFS feed object
    - route_id (str): Route ID to build timetable for
    - dates (list): List of dates in YYYYMMDD format
    
    Returns:
    - DataFrame: Route timetable with stops and departure times
    """

def build_stop_timetable(feed, stop_id, dates):
    """
    Build a stop timetable showing all arrivals/departures.
    
    Parameters:
    - feed (Feed): GTFS feed object
    - stop_id (str): Stop ID to build timetable for
    - dates (list): List of dates in YYYYMMDD format
    
    Returns:
    - DataFrame: Stop timetable with trip arrivals and departures
    """

Feed-Level Analysis

Feed Statistics

def compute_feed_stats_0(feed, trip_stats_subset, *, split_route_types=False):
    """
    Compute feed-level statistics for a trip subset.
    
    Parameters:
    - feed (Feed): GTFS feed object
    - trip_stats_subset (DataFrame): Trip statistics subset
    - split_route_types (bool): Split statistics by route type
    
    Returns:
    - DataFrame: Feed-level statistics
    """

def compute_feed_stats(feed, trip_stats, dates, *, split_route_types=False):
    """
    Compute feed-level statistics for multiple dates.
    
    Parameters:
    - feed (Feed): GTFS feed object
    - trip_stats (DataFrame): Trip statistics
    - dates (list): List of dates to analyze
    - split_route_types (bool): Split statistics by route type
    
    Returns:
    - DataFrame: Feed statistics with date index
    """

def compute_feed_time_series(feed, trip_stats, dates, freq='5Min', *, split_route_types=False):
    """
    Compute feed-level time series for multiple dates.
    
    Parameters:
    - feed (Feed): GTFS feed object
    - trip_stats (DataFrame): Trip statistics
    - dates (list): List of dates to analyze
    - freq (str): Frequency for time series sampling
    - split_route_types (bool): Split by route type
    
    Returns:
    - DataFrame: Feed time series with hierarchical columns
    """

Usage Examples

Basic Route Analysis

import gtfs_kit as gk

# Load feed and compute trip statistics
feed = gk.read_feed('gtfs.zip', dist_units='km')
trip_stats = gk.compute_trip_stats(feed)

# Analyze routes for specific dates
dates = ['20230101', '20230102', '20230103']
route_stats = gk.compute_route_stats(feed, trip_stats, dates)

# Generate route time series
route_ts = gk.compute_route_time_series(feed, trip_stats, dates, freq='15Min')

Stop Performance Analysis

# Compute stop statistics with custom headway period
stop_stats = gk.compute_stop_stats(
    feed, 
    dates=['20230101'], 
    headway_start_time='06:00:00',
    headway_end_time='22:00:00',
    split_directions=True
)

# Generate stop time series
stop_ts = gk.compute_stop_time_series(
    feed, 
    dates=['20230101'], 
    freq='10Min'
)

System-Wide Analysis

# Find the busiest operating day
busiest_date = gk.compute_busiest_date(feed, dates)

# Compute feed-level statistics
feed_stats = gk.compute_feed_stats(feed, trip_stats, dates, split_route_types=True)

# Generate system-wide time series
feed_ts = gk.compute_feed_time_series(feed, trip_stats, dates)

All analysis functions support flexible date ranges, time periods, and granularity options to accommodate different analytical needs and reporting requirements.