Tessl Tile for pypi/gtfs-kit@9.0.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

data-analysis.md data-cleaning.md feed-operations.md geospatial.md index.md time-series.md validation.md

time-series.mddocs/

0
# Time Series Analysis
1

2
Time-based analysis, service frequency computation, and temporal patterns.
3

4
## Time Series Computation
5

6
{ .api }
7
```python
8
def compute_route_time_series(feed: Feed, trip_stats_subset: pd.DataFrame, dates: list[str],
9
                             freq: str, *, split_directions: bool = False) -> pd.DataFrame:
10
    """
11
    Compute route-level time series showing service frequency over time.
12
    
13
    Args:
14
        feed: Feed object containing route and schedule data
15
        trip_stats_subset: Pre-computed trip statistics DataFrame
16
        dates: List of service dates to analyze 
17
        freq: Frequency string for time bins (e.g., "5T" for 5-minute intervals)
18
        split_directions: If True, compute separate series by direction
19
    
20
    Returns:
21
        DataFrame with time series data indexed by datetime, columns for routes/directions
22
        Values represent service frequency (trips per time period)
23
    """
24

25
def compute_route_time_series_0(trip_stats_subset: pd.DataFrame, date_label: str, 
26
                               freq: str, *, split_directions: bool = False) -> pd.DataFrame:
27
    """
28
    Helper function to compute route time series for a single date.
29
    
30
    Args:
31
        trip_stats_subset: Trip statistics for the specific date
32
        date_label: Date string label for the time series
33
        freq: Frequency string for time aggregation
34
        split_directions: If True, split by direction
35
    
36
    Returns:
37
        DataFrame with single-date route time series
38
    """
39

40
def compute_stop_time_series(feed: Feed, dates: list[str], stop_ids: list[str] | None,
41
                            freq: str, *, split_directions: bool = False) -> pd.DataFrame:
42
    """
43
    Compute stop-level time series showing arrival/departure frequency.
44
    
45
    Args:
46
        feed: Feed object containing stop and schedule data
47
        dates: List of service dates to analyze
48
        stop_ids: List of stop IDs to include, or None for all stops
49
        freq: Frequency string for time bins (e.g., "10T" for 10-minute intervals)
50
        split_directions: If True, compute separate series by direction
51
    
52
    Returns:
53
        DataFrame with time series indexed by datetime, columns for stops/directions
54
        Values represent number of arrivals/departures per time period
55
    """
56

57
def compute_stop_time_series_0(stop_times_subset: pd.DataFrame, trip_subset: pd.DataFrame,
58
                              freq: str, date_label: str, *, split_directions: bool = False) -> pd.DataFrame:
59
    """
60
    Helper function to compute stop time series for a single date.
61
    
62
    Args:
63
        stop_times_subset: Stop times data for the specific date
64
        trip_subset: Trip data subset for the date
65
        freq: Frequency string for aggregation
66
        date_label: Date string label
67
        split_directions: If True, split by direction
68
    
69
    Returns:
70
        DataFrame with single-date stop time series
71
    """
72

73
def compute_feed_time_series(feed: Feed, trip_stats: pd.DataFrame, dates: list[str],
74
                            freq: str, *, split_route_types: bool = False) -> pd.DataFrame:
75
    """
76
    Compute feed-level time series showing total system activity over time.
77
    
78
    Args:
79
        feed: Feed object containing system data
80
        trip_stats: Pre-computed trip statistics DataFrame
81
        dates: List of service dates to analyze
82
        freq: Frequency string for time bins
83
        split_route_types: If True, compute separate series by route type
84
    
85
    Returns:
86
        DataFrame with system-wide time series indexed by datetime
87
        Values represent total trips/activity per time period
88
    """
89
```
90

91
## Time Series Structure Building
92

93
{ .api }
94
```python
95
def build_zero_route_time_series(feed: Feed, date_label: str, freq: str,
96
                                *, split_directions: bool = False) -> pd.DataFrame:
97
    """
98
    Build empty route time series structure with all routes and time periods.
99
    
100
    Args:
101
        feed: Feed object containing route data
102
        date_label: Date string for the time series structure
103
        freq: Frequency string defining time bin size
104
        split_directions: If True, include direction columns
105
    
106
    Returns:
107
        DataFrame with zero values but complete route/time structure for filling
108
    """
109

110
def build_zero_stop_time_series(feed: Feed, date_label: str, freq: str,
111
                               *, split_directions: bool = False) -> pd.DataFrame:
112
    """
113
    Build empty stop time series structure with all stops and time periods.
114
    
115
    Args:
116
        feed: Feed object containing stop data
117
        date_label: Date string for the time series structure
118
        freq: Frequency string defining time bin size  
119
        split_directions: If True, include direction columns
120
    
121
    Returns:
122
        DataFrame with zero values but complete stop/time structure for filling
123
    """
124
```
125

126
## Time Series Utilities
127

128
{ .api }
129
```python
130
def combine_time_series(time_series_dict: dict, kind: str, 
131
                       *, split_directions: bool = False) -> pd.DataFrame:
132
    """
133
    Combine multiple time series into a single DataFrame.
134
    
135
    Args:
136
        time_series_dict: Dictionary mapping labels to time series DataFrames
137
        kind: Type of combination ("sum", "mean", "max", etc.)
138
        split_directions: If True, handle direction-split data
139
    
140
    Returns:
141
        Combined DataFrame with multiple time series as columns or aggregated
142
    """
143

144
def downsample(time_series: pd.DataFrame, freq: str) -> pd.DataFrame:
145
    """
146
    Downsample time series to lower frequency (larger time bins).
147
    
148
    Args:
149
        time_series: Time series DataFrame with datetime index
150
        freq: Target frequency string (e.g., "30T" for 30-minute bins)
151
    
152
    Returns:
153
        Downsampled DataFrame with aggregated values at lower frequency
154
    """
155

156
def unstack_time_series(time_series: pd.DataFrame) -> pd.DataFrame:
157
    """
158
    Convert wide-format time series to long format for analysis.
159
    
160
    Args:
161
        time_series: Wide-format time series with columns for entities
162
    
163
    Returns:
164
        Long-format DataFrame with entity and value columns
165
    """
166

167
def restack_time_series(unstacked_time_series: pd.DataFrame) -> pd.DataFrame:
168
    """
169
    Convert long-format time series back to wide format.
170
    
171
    Args:
172
        unstacked_time_series: Long-format time series DataFrame
173
    
174
    Returns:
175
        Wide-format DataFrame with entities as columns
176
    """
177
```
178

179
## Active Trips Analysis
180

181
{ .api }
182
```python
183
def get_active_trips_df(trip_times: pd.DataFrame) -> pd.Series:
184
    """
185
    Count number of active trips at each time point throughout the day.
186
    
187
    Args:
188
        trip_times: DataFrame with trip start and end times
189
    
190
    Returns:
191
        Series indexed by time showing count of concurrent active trips
192
    """
193
```
194

195
## Time and Date Utilities
196

197
{ .api }
198
```python
199
def get_start_and_end_times(feed: Feed, date: str) -> list[str]:
200
    """
201
    Get the first and last service times for a specific date.
202
    
203
    Args:
204
        feed: Feed object containing schedule data
205
        date: Date string (YYYYMMDD) to analyze
206
    
207
    Returns:
208
        List with [earliest_time, latest_time] as HH:MM:SS strings
209
    """
210

211
def get_stop_times(feed: Feed, date: str) -> pd.DataFrame:
212
    """
213
    Get stop_times data filtered for active trips on a specific date.
214
    
215
    Args:
216
        feed: Feed object containing stop_times and calendar data
217
        date: Date string (YYYYMMDD) to filter by
218
    
219
    Returns:
220
        DataFrame with stop_times for trips active on the specified date
221
    """
222

223
# Time conversion utilities
224
def timestr_to_seconds(x: str, *, inverse: bool = False, mod24: bool = True) -> int:
225
    """Convert time string to seconds since midnight."""
226

227
def timestr_mod24(timestr: str) -> int:
228
    """Convert time string to seconds with 24-hour modulo."""
229

230
def datestr_to_date(x: str, format_str: str, *, inverse: bool = False) -> str | date:
231
    """Convert between date strings and date objects."""
232
```
233

234
## Peak Analysis
235

236
{ .api }
237
```python
238
def get_peak_indices(times: list, counts: list) -> np.array:
239
    """
240
    Find indices corresponding to the longest peak period in time series.
241
    
242
    Args:
243
        times: List of time points
244
        counts: List of count values corresponding to times
245
    
246
    Returns:
247
        Array of indices representing the longest continuous peak period
248
    """
249

250
def get_max_runs(x: np.array) -> np.array:
251
    """
252
    Get start and end indices of runs of maximum values in array.
253
    
254
    Args:
255
        x: Array of numeric values
256
    
257
    Returns:
258
        Array with [start_idx, end_idx] pairs for maximum value runs
259
    """
260
```
261

262
## Usage Examples
263

264
### Route Time Series Analysis
265

266
```python
267
import gtfs_kit as gk
268
import pandas as pd
269
import matplotlib.pyplot as plt
270

271
# Load feed and compute trip stats
272
feed = gk.read_feed("data/gtfs.zip")
273
trip_stats = gk.compute_trip_stats(feed, route_ids=None)
274

275
# Get service dates
276
dates = gk.get_dates(feed)
277
sample_dates = dates[:7]  # First week
278

279
# Compute route time series
280
route_ts = gk.compute_route_time_series(
281
    feed=feed,
282
    trip_stats_subset=trip_stats,
283
    dates=sample_dates,
284
    freq="15T",  # 15-minute intervals
285
    split_directions=True
286
)
287

288
print(f"Route time series shape: {route_ts.shape}")
289
print("Time range:", route_ts.index.min(), "to", route_ts.index.max())
290

291
# Analyze busiest routes
292
daily_totals = route_ts.sum()
293
busiest_routes = daily_totals.nlargest(10)
294
print("Busiest routes:")
295
print(busiest_routes)
296
```
297

298
### Stop Time Series Analysis
299

300
```python
301
# Compute stop time series for major stops
302
stops_gdf = gk.get_stops(feed, as_gdf=True)
303
major_stops = stops_gdf.nlargest(20, 'stop_id')['stop_id'].tolist()
304

305
stop_ts = gk.compute_stop_time_series(
306
    feed=feed,
307
    dates=sample_dates,
308
    stop_ids=major_stops,
309
    freq="10T",  # 10-minute intervals
310
    split_directions=False
311
)
312

313
print(f"Stop time series shape: {stop_ts.shape}")
314

315
# Find peak hours at stops
316
hourly_ts = gk.downsample(stop_ts, "1H")
317
peak_hours = hourly_ts.idxmax()
318
print("Peak hours by stop:")
319
print(peak_hours.head())
320
```
321

322
### System-Wide Time Series
323

324
```python
325
# Compute feed-level time series
326
feed_ts = gk.compute_feed_time_series(
327
    feed=feed,
328
    trip_stats=trip_stats,
329
    dates=sample_dates,
330
    freq="30T",  # 30-minute intervals
331
    split_route_types=True
332
)
333

334
print("System-wide time series:")
335
print(feed_ts.head())
336

337
# Plot system activity
338
if len(feed_ts) > 0:
339
    daily_pattern = feed_ts.groupby(feed_ts.index.time).sum()
340
    plt.figure(figsize=(12, 6))
341
    daily_pattern.plot()
342
    plt.title("Daily Transit System Activity Pattern")
343
    plt.xlabel("Time of Day")
344
    plt.ylabel("Number of Trips")
345
    plt.show()
346
```
347

348
### Time Series Combination and Analysis
349

350
```python
351
# Create multiple time series for comparison
352
weekday_dates = [d for d in dates[:7] if pd.to_datetime(d, format='%Y%m%d').weekday() < 5]
353
weekend_dates = [d for d in dates[:7] if pd.to_datetime(d, format='%Y%m%d').weekday() >= 5]
354

355
weekday_ts = gk.compute_route_time_series(feed, trip_stats, weekday_dates, "30T")
356
weekend_ts = gk.compute_route_time_series(feed, trip_stats, weekend_dates, "30T")
357

358
# Combine time series
359
combined_ts = gk.combine_time_series(
360
    {"weekday": weekday_ts, "weekend": weekend_ts}, 
361
    kind="mean"
362
)
363

364
print("Combined weekday/weekend patterns:")
365
print(combined_ts.head())
366

367
# Analyze differences
368
if len(combined_ts) > 0 and 'weekday' in combined_ts.columns and 'weekend' in combined_ts.columns:
369
    combined_ts['difference'] = combined_ts['weekday'] - combined_ts['weekend']
370
    print("Peak weekday advantage:", combined_ts['difference'].max())
371
```
372

373
### Active Trips Analysis
374

375
```python
376
# Analyze concurrent trip activity
377
sample_date = dates[0]
378
stop_times = gk.get_stop_times(feed, sample_date)
379

380
# Get trip start/end times
381
trip_times = stop_times.groupby('trip_id').agg({
382
    'arrival_time': ['min', 'max']
383
}).round(0)
384

385
trip_times.columns = ['start_time', 'end_time']
386

387
# Count active trips over time
388
active_trips = gk.get_active_trips_df(trip_times)
389
print(f"Max concurrent trips: {active_trips.max()}")
390

391
# Find peak periods
392
times = active_trips.index.tolist()
393
counts = active_trips.values.tolist()
394
peak_indices = gk.get_peak_indices(times, counts)
395

396
if len(peak_indices) > 0:
397
    peak_start = times[peak_indices[0]]
398
    peak_end = times[peak_indices[-1]]
399
    print(f"Peak period: {peak_start} to {peak_end}")
400
```
401

402
### Temporal Pattern Analysis
403

404
```python
405
# Analyze service start and end times
406
start_end_times = gk.get_start_and_end_times(feed, sample_date)
407
print(f"Service span: {start_end_times[0]} to {start_end_times[1]}")
408

409
# Convert to seconds for analysis
410
start_seconds = gk.timestr_to_seconds(start_end_times[0])
411
end_seconds = gk.timestr_to_seconds(start_end_times[1])
412
service_span_hours = (end_seconds - start_seconds) / 3600
413

414
print(f"Daily service span: {service_span_hours:.1f} hours")
415

416
# Analyze time series structure
417
sample_ts = gk.build_zero_route_time_series(feed, sample_date, "1H")
418
print(f"Hourly time series structure: {sample_ts.shape}")
419
print("Time periods:", len(sample_ts.index.unique()))
420
```
421

422
### Long Format Analysis
423

424
```python
425
# Convert to long format for detailed analysis
426
route_ts_long = gk.unstack_time_series(route_ts)
427
print("Long format time series:")
428
print(route_ts_long.head())
429

430
# Analyze by time of day
431
if 'time' in route_ts_long.columns and 'value' in route_ts_long.columns:
432
    hourly_summary = route_ts_long.groupby(
433
        route_ts_long['time'].dt.hour
434
    )['value'].agg(['mean', 'std', 'sum'])
435
    
436
    print("Hourly service summary:")
437
    print(hourly_summary)
438

439
# Convert back to wide format
440
route_ts_restored = gk.restack_time_series(route_ts_long)
441
print("Restored wide format shape:", route_ts_restored.shape)
442
```
443

444
### Custom Frequency Analysis
445

446
```python
447
# Analyze at different time resolutions
448
frequencies = ["5T", "15T", "30T", "1H"]
449
frequency_analysis = {}
450

451
for freq in frequencies:
452
    ts = gk.compute_route_time_series(feed, trip_stats, [sample_date], freq)
453
    if len(ts) > 0:
454
        frequency_analysis[freq] = {
455
            'periods': len(ts),
456
            'max_value': ts.values.max(),
457
            'mean_value': ts.values.mean()
458
        }
459

460
print("Analysis by frequency:")
461
for freq, stats in frequency_analysis.items():
462
    print(f"{freq}: {stats['periods']} periods, max={stats['max_value']}, mean={stats['mean_value']:.2f}")
463

464
# Downsample high-frequency data
465
if "5T" in frequency_analysis:
466
    high_freq_ts = gk.compute_route_time_series(feed, trip_stats, [sample_date], "5T")
467
    downsampled = gk.downsample(high_freq_ts, "30T")
468
    print(f"Downsampled from {len(high_freq_ts)} to {len(downsampled)} periods")
469
```

Version

Tile

Files

time-series.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

time-series.mddocs/