0
# Time Series Analysis
1
2
Time-based analysis, service frequency computation, and temporal patterns.
3
4
## Time Series Computation
5
6
{ .api }
7
```python
8
def compute_route_time_series(feed: Feed, trip_stats_subset: pd.DataFrame, dates: list[str],
9
freq: str, *, split_directions: bool = False) -> pd.DataFrame:
10
"""
11
Compute route-level time series showing service frequency over time.
12
13
Args:
14
feed: Feed object containing route and schedule data
15
trip_stats_subset: Pre-computed trip statistics DataFrame
16
dates: List of service dates to analyze
17
freq: Frequency string for time bins (e.g., "5T" for 5-minute intervals)
18
split_directions: If True, compute separate series by direction
19
20
Returns:
21
DataFrame with time series data indexed by datetime, columns for routes/directions
22
Values represent service frequency (trips per time period)
23
"""
24
25
def compute_route_time_series_0(trip_stats_subset: pd.DataFrame, date_label: str,
26
freq: str, *, split_directions: bool = False) -> pd.DataFrame:
27
"""
28
Helper function to compute route time series for a single date.
29
30
Args:
31
trip_stats_subset: Trip statistics for the specific date
32
date_label: Date string label for the time series
33
freq: Frequency string for time aggregation
34
split_directions: If True, split by direction
35
36
Returns:
37
DataFrame with single-date route time series
38
"""
39
40
def compute_stop_time_series(feed: Feed, dates: list[str], stop_ids: list[str] | None,
41
freq: str, *, split_directions: bool = False) -> pd.DataFrame:
42
"""
43
Compute stop-level time series showing arrival/departure frequency.
44
45
Args:
46
feed: Feed object containing stop and schedule data
47
dates: List of service dates to analyze
48
stop_ids: List of stop IDs to include, or None for all stops
49
freq: Frequency string for time bins (e.g., "10T" for 10-minute intervals)
50
split_directions: If True, compute separate series by direction
51
52
Returns:
53
DataFrame with time series indexed by datetime, columns for stops/directions
54
Values represent number of arrivals/departures per time period
55
"""
56
57
def compute_stop_time_series_0(stop_times_subset: pd.DataFrame, trip_subset: pd.DataFrame,
58
freq: str, date_label: str, *, split_directions: bool = False) -> pd.DataFrame:
59
"""
60
Helper function to compute stop time series for a single date.
61
62
Args:
63
stop_times_subset: Stop times data for the specific date
64
trip_subset: Trip data subset for the date
65
freq: Frequency string for aggregation
66
date_label: Date string label
67
split_directions: If True, split by direction
68
69
Returns:
70
DataFrame with single-date stop time series
71
"""
72
73
def compute_feed_time_series(feed: Feed, trip_stats: pd.DataFrame, dates: list[str],
74
freq: str, *, split_route_types: bool = False) -> pd.DataFrame:
75
"""
76
Compute feed-level time series showing total system activity over time.
77
78
Args:
79
feed: Feed object containing system data
80
trip_stats: Pre-computed trip statistics DataFrame
81
dates: List of service dates to analyze
82
freq: Frequency string for time bins
83
split_route_types: If True, compute separate series by route type
84
85
Returns:
86
DataFrame with system-wide time series indexed by datetime
87
Values represent total trips/activity per time period
88
"""
89
```
90
91
## Time Series Structure Building
92
93
{ .api }
94
```python
95
def build_zero_route_time_series(feed: Feed, date_label: str, freq: str,
96
*, split_directions: bool = False) -> pd.DataFrame:
97
"""
98
Build empty route time series structure with all routes and time periods.
99
100
Args:
101
feed: Feed object containing route data
102
date_label: Date string for the time series structure
103
freq: Frequency string defining time bin size
104
split_directions: If True, include direction columns
105
106
Returns:
107
DataFrame with zero values but complete route/time structure for filling
108
"""
109
110
def build_zero_stop_time_series(feed: Feed, date_label: str, freq: str,
111
*, split_directions: bool = False) -> pd.DataFrame:
112
"""
113
Build empty stop time series structure with all stops and time periods.
114
115
Args:
116
feed: Feed object containing stop data
117
date_label: Date string for the time series structure
118
freq: Frequency string defining time bin size
119
split_directions: If True, include direction columns
120
121
Returns:
122
DataFrame with zero values but complete stop/time structure for filling
123
"""
124
```
125
126
## Time Series Utilities
127
128
{ .api }
129
```python
130
def combine_time_series(time_series_dict: dict, kind: str,
131
*, split_directions: bool = False) -> pd.DataFrame:
132
"""
133
Combine multiple time series into a single DataFrame.
134
135
Args:
136
time_series_dict: Dictionary mapping labels to time series DataFrames
137
kind: Type of combination ("sum", "mean", "max", etc.)
138
split_directions: If True, handle direction-split data
139
140
Returns:
141
Combined DataFrame with multiple time series as columns or aggregated
142
"""
143
144
def downsample(time_series: pd.DataFrame, freq: str) -> pd.DataFrame:
145
"""
146
Downsample time series to lower frequency (larger time bins).
147
148
Args:
149
time_series: Time series DataFrame with datetime index
150
freq: Target frequency string (e.g., "30T" for 30-minute bins)
151
152
Returns:
153
Downsampled DataFrame with aggregated values at lower frequency
154
"""
155
156
def unstack_time_series(time_series: pd.DataFrame) -> pd.DataFrame:
157
"""
158
Convert wide-format time series to long format for analysis.
159
160
Args:
161
time_series: Wide-format time series with columns for entities
162
163
Returns:
164
Long-format DataFrame with entity and value columns
165
"""
166
167
def restack_time_series(unstacked_time_series: pd.DataFrame) -> pd.DataFrame:
168
"""
169
Convert long-format time series back to wide format.
170
171
Args:
172
unstacked_time_series: Long-format time series DataFrame
173
174
Returns:
175
Wide-format DataFrame with entities as columns
176
"""
177
```
178
179
## Active Trips Analysis
180
181
{ .api }
182
```python
183
def get_active_trips_df(trip_times: pd.DataFrame) -> pd.Series:
184
"""
185
Count number of active trips at each time point throughout the day.
186
187
Args:
188
trip_times: DataFrame with trip start and end times
189
190
Returns:
191
Series indexed by time showing count of concurrent active trips
192
"""
193
```
194
195
## Time and Date Utilities
196
197
{ .api }
198
```python
199
def get_start_and_end_times(feed: Feed, date: str) -> list[str]:
200
"""
201
Get the first and last service times for a specific date.
202
203
Args:
204
feed: Feed object containing schedule data
205
date: Date string (YYYYMMDD) to analyze
206
207
Returns:
208
List with [earliest_time, latest_time] as HH:MM:SS strings
209
"""
210
211
def get_stop_times(feed: Feed, date: str) -> pd.DataFrame:
212
"""
213
Get stop_times data filtered for active trips on a specific date.
214
215
Args:
216
feed: Feed object containing stop_times and calendar data
217
date: Date string (YYYYMMDD) to filter by
218
219
Returns:
220
DataFrame with stop_times for trips active on the specified date
221
"""
222
223
# Time conversion utilities
224
def timestr_to_seconds(x: str, *, inverse: bool = False, mod24: bool = True) -> int:
225
"""Convert time string to seconds since midnight."""
226
227
def timestr_mod24(timestr: str) -> int:
228
"""Convert time string to seconds with 24-hour modulo."""
229
230
def datestr_to_date(x: str, format_str: str, *, inverse: bool = False) -> str | date:
231
"""Convert between date strings and date objects."""
232
```
233
234
## Peak Analysis
235
236
{ .api }
237
```python
238
def get_peak_indices(times: list, counts: list) -> np.array:
239
"""
240
Find indices corresponding to the longest peak period in time series.
241
242
Args:
243
times: List of time points
244
counts: List of count values corresponding to times
245
246
Returns:
247
Array of indices representing the longest continuous peak period
248
"""
249
250
def get_max_runs(x: np.array) -> np.array:
251
"""
252
Get start and end indices of runs of maximum values in array.
253
254
Args:
255
x: Array of numeric values
256
257
Returns:
258
Array with [start_idx, end_idx] pairs for maximum value runs
259
"""
260
```
261
262
## Usage Examples
263
264
### Route Time Series Analysis
265
266
```python
267
import gtfs_kit as gk
268
import pandas as pd
269
import matplotlib.pyplot as plt
270
271
# Load feed and compute trip stats
272
feed = gk.read_feed("data/gtfs.zip")
273
trip_stats = gk.compute_trip_stats(feed, route_ids=None)
274
275
# Get service dates
276
dates = gk.get_dates(feed)
277
sample_dates = dates[:7] # First week
278
279
# Compute route time series
280
route_ts = gk.compute_route_time_series(
281
feed=feed,
282
trip_stats_subset=trip_stats,
283
dates=sample_dates,
284
freq="15T", # 15-minute intervals
285
split_directions=True
286
)
287
288
print(f"Route time series shape: {route_ts.shape}")
289
print("Time range:", route_ts.index.min(), "to", route_ts.index.max())
290
291
# Analyze busiest routes
292
daily_totals = route_ts.sum()
293
busiest_routes = daily_totals.nlargest(10)
294
print("Busiest routes:")
295
print(busiest_routes)
296
```
297
298
### Stop Time Series Analysis
299
300
```python
301
# Compute stop time series for major stops
302
stops_gdf = gk.get_stops(feed, as_gdf=True)
303
major_stops = stops_gdf.nlargest(20, 'stop_id')['stop_id'].tolist()
304
305
stop_ts = gk.compute_stop_time_series(
306
feed=feed,
307
dates=sample_dates,
308
stop_ids=major_stops,
309
freq="10T", # 10-minute intervals
310
split_directions=False
311
)
312
313
print(f"Stop time series shape: {stop_ts.shape}")
314
315
# Find peak hours at stops
316
hourly_ts = gk.downsample(stop_ts, "1H")
317
peak_hours = hourly_ts.idxmax()
318
print("Peak hours by stop:")
319
print(peak_hours.head())
320
```
321
322
### System-Wide Time Series
323
324
```python
325
# Compute feed-level time series
326
feed_ts = gk.compute_feed_time_series(
327
feed=feed,
328
trip_stats=trip_stats,
329
dates=sample_dates,
330
freq="30T", # 30-minute intervals
331
split_route_types=True
332
)
333
334
print("System-wide time series:")
335
print(feed_ts.head())
336
337
# Plot system activity
338
if len(feed_ts) > 0:
339
daily_pattern = feed_ts.groupby(feed_ts.index.time).sum()
340
plt.figure(figsize=(12, 6))
341
daily_pattern.plot()
342
plt.title("Daily Transit System Activity Pattern")
343
plt.xlabel("Time of Day")
344
plt.ylabel("Number of Trips")
345
plt.show()
346
```
347
348
### Time Series Combination and Analysis
349
350
```python
351
# Create multiple time series for comparison
352
weekday_dates = [d for d in dates[:7] if pd.to_datetime(d, format='%Y%m%d').weekday() < 5]
353
weekend_dates = [d for d in dates[:7] if pd.to_datetime(d, format='%Y%m%d').weekday() >= 5]
354
355
weekday_ts = gk.compute_route_time_series(feed, trip_stats, weekday_dates, "30T")
356
weekend_ts = gk.compute_route_time_series(feed, trip_stats, weekend_dates, "30T")
357
358
# Combine time series
359
combined_ts = gk.combine_time_series(
360
{"weekday": weekday_ts, "weekend": weekend_ts},
361
kind="mean"
362
)
363
364
print("Combined weekday/weekend patterns:")
365
print(combined_ts.head())
366
367
# Analyze differences
368
if len(combined_ts) > 0 and 'weekday' in combined_ts.columns and 'weekend' in combined_ts.columns:
369
combined_ts['difference'] = combined_ts['weekday'] - combined_ts['weekend']
370
print("Peak weekday advantage:", combined_ts['difference'].max())
371
```
372
373
### Active Trips Analysis
374
375
```python
376
# Analyze concurrent trip activity
377
sample_date = dates[0]
378
stop_times = gk.get_stop_times(feed, sample_date)
379
380
# Get trip start/end times
381
trip_times = stop_times.groupby('trip_id').agg({
382
'arrival_time': ['min', 'max']
383
}).round(0)
384
385
trip_times.columns = ['start_time', 'end_time']
386
387
# Count active trips over time
388
active_trips = gk.get_active_trips_df(trip_times)
389
print(f"Max concurrent trips: {active_trips.max()}")
390
391
# Find peak periods
392
times = active_trips.index.tolist()
393
counts = active_trips.values.tolist()
394
peak_indices = gk.get_peak_indices(times, counts)
395
396
if len(peak_indices) > 0:
397
peak_start = times[peak_indices[0]]
398
peak_end = times[peak_indices[-1]]
399
print(f"Peak period: {peak_start} to {peak_end}")
400
```
401
402
### Temporal Pattern Analysis
403
404
```python
405
# Analyze service start and end times
406
start_end_times = gk.get_start_and_end_times(feed, sample_date)
407
print(f"Service span: {start_end_times[0]} to {start_end_times[1]}")
408
409
# Convert to seconds for analysis
410
start_seconds = gk.timestr_to_seconds(start_end_times[0])
411
end_seconds = gk.timestr_to_seconds(start_end_times[1])
412
service_span_hours = (end_seconds - start_seconds) / 3600
413
414
print(f"Daily service span: {service_span_hours:.1f} hours")
415
416
# Analyze time series structure
417
sample_ts = gk.build_zero_route_time_series(feed, sample_date, "1H")
418
print(f"Hourly time series structure: {sample_ts.shape}")
419
print("Time periods:", len(sample_ts.index.unique()))
420
```
421
422
### Long Format Analysis
423
424
```python
425
# Convert to long format for detailed analysis
426
route_ts_long = gk.unstack_time_series(route_ts)
427
print("Long format time series:")
428
print(route_ts_long.head())
429
430
# Analyze by time of day
431
if 'time' in route_ts_long.columns and 'value' in route_ts_long.columns:
432
hourly_summary = route_ts_long.groupby(
433
route_ts_long['time'].dt.hour
434
)['value'].agg(['mean', 'std', 'sum'])
435
436
print("Hourly service summary:")
437
print(hourly_summary)
438
439
# Convert back to wide format
440
route_ts_restored = gk.restack_time_series(route_ts_long)
441
print("Restored wide format shape:", route_ts_restored.shape)
442
```
443
444
### Custom Frequency Analysis
445
446
```python
447
# Analyze at different time resolutions
448
frequencies = ["5T", "15T", "30T", "1H"]
449
frequency_analysis = {}
450
451
for freq in frequencies:
452
ts = gk.compute_route_time_series(feed, trip_stats, [sample_date], freq)
453
if len(ts) > 0:
454
frequency_analysis[freq] = {
455
'periods': len(ts),
456
'max_value': ts.values.max(),
457
'mean_value': ts.values.mean()
458
}
459
460
print("Analysis by frequency:")
461
for freq, stats in frequency_analysis.items():
462
print(f"{freq}: {stats['periods']} periods, max={stats['max_value']}, mean={stats['mean_value']:.2f}")
463
464
# Downsample high-frequency data
465
if "5T" in frequency_analysis:
466
high_freq_ts = gk.compute_route_time_series(feed, trip_stats, [sample_date], "5T")
467
downsampled = gk.downsample(high_freq_ts, "30T")
468
print(f"Downsampled from {len(high_freq_ts)} to {len(downsampled)} periods")
469
```