or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

data-analysis.mddata-cleaning.mdfeed-operations.mdgeospatial.mdindex.mdtime-series.mdvalidation.md

time-series.mddocs/

0

# Time Series Analysis

1

2

Time-based analysis, service frequency computation, and temporal patterns.

3

4

## Time Series Computation

5

6

{ .api }

7

```python

8

def compute_route_time_series(feed: Feed, trip_stats_subset: pd.DataFrame, dates: list[str],

9

freq: str, *, split_directions: bool = False) -> pd.DataFrame:

10

"""

11

Compute route-level time series showing service frequency over time.

12

13

Args:

14

feed: Feed object containing route and schedule data

15

trip_stats_subset: Pre-computed trip statistics DataFrame

16

dates: List of service dates to analyze

17

freq: Frequency string for time bins (e.g., "5T" for 5-minute intervals)

18

split_directions: If True, compute separate series by direction

19

20

Returns:

21

DataFrame with time series data indexed by datetime, columns for routes/directions

22

Values represent service frequency (trips per time period)

23

"""

24

25

def compute_route_time_series_0(trip_stats_subset: pd.DataFrame, date_label: str,

26

freq: str, *, split_directions: bool = False) -> pd.DataFrame:

27

"""

28

Helper function to compute route time series for a single date.

29

30

Args:

31

trip_stats_subset: Trip statistics for the specific date

32

date_label: Date string label for the time series

33

freq: Frequency string for time aggregation

34

split_directions: If True, split by direction

35

36

Returns:

37

DataFrame with single-date route time series

38

"""

39

40

def compute_stop_time_series(feed: Feed, dates: list[str], stop_ids: list[str] | None,

41

freq: str, *, split_directions: bool = False) -> pd.DataFrame:

42

"""

43

Compute stop-level time series showing arrival/departure frequency.

44

45

Args:

46

feed: Feed object containing stop and schedule data

47

dates: List of service dates to analyze

48

stop_ids: List of stop IDs to include, or None for all stops

49

freq: Frequency string for time bins (e.g., "10T" for 10-minute intervals)

50

split_directions: If True, compute separate series by direction

51

52

Returns:

53

DataFrame with time series indexed by datetime, columns for stops/directions

54

Values represent number of arrivals/departures per time period

55

"""

56

57

def compute_stop_time_series_0(stop_times_subset: pd.DataFrame, trip_subset: pd.DataFrame,

58

freq: str, date_label: str, *, split_directions: bool = False) -> pd.DataFrame:

59

"""

60

Helper function to compute stop time series for a single date.

61

62

Args:

63

stop_times_subset: Stop times data for the specific date

64

trip_subset: Trip data subset for the date

65

freq: Frequency string for aggregation

66

date_label: Date string label

67

split_directions: If True, split by direction

68

69

Returns:

70

DataFrame with single-date stop time series

71

"""

72

73

def compute_feed_time_series(feed: Feed, trip_stats: pd.DataFrame, dates: list[str],

74

freq: str, *, split_route_types: bool = False) -> pd.DataFrame:

75

"""

76

Compute feed-level time series showing total system activity over time.

77

78

Args:

79

feed: Feed object containing system data

80

trip_stats: Pre-computed trip statistics DataFrame

81

dates: List of service dates to analyze

82

freq: Frequency string for time bins

83

split_route_types: If True, compute separate series by route type

84

85

Returns:

86

DataFrame with system-wide time series indexed by datetime

87

Values represent total trips/activity per time period

88

"""

89

```

90

91

## Time Series Structure Building

92

93

{ .api }

94

```python

95

def build_zero_route_time_series(feed: Feed, date_label: str, freq: str,

96

*, split_directions: bool = False) -> pd.DataFrame:

97

"""

98

Build empty route time series structure with all routes and time periods.

99

100

Args:

101

feed: Feed object containing route data

102

date_label: Date string for the time series structure

103

freq: Frequency string defining time bin size

104

split_directions: If True, include direction columns

105

106

Returns:

107

DataFrame with zero values but complete route/time structure for filling

108

"""

109

110

def build_zero_stop_time_series(feed: Feed, date_label: str, freq: str,

111

*, split_directions: bool = False) -> pd.DataFrame:

112

"""

113

Build empty stop time series structure with all stops and time periods.

114

115

Args:

116

feed: Feed object containing stop data

117

date_label: Date string for the time series structure

118

freq: Frequency string defining time bin size

119

split_directions: If True, include direction columns

120

121

Returns:

122

DataFrame with zero values but complete stop/time structure for filling

123

"""

124

```

125

126

## Time Series Utilities

127

128

{ .api }

129

```python

130

def combine_time_series(time_series_dict: dict, kind: str,

131

*, split_directions: bool = False) -> pd.DataFrame:

132

"""

133

Combine multiple time series into a single DataFrame.

134

135

Args:

136

time_series_dict: Dictionary mapping labels to time series DataFrames

137

kind: Type of combination ("sum", "mean", "max", etc.)

138

split_directions: If True, handle direction-split data

139

140

Returns:

141

Combined DataFrame with multiple time series as columns or aggregated

142

"""

143

144

def downsample(time_series: pd.DataFrame, freq: str) -> pd.DataFrame:

145

"""

146

Downsample time series to lower frequency (larger time bins).

147

148

Args:

149

time_series: Time series DataFrame with datetime index

150

freq: Target frequency string (e.g., "30T" for 30-minute bins)

151

152

Returns:

153

Downsampled DataFrame with aggregated values at lower frequency

154

"""

155

156

def unstack_time_series(time_series: pd.DataFrame) -> pd.DataFrame:

157

"""

158

Convert wide-format time series to long format for analysis.

159

160

Args:

161

time_series: Wide-format time series with columns for entities

162

163

Returns:

164

Long-format DataFrame with entity and value columns

165

"""

166

167

def restack_time_series(unstacked_time_series: pd.DataFrame) -> pd.DataFrame:

168

"""

169

Convert long-format time series back to wide format.

170

171

Args:

172

unstacked_time_series: Long-format time series DataFrame

173

174

Returns:

175

Wide-format DataFrame with entities as columns

176

"""

177

```

178

179

## Active Trips Analysis

180

181

{ .api }

182

```python

183

def get_active_trips_df(trip_times: pd.DataFrame) -> pd.Series:

184

"""

185

Count number of active trips at each time point throughout the day.

186

187

Args:

188

trip_times: DataFrame with trip start and end times

189

190

Returns:

191

Series indexed by time showing count of concurrent active trips

192

"""

193

```

194

195

## Time and Date Utilities

196

197

{ .api }

198

```python

199

def get_start_and_end_times(feed: Feed, date: str) -> list[str]:

200

"""

201

Get the first and last service times for a specific date.

202

203

Args:

204

feed: Feed object containing schedule data

205

date: Date string (YYYYMMDD) to analyze

206

207

Returns:

208

List with [earliest_time, latest_time] as HH:MM:SS strings

209

"""

210

211

def get_stop_times(feed: Feed, date: str) -> pd.DataFrame:

212

"""

213

Get stop_times data filtered for active trips on a specific date.

214

215

Args:

216

feed: Feed object containing stop_times and calendar data

217

date: Date string (YYYYMMDD) to filter by

218

219

Returns:

220

DataFrame with stop_times for trips active on the specified date

221

"""

222

223

# Time conversion utilities

224

def timestr_to_seconds(x: str, *, inverse: bool = False, mod24: bool = True) -> int:

225

"""Convert time string to seconds since midnight."""

226

227

def timestr_mod24(timestr: str) -> int:

228

"""Convert time string to seconds with 24-hour modulo."""

229

230

def datestr_to_date(x: str, format_str: str, *, inverse: bool = False) -> str | date:

231

"""Convert between date strings and date objects."""

232

```

233

234

## Peak Analysis

235

236

{ .api }

237

```python

238

def get_peak_indices(times: list, counts: list) -> np.array:

239

"""

240

Find indices corresponding to the longest peak period in time series.

241

242

Args:

243

times: List of time points

244

counts: List of count values corresponding to times

245

246

Returns:

247

Array of indices representing the longest continuous peak period

248

"""

249

250

def get_max_runs(x: np.array) -> np.array:

251

"""

252

Get start and end indices of runs of maximum values in array.

253

254

Args:

255

x: Array of numeric values

256

257

Returns:

258

Array with [start_idx, end_idx] pairs for maximum value runs

259

"""

260

```

261

262

## Usage Examples

263

264

### Route Time Series Analysis

265

266

```python

267

import gtfs_kit as gk

268

import pandas as pd

269

import matplotlib.pyplot as plt

270

271

# Load feed and compute trip stats

272

feed = gk.read_feed("data/gtfs.zip")

273

trip_stats = gk.compute_trip_stats(feed, route_ids=None)

274

275

# Get service dates

276

dates = gk.get_dates(feed)

277

sample_dates = dates[:7] # First week

278

279

# Compute route time series

280

route_ts = gk.compute_route_time_series(

281

feed=feed,

282

trip_stats_subset=trip_stats,

283

dates=sample_dates,

284

freq="15T", # 15-minute intervals

285

split_directions=True

286

)

287

288

print(f"Route time series shape: {route_ts.shape}")

289

print("Time range:", route_ts.index.min(), "to", route_ts.index.max())

290

291

# Analyze busiest routes

292

daily_totals = route_ts.sum()

293

busiest_routes = daily_totals.nlargest(10)

294

print("Busiest routes:")

295

print(busiest_routes)

296

```

297

298

### Stop Time Series Analysis

299

300

```python

301

# Compute stop time series for major stops

302

stops_gdf = gk.get_stops(feed, as_gdf=True)

303

major_stops = stops_gdf.nlargest(20, 'stop_id')['stop_id'].tolist()

304

305

stop_ts = gk.compute_stop_time_series(

306

feed=feed,

307

dates=sample_dates,

308

stop_ids=major_stops,

309

freq="10T", # 10-minute intervals

310

split_directions=False

311

)

312

313

print(f"Stop time series shape: {stop_ts.shape}")

314

315

# Find peak hours at stops

316

hourly_ts = gk.downsample(stop_ts, "1H")

317

peak_hours = hourly_ts.idxmax()

318

print("Peak hours by stop:")

319

print(peak_hours.head())

320

```

321

322

### System-Wide Time Series

323

324

```python

325

# Compute feed-level time series

326

feed_ts = gk.compute_feed_time_series(

327

feed=feed,

328

trip_stats=trip_stats,

329

dates=sample_dates,

330

freq="30T", # 30-minute intervals

331

split_route_types=True

332

)

333

334

print("System-wide time series:")

335

print(feed_ts.head())

336

337

# Plot system activity

338

if len(feed_ts) > 0:

339

daily_pattern = feed_ts.groupby(feed_ts.index.time).sum()

340

plt.figure(figsize=(12, 6))

341

daily_pattern.plot()

342

plt.title("Daily Transit System Activity Pattern")

343

plt.xlabel("Time of Day")

344

plt.ylabel("Number of Trips")

345

plt.show()

346

```

347

348

### Time Series Combination and Analysis

349

350

```python

351

# Create multiple time series for comparison

352

weekday_dates = [d for d in dates[:7] if pd.to_datetime(d, format='%Y%m%d').weekday() < 5]

353

weekend_dates = [d for d in dates[:7] if pd.to_datetime(d, format='%Y%m%d').weekday() >= 5]

354

355

weekday_ts = gk.compute_route_time_series(feed, trip_stats, weekday_dates, "30T")

356

weekend_ts = gk.compute_route_time_series(feed, trip_stats, weekend_dates, "30T")

357

358

# Combine time series

359

combined_ts = gk.combine_time_series(

360

{"weekday": weekday_ts, "weekend": weekend_ts},

361

kind="mean"

362

)

363

364

print("Combined weekday/weekend patterns:")

365

print(combined_ts.head())

366

367

# Analyze differences

368

if len(combined_ts) > 0 and 'weekday' in combined_ts.columns and 'weekend' in combined_ts.columns:

369

combined_ts['difference'] = combined_ts['weekday'] - combined_ts['weekend']

370

print("Peak weekday advantage:", combined_ts['difference'].max())

371

```

372

373

### Active Trips Analysis

374

375

```python

376

# Analyze concurrent trip activity

377

sample_date = dates[0]

378

stop_times = gk.get_stop_times(feed, sample_date)

379

380

# Get trip start/end times

381

trip_times = stop_times.groupby('trip_id').agg({

382

'arrival_time': ['min', 'max']

383

}).round(0)

384

385

trip_times.columns = ['start_time', 'end_time']

386

387

# Count active trips over time

388

active_trips = gk.get_active_trips_df(trip_times)

389

print(f"Max concurrent trips: {active_trips.max()}")

390

391

# Find peak periods

392

times = active_trips.index.tolist()

393

counts = active_trips.values.tolist()

394

peak_indices = gk.get_peak_indices(times, counts)

395

396

if len(peak_indices) > 0:

397

peak_start = times[peak_indices[0]]

398

peak_end = times[peak_indices[-1]]

399

print(f"Peak period: {peak_start} to {peak_end}")

400

```

401

402

### Temporal Pattern Analysis

403

404

```python

405

# Analyze service start and end times

406

start_end_times = gk.get_start_and_end_times(feed, sample_date)

407

print(f"Service span: {start_end_times[0]} to {start_end_times[1]}")

408

409

# Convert to seconds for analysis

410

start_seconds = gk.timestr_to_seconds(start_end_times[0])

411

end_seconds = gk.timestr_to_seconds(start_end_times[1])

412

service_span_hours = (end_seconds - start_seconds) / 3600

413

414

print(f"Daily service span: {service_span_hours:.1f} hours")

415

416

# Analyze time series structure

417

sample_ts = gk.build_zero_route_time_series(feed, sample_date, "1H")

418

print(f"Hourly time series structure: {sample_ts.shape}")

419

print("Time periods:", len(sample_ts.index.unique()))

420

```

421

422

### Long Format Analysis

423

424

```python

425

# Convert to long format for detailed analysis

426

route_ts_long = gk.unstack_time_series(route_ts)

427

print("Long format time series:")

428

print(route_ts_long.head())

429

430

# Analyze by time of day

431

if 'time' in route_ts_long.columns and 'value' in route_ts_long.columns:

432

hourly_summary = route_ts_long.groupby(

433

route_ts_long['time'].dt.hour

434

)['value'].agg(['mean', 'std', 'sum'])

435

436

print("Hourly service summary:")

437

print(hourly_summary)

438

439

# Convert back to wide format

440

route_ts_restored = gk.restack_time_series(route_ts_long)

441

print("Restored wide format shape:", route_ts_restored.shape)

442

```

443

444

### Custom Frequency Analysis

445

446

```python

447

# Analyze at different time resolutions

448

frequencies = ["5T", "15T", "30T", "1H"]

449

frequency_analysis = {}

450

451

for freq in frequencies:

452

ts = gk.compute_route_time_series(feed, trip_stats, [sample_date], freq)

453

if len(ts) > 0:

454

frequency_analysis[freq] = {

455

'periods': len(ts),

456

'max_value': ts.values.max(),

457

'mean_value': ts.values.mean()

458

}

459

460

print("Analysis by frequency:")

461

for freq, stats in frequency_analysis.items():

462

print(f"{freq}: {stats['periods']} periods, max={stats['max_value']}, mean={stats['mean_value']:.2f}")

463

464

# Downsample high-frequency data

465

if "5T" in frequency_analysis:

466

high_freq_ts = gk.compute_route_time_series(feed, trip_stats, [sample_date], "5T")

467

downsampled = gk.downsample(high_freq_ts, "30T")

468

print(f"Downsampled from {len(high_freq_ts)} to {len(downsampled)} periods")

469

```