Python library for trajectory and movement data analysis built on pandas and GeoPandas.
—
Tools for extracting insights from trajectory collections, including significant point detection, clustering, and flow analysis. These classes enable higher-level analysis of movement patterns across multiple trajectories.
Advanced analysis of trajectory collections to extract meaningful patterns and insights.
class TrajectoryCollectionAggregator:
def __init__(self, traj_collection, max_distance, min_distance, min_stop_duration, min_angle=45):
"""
Aggregates trajectories by extracting significant points, clustering, and extracting flows.
Parameters:
- traj_collection: TrajectoryCollection to analyze
- max_distance: Maximum distance for clustering analysis
- min_distance: Minimum distance for filtering points
- min_stop_duration: Minimum duration for stop detection
- min_angle: Minimum angle change for significant point detection (degrees)
"""
def get_significant_points_gdf(self):
"""
Extract significant points from all trajectories.
Significant points include:
- Start and end points
- Points with significant direction changes
- Points where speed changes significantly
- Stop locations
Returns:
GeoDataFrame with significant points and their attributes
"""
def get_clusters_gdf(self):
"""
Get clustered significant points.
Groups nearby significant points into clusters to identify
common locations across multiple trajectories.
Returns:
GeoDataFrame with cluster information including:
- Cluster ID
- Cluster centroid
- Number of points in cluster
- Trajectories represented in cluster
"""
def get_flows_gdf(self):
"""
Extract flow information between clusters.
Analyzes movement patterns between significant point clusters
to identify common routes and flows.
Returns:
GeoDataFrame with flow lines including:
- Origin cluster ID
- Destination cluster ID
- Number of trajectories using this flow
- Flow geometry (LineString)
"""Detailed analysis for extracting important points from individual trajectories.
class PtsExtractor:
def __init__(self, traj, max_distance, min_distance, min_stop_duration, min_angle=45):
"""
Extracts significant points from trajectories.
Parameters:
- traj: Trajectory object to analyze
- max_distance: Maximum distance for analysis
- min_distance: Minimum distance between significant points
- min_stop_duration: Minimum duration for stop detection
- min_angle: Minimum angle change for significance (degrees)
"""
def find_significant_points(self):
"""
Find significant points in the trajectory.
Identifies points that are important for understanding
trajectory structure and behavior, including:
- Start and end points
- Direction change points
- Speed change points
- Stop locations
Returns:
GeoDataFrame with significant points and their classifications
"""Grid-based clustering for grouping nearby points across trajectories.
class PointClusterer:
def __init__(self, points, max_distance, is_latlon):
"""
Grid-based point clustering for trajectory analysis.
Parameters:
- points: GeoDataFrame or list of Point geometries to cluster
- max_distance: Maximum distance for clustering (cluster size)
- is_latlon: Boolean indicating if coordinates are latitude/longitude
"""
def get_clusters(self):
"""
Perform grid-based clustering of points.
Groups points into spatial clusters based on proximity,
useful for identifying common locations across trajectories.
Returns:
GeoDataFrame or list with cluster assignments and cluster information:
- Original point data
- Cluster ID for each point
- Cluster centroid coordinates
- Number of points in each cluster
"""import movingpandas as mpd
import pandas as pd
# Assume we have a TrajectoryCollection
# collection = mpd.TrajectoryCollection(...)
# Create aggregator for comprehensive analysis
aggregator = mpd.TrajectoryCollectionAggregator(
traj_collection=collection,
max_distance=100, # 100 meter clustering distance
min_distance=50, # 50 meter minimum point separation
min_stop_duration=pd.Timedelta("5 minutes"), # 5 minute minimum stops
min_angle=30 # 30 degree minimum angle change
)
# Extract significant points across all trajectories
significant_points = aggregator.get_significant_points_gdf()
print(f"Found {len(significant_points)} significant points")
# Get clustered locations
clusters = aggregator.get_clusters_gdf()
print(f"Identified {clusters['cluster_id'].nunique()} distinct location clusters")
# Analyze flows between clusters
flows = aggregator.get_flows_gdf()
print(f"Found {len(flows)} distinct flows between clusters")
# Examine the most common flows
top_flows = flows.nlargest(5, 'trajectory_count')
for idx, flow in top_flows.iterrows():
print(f"Flow from cluster {flow['origin_cluster']} to {flow['dest_cluster']}: "
f"{flow['trajectory_count']} trajectories")# Analyze single trajectory for significant points
# traj = mpd.Trajectory(...)
extractor = mpd.PtsExtractor(
traj=traj,
max_distance=200,
min_distance=25,
min_stop_duration=pd.Timedelta("2 minutes"),
min_angle=45
)
# Find significant points
sig_points = extractor.find_significant_points()
# Examine types of significant points found
point_types = sig_points['point_type'].value_counts()
print("Significant point types:")
for point_type, count in point_types.items():
print(f" {point_type}: {count}")import geopandas as gpd
from shapely.geometry import Point
# Create sample points for clustering
points_data = [
Point(0, 0), Point(0.1, 0.1), Point(0.2, 0.05), # Cluster 1
Point(5, 5), Point(5.1, 5.2), Point(4.9, 4.8), # Cluster 2
Point(10, 10), Point(10.3, 10.1) # Cluster 3
]
# Create point clusterer
clusterer = mpd.PointClusterer(
points=points_data,
max_distance=0.5, # 0.5 unit clustering distance
is_latlon=False # Using projected coordinates
)
# Get clusters
clusters = clusterer.get_clusters()
# Analyze clustering results
print(f"Points clustered into {len(clusters)} groups")# Complete workflow for analyzing movement patterns
def analyze_movement_patterns(trajectory_collection):
"""Comprehensive analysis of movement patterns in trajectory collection."""
# Set up aggregator with reasonable parameters
aggregator = mpd.TrajectoryCollectionAggregator(
traj_collection=trajectory_collection,
max_distance=100,
min_distance=25,
min_stop_duration=pd.Timedelta("3 minutes"),
min_angle=45
)
# Extract all analysis components
significant_points = aggregator.get_significant_points_gdf()
clusters = aggregator.get_clusters_gdf()
flows = aggregator.get_flows_gdf()
# Summary statistics
analysis_summary = {
'total_trajectories': len(trajectory_collection),
'significant_points': len(significant_points),
'location_clusters': clusters['cluster_id'].nunique(),
'distinct_flows': len(flows),
'most_used_flow': flows.loc[flows['trajectory_count'].idxmax()] if len(flows) > 0 else None
}
return {
'summary': analysis_summary,
'significant_points': significant_points,
'clusters': clusters,
'flows': flows
}
# Use the analysis function
# results = analyze_movement_patterns(my_collection)
# print("Analysis Summary:", results['summary'])# Cluster different types of trajectory points separately
# Extract start points from collection
start_points = collection.get_start_locations()
start_clusterer = mpd.PointClusterer(
points=start_points.geometry.tolist(),
max_distance=200, # 200 meter clusters for origins
is_latlon=True
)
origin_clusters = start_clusterer.get_clusters()
# Extract end points from collection
end_points = collection.get_end_locations()
end_clusterer = mpd.PointClusterer(
points=end_points.geometry.tolist(),
max_distance=200, # 200 meter clusters for destinations
is_latlon=True
)
destination_clusters = end_clusterer.get_clusters()
print(f"Found {len(origin_clusters)} origin clusters")
print(f"Found {len(destination_clusters)} destination clusters")The significant points GeoDataFrame typically contains:
geometry: Point geometry of significant locationpoint_type: Type of significant point (start, end, direction_change, speed_change, stop)trajectory_id: ID of source trajectorytimestamp: Time when point occurredspeed: Speed at this point (if available)direction: Direction/heading at this point (if available)significance_score: Numeric score indicating importanceThe clusters GeoDataFrame typically contains:
geometry: Centroid point of clustercluster_id: Unique identifier for clusterpoint_count: Number of points in clustertrajectory_count: Number of different trajectories representedcluster_radius: Spatial extent of clusterdominant_point_type: Most common type of significant point in clusterThe flows GeoDataFrame typically contains:
geometry: LineString representing flow pathorigin_cluster: ID of origin clusterdest_cluster: ID of destination clustertrajectory_count: Number of trajectories using this flowavg_duration: Average time to travel this flowavg_distance: Average distance of this flowInstall with Tessl CLI
npx tessl i tessl/pypi-movingpandas