Distance measures for time series with Dynamic Time Warping as the primary focus
npx @tessl/cli install tessl/pypi-dtaidistance@1.2.0A comprehensive Python library for computing distance measures between time series, with Dynamic Time Warping (DTW) as the primary focus. It offers both pure Python and optimized C implementations for performance-critical applications, enabling researchers and developers to measure similarity between temporal sequences with various constraints and optimization options.
pip install dtaidistanceimport dtaidistanceFor DTW functionality:
from dtaidistance import dtwFor fast C implementation (if available):
from dtaidistance import dtw_cfrom dtaidistance import dtw
import numpy as np
# Create two time series
s1 = [0, 0, 1, 2, 1, 0, 1, 0, 0]
s2 = [0, 1, 2, 0, 0, 0, 0, 0, 0]
# Compute DTW distance
distance = dtw.distance(s1, s2)
print(f"DTW distance: {distance}")
# Compute distance with constraints
distance_constrained = dtw.distance(s1, s2, window=3, max_dist=5.0)
# Get the optimal warping path
path = dtw.warping_path(s1, s2)
print(f"Warping path: {path}")
# Compute distance matrix for multiple series
series = [[0, 0, 1, 2, 1, 0, 1, 0, 0],
[0, 1, 2, 0, 0, 0, 0, 0, 0],
[1, 2, 0, 0, 0, 0, 0, 1, 1]]
distances = dtw.distance_matrix(series)
print("Distance matrix shape:", distances.shape)The dtaidistance library is organized around several key components:
Fundamental DTW distance computation between time series pairs, including constraint-based optimizations, early stopping mechanisms, and both Python and C implementations for performance flexibility.
def distance(s1, s2, window=None, max_dist=None, max_step=None,
max_length_diff=None, penalty=None, psi=None, use_c=False):
"""
Compute DTW distance between two sequences.
Parameters:
- s1, s2: array-like, input sequences
- window: int, warping window constraint
- max_dist: float, early stopping threshold
- max_step: float, maximum step size
- max_length_diff: int, maximum length difference
- penalty: float, penalty for compression/expansion
- psi: int, psi relaxation parameter
- use_c: bool, use C implementation
Returns:
float: DTW distance
"""
def distance_fast(s1, s2, window=None, max_dist=None, max_step=None,
max_length_diff=None, penalty=None, psi=None):
"""Fast C version of DTW distance calculation."""
def lb_keogh(s1, s2, window=None, max_dist=None, max_step=None,
max_length_diff=None):
"""Lower bound LB_KEOGH calculation."""Computation and analysis of optimal warping paths between sequences, including path extraction, penalty calculations, and warping amount quantification for understanding sequence alignment patterns.
def warping_paths(s1, s2, window=None, max_dist=None, max_step=None,
max_length_diff=None, penalty=None, psi=None):
"""
DTW with full warping paths matrix.
Returns:
tuple: (distance, paths_matrix)
"""
def warping_path(from_s, to_s, **kwargs):
"""Compute optimal warping path between sequences."""
def warp(from_s, to_s, **kwargs):
"""
Warp one sequence to match another.
Returns:
tuple: (warped_sequence, path)
"""Efficient computation of distance matrices for multiple time series, supporting parallel processing, memory optimization through blocking, and various output formats for large-scale time series analysis.
def distance_matrix(s, max_dist=None, max_length_diff=None, window=None,
max_step=None, penalty=None, psi=None, block=None,
compact=False, parallel=False, use_c=False,
use_nogil=False, show_progress=False):
"""
Compute distance matrix for all sequence pairs.
Parameters:
- s: list/array of sequences
- compact: bool, return condensed array if True
- parallel: bool, enable parallel computation
- show_progress: bool, show progress bar
Returns:
array: distance matrix or condensed array
"""
def distances_array_to_matrix(dists, nb_series, block=None):
"""Convert condensed distance array to full matrix."""Hierarchical clustering algorithms specifically designed for time series data, including multiple clustering strategies, tree representations, and visualization capabilities for discovering patterns in temporal datasets.
class Hierarchical:
"""Hierarchical clustering for time series."""
def __init__(self, dists_fun, dists_options, max_dist=np.inf,
merge_hook=None, order_hook=None, show_progress=True):
"""Initialize hierarchical clustering."""
def fit(self, series):
"""
Perform clustering.
Returns:
dict: cluster hierarchy
"""
class HierarchicalTree:
"""Hierarchical clustering with tree tracking."""
def plot(self, filename=None, axes=None, **kwargs):
"""Plot hierarchy and time series."""Comprehensive visualization capabilities for DTW analysis, including warping path plots, distance matrix heatmaps, and time series alignment visualizations for both 1D and multi-dimensional data.
def plot_warping(s1, s2, path, filename=None):
"""
Plot optimal warping between sequences.
Returns:
tuple: (figure, axes)
"""
def plot_warpingpaths(s1, s2, paths, path=None, filename=None,
shownumbers=False):
"""Plot warping paths matrix with sequences."""
def plot_matrix(distances, filename=None, ax=None, shownumbers=False):
"""Plot distance matrix."""DTW algorithms optimized for multi-dimensional time series where each time point contains multiple features, using Euclidean distance for point-wise comparisons and supporting the same constraint and optimization options as 1D DTW.
def distance(s1, s2, window=None, max_dist=None, max_step=None,
max_length_diff=None, penalty=None, psi=None, use_c=False):
"""DTW for N-dimensional sequences using Euclidean distance."""
def distance_matrix(s, max_dist=None, max_length_diff=None, window=None,
max_step=None, penalty=None, psi=None, block=None,
parallel=False, use_c=False, show_progress=False):
"""Distance matrix for N-dimensional sequences."""Advanced DTW with custom weighting functions and machine learning integration for learning optimal feature weights from labeled data, including decision tree-based weight learning and must-link/cannot-link constraint incorporation.
def warping_paths(s1, s2, weights=None, window=None, **kwargs):
"""DTW with custom weight functions."""
def compute_weights_using_dt(series, labels, prototypeidx, **kwargs):
"""
Learn weights using decision trees.
Returns:
tuple: (weights, importances)
"""
class DecisionTreeClassifier:
"""Custom decision tree for DTW weight learning."""
def fit(self, features, targets, use_feature_once=True,
ignore_features=None, min_ig=0):
"""Train classifier."""Global sequence alignment algorithms like Needleman-Wunsch for optimal alignment of time series with gap penalties, providing alternative approaches to DTW for sequence comparison and alignment tasks.
def needleman_wunsch(s1, s2, window=None, max_dist=None, max_step=None,
max_length_diff=None, psi=None):
"""
Global sequence alignment.
Returns:
tuple: (alignment_score, alignment_matrix)
"""
def best_alignment(paths, s1=None, s2=None, gap="-", order=None):
"""
Compute optimal alignment from paths matrix.
Returns:
tuple: (path, aligned_s1, aligned_s2)
"""Most DTW functions share these common constraint parameters:
class SeriesContainer:
"""Container for handling multiple sequence data formats."""
def __init__(self, series):
"""Initialize with various data types (list, numpy array, etc.)."""
def c_data(self):
"""Return C-compatible data structure."""
def get_max_y(self):
"""Get maximum Y value across all series."""
@staticmethod
def wrap(series):
"""Wrap series in container."""