or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

alignment.mdclustering.mdcore-dtw.mddistance-matrices.mdindex.mdndim-dtw.mdvisualization.mdwarping-paths.mdweighted-dtw.md

clustering.mddocs/

0

# Time Series Clustering

1

2

Hierarchical clustering algorithms specifically designed for time series data. This module provides multiple clustering strategies, tree representations, and visualization capabilities for discovering patterns and groupings in temporal datasets using DTW-based distance measures.

3

4

## Capabilities

5

6

### Hierarchical Clustering

7

8

Core hierarchical clustering implementation that builds cluster hierarchies using DTW distances with customizable distance functions and merging strategies.

9

10

```python { .api }

11

class Hierarchical:

12

"""

13

Hierarchical clustering for time series using DTW distances.

14

15

Builds a hierarchy of clusters by iteratively merging the closest pairs

16

of sequences or clusters based on DTW distance measures.

17

"""

18

19

def __init__(self, dists_fun, dists_options, max_dist=np.inf,

20

merge_hook=None, order_hook=None, show_progress=True):

21

"""

22

Initialize hierarchical clustering.

23

24

Parameters:

25

- dists_fun: function, distance matrix computation function (e.g., dtw.distance_matrix)

26

- dists_options: dict, options passed to distance function

27

- max_dist: float, maximum distance threshold for clustering

28

- merge_hook: function, callback called when clusters merge

29

- order_hook: function, callback for ordering sequences

30

- show_progress: bool, display progress during clustering

31

"""

32

33

def fit(self, series):

34

"""

35

Perform hierarchical clustering on time series collection.

36

37

Parameters:

38

- series: list/array, collection of time series sequences

39

40

Returns:

41

dict: cluster hierarchy with node information and structure

42

"""

43

```

44

45

### Tree Representations

46

47

Abstract base class and concrete implementations for representing and manipulating cluster hierarchies.

48

49

```python { .api }

50

class BaseTree:

51

"""

52

Abstract base class for cluster tree representations.

53

54

Provides common interface for different tree implementations

55

and visualization capabilities.

56

"""

57

58

@property

59

def maxnode(self):

60

"""Get maximum node ID in the tree."""

61

62

def get_linkage(self, node):

63

"""

64

Get linkage information for a specific node.

65

66

Parameters:

67

- node: int, node identifier

68

69

Returns:

70

tuple: linkage information (left, right, distance, count)

71

"""

72

73

def plot(self, filename=None, axes=None, **kwargs):

74

"""

75

Plot hierarchy dendrogram and time series.

76

77

Parameters:

78

- filename: str, optional file to save plot

79

- axes: matplotlib axes, optional axes for plotting

80

- **kwargs: additional plotting parameters

81

82

Returns:

83

tuple: (figure, axes)

84

"""

85

86

def to_dot(self):

87

"""

88

Generate Graphviz DOT representation of the tree.

89

90

Returns:

91

str: DOT format string for visualization with Graphviz

92

"""

93

94

class HierarchicalTree(BaseTree):

95

"""

96

Wrapper for hierarchical clustering with tree tracking.

97

98

Extends the basic Hierarchical clustering with tree structure

99

preservation and visualization capabilities.

100

"""

101

102

def __init__(self, model=None, **kwargs):

103

"""

104

Initialize hierarchical tree.

105

106

Parameters:

107

- model: Hierarchical, optional pre-configured clustering model

108

- **kwargs: parameters passed to Hierarchical constructor if model is None

109

"""

110

111

def fit(self, series, *args, **kwargs):

112

"""

113

Fit clustering model and build tree structure.

114

115

Parameters:

116

- series: list/array, time series collection

117

- *args, **kwargs: additional parameters passed to clustering

118

119

Returns:

120

self: fitted tree object

121

"""

122

123

class LinkageTree(BaseTree):

124

"""

125

Fast scipy-based hierarchical clustering.

126

127

Uses scipy's optimized linkage algorithms for improved performance

128

on large datasets while maintaining DTW distance compatibility.

129

"""

130

131

def __init__(self, dists_fun, dists_options, method='complete'):

132

"""

133

Initialize linkage-based clustering.

134

135

Parameters:

136

- dists_fun: function, distance computation function

137

- dists_options: dict, distance function options

138

- method: str, linkage method ('complete', 'single', 'average', 'ward')

139

"""

140

141

def fit(self, series):

142

"""

143

Perform clustering using scipy linkage.

144

145

Parameters:

146

- series: list/array, time series collection

147

148

Returns:

149

self: fitted tree object

150

"""

151

```

152

153

### Clustering Hooks

154

155

Utility functions for creating custom hooks that modify clustering behavior through weights and ordering constraints.

156

157

```python { .api }

158

class Hooks:

159

"""Utility class for creating clustering hooks."""

160

161

@staticmethod

162

def create_weighthook(weights, series):

163

"""

164

Create a weight hook for biasing cluster merging.

165

166

Parameters:

167

- weights: array-like, weights for each series

168

- series: list/array, time series collection

169

170

Returns:

171

function: weight hook function

172

"""

173

174

@staticmethod

175

def create_orderhook(weights):

176

"""

177

Create an order hook for controlling merge sequence.

178

179

Parameters:

180

- weights: array-like, ordering weights

181

182

Returns:

183

function: order hook function

184

"""

185

```

186

187

## Usage Examples

188

189

### Basic Hierarchical Clustering

190

191

```python

192

from dtaidistance import dtw, clustering

193

import numpy as np

194

195

# Create sample time series with different patterns

196

series = [

197

[1, 2, 3, 2, 1], # Mountain shape

198

[1, 3, 2, 3, 1], # Double peak

199

[0, 1, 2, 3, 4], # Increasing

200

[4, 3, 2, 1, 0], # Decreasing

201

[2, 2, 2, 2, 2], # Constant

202

[1, 2, 3, 2, 1, 0] # Mountain with tail

203

]

204

205

# Set up hierarchical clustering

206

clusterer = clustering.Hierarchical(

207

dists_fun=dtw.distance_matrix,

208

dists_options={'window': 3},

209

show_progress=True

210

)

211

212

# Perform clustering

213

cluster_tree = clusterer.fit(series)

214

215

print("Clustering completed")

216

print(f"Cluster tree keys: {list(cluster_tree.keys())}")

217

218

# Access cluster information

219

for node_id, node_info in cluster_tree.items():

220

if 'distance' in node_info:

221

print(f"Node {node_id}: distance={node_info['distance']:.3f}")

222

```

223

224

### Tree Visualization and Analysis

225

226

```python

227

from dtaidistance import dtw, clustering

228

import matplotlib.pyplot as plt

229

import numpy as np

230

231

# Generate synthetic time series clusters

232

np.random.seed(42)

233

234

# Cluster 1: Sine waves

235

cluster1 = [np.sin(np.linspace(0, 4*np.pi, 50)) + 0.1*np.random.randn(50)

236

for _ in range(5)]

237

238

# Cluster 2: Cosine waves

239

cluster2 = [np.cos(np.linspace(0, 3*np.pi, 50)) + 0.1*np.random.randn(50)

240

for _ in range(4)]

241

242

# Cluster 3: Linear trends

243

cluster3 = [np.linspace(0, 2, 50) + 0.1*np.random.randn(50)

244

for _ in range(3)]

245

246

all_series = cluster1 + cluster2 + cluster3

247

248

# Build hierarchical tree with visualization

249

tree = clustering.HierarchicalTree(

250

dists_fun=dtw.distance_matrix_fast,

251

dists_options={'window': 5},

252

show_progress=True

253

)

254

255

tree.fit(all_series)

256

257

# Plot dendrogram and time series

258

fig, axes = tree.plot(filename='cluster_tree.png')

259

plt.title('Hierarchical Clustering of Time Series')

260

plt.show()

261

262

# Export tree structure to DOT format

263

dot_representation = tree.to_dot()

264

print("DOT representation (first 200 chars):")

265

print(dot_representation[:200] + "...")

266

```

267

268

### Fast Clustering with Scipy Integration

269

270

```python

271

from dtaidistance import dtw, clustering

272

import numpy as np

273

import time

274

275

# Large dataset

276

np.random.seed(42)

277

n_series = 100

278

series_length = 200

279

280

# Generate diverse time series patterns

281

series = []

282

for i in range(n_series):

283

if i < n_series // 3:

284

# Sine patterns

285

s = np.sin(np.linspace(0, 2*np.pi*np.random.uniform(1, 5), series_length))

286

elif i < 2 * n_series // 3:

287

# Random walks

288

s = np.cumsum(np.random.randn(series_length))

289

else:

290

# Polynomial trends

291

x = np.linspace(0, 1, series_length)

292

s = np.random.randn() * x**2 + np.random.randn() * x + np.random.randn()

293

294

series.append(s + 0.1 * np.random.randn(series_length))

295

296

# Compare clustering methods

297

methods = [

298

("Basic Hierarchical", clustering.Hierarchical),

299

("Tree with Tracking", clustering.HierarchicalTree),

300

("Fast Linkage", clustering.LinkageTree)

301

]

302

303

for name, ClusterClass in methods:

304

start_time = time.time()

305

306

if ClusterClass == clustering.LinkageTree:

307

clusterer = ClusterClass(

308

dists_fun=dtw.distance_matrix_fast,

309

dists_options={'window': 10, 'parallel': True},

310

method='complete'

311

)

312

else:

313

clusterer = ClusterClass(

314

dists_fun=dtw.distance_matrix_fast,

315

dists_options={'window': 10, 'parallel': True},

316

show_progress=False

317

)

318

319

result = clusterer.fit(series)

320

elapsed = time.time() - start_time

321

322

print(f"{name}: {elapsed:.2f}s")

323

```

324

325

### Custom Hooks and Weights

326

327

```python

328

from dtaidistance import dtw, clustering

329

import numpy as np

330

331

# Time series with known importance weights

332

series = [

333

[1, 2, 3, 2, 1], # Important series

334

[2, 3, 4, 3, 2], # Important series

335

[0, 1, 0, 1, 0], # Less important

336

[3, 1, 4, 1, 5], # Less important

337

[1, 1, 1, 1, 1] # Least important

338

]

339

340

# Define importance weights (higher = more important for clustering)

341

importance_weights = [1.0, 1.0, 0.5, 0.5, 0.1]

342

343

# Create hooks

344

weight_hook = clustering.Hooks.create_weighthook(importance_weights, series)

345

order_hook = clustering.Hooks.create_orderhook(importance_weights)

346

347

# Clustering with hooks

348

clusterer = clustering.Hierarchical(

349

dists_fun=dtw.distance_matrix,

350

dists_options={'window': 2},

351

merge_hook=weight_hook,

352

order_hook=order_hook,

353

show_progress=True

354

)

355

356

weighted_clusters = clusterer.fit(series)

357

358

print("Weighted clustering completed")

359

print("Cluster structure influenced by importance weights")

360

```

361

362

### Multi-Level Clustering Analysis

363

364

```python

365

from dtaidistance import dtw, clustering

366

import numpy as np

367

368

# Create hierarchical data with multiple cluster levels

369

np.random.seed(42)

370

371

# Level 1: Different base patterns

372

patterns = [

373

lambda t: np.sin(2*np.pi*t), # Sine

374

lambda t: np.cos(2*np.pi*t), # Cosine

375

lambda t: np.sign(np.sin(4*np.pi*t)), # Square wave

376

lambda t: 2*t - 1 # Linear

377

]

378

379

series = []

380

true_labels = []

381

382

t = np.linspace(0, 1, 100)

383

for pattern_idx, pattern_func in enumerate(patterns):

384

for variant in range(3): # 3 variants per pattern

385

# Add noise and slight variations

386

noise_level = 0.1 + 0.05 * variant

387

s = pattern_func(t) + noise_level * np.random.randn(len(t))

388

series.append(s)

389

true_labels.append(pattern_idx)

390

391

# Perform clustering

392

tree = clustering.HierarchicalTree(

393

dists_fun=dtw.distance_matrix_fast,

394

dists_options={'window': 5},

395

max_dist=2.0 # Stop at reasonable distance threshold

396

)

397

398

tree.fit(series)

399

400

# Analyze cluster structure at different levels

401

def analyze_clusters_at_distance(tree, max_distance):

402

"""Extract clusters formed at given distance threshold."""

403

clusters = {}

404

cluster_id = 0

405

406

# Implementation would traverse tree to find clusters

407

# This is a simplified example

408

print(f"Analyzing clusters at distance threshold: {max_distance}")

409

return clusters

410

411

# Analyze at different distance thresholds

412

for threshold in [0.5, 1.0, 1.5, 2.0]:

413

clusters = analyze_clusters_at_distance(tree, threshold)

414

print(f"Threshold {threshold}: Found clusters")

415

```

416

417

### Integration with DTW Variants

418

419

```python

420

from dtaidistance import dtw, dtw_ndim, clustering

421

import numpy as np

422

423

# Example with multi-dimensional time series

424

np.random.seed(42)

425

426

# Generate 3D time series (e.g., accelerometer data)

427

n_series = 15

428

series_length = 80

429

430

multidim_series = []

431

for i in range(n_series):

432

# Create 3D patterns

433

t = np.linspace(0, 4*np.pi, series_length)

434

x = np.sin(t + i*0.5) + 0.1*np.random.randn(series_length)

435

y = np.cos(t + i*0.3) + 0.1*np.random.randn(series_length)

436

z = np.sin(2*t + i*0.2) + 0.1*np.random.randn(series_length)

437

438

# Stack into multi-dimensional series

439

series_3d = np.column_stack([x, y, z])

440

multidim_series.append(series_3d)

441

442

# Cluster using N-dimensional DTW

443

clusterer = clustering.Hierarchical(

444

dists_fun=dtw_ndim.distance_matrix,

445

dists_options={'window': 10, 'parallel': True},

446

show_progress=True

447

)

448

449

ndim_clusters = clusterer.fit(multidim_series)

450

451

print("Multi-dimensional clustering completed")

452

print(f"Number of cluster nodes: {len(ndim_clusters)}")

453

```

454

455

## Advanced Clustering Strategies

456

457

### Distance Threshold Clustering

458

459

```python

460

from dtaidistance import dtw, clustering

461

462

def cluster_with_threshold(series, threshold=2.0):

463

"""Cluster series and stop at distance threshold."""

464

clusterer = clustering.Hierarchical(

465

dists_fun=dtw.distance_matrix_fast,

466

dists_options={'window': 5},

467

max_dist=threshold,

468

show_progress=True

469

)

470

471

return clusterer.fit(series)

472

473

# Apply threshold-based clustering

474

series = [[1, 2, 1], [2, 3, 2], [10, 11, 10], [11, 12, 11]]

475

clusters = cluster_with_threshold(series, threshold=5.0)

476

```

477

478

This comprehensive clustering module enables sophisticated analysis of time series collections, from basic hierarchical clustering to advanced multi-dimensional pattern discovery with customizable distance measures and clustering strategies.