or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

classification.mdclustering.mddata-handling.mddistance.mdevaluation.mdindex.mdpreprocessing.mdprojection.mdregression.mdwidgets.md

clustering.mddocs/

0

# Clustering and Unsupervised Learning

1

2

Orange3 provides unsupervised learning algorithms for discovering patterns and structures in data without labeled examples.

3

4

## Capabilities

5

6

### K-Means Clustering

7

8

Partition data into k clusters based on feature similarity.

9

10

```python { .api }

11

class KMeans:

12

"""

13

K-means clustering algorithm.

14

15

Args:

16

n_clusters: Number of clusters

17

init: Initialization method ('k-means++', 'random')

18

n_init: Number of random initializations

19

max_iter: Maximum iterations

20

tol: Tolerance for convergence

21

random_state: Random seed

22

"""

23

def __init__(self, n_clusters=8, init='k-means++', n_init=10,

24

max_iter=300, tol=1e-4, random_state=None): ...

25

26

def fit(self, X):

27

"""

28

Fit k-means clustering to data.

29

30

Args:

31

X: Data array or Orange Table

32

33

Returns:

34

Fitted k-means model

35

"""

36

37

def predict(self, X):

38

"""Predict cluster labels for new data."""

39

40

@property

41

def cluster_centers_(self):

42

"""Cluster center coordinates."""

43

```

44

45

### Density-Based Clustering

46

47

DBSCAN algorithm for finding clusters of varying shapes and sizes.

48

49

```python { .api }

50

class DBSCAN:

51

"""

52

DBSCAN (Density-Based Spatial Clustering) algorithm.

53

54

Args:

55

eps: Maximum distance between samples in same neighborhood

56

min_samples: Minimum samples in neighborhood for core point

57

metric: Distance metric

58

algorithm: Algorithm for computing nearest neighbors

59

"""

60

def __init__(self, eps=0.5, min_samples=5, metric='euclidean', algorithm='auto'): ...

61

62

def fit(self, X):

63

"""

64

Fit DBSCAN clustering to data.

65

66

Args:

67

X: Data array or Orange Table

68

69

Returns:

70

Fitted DBSCAN model

71

"""

72

73

def predict(self, X):

74

"""Predict cluster labels (not supported by standard DBSCAN)."""

75

76

@property

77

def labels_(self):

78

"""Cluster labels for training data."""

79

80

@property

81

def core_sample_indices_(self):

82

"""Indices of core samples."""

83

```

84

85

### Hierarchical Clustering

86

87

Build tree of clusters using agglomerative approach.

88

89

```python { .api }

90

class HierarchicalClustering:

91

"""

92

Agglomerative hierarchical clustering.

93

94

Args:

95

n_clusters: Number of clusters (if None, returns full tree)

96

linkage: Linkage criterion ('ward', 'complete', 'average', 'single')

97

metric: Distance metric

98

compute_full_tree: Compute full dendrogram

99

"""

100

def __init__(self, n_clusters=None, linkage='ward', metric='euclidean',

101

compute_full_tree='auto'): ...

102

103

def fit(self, X):

104

"""

105

Fit hierarchical clustering to data.

106

107

Args:

108

X: Data array or Orange Table

109

110

Returns:

111

Fitted hierarchical clustering model

112

"""

113

114

@property

115

def labels_(self):

116

"""Cluster labels."""

117

118

@property

119

def children_(self):

120

"""Tree structure of clustering."""

121

122

@property

123

def distances_(self):

124

"""Distances between merged clusters."""

125

```

126

127

### Community Detection

128

129

Graph-based clustering using the Louvain algorithm.

130

131

```python { .api }

132

class Louvain:

133

"""

134

Louvain community detection algorithm.

135

136

Args:

137

resolution: Resolution parameter for modularity

138

random_state: Random seed

139

"""

140

def __init__(self, resolution=1.0, random_state=None): ...

141

142

def fit(self, graph):

143

"""

144

Fit Louvain clustering to graph data.

145

146

Args:

147

graph: Network graph or adjacency matrix

148

149

Returns:

150

Fitted Louvain model

151

"""

152

153

@property

154

def labels_(self):

155

"""Community labels."""

156

```

157

158

### Clustering Utilities

159

160

Helper functions for clustering analysis.

161

162

```python { .api }

163

def matrix_to_knn_graph(distances, k, include_self=False):

164

"""

165

Convert distance matrix to k-nearest neighbor graph.

166

167

Args:

168

distances: Distance matrix

169

k: Number of nearest neighbors

170

include_self: Include self-connections

171

172

Returns:

173

Sparse adjacency matrix representing kNN graph

174

"""

175

```

176

177

### Clustering Evaluation

178

179

Metrics for assessing clustering quality.

180

181

```python { .api }

182

def silhouette_score(X, labels):

183

"""

184

Calculate silhouette coefficient for clustering.

185

186

Args:

187

X: Data samples

188

labels: Cluster labels

189

190

Returns:

191

float: Mean silhouette coefficient

192

"""

193

194

def adjusted_rand_score(labels_true, labels_pred):

195

"""

196

Calculate adjusted rand index between two clusterings.

197

198

Args:

199

labels_true: True cluster labels

200

labels_pred: Predicted cluster labels

201

202

Returns:

203

float: Adjusted rand index

204

"""

205

206

def calinski_harabasz_score(X, labels):

207

"""

208

Calculate Calinski-Harabasz index (variance ratio criterion).

209

210

Args:

211

X: Data samples

212

labels: Cluster labels

213

214

Returns:

215

float: Calinski-Harabasz index

216

"""

217

```

218

219

### Usage Examples

220

221

```python

222

# Basic clustering workflow

223

from Orange.data import Table

224

from Orange.clustering import KMeans, DBSCAN, HierarchicalClustering

225

import numpy as np

226

227

# Load or create data

228

data = Table("iris")

229

X = data.X # Feature matrix

230

231

# K-means clustering

232

kmeans = KMeans(n_clusters=3, random_state=42)

233

kmeans_model = kmeans.fit(X)

234

kmeans_labels = kmeans_model.predict(X)

235

236

print(f"K-means cluster centers shape: {kmeans_model.cluster_centers_.shape}")

237

print(f"K-means labels: {np.unique(kmeans_labels)}")

238

239

# DBSCAN clustering

240

dbscan = DBSCAN(eps=0.5, min_samples=5)

241

dbscan_model = dbscan.fit(X)

242

dbscan_labels = dbscan_model.labels_

243

244

print(f"DBSCAN found {len(np.unique(dbscan_labels[dbscan_labels != -1]))} clusters")

245

print(f"DBSCAN noise points: {np.sum(dbscan_labels == -1)}")

246

247

# Hierarchical clustering

248

hierarchical = HierarchicalClustering(n_clusters=3, linkage='ward')

249

hierarchical_model = hierarchical.fit(X)

250

hierarchical_labels = hierarchical_model.labels_

251

252

print(f"Hierarchical clustering labels: {np.unique(hierarchical_labels)}")

253

254

# Evaluate clustering quality

255

from Orange.clustering import silhouette_score, calinski_harabasz_score

256

257

kmeans_silhouette = silhouette_score(X, kmeans_labels)

258

hierarchical_silhouette = silhouette_score(X, hierarchical_labels)

259

260

kmeans_ch_score = calinski_harabasz_score(X, kmeans_labels)

261

hierarchical_ch_score = calinski_harabasz_score(X, hierarchical_labels)

262

263

print(f"K-means silhouette score: {kmeans_silhouette:.3f}")

264

print(f"Hierarchical silhouette score: {hierarchical_silhouette:.3f}")

265

print(f"K-means Calinski-Harabasz score: {kmeans_ch_score:.3f}")

266

print(f"Hierarchical Calinski-Harabasz score: {hierarchical_ch_score:.3f}")

267

268

# Find optimal number of clusters using elbow method

269

inertias = []

270

silhouette_scores = []

271

k_range = range(2, 11)

272

273

for k in k_range:

274

kmeans_k = KMeans(n_clusters=k, random_state=42)

275

model_k = kmeans_k.fit(X)

276

labels_k = model_k.predict(X)

277

278

# Note: inertia would be available as model_k.inertia_ in actual implementation

279

silhouette_k = silhouette_score(X, labels_k)

280

silhouette_scores.append(silhouette_k)

281

282

print(f"Silhouette scores for k=2 to 10: {silhouette_scores}")

283

284

# Graph-based clustering example (requires network data)

285

# from Orange.clustering import Louvain, matrix_to_knn_graph

286

#

287

# # Create kNN graph from distance matrix

288

# knn_graph = matrix_to_knn_graph(distance_matrix, k=5)

289

#

290

# # Apply Louvain community detection

291

# louvain = Louvain(resolution=1.0)

292

# louvain_model = louvain.fit(knn_graph)

293

# community_labels = louvain_model.labels_

294

```