Orange, a component-based data mining framework.
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Orange3 provides unsupervised learning algorithms for discovering patterns and structures in data without labeled examples.
Partition data into k clusters based on feature similarity.
class KMeans:
"""
K-means clustering algorithm.
Args:
n_clusters: Number of clusters
init: Initialization method ('k-means++', 'random')
n_init: Number of random initializations
max_iter: Maximum iterations
tol: Tolerance for convergence
random_state: Random seed
"""
def __init__(self, n_clusters=8, init='k-means++', n_init=10,
max_iter=300, tol=1e-4, random_state=None): ...
def fit(self, X):
"""
Fit k-means clustering to data.
Args:
X: Data array or Orange Table
Returns:
Fitted k-means model
"""
def predict(self, X):
"""Predict cluster labels for new data."""
@property
def cluster_centers_(self):
"""Cluster center coordinates."""DBSCAN algorithm for finding clusters of varying shapes and sizes.
class DBSCAN:
"""
DBSCAN (Density-Based Spatial Clustering) algorithm.
Args:
eps: Maximum distance between samples in same neighborhood
min_samples: Minimum samples in neighborhood for core point
metric: Distance metric
algorithm: Algorithm for computing nearest neighbors
"""
def __init__(self, eps=0.5, min_samples=5, metric='euclidean', algorithm='auto'): ...
def fit(self, X):
"""
Fit DBSCAN clustering to data.
Args:
X: Data array or Orange Table
Returns:
Fitted DBSCAN model
"""
def predict(self, X):
"""Predict cluster labels (not supported by standard DBSCAN)."""
@property
def labels_(self):
"""Cluster labels for training data."""
@property
def core_sample_indices_(self):
"""Indices of core samples."""Build tree of clusters using agglomerative approach.
class HierarchicalClustering:
"""
Agglomerative hierarchical clustering.
Args:
n_clusters: Number of clusters (if None, returns full tree)
linkage: Linkage criterion ('ward', 'complete', 'average', 'single')
metric: Distance metric
compute_full_tree: Compute full dendrogram
"""
def __init__(self, n_clusters=None, linkage='ward', metric='euclidean',
compute_full_tree='auto'): ...
def fit(self, X):
"""
Fit hierarchical clustering to data.
Args:
X: Data array or Orange Table
Returns:
Fitted hierarchical clustering model
"""
@property
def labels_(self):
"""Cluster labels."""
@property
def children_(self):
"""Tree structure of clustering."""
@property
def distances_(self):
"""Distances between merged clusters."""Graph-based clustering using the Louvain algorithm.
class Louvain:
"""
Louvain community detection algorithm.
Args:
resolution: Resolution parameter for modularity
random_state: Random seed
"""
def __init__(self, resolution=1.0, random_state=None): ...
def fit(self, graph):
"""
Fit Louvain clustering to graph data.
Args:
graph: Network graph or adjacency matrix
Returns:
Fitted Louvain model
"""
@property
def labels_(self):
"""Community labels."""Helper functions for clustering analysis.
def matrix_to_knn_graph(distances, k, include_self=False):
"""
Convert distance matrix to k-nearest neighbor graph.
Args:
distances: Distance matrix
k: Number of nearest neighbors
include_self: Include self-connections
Returns:
Sparse adjacency matrix representing kNN graph
"""Metrics for assessing clustering quality.
def silhouette_score(X, labels):
"""
Calculate silhouette coefficient for clustering.
Args:
X: Data samples
labels: Cluster labels
Returns:
float: Mean silhouette coefficient
"""
def adjusted_rand_score(labels_true, labels_pred):
"""
Calculate adjusted rand index between two clusterings.
Args:
labels_true: True cluster labels
labels_pred: Predicted cluster labels
Returns:
float: Adjusted rand index
"""
def calinski_harabasz_score(X, labels):
"""
Calculate Calinski-Harabasz index (variance ratio criterion).
Args:
X: Data samples
labels: Cluster labels
Returns:
float: Calinski-Harabasz index
"""# Basic clustering workflow
from Orange.data import Table
from Orange.clustering import KMeans, DBSCAN, HierarchicalClustering
import numpy as np
# Load or create data
data = Table("iris")
X = data.X # Feature matrix
# K-means clustering
kmeans = KMeans(n_clusters=3, random_state=42)
kmeans_model = kmeans.fit(X)
kmeans_labels = kmeans_model.predict(X)
print(f"K-means cluster centers shape: {kmeans_model.cluster_centers_.shape}")
print(f"K-means labels: {np.unique(kmeans_labels)}")
# DBSCAN clustering
dbscan = DBSCAN(eps=0.5, min_samples=5)
dbscan_model = dbscan.fit(X)
dbscan_labels = dbscan_model.labels_
print(f"DBSCAN found {len(np.unique(dbscan_labels[dbscan_labels != -1]))} clusters")
print(f"DBSCAN noise points: {np.sum(dbscan_labels == -1)}")
# Hierarchical clustering
hierarchical = HierarchicalClustering(n_clusters=3, linkage='ward')
hierarchical_model = hierarchical.fit(X)
hierarchical_labels = hierarchical_model.labels_
print(f"Hierarchical clustering labels: {np.unique(hierarchical_labels)}")
# Evaluate clustering quality
from Orange.clustering import silhouette_score, calinski_harabasz_score
kmeans_silhouette = silhouette_score(X, kmeans_labels)
hierarchical_silhouette = silhouette_score(X, hierarchical_labels)
kmeans_ch_score = calinski_harabasz_score(X, kmeans_labels)
hierarchical_ch_score = calinski_harabasz_score(X, hierarchical_labels)
print(f"K-means silhouette score: {kmeans_silhouette:.3f}")
print(f"Hierarchical silhouette score: {hierarchical_silhouette:.3f}")
print(f"K-means Calinski-Harabasz score: {kmeans_ch_score:.3f}")
print(f"Hierarchical Calinski-Harabasz score: {hierarchical_ch_score:.3f}")
# Find optimal number of clusters using elbow method
inertias = []
silhouette_scores = []
k_range = range(2, 11)
for k in k_range:
kmeans_k = KMeans(n_clusters=k, random_state=42)
model_k = kmeans_k.fit(X)
labels_k = model_k.predict(X)
# Note: inertia would be available as model_k.inertia_ in actual implementation
silhouette_k = silhouette_score(X, labels_k)
silhouette_scores.append(silhouette_k)
print(f"Silhouette scores for k=2 to 10: {silhouette_scores}")
# Graph-based clustering example (requires network data)
# from Orange.clustering import Louvain, matrix_to_knn_graph
#
# # Create kNN graph from distance matrix
# knn_graph = matrix_to_knn_graph(distance_matrix, k=5)
#
# # Apply Louvain community detection
# louvain = Louvain(resolution=1.0)
# louvain_model = louvain.fit(knn_graph)
# community_labels = louvain_model.labels_Install with Tessl CLI
npx tessl i tessl/pypi-orange3