Machine Learning Library Extensions providing essential tools for day-to-day data science tasks
—
Unsupervised learning algorithms for data clustering and pattern discovery.
Iterative clustering algorithm that partitions data into k clusters.
class Kmeans:
def __init__(self, k, max_iter=100, convergence_tolerance=1e-05, random_seed=None,
print_progress=0):
"""
K-means clustering algorithm.
Parameters:
- k: int, number of clusters
- max_iter: int, maximum number of iterations
- convergence_tolerance: float, convergence threshold
- random_seed: int, random seed for centroid initialization
- print_progress: int, print progress every n iterations
"""
def fit(self, X, init_params=True):
"""
Fit K-means clustering to data.
Parameters:
- X: array-like, feature matrix (shape: [n_samples, n_features])
- init_params: bool, initialize parameters
Returns:
- self: fitted estimator
"""
def predict(self, X):
"""
Predict cluster labels for samples.
Parameters:
- X: array-like, feature matrix
Returns:
- labels: array, cluster labels for each sample
"""
def fit_predict(self, X):
"""Fit clustering and return cluster labels"""
centroids_: # Cluster centroids after fitting
clusters_: # Dictionary mapping cluster indices to sample indices
iterations_: # Number of iterations until convergencefrom mlxtend.cluster import Kmeans
from sklearn.datasets import make_blobs
import matplotlib.pyplot as plt
import numpy as np
# Generate sample data
X, _ = make_blobs(n_samples=300, centers=4, cluster_std=0.60, random_state=0)
# Apply K-means clustering
kmeans = Kmeans(k=4, random_seed=42)
cluster_labels = kmeans.fit_predict(X)
# Plot results
plt.figure(figsize=(8, 6))
plt.scatter(X[:, 0], X[:, 1], c=cluster_labels, cmap='viridis', alpha=0.6)
plt.scatter(kmeans.centroids_[:, 0], kmeans.centroids_[:, 1],
c='red', marker='x', s=200, linewidths=3)
plt.title('K-means Clustering Results')
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.show()
print(f"Converged after {kmeans.iterations_} iterations")
print(f"Cluster centroids:\n{kmeans.centroids_}")from mlxtend.cluster import Kmeans
from sklearn.datasets import make_blobs
import matplotlib.pyplot as plt
# Generate data
X, _ = make_blobs(n_samples=300, centers=3, cluster_std=1.0, random_state=42)
# Try different k values
k_values = [2, 3, 4, 5]
fig, axes = plt.subplots(2, 2, figsize=(12, 10))
axes = axes.ravel()
for i, k in enumerate(k_values):
kmeans = Kmeans(k=k, random_seed=42)
labels = kmeans.fit_predict(X)
axes[i].scatter(X[:, 0], X[:, 1], c=labels, cmap='tab10', alpha=0.6)
axes[i].scatter(kmeans.centroids_[:, 0], kmeans.centroids_[:, 1],
c='red', marker='x', s=100, linewidths=2)
axes[i].set_title(f'K-means with k={k}')
axes[i].set_xlabel('Feature 1')
axes[i].set_ylabel('Feature 2')
plt.tight_layout()
plt.show()Install with Tessl CLI
npx tessl i tessl/pypi-mlxtend