tessl/pypi-mlxtend

Machine Learning Library Extensions providing essential tools for day-to-day data science tasks

—

Pending

Overview

Eval results

Files

Clustering

Name: tessl/pypi-mlxtend
Author: tessl

Unsupervised learning algorithms for data clustering and pattern discovery.

Capabilities

K-means Clustering

Iterative clustering algorithm that partitions data into k clusters.

class Kmeans:
    def __init__(self, k, max_iter=100, convergence_tolerance=1e-05, random_seed=None,
                 print_progress=0):
        """
        K-means clustering algorithm.
        
        Parameters:
        - k: int, number of clusters
        - max_iter: int, maximum number of iterations
        - convergence_tolerance: float, convergence threshold
        - random_seed: int, random seed for centroid initialization
        - print_progress: int, print progress every n iterations
        """
    
    def fit(self, X, init_params=True):
        """
        Fit K-means clustering to data.
        
        Parameters:
        - X: array-like, feature matrix (shape: [n_samples, n_features])
        - init_params: bool, initialize parameters
        
        Returns:
        - self: fitted estimator
        """
        
    def predict(self, X):
        """
        Predict cluster labels for samples.
        
        Parameters:
        - X: array-like, feature matrix
        
        Returns:
        - labels: array, cluster labels for each sample
        """
        
    def fit_predict(self, X):
        """Fit clustering and return cluster labels"""
        
    centroids_: # Cluster centroids after fitting
    clusters_: # Dictionary mapping cluster indices to sample indices  
    iterations_: # Number of iterations until convergence

Usage Examples

Basic K-means Clustering

from mlxtend.cluster import Kmeans
from sklearn.datasets import make_blobs
import matplotlib.pyplot as plt
import numpy as np

# Generate sample data
X, _ = make_blobs(n_samples=300, centers=4, cluster_std=0.60, random_state=0)

# Apply K-means clustering
kmeans = Kmeans(k=4, random_seed=42)
cluster_labels = kmeans.fit_predict(X)

# Plot results
plt.figure(figsize=(8, 6))
plt.scatter(X[:, 0], X[:, 1], c=cluster_labels, cmap='viridis', alpha=0.6)
plt.scatter(kmeans.centroids_[:, 0], kmeans.centroids_[:, 1], 
           c='red', marker='x', s=200, linewidths=3)
plt.title('K-means Clustering Results')
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.show()

print(f"Converged after {kmeans.iterations_} iterations")
print(f"Cluster centroids:\n{kmeans.centroids_}")

Clustering with Different K Values

from mlxtend.cluster import Kmeans
from sklearn.datasets import make_blobs
import matplotlib.pyplot as plt

# Generate data
X, _ = make_blobs(n_samples=300, centers=3, cluster_std=1.0, random_state=42)

# Try different k values
k_values = [2, 3, 4, 5]
fig, axes = plt.subplots(2, 2, figsize=(12, 10))
axes = axes.ravel()

for i, k in enumerate(k_values):
    kmeans = Kmeans(k=k, random_seed=42)
    labels = kmeans.fit_predict(X)
    
    axes[i].scatter(X[:, 0], X[:, 1], c=labels, cmap='tab10', alpha=0.6)
    axes[i].scatter(kmeans.centroids_[:, 0], kmeans.centroids_[:, 1], 
                   c='red', marker='x', s=100, linewidths=2)
    axes[i].set_title(f'K-means with k={k}')
    axes[i].set_xlabel('Feature 1')
    axes[i].set_ylabel('Feature 2')

plt.tight_layout()
plt.show()

Install with Tessl CLI