Intel Extension for Scikit-learn providing hardware-accelerated implementations of scikit-learn algorithms optimized for Intel CPUs and GPUs.
—
High-performance implementations of clustering algorithms with Intel hardware acceleration. These algorithms provide significant speedups for density-based and centroid-based clustering on large datasets.
Intel-accelerated K-means clustering with optimized centroid computation and distance calculations.
class KMeans:
"""
K-means clustering with Intel optimization.
Provides 10-100x speedup over standard scikit-learn implementation
through vectorized operations and Intel hardware acceleration.
"""
def __init__(
self,
n_clusters=8,
init='k-means++',
n_init=10,
max_iter=300,
tol=1e-4,
random_state=None,
copy_x=True,
algorithm='auto'
):
"""
Initialize K-means clustering.
Parameters:
n_clusters (int): Number of clusters to form
init (str or array): Initialization method ('k-means++', 'random')
n_init (int): Number of initializations to perform
max_iter (int): Maximum number of iterations
tol (float): Tolerance for convergence
random_state (int): Random state for reproducibility
copy_x (bool): Whether to copy input data
algorithm (str): Algorithm to use ('auto', 'full', 'elkan')
"""
def fit(self, X, y=None, sample_weight=None):
"""
Compute k-means clustering.
Parameters:
X (array-like): Training data of shape (n_samples, n_features)
y: Ignored, present for API consistency
sample_weight (array-like): Sample weights
Returns:
self: Fitted estimator
"""
def predict(self, X, sample_weight=None):
"""
Predict cluster labels for samples.
Parameters:
X (array-like): New data to predict
sample_weight (array-like): Sample weights
Returns:
array: Cluster labels for each sample
"""
def fit_predict(self, X, y=None, sample_weight=None):
"""
Compute clustering and return cluster labels.
Parameters:
X (array-like): Training data
y: Ignored
sample_weight (array-like): Sample weights
Returns:
array: Cluster labels
"""
def transform(self, X):
"""
Transform X to cluster-distance space.
Parameters:
X (array-like): Data to transform
Returns:
array: Distances to cluster centers
"""
def fit_transform(self, X, y=None, sample_weight=None):
"""
Compute clustering and transform to cluster-distance space.
Parameters:
X (array-like): Training data
y: Ignored
sample_weight (array-like): Sample weights
Returns:
array: Distances to cluster centers
"""
def score(self, X, y=None, sample_weight=None):
"""
Return the negative sum of squared distances to centroids.
Parameters:
X (array-like): Data to score
y: Ignored
sample_weight (array-like): Sample weights
Returns:
float: Negative inertia score
"""
# Attributes available after fitting
cluster_centers_: ... # Cluster centers
labels_: ... # Labels of training data
inertia_: ... # Sum of squared distances to centroids
n_iter_: ... # Number of iterations runDensity-Based Spatial Clustering of Applications with Noise, optimized for Intel hardware.
class DBSCAN:
"""
DBSCAN clustering with Intel optimization.
Efficient density-based clustering that finds clusters of varying shapes
and identifies outliers as noise points.
"""
def __init__(
self,
eps=0.5,
min_samples=5,
metric='euclidean',
metric_params=None,
algorithm='auto',
leaf_size=30,
p=None,
n_jobs=None
):
"""
Initialize DBSCAN clustering.
Parameters:
eps (float): Maximum distance between samples in same neighborhood
min_samples (int): Minimum samples in neighborhood for core point
metric (str): Distance metric to use
metric_params (dict): Additional parameters for distance metric
algorithm (str): Algorithm for nearest neighbors computation
leaf_size (int): Leaf size for tree algorithms
p (float): Power parameter for Minkowski metric
n_jobs (int): Number of parallel jobs
"""
def fit(self, X, y=None, sample_weight=None):
"""
Perform DBSCAN clustering.
Parameters:
X (array-like): Training data of shape (n_samples, n_features)
y: Ignored, present for API consistency
sample_weight (array-like): Sample weights
Returns:
self: Fitted estimator
"""
def fit_predict(self, X, y=None, sample_weight=None):
"""
Compute clustering and return cluster labels.
Parameters:
X (array-like): Training data
y: Ignored
sample_weight (array-like): Sample weights
Returns:
array: Cluster labels (-1 for noise points)
"""
# Attributes available after fitting
labels_: ... # Cluster labels (-1 for noise)
core_sample_indices_: ... # Indices of core samples
components_: ... # Core samplesimport numpy as np
from sklearnex.cluster import KMeans
from sklearn.datasets import make_blobs
# Generate sample data
X, _ = make_blobs(n_samples=1000, centers=4, n_features=2,
cluster_std=1.0, random_state=42)
# Create and fit K-means model
kmeans = KMeans(n_clusters=4, random_state=42, n_init=10)
kmeans.fit(X)
# Get cluster labels and centers
labels = kmeans.labels_
centers = kmeans.cluster_centers_
inertia = kmeans.inertia_
print(f"Inertia: {inertia:.2f}")
print(f"Centers shape: {centers.shape}")
# Predict clusters for new data
new_points = np.array([[1, 2], [3, 4]])
new_labels = kmeans.predict(new_points)
distances = kmeans.transform(new_points)
print(f"New point labels: {new_labels}")
print(f"Distances to centers: {distances}")import numpy as np
from sklearnex.cluster import DBSCAN
from sklearn.datasets import make_blobs
# Generate data with noise
X, _ = make_blobs(n_samples=300, centers=4, n_features=2,
random_state=42, cluster_std=0.60)
# Add noise points
noise = np.random.uniform(-6, 6, (50, 2))
X = np.vstack([X, noise])
# Create and fit DBSCAN model
dbscan = DBSCAN(eps=0.3, min_samples=10)
cluster_labels = dbscan.fit_predict(X)
# Analyze results
n_clusters = len(set(cluster_labels)) - (1 if -1 in cluster_labels else 0)
n_noise = list(cluster_labels).count(-1)
print(f"Estimated number of clusters: {n_clusters}")
print(f"Estimated number of noise points: {n_noise}")
print(f"Core samples: {len(dbscan.core_sample_indices_)}")
# Get core samples
core_samples = dbscan.components_
print(f"Core samples shape: {core_samples.shape}")import time
import numpy as np
from sklearn.datasets import make_blobs
# Generate large dataset
X, _ = make_blobs(n_samples=100000, centers=10, n_features=50, random_state=42)
# Intel-optimized version
from sklearnex.cluster import KMeans as IntelKMeans
start_time = time.time()
intel_kmeans = IntelKMeans(n_clusters=10, random_state=42)
intel_kmeans.fit(X)
intel_time = time.time() - start_time
print(f"Intel K-means time: {intel_time:.2f} seconds")
print(f"Intel inertia: {intel_kmeans.inertia_:.2f}")
# Standard scikit-learn version (for comparison)
from sklearn.cluster import KMeans as StandardKMeans
start_time = time.time()
standard_kmeans = StandardKMeans(n_clusters=10, random_state=42)
standard_kmeans.fit(X)
standard_time = time.time() - start_time
print(f"Standard K-means time: {standard_time:.2f} seconds")
print(f"Standard inertia: {standard_kmeans.inertia_:.2f}")
print(f"Speedup: {standard_time / intel_time:.1f}x")Install with Tessl CLI
npx tessl i tessl/pypi-scikit-learn-intelex