Intel Extension for Scikit-learn providing hardware-accelerated implementations of scikit-learn algorithms optimized for Intel CPUs and GPUs.
—
High-performance implementations of k-nearest neighbors algorithms with Intel hardware acceleration. These algorithms provide significant speedups for classification, regression, and unsupervised neighbor searches on large datasets.
Intel-accelerated k-nearest neighbors classifier with optimized distance calculations and neighbor search algorithms.
class KNeighborsClassifier:
"""
K-nearest neighbors classifier with Intel optimization.
Provides significant speedup over standard scikit-learn implementation
through vectorized distance computations and Intel hardware acceleration.
"""
def __init__(
self,
n_neighbors=5,
weights='uniform',
algorithm='auto',
leaf_size=30,
p=2,
metric='minkowski',
metric_params=None,
n_jobs=None
):
"""
Initialize k-nearest neighbors classifier.
Parameters:
n_neighbors (int): Number of neighbors to use
weights (str): Weight function ('uniform', 'distance')
algorithm (str): Algorithm to compute nearest neighbors
leaf_size (int): Leaf size for tree algorithms
p (int): Power parameter for Minkowski metric
metric (str): Distance metric to use
metric_params (dict): Additional parameters for distance metric
n_jobs (int): Number of parallel jobs
"""
def fit(self, X, y):
"""
Fit the k-nearest neighbors classifier.
Parameters:
X (array-like): Training data of shape (n_samples, n_features)
y (array-like): Target values of shape (n_samples,)
Returns:
self: Fitted estimator
"""
def predict(self, X):
"""
Predict class labels for samples.
Parameters:
X (array-like): Test samples of shape (n_samples, n_features)
Returns:
array: Predicted class labels
"""
def predict_proba(self, X):
"""
Return probability estimates for test data.
Parameters:
X (array-like): Test samples
Returns:
array: Class probabilities of shape (n_samples, n_classes)
"""
def kneighbors(self, X=None, n_neighbors=None, return_distance=True):
"""
Find k-neighbors of a point.
Parameters:
X (array-like): Query points, or None for training data
n_neighbors (int): Number of neighbors to get
return_distance (bool): Whether to return distances
Returns:
tuple: (distances, indices) or just indices
"""
def score(self, X, y, sample_weight=None):
"""
Return mean accuracy on given test data and labels.
Parameters:
X (array-like): Test samples
y (array-like): True labels
sample_weight (array-like): Sample weights
Returns:
float: Mean accuracy score
"""
# Attributes available after fitting
classes_: ... # Unique class labels
effective_metric_: ... # Effective distance metric used
effective_metric_params_: ... # Effective additional metric parameters
n_features_in_: ... # Number of features in training data
n_samples_fit_: ... # Number of samples in training dataIntel-accelerated k-nearest neighbors regressor for continuous target prediction.
class KNeighborsRegressor:
"""
K-nearest neighbors regressor with Intel optimization.
Efficient regression based on k-nearest neighbors with optimized
distance calculations and neighbor averaging.
"""
def __init__(
self,
n_neighbors=5,
weights='uniform',
algorithm='auto',
leaf_size=30,
p=2,
metric='minkowski',
metric_params=None,
n_jobs=None
):
"""
Initialize k-nearest neighbors regressor.
Parameters:
n_neighbors (int): Number of neighbors to use
weights (str): Weight function ('uniform', 'distance')
algorithm (str): Algorithm to compute nearest neighbors
leaf_size (int): Leaf size for tree algorithms
p (int): Power parameter for Minkowski metric
metric (str): Distance metric to use
metric_params (dict): Additional parameters for distance metric
n_jobs (int): Number of parallel jobs
"""
def fit(self, X, y):
"""
Fit the k-nearest neighbors regressor.
Parameters:
X (array-like): Training data of shape (n_samples, n_features)
y (array-like): Target values of shape (n_samples,) or (n_samples, n_outputs)
Returns:
self: Fitted estimator
"""
def predict(self, X):
"""
Predict target values for samples.
Parameters:
X (array-like): Test samples of shape (n_samples, n_features)
Returns:
array: Predicted target values
"""
def kneighbors(self, X=None, n_neighbors=None, return_distance=True):
"""
Find k-neighbors of a point.
Parameters:
X (array-like): Query points, or None for training data
n_neighbors (int): Number of neighbors to get
return_distance (bool): Whether to return distances
Returns:
tuple: (distances, indices) or just indices
"""
def score(self, X, y, sample_weight=None):
"""
Return coefficient of determination R^2 of prediction.
Parameters:
X (array-like): Test samples
y (array-like): True values
sample_weight (array-like): Sample weights
Returns:
float: R^2 score
"""
# Attributes available after fitting
effective_metric_: ... # Effective distance metric used
effective_metric_params_: ... # Effective additional metric parameters
n_features_in_: ... # Number of features in training data
n_samples_fit_: ... # Number of samples in training dataIntel-accelerated unsupervised nearest neighbors for neighbor queries without target labels.
class NearestNeighbors:
"""
Unsupervised nearest neighbors with Intel optimization.
Efficient neighbor search algorithm for finding nearest neighbors
without requiring target labels.
"""
def __init__(
self,
n_neighbors=5,
radius=1.0,
algorithm='auto',
leaf_size=30,
metric='minkowski',
p=2,
metric_params=None,
n_jobs=None
):
"""
Initialize nearest neighbors searcher.
Parameters:
n_neighbors (int): Number of neighbors to use
radius (float): Range of parameter space for radius neighbors
algorithm (str): Algorithm to compute nearest neighbors
leaf_size (int): Leaf size for tree algorithms
metric (str): Distance metric to use
p (int): Power parameter for Minkowski metric
metric_params (dict): Additional parameters for distance metric
n_jobs (int): Number of parallel jobs
"""
def fit(self, X, y=None):
"""
Fit the nearest neighbors estimator.
Parameters:
X (array-like): Training data of shape (n_samples, n_features)
y: Ignored, present for API consistency
Returns:
self: Fitted estimator
"""
def kneighbors(self, X=None, n_neighbors=None, return_distance=True):
"""
Find k-neighbors of a point.
Parameters:
X (array-like): Query points, or None for training data
n_neighbors (int): Number of neighbors to get
return_distance (bool): Whether to return distances
Returns:
tuple: (distances, indices) or just indices
"""
def radius_neighbors(self, X=None, radius=None, return_distance=True, sort_results=False):
"""
Find neighbors within given radius.
Parameters:
X (array-like): Query points, or None for training data
radius (float): Limiting distance of neighbors to return
return_distance (bool): Whether to return distances
sort_results (bool): Whether to sort results by distance
Returns:
tuple: (distances, indices) arrays of neighbors
"""
def kneighbors_graph(self, X=None, n_neighbors=None, mode='connectivity'):
"""
Compute k-neighbors connectivity graph.
Parameters:
X (array-like): Query points, or None for training data
n_neighbors (int): Number of neighbors for each sample
mode (str): Type of returned matrix ('connectivity', 'distance')
Returns:
sparse matrix: Graph representing k-neighbors connectivity
"""
def radius_neighbors_graph(self, X=None, radius=None, mode='connectivity', sort_results=False):
"""
Compute radius-based neighbors connectivity graph.
Parameters:
X (array-like): Query points, or None for training data
radius (float): Radius of neighborhoods
mode (str): Type of returned matrix ('connectivity', 'distance')
sort_results (bool): Whether to sort results by distance
Returns:
sparse matrix: Graph representing radius neighbors connectivity
"""
# Attributes available after fitting
effective_metric_: ... # Effective distance metric used
effective_metric_params_: ... # Effective additional metric parameters
n_features_in_: ... # Number of features in training data
n_samples_fit_: ... # Number of samples in training dataIntel-accelerated Local Outlier Factor for unsupervised outlier detection.
class LocalOutlierFactor:
"""
Local Outlier Factor with Intel optimization.
Efficient unsupervised outlier detection using local density
deviation of given data point with respect to its neighbors.
"""
def __init__(
self,
n_neighbors=20,
algorithm='auto',
leaf_size=30,
metric='minkowski',
p=2,
metric_params=None,
contamination='auto',
novelty=False,
n_jobs=None
):
"""
Initialize Local Outlier Factor.
Parameters:
n_neighbors (int): Number of neighbors to use
algorithm (str): Algorithm to compute nearest neighbors
leaf_size (int): Leaf size for tree algorithms
metric (str): Distance metric to use
p (int): Power parameter for Minkowski metric
metric_params (dict): Additional parameters for distance metric
contamination (str or float): Proportion of outliers in data set
novelty (bool): Whether to use LOF for novelty detection
n_jobs (int): Number of parallel jobs
"""
def fit(self, X, y=None):
"""
Fit the Local Outlier Factor detector.
Parameters:
X (array-like): Training data of shape (n_samples, n_features)
y: Ignored, present for API consistency
Returns:
self: Fitted estimator
"""
def fit_predict(self, X, y=None):
"""
Fit detector and return binary labels (1 for inliers, -1 for outliers).
Parameters:
X (array-like): Training data
y: Ignored
Returns:
array: Binary labels for each sample
"""
def decision_function(self, X):
"""
Shifted opposite of Local Outlier Factor of X.
Parameters:
X (array-like): Query samples
Returns:
array: Shifted opposite of LOF scores
"""
def score_samples(self, X):
"""
Opposite of Local Outlier Factor of X.
Parameters:
X (array-like): Query samples
Returns:
array: Opposite of LOF scores
"""
def predict(self, X):
"""
Predict raw anomaly score of X using fitted detector.
Only available when novelty=True.
Parameters:
X (array-like): Query samples
Returns:
array: Anomaly scores for each sample
"""
def kneighbors(self, X=None, n_neighbors=None, return_distance=True):
"""
Find k-neighbors of a point.
Parameters:
X (array-like): Query points, or None for training data
n_neighbors (int): Number of neighbors to get
return_distance (bool): Whether to return distances
Returns:
tuple: (distances, indices) or just indices
"""
# Attributes available after fitting
negative_outlier_factor_: ... # Opposite of LOF of training samples
n_neighbors_: ... # Actual number of neighbors used
offset_: ... # Offset used to obtain binary labels
effective_metric_: ... # Effective distance metric used
effective_metric_params_: ... # Effective additional metric parameters
n_features_in_: ... # Number of features in training data
n_samples_fit_: ... # Number of samples in training dataimport numpy as np
from sklearnex.neighbors import KNeighborsClassifier
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
# Generate sample data
X, y = make_classification(n_samples=1000, n_features=20, n_informative=10,
n_redundant=10, n_classes=3, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Create and fit KNN classifier
knn = KNeighborsClassifier(n_neighbors=5, weights='distance')
knn.fit(X_train, y_train)
# Make predictions
y_pred = knn.predict(X_test)
y_proba = knn.predict_proba(X_test)
print(f"Accuracy: {knn.score(X_test, y_test):.3f}")
print(f"Classes: {knn.classes_}")
print(f"Prediction shape: {y_pred.shape}")
print(f"Probability shape: {y_proba.shape}")
# Find neighbors for specific points
distances, indices = knn.kneighbors(X_test[:5], n_neighbors=3)
print(f"Neighbor distances shape: {distances.shape}")
print(f"Neighbor indices shape: {indices.shape}")import numpy as np
from sklearnex.neighbors import KNeighborsRegressor
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
# Generate sample data
X, y = make_regression(n_samples=1000, n_features=10, noise=0.1, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Create and fit KNN regressor
knn_reg = KNeighborsRegressor(n_neighbors=5, weights='distance')
knn_reg.fit(X_train, y_train)
# Make predictions
y_pred = knn_reg.predict(X_test)
print(f"R² Score: {knn_reg.score(X_test, y_test):.3f}")
print(f"MSE: {mean_squared_error(y_test, y_pred):.3f}")
print(f"Features in training: {knn_reg.n_features_in_}")
print(f"Samples in training: {knn_reg.n_samples_fit_}")
# Multi-output regression example
y_multi = np.column_stack([y, y * 0.5 + np.random.normal(0, 0.1, len(y))])
X_train, X_test, y_train_multi, y_test_multi = train_test_split(X, y_multi, test_size=0.2, random_state=42)
knn_multi = KNeighborsRegressor(n_neighbors=3)
knn_multi.fit(X_train, y_train_multi)
y_pred_multi = knn_multi.predict(X_test)
print(f"Multi-output prediction shape: {y_pred_multi.shape}")import numpy as np
from sklearnex.neighbors import NearestNeighbors
from sklearn.datasets import make_blobs
# Generate sample data
X, _ = make_blobs(n_samples=500, centers=5, n_features=10, random_state=42)
# Create and fit nearest neighbors model
nn = NearestNeighbors(n_neighbors=10, metric='euclidean')
nn.fit(X)
# Find k-nearest neighbors
distances, indices = nn.kneighbors(X[:5])
print(f"Distances shape: {distances.shape}")
print(f"Indices shape: {indices.shape}")
# Find radius neighbors
radius_distances, radius_indices = nn.radius_neighbors(X[:3], radius=2.0)
print(f"Radius neighbors found for first 3 points:")
for i, (dists, idxs) in enumerate(zip(radius_distances, radius_indices)):
print(f" Point {i}: {len(idxs)} neighbors within radius 2.0")
# Create connectivity graphs
knn_graph = nn.kneighbors_graph(X[:10], n_neighbors=3, mode='connectivity')
print(f"KNN graph shape: {knn_graph.shape}")
print(f"KNN graph density: {knn_graph.nnz / (knn_graph.shape[0] * knn_graph.shape[1]):.3f}")
radius_graph = nn.radius_neighbors_graph(X[:10], radius=1.5, mode='distance')
print(f"Radius graph shape: {radius_graph.shape}")
print(f"Radius graph density: {radius_graph.nnz / (radius_graph.shape[0] * radius_graph.shape[1]):.3f}")import numpy as np
from sklearnex.neighbors import LocalOutlierFactor
from sklearn.datasets import make_blobs
# Generate normal data with some outliers
X_inliers, _ = make_blobs(n_samples=200, centers=1, cluster_std=0.5, random_state=42)
X_outliers = np.random.uniform(low=-6, high=6, size=(20, 2))
X = np.concatenate([X_inliers, X_outliers])
# Outlier detection (unsupervised)
lof = LocalOutlierFactor(n_neighbors=20, contamination=0.1)
y_pred = lof.fit_predict(X)
# Count inliers and outliers
n_outliers = np.sum(y_pred == -1)
n_inliers = np.sum(y_pred == 1)
print(f"Outliers detected: {n_outliers}")
print(f"Inliers detected: {n_inliers}")
print(f"Outlier factor shape: {lof.negative_outlier_factor_.shape}")
# Get outlier scores
outlier_scores = lof.negative_outlier_factor_
print(f"Score range: [{outlier_scores.min():.3f}, {outlier_scores.max():.3f}]")
# Novelty detection example
X_train = X_inliers # Train only on inliers
X_test = np.concatenate([X_inliers[:50], X_outliers[:10]]) # Test on mix
lof_novelty = LocalOutlierFactor(n_neighbors=20, novelty=True, contamination=0.1)
lof_novelty.fit(X_train)
# Predict on new data
y_pred_novelty = lof_novelty.predict(X_test)
decision_scores = lof_novelty.decision_function(X_test)
print(f"Novelty detection - Outliers: {np.sum(y_pred_novelty == -1)}")
print(f"Decision scores range: [{decision_scores.min():.3f}, {decision_scores.max():.3f}]")import time
import numpy as np
from sklearn.datasets import make_classification
# Generate large dataset
X, y = make_classification(n_samples=50000, n_features=20, n_informative=15,
n_redundant=5, n_classes=5, random_state=42)
X_train, X_test = X[:40000], X[40000:]
y_train, y_test = y[:40000], y[40000:]
# Intel-optimized version
from sklearnex.neighbors import KNeighborsClassifier as IntelKNN
start_time = time.time()
intel_knn = IntelKNN(n_neighbors=5)
intel_knn.fit(X_train, y_train)
intel_pred = intel_knn.predict(X_test)
intel_time = time.time() - start_time
print(f"Intel KNN time: {intel_time:.2f} seconds")
print(f"Intel KNN accuracy: {intel_knn.score(X_test, y_test):.3f}")
# Standard scikit-learn version (for comparison)
from sklearn.neighbors import KNeighborsClassifier as StandardKNN
start_time = time.time()
standard_knn = StandardKNN(n_neighbors=5)
standard_knn.fit(X_train, y_train)
standard_pred = standard_knn.predict(X_test)
standard_time = time.time() - start_time
print(f"Standard KNN time: {standard_time:.2f} seconds")
print(f"Standard KNN accuracy: {standard_knn.score(X_test, y_test):.3f}")
print(f"Speedup: {standard_time / intel_time:.1f}x")
# Verify results are identical
print(f"Predictions identical: {np.allclose(intel_pred, standard_pred)}")Install with Tessl CLI
npx tessl i tessl/pypi-scikit-learn-intelex