A comprehensive machine learning library providing supervised and unsupervised learning algorithms with consistent APIs and extensive tools for data preprocessing, model evaluation, and deployment.
87
K-nearest neighbors algorithms for classification, regression, outlier detection, and manifold learning. These algorithms are based on the principle that similar data points tend to have similar labels or values.
Classifier implementing k-nearest neighbors vote.
from sklearn.neighbors import KNeighborsClassifier
KNeighborsClassifier(
n_neighbors: int = 5,
weights: str | callable = "uniform",
algorithm: str = "auto",
leaf_size: int = 30,
p: int = 2,
metric: str | callable = "minkowski",
metric_params: dict | None = None,
n_jobs: int | None = None
)Classifier implementing radius-based neighbors vote.
from sklearn.neighbors import RadiusNeighborsClassifier
RadiusNeighborsClassifier(
radius: float = 1.0,
weights: str | callable = "uniform",
algorithm: str = "auto",
leaf_size: int = 30,
p: int = 2,
metric: str | callable = "minkowski",
metric_params: dict | None = None,
outlier_label: int | str | None = None,
n_jobs: int | None = None
)Regression based on k-nearest neighbors.
from sklearn.neighbors import KNeighborsRegressor
KNeighborsRegressor(
n_neighbors: int = 5,
weights: str | callable = "uniform",
algorithm: str = "auto",
leaf_size: int = 30,
p: int = 2,
metric: str | callable = "minkowski",
metric_params: dict | None = None,
n_jobs: int | None = None
)Regression based on neighbors within a fixed radius.
from sklearn.neighbors import RadiusNeighborsRegressor
RadiusNeighborsRegressor(
radius: float = 1.0,
weights: str | callable = "uniform",
algorithm: str = "auto",
leaf_size: int = 30,
p: int = 2,
metric: str | callable = "minkowski",
metric_params: dict | None = None,
n_jobs: int | None = None
)Unsupervised learner for implementing neighbor searches.
from sklearn.neighbors import NearestNeighbors
NearestNeighbors(
n_neighbors: int = 5,
radius: float = 1.0,
algorithm: str = "auto",
leaf_size: int = 30,
metric: str | callable = "minkowski",
p: int = 2,
metric_params: dict | None = None,
n_jobs: int | None = None
)Nearest centroid classifier using class centroids.
from sklearn.neighbors import NearestCentroid
NearestCentroid(
metric: str = "euclidean",
shrink_threshold: float | None = None
)Transform X into a weighted graph of k nearest neighbors.
from sklearn.neighbors import KNeighborsTransformer
KNeighborsTransformer(
mode: str = "connectivity",
n_neighbors: int = 5,
algorithm: str = "auto",
leaf_size: int = 30,
metric: str | callable = "minkowski",
p: int = 2,
metric_params: dict | None = None,
n_jobs: int | None = None
)Transform X into a weighted graph of neighbors within a fixed radius.
from sklearn.neighbors import RadiusNeighborsTransformer
RadiusNeighborsTransformer(
mode: str = "connectivity",
radius: float = 1.0,
algorithm: str = "auto",
leaf_size: int = 30,
metric: str | callable = "minkowski",
p: int = 2,
metric_params: dict | None = None,
n_jobs: int | None = None
)Unsupervised outlier detection using Local Outlier Factor.
from sklearn.neighbors import LocalOutlierFactor
LocalOutlierFactor(
n_neighbors: int = 20,
algorithm: str = "auto",
leaf_size: int = 30,
metric: str | callable = "minkowski",
p: int = 2,
metric_params: dict | None = None,
contamination: str | float = "auto",
novelty: bool = False,
n_jobs: int | None = None
)Neighborhood Components Analysis for dimensionality reduction.
from sklearn.neighbors import NeighborhoodComponentsAnalysis
NeighborhoodComponentsAnalysis(
n_components: int | None = None,
init: str | ndarray = "auto",
warm_start: bool = False,
max_iter: int = 50,
tol: float = 1e-5,
callback: callable | None = None,
verbose: int = 0,
random_state: int | RandomState | None = None
)Kernel Density Estimation using various kernel functions.
from sklearn.neighbors import KernelDensity
KernelDensity(
bandwidth: float | str = 1.0,
algorithm: str = "auto",
kernel: str = "gaussian",
metric: str | callable = "euclidean",
atol: float = 0,
rtol: float = 0,
breadth_first: bool = True,
leaf_size: int = 40,
metric_params: dict | None = None
)K-dimensional tree for fast nearest neighbor queries.
from sklearn.neighbors import KDTree
KDTree(
X: ArrayLike,
leaf_size: int = 10,
metric: str | callable = "euclidean",
**kwargs
)Ball tree for fast nearest neighbor queries in high dimensions.
from sklearn.neighbors import BallTree
BallTree(
X: ArrayLike,
leaf_size: int = 10,
metric: str | callable = "euclidean",
**kwargs
)Compute k-neighbors graph of points.
from sklearn.neighbors import kneighbors_graph
def kneighbors_graph(
X: ArrayLike,
n_neighbors: int,
mode: str = "connectivity",
metric: str | callable = "minkowski",
p: int = 2,
metric_params: dict | None = None,
include_self: bool = False,
n_jobs: int | None = None
) -> csr_matrix: ...Compute radius-based neighbors graph of points.
from sklearn.neighbors import radius_neighbors_graph
def radius_neighbors_graph(
X: ArrayLike,
radius: float,
mode: str = "connectivity",
metric: str | callable = "minkowski",
p: int = 2,
metric_params: dict | None = None,
include_self: bool = False,
n_jobs: int | None = None
) -> csr_matrix: ...Sort sparse graph by row values in-place.
from sklearn.neighbors import sort_graph_by_row_values
def sort_graph_by_row_values(
graph: csr_matrix,
copy: bool = False,
warn_when_not_sorted: bool = True
) -> csr_matrix: ...from sklearn.neighbors import KNeighborsClassifier
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
# Load data
iris = load_iris()
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target)
# Train k-NN classifier
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train, y_train)
# Make predictions
y_pred = knn.predict(X_test)
accuracy = knn.score(X_test, y_test)from sklearn.neighbors import KNeighborsRegressor
from sklearn.datasets import load_diabetes
# Load data
diabetes = load_diabetes()
X_train, X_test, y_train, y_test = train_test_split(diabetes.data, diabetes.target)
# Train k-NN regressor
knn_reg = KNeighborsRegressor(n_neighbors=5, weights='distance')
knn_reg.fit(X_train, y_train)
# Make predictions
y_pred = knn_reg.predict(X_test)
score = knn_reg.score(X_test, y_test)from sklearn.neighbors import LocalOutlierFactor
import numpy as np
# Generate sample data with outliers
np.random.seed(42)
X = np.random.randn(100, 2)
X_outliers = np.random.uniform(low=-4, high=4, size=(20, 2))
X = np.r_[X, X_outliers]
# Detect outliers
lof = LocalOutlierFactor(n_neighbors=20, contamination=0.1)
y_pred = lof.fit_predict(X)
outlier_scores = lof.negative_outlier_factor_from sklearn.neighbors import NearestNeighbors
# Fit nearest neighbors
nbrs = NearestNeighbors(n_neighbors=5, algorithm='ball_tree')
nbrs.fit(X_train)
# Find neighbors
distances, indices = nbrs.kneighbors(X_test)from sklearn.neighbors import kneighbors_graph, radius_neighbors_graph
# Create k-neighbors graph
knn_graph = kneighbors_graph(X, n_neighbors=5, mode='connectivity')
# Create radius neighbors graph
radius_graph = radius_neighbors_graph(X, radius=1.0, mode='distance')from sklearn.neighbors import NeighborhoodComponentsAnalysis
from sklearn.preprocessing import StandardScaler
# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_train)
# Apply NCA
nca = NeighborhoodComponentsAnalysis(n_components=2, random_state=42)
X_nca = nca.fit_transform(X_scaled, y_train)
# Use with k-NN classifier
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_nca, y_train)from sklearn.neighbors import VALID_METRICS, VALID_METRICS_SPARSE
VALID_METRICS: dict # Valid metrics for dense matrices
VALID_METRICS_SPARSE: dict # Valid metrics for sparse matricesInstall with Tessl CLI
npx tessl i tessl/pypi-scikit-learndocs
evals
scenario-1
scenario-2
scenario-3
scenario-4
scenario-5
scenario-6
scenario-7
scenario-8
scenario-9
scenario-10