CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-scikit-learn

A comprehensive machine learning library providing supervised and unsupervised learning algorithms with consistent APIs and extensive tools for data preprocessing, model evaluation, and deployment.

87

0.98x
Overview
Eval results
Files

neighbors.mddocs/

Nearest Neighbors

K-nearest neighbors algorithms for classification, regression, outlier detection, and manifold learning. These algorithms are based on the principle that similar data points tend to have similar labels or values.

Classification

KNeighborsClassifier

Classifier implementing k-nearest neighbors vote.

from sklearn.neighbors import KNeighborsClassifier

KNeighborsClassifier(
    n_neighbors: int = 5,
    weights: str | callable = "uniform",
    algorithm: str = "auto",
    leaf_size: int = 30,
    p: int = 2,
    metric: str | callable = "minkowski",
    metric_params: dict | None = None,
    n_jobs: int | None = None
)

RadiusNeighborsClassifier

Classifier implementing radius-based neighbors vote.

from sklearn.neighbors import RadiusNeighborsClassifier

RadiusNeighborsClassifier(
    radius: float = 1.0,
    weights: str | callable = "uniform",
    algorithm: str = "auto",
    leaf_size: int = 30,
    p: int = 2,
    metric: str | callable = "minkowski",
    metric_params: dict | None = None,
    outlier_label: int | str | None = None,
    n_jobs: int | None = None
)

Regression

KNeighborsRegressor

Regression based on k-nearest neighbors.

from sklearn.neighbors import KNeighborsRegressor

KNeighborsRegressor(
    n_neighbors: int = 5,
    weights: str | callable = "uniform",
    algorithm: str = "auto",
    leaf_size: int = 30,
    p: int = 2,
    metric: str | callable = "minkowski",
    metric_params: dict | None = None,
    n_jobs: int | None = None
)

RadiusNeighborsRegressor

Regression based on neighbors within a fixed radius.

from sklearn.neighbors import RadiusNeighborsRegressor

RadiusNeighborsRegressor(
    radius: float = 1.0,
    weights: str | callable = "uniform",
    algorithm: str = "auto",
    leaf_size: int = 30,
    p: int = 2,
    metric: str | callable = "minkowski",
    metric_params: dict | None = None,
    n_jobs: int | None = None
)

Unsupervised Learning

NearestNeighbors

Unsupervised learner for implementing neighbor searches.

from sklearn.neighbors import NearestNeighbors

NearestNeighbors(
    n_neighbors: int = 5,
    radius: float = 1.0,
    algorithm: str = "auto",
    leaf_size: int = 30,
    metric: str | callable = "minkowski",
    p: int = 2,
    metric_params: dict | None = None,
    n_jobs: int | None = None
)

NearestCentroid

Nearest centroid classifier using class centroids.

from sklearn.neighbors import NearestCentroid

NearestCentroid(
    metric: str = "euclidean",
    shrink_threshold: float | None = None
)

Transformers

KNeighborsTransformer

Transform X into a weighted graph of k nearest neighbors.

from sklearn.neighbors import KNeighborsTransformer

KNeighborsTransformer(
    mode: str = "connectivity",
    n_neighbors: int = 5,
    algorithm: str = "auto",
    leaf_size: int = 30,
    metric: str | callable = "minkowski",
    p: int = 2,
    metric_params: dict | None = None,
    n_jobs: int | None = None
)

RadiusNeighborsTransformer

Transform X into a weighted graph of neighbors within a fixed radius.

from sklearn.neighbors import RadiusNeighborsTransformer

RadiusNeighborsTransformer(
    mode: str = "connectivity",
    radius: float = 1.0,
    algorithm: str = "auto",
    leaf_size: int = 30,
    metric: str | callable = "minkowski",
    p: int = 2,
    metric_params: dict | None = None,
    n_jobs: int | None = None
)

Outlier Detection

LocalOutlierFactor

Unsupervised outlier detection using Local Outlier Factor.

from sklearn.neighbors import LocalOutlierFactor

LocalOutlierFactor(
    n_neighbors: int = 20,
    algorithm: str = "auto",
    leaf_size: int = 30,
    metric: str | callable = "minkowski",
    p: int = 2,
    metric_params: dict | None = None,
    contamination: str | float = "auto",
    novelty: bool = False,
    n_jobs: int | None = None
)

Dimensionality Reduction

NeighborhoodComponentsAnalysis

Neighborhood Components Analysis for dimensionality reduction.

from sklearn.neighbors import NeighborhoodComponentsAnalysis

NeighborhoodComponentsAnalysis(
    n_components: int | None = None,
    init: str | ndarray = "auto",
    warm_start: bool = False,
    max_iter: int = 50,
    tol: float = 1e-5,
    callback: callable | None = None,
    verbose: int = 0,
    random_state: int | RandomState | None = None
)

Density Estimation

KernelDensity

Kernel Density Estimation using various kernel functions.

from sklearn.neighbors import KernelDensity

KernelDensity(
    bandwidth: float | str = 1.0,
    algorithm: str = "auto",
    kernel: str = "gaussian",
    metric: str | callable = "euclidean",
    atol: float = 0,
    rtol: float = 0,
    breadth_first: bool = True,
    leaf_size: int = 40,
    metric_params: dict | None = None
)

Tree Data Structures

KDTree

K-dimensional tree for fast nearest neighbor queries.

from sklearn.neighbors import KDTree

KDTree(
    X: ArrayLike,
    leaf_size: int = 10,
    metric: str | callable = "euclidean",
    **kwargs
)

BallTree

Ball tree for fast nearest neighbor queries in high dimensions.

from sklearn.neighbors import BallTree

BallTree(
    X: ArrayLike,
    leaf_size: int = 10,
    metric: str | callable = "euclidean",
    **kwargs
)

Graph Construction Functions

kneighbors_graph

Compute k-neighbors graph of points.

from sklearn.neighbors import kneighbors_graph

def kneighbors_graph(
    X: ArrayLike,
    n_neighbors: int,
    mode: str = "connectivity",
    metric: str | callable = "minkowski",
    p: int = 2,
    metric_params: dict | None = None,
    include_self: bool = False,
    n_jobs: int | None = None
) -> csr_matrix: ...

radius_neighbors_graph

Compute radius-based neighbors graph of points.

from sklearn.neighbors import radius_neighbors_graph

def radius_neighbors_graph(
    X: ArrayLike,
    radius: float,
    mode: str = "connectivity",
    metric: str | callable = "minkowski",
    p: int = 2,
    metric_params: dict | None = None,
    include_self: bool = False,
    n_jobs: int | None = None
) -> csr_matrix: ...

Utility Functions

sort_graph_by_row_values

Sort sparse graph by row values in-place.

from sklearn.neighbors import sort_graph_by_row_values

def sort_graph_by_row_values(
    graph: csr_matrix,
    copy: bool = False,
    warn_when_not_sorted: bool = True
) -> csr_matrix: ...

Usage Examples

Classification

from sklearn.neighbors import KNeighborsClassifier
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

# Load data
iris = load_iris()
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target)

# Train k-NN classifier
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train, y_train)

# Make predictions
y_pred = knn.predict(X_test)
accuracy = knn.score(X_test, y_test)

Regression

from sklearn.neighbors import KNeighborsRegressor
from sklearn.datasets import load_diabetes

# Load data
diabetes = load_diabetes()
X_train, X_test, y_train, y_test = train_test_split(diabetes.data, diabetes.target)

# Train k-NN regressor
knn_reg = KNeighborsRegressor(n_neighbors=5, weights='distance')
knn_reg.fit(X_train, y_train)

# Make predictions
y_pred = knn_reg.predict(X_test)
score = knn_reg.score(X_test, y_test)

Outlier Detection

from sklearn.neighbors import LocalOutlierFactor
import numpy as np

# Generate sample data with outliers
np.random.seed(42)
X = np.random.randn(100, 2)
X_outliers = np.random.uniform(low=-4, high=4, size=(20, 2))
X = np.r_[X, X_outliers]

# Detect outliers
lof = LocalOutlierFactor(n_neighbors=20, contamination=0.1)
y_pred = lof.fit_predict(X)
outlier_scores = lof.negative_outlier_factor_

Nearest Neighbor Search

from sklearn.neighbors import NearestNeighbors

# Fit nearest neighbors
nbrs = NearestNeighbors(n_neighbors=5, algorithm='ball_tree')
nbrs.fit(X_train)

# Find neighbors
distances, indices = nbrs.kneighbors(X_test)

Graph Construction

from sklearn.neighbors import kneighbors_graph, radius_neighbors_graph

# Create k-neighbors graph
knn_graph = kneighbors_graph(X, n_neighbors=5, mode='connectivity')

# Create radius neighbors graph  
radius_graph = radius_neighbors_graph(X, radius=1.0, mode='distance')

Dimensionality Reduction with NCA

from sklearn.neighbors import NeighborhoodComponentsAnalysis
from sklearn.preprocessing import StandardScaler

# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_train)

# Apply NCA
nca = NeighborhoodComponentsAnalysis(n_components=2, random_state=42)
X_nca = nca.fit_transform(X_scaled, y_train)

# Use with k-NN classifier
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_nca, y_train)

Constants

from sklearn.neighbors import VALID_METRICS, VALID_METRICS_SPARSE

VALID_METRICS: dict  # Valid metrics for dense matrices
VALID_METRICS_SPARSE: dict  # Valid metrics for sparse matrices

Install with Tessl CLI

npx tessl i tessl/pypi-scikit-learn

docs

datasets.md

feature-extraction.md

index.md

metrics.md

model-selection.md

neighbors.md

pipelines.md

preprocessing.md

supervised-learning.md

unsupervised-learning.md

utilities.md

tile.json