CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-scikit-learn

A comprehensive machine learning library providing supervised and unsupervised learning algorithms with consistent APIs and extensive tools for data preprocessing, model evaluation, and deployment.

87

0.98x
Overview
Eval results
Files

utilities.mddocs/

Utilities and Core Functions

This document covers core utilities, configuration functions, pipelines, composition tools, and other utility functions in scikit-learn.

Core Utilities

Base Functions

clone { .api }

from sklearn.base import clone

clone(
    estimator: BaseEstimator,
    safe: bool = True
) -> BaseEstimator

Construct a new unfitted estimator with the same parameters.

Configuration Functions

get_config { .api }

from sklearn import get_config

get_config() -> dict

Retrieve current scikit-learn configuration.

set_config { .api }

from sklearn import set_config

set_config(
    assume_finite: bool | None = None,
    working_memory: int | None = None,
    print_changed_only: bool | None = None,
    display: str | None = None,
    pairwise_distances_chunk_size: int | None = None,
    enable_cython_pairwise_dist: bool | None = None,
    array_api_dispatch: bool | None = None,
    transform_output: str | None = None,
    enable_metadata_routing: bool | None = None,
    skip_parameter_validation: bool | None = None
) -> dict

Set global scikit-learn configuration.

config_context { .api }

from sklearn import config_context

config_context(**new_config) -> ContextManager

Temporarily change global configuration.

Version Information

show_versions { .api }

from sklearn import show_versions

show_versions() -> None

Print system and dependency version information.

version { .api }

import sklearn
sklearn.__version__  # "1.7.1"

Current scikit-learn version string.

Pipeline

Pipeline Classes

Pipeline { .api }

from sklearn.pipeline import Pipeline

Pipeline(
    steps: list[tuple[str, BaseEstimator]],
    memory: str | object | None = None,
    verbose: bool = False
)

Pipeline of transforms with a final estimator.

FeatureUnion { .api }

from sklearn.pipeline import FeatureUnion

FeatureUnion(
    transformer_list: list[tuple[str, BaseTransformer]],
    n_jobs: int | None = None,
    transformer_weights: dict | None = None,
    verbose: bool = False,
    verbose_feature_names_out: bool = True
)

Concatenates results of multiple transformer objects.

Pipeline Functions

make_pipeline { .api }

from sklearn.pipeline import make_pipeline

make_pipeline(
    *steps: BaseEstimator,
    memory: str | object | None = None,
    verbose: bool = False
) -> Pipeline

Construct a Pipeline from the given estimators.

make_union { .api }

from sklearn.pipeline import make_union

make_union(
    *transformers: BaseTransformer,
    n_jobs: int | None = None,
    verbose: bool = False
) -> FeatureUnion

Construct a FeatureUnion from the given transformers.

Compose

Column Transformer

ColumnTransformer { .api }

from sklearn.compose import ColumnTransformer

ColumnTransformer(
    transformers: list[tuple[str, BaseTransformer, ArrayLike | str | Callable]],
    remainder: str | BaseTransformer = "drop",
    sparse_threshold: float = 0.3,
    n_jobs: int | None = None,
    transformer_weights: dict | None = None,
    verbose: bool = False,
    verbose_feature_names_out: bool = True,
    force_int_remainder_cols: bool = True
)

Applies transformers to columns of an array or pandas DataFrame.

TransformedTargetRegressor { .api }

from sklearn.compose import TransformedTargetRegressor

TransformedTargetRegressor(
    regressor: BaseRegressor | None = None,
    transformer: BaseTransformer | None = None,
    func: Callable | None = None,
    inverse_func: Callable | None = None,
    check_inverse: bool = True
)

Meta-estimator to regress on a transformed target.

Compose Functions

make_column_transformer { .api }

from sklearn.compose import make_column_transformer

make_column_transformer(
    *transformers: tuple[BaseTransformer, ArrayLike | str | Callable],
    remainder: str | BaseTransformer = "drop",
    sparse_threshold: float = 0.3,
    n_jobs: int | None = None,
    verbose: bool = False,
    verbose_feature_names_out: bool = True,
    force_int_remainder_cols: bool = True
) -> ColumnTransformer

Construct a ColumnTransformer from the given transformers.

make_column_selector { .api }

from sklearn.compose import make_column_selector

make_column_selector(
    pattern: str | None = None,
    dtype_include: type | str | list | None = None,
    dtype_exclude: type | str | list | None = None
) -> Callable

Create a callable to select columns to be used with ColumnTransformer.

Inspection

Partial Dependence

partial_dependence { .api }

from sklearn.inspection import partial_dependence

partial_dependence(
    estimator: BaseEstimator,
    X: ArrayLike,
    features: int | str | ArrayLike | list,
    response_method: str = "auto",
    percentiles: tuple[float, float] = (0.05, 0.95),
    grid_resolution: int = 100,
    method: str = "auto",
    kind: str = "average",
    subsample: int | float | None = 1000,
    n_jobs: int | None = None,
    verbose: int = 0,
    feature_names: ArrayLike | None = None,
    categorical_features: ArrayLike | None = None
) -> dict

Partial dependence of features.

permutation_importance { .api }

from sklearn.inspection import permutation_importance

permutation_importance(
    estimator: BaseEstimator,
    X: ArrayLike,
    y: ArrayLike,
    scoring: str | Callable | list | tuple | dict | None = None,
    n_repeats: int = 5,
    n_jobs: int | None = None,
    random_state: int | RandomState | None = None,
    sample_weight: ArrayLike | None = None,
    max_samples: int | float = 1.0
) -> dict

Permutation importance for feature evaluation.

Display Classes

PartialDependenceDisplay { .api }

from sklearn.inspection import PartialDependenceDisplay

PartialDependenceDisplay(
    pd_results: list[dict],
    features: list,
    feature_names: ArrayLike | None = None,
    target_idx: int | None = None,
    deciles: dict | None = None
)

Partial Dependence Plot (PDP).

DecisionBoundaryDisplay { .api }

from sklearn.inspection import DecisionBoundaryDisplay

DecisionBoundaryDisplay(
    xx0: ArrayLike,
    xx1: ArrayLike,
    response: ArrayLike
)

Visualization of decision boundaries of a classifier.

Isotonic Regression Utilities

Isotonic Functions

check_increasing { .api }

from sklearn.isotonic import check_increasing

check_increasing(
    x: ArrayLike,
    y: ArrayLike
) -> bool

Determine whether y is monotonically correlated with x.

isotonic_regression { .api }

from sklearn.isotonic import isotonic_regression

isotonic_regression(
    y: ArrayLike,
    sample_weight: ArrayLike | None = None,
    y_min: float | None = None,
    y_max: float | None = None,
    increasing: bool = True
) -> ArrayLike

Solve the isotonic regression model.

Neighbors Utilities

Neighbor Functions

kneighbors_graph { .api }

from sklearn.neighbors import kneighbors_graph

kneighbors_graph(
    X: ArrayLike,
    n_neighbors: int,
    mode: str = "connectivity",
    metric: str | Callable = "minkowski",
    p: int = 2,
    metric_params: dict | None = None,
    include_self: bool | str = "auto",
    n_jobs: int | None = None
) -> ArrayLike

Compute the (weighted) graph of k-Neighbors for points in X.

radius_neighbors_graph { .api }

from sklearn.neighbors import radius_neighbors_graph

radius_neighbors_graph(
    X: ArrayLike,
    radius: float,
    mode: str = "connectivity",
    metric: str | Callable = "minkowski",
    p: int = 2,
    metric_params: dict | None = None,
    include_self: bool | str = "auto",
    n_jobs: int | None = None
) -> ArrayLike

Compute the (weighted) graph of Neighbors for points in X.

sort_graph_by_row_values { .api }

from sklearn.neighbors import sort_graph_by_row_values

sort_graph_by_row_values(
    graph: ArrayLike,
    copy: bool = True,
    warn_when_not_sorted: bool = True
) -> ArrayLike

Sort a sparse graph such that each row has its data sorted by value.

Neighbor Data Structures

BallTree { .api }

from sklearn.neighbors import BallTree

BallTree(
    X: ArrayLike,
    leaf_size: int = 40,
    metric: str | DistanceMetric = "minkowski",
    **kwargs
)

BallTree for fast generalized N-point problems.

KDTree { .api }

from sklearn.neighbors import KDTree

KDTree(
    X: ArrayLike,
    leaf_size: int = 40,
    metric: str = "minkowski",
    **kwargs
)

KDTree for fast generalized N-point problems.

KernelDensity { .api }

from sklearn.neighbors import KernelDensity

KernelDensity(
    bandwidth: float | str = 1.0,
    algorithm: str = "auto",
    kernel: str = "gaussian",
    metric: str = "euclidean",
    atol: float = 0,
    rtol: float = 0,
    breadth_first: bool = True,
    leaf_size: int = 40,
    metric_params: dict | None = None
)

Kernel Density Estimation.

NearestNeighbors { .api }

from sklearn.neighbors import NearestNeighbors

NearestNeighbors(
    n_neighbors: int = 5,
    radius: float = 1.0,
    algorithm: str = "auto",
    leaf_size: int = 30,
    metric: str | Callable = "minkowski",
    p: int = 2,
    metric_params: dict | None = None,
    n_jobs: int | None = None
)

Unsupervised learner for implementing neighbor searches.

KNeighborsTransformer { .api }

from sklearn.neighbors import KNeighborsTransformer

KNeighborsTransformer(
    mode: str = "distance",
    n_neighbors: int = 5,
    algorithm: str = "auto",
    leaf_size: int = 30,
    metric: str | Callable = "minkowski",
    p: int = 2,
    metric_params: dict | None = None,
    n_jobs: int | None = None
)

Transform X into a (weighted) graph of k nearest neighbors.

RadiusNeighborsTransformer { .api }

from sklearn.neighbors import RadiusNeighborsTransformer

RadiusNeighborsTransformer(
    mode: str = "distance",
    radius: float = 1.0,
    algorithm: str = "auto",
    leaf_size: int = 30,
    metric: str | Callable = "minkowski",
    p: int = 2,
    metric_params: dict | None = None,
    n_jobs: int | None = None
)

Transform X into a (weighted) graph of neighbors nearer than a radius.

NeighborhoodComponentsAnalysis { .api }

from sklearn.neighbors import NeighborhoodComponentsAnalysis

NeighborhoodComponentsAnalysis(
    n_components: int | None = None,
    init: str | ArrayLike = "auto",
    warm_start: bool = False,
    max_iter: int = 50,
    tol: float = 1e-05,
    callback: Callable | None = None,
    verbose: int = 0,
    random_state: int | RandomState | None = None
)

Neighborhood Components Analysis.

Neighbor Constants

VALID_METRICS { .api }

from sklearn.neighbors import VALID_METRICS

# Dictionary mapping algorithm names to valid metrics
VALID_METRICS: dict[str, list[str]]

Valid metrics for neighbor algorithms.

VALID_METRICS_SPARSE { .api }

from sklearn.neighbors import VALID_METRICS_SPARSE

# Dictionary mapping algorithm names to valid metrics for sparse matrices  
VALID_METRICS_SPARSE: dict[str, list[str]]

Valid metrics for neighbor algorithms with sparse matrices.

Exception Classes

NotFittedError { .api }

from sklearn.exceptions import NotFittedError

class NotFittedError(ValueError, AttributeError):
    """Exception class to raise if estimator is used before fitting."""
    pass

Exception class to raise if estimator is used before fitting.

ConvergenceWarning { .api }

from sklearn.exceptions import ConvergenceWarning

class ConvergenceWarning(UserWarning):
    """Custom warning to capture convergence problems."""
    pass

Custom warning to capture convergence problems.

DataConversionWarning { .api }

from sklearn.exceptions import DataConversionWarning

class DataConversionWarning(UserWarning):
    """Warning used to notify implicit data conversions happening in the code."""
    pass

Warning used to notify implicit data conversions happening in the code.

DataDimensionalityWarning { .api }

from sklearn.exceptions import DataDimensionalityWarning

class DataDimensionalityWarning(UserWarning):
    """Custom warning to capture data dimensionality problems."""
    pass

Custom warning to capture data dimensionality problems.

EfficiencyWarning { .api }

from sklearn.exceptions import EfficiencyWarning

class EfficiencyWarning(UserWarning):
    """Warning used to notify the user of inefficient computation."""
    pass

Warning used to notify the user of inefficient computation.

EstimatorCheckFailedWarning { .api }

from sklearn.exceptions import EstimatorCheckFailedWarning

class EstimatorCheckFailedWarning(UserWarning):
    """Warning used when an estimator check fails."""
    pass

Warning used when an estimator check fails.

FitFailedWarning { .api }

from sklearn.exceptions import FitFailedWarning

class FitFailedWarning(RuntimeWarning):
    """Warning class used if there is an error while fitting the estimator."""
    pass

Warning class used if there is an error while fitting the estimator.

PositiveSpectrumWarning { .api }

from sklearn.exceptions import PositiveSpectrumWarning

class PositiveSpectrumWarning(UserWarning):
    """Warning raised when the eigenvalues of a PSD matrix have issues."""
    pass

Warning raised when the eigenvalues of a PSD matrix have issues.

SkipTestWarning { .api }

from sklearn.exceptions import SkipTestWarning

class SkipTestWarning(UserWarning):
    """Warning class used to notify the user of a test that was skipped."""
    pass

Warning class used to notify the user of a test that was skipped.

UndefinedMetricWarning { .api }

from sklearn.exceptions import UndefinedMetricWarning

class UndefinedMetricWarning(UserWarning):
    """Warning used when the metric is invalid."""
    pass

Warning used when the metric is invalid.

UnsetMetadataPassedError { .api }

from sklearn.exceptions import UnsetMetadataPassedError

class UnsetMetadataPassedError(ValueError):
    """Exception when metadata is passed which is not explicitly requested."""
    pass

Exception when metadata is passed which is not explicitly requested.

Frozen Estimators

FrozenEstimator { .api }

from sklearn.frozen import FrozenEstimator

FrozenEstimator(
    estimator: BaseEstimator
)

Wrapper to freeze an estimator and use it as a transformer.

Examples

Basic Pipeline Example

from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_iris

# Load data
X, y = load_iris(return_X_y=True)

# Method 1: Using Pipeline class
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('classifier', LogisticRegression())
])

# Method 2: Using make_pipeline function
pipeline = make_pipeline(
    StandardScaler(),
    LogisticRegression()
)

# Fit and predict
pipeline.fit(X, y)
predictions = pipeline.predict(X)

Column Transformer Example

from sklearn.compose import ColumnTransformer, make_column_transformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
import pandas as pd

# Example with mixed data types
data = pd.DataFrame({
    'age': [25, 30, 35],
    'income': [50000, 60000, 70000], 
    'city': ['NYC', 'LA', 'Chicago'],
    'gender': ['M', 'F', 'M']
})

# Method 1: Using ColumnTransformer class
preprocessor = ColumnTransformer([
    ('num', StandardScaler(), ['age', 'income']),
    ('cat', OneHotEncoder(), ['city', 'gender'])
])

# Method 2: Using make_column_transformer function
preprocessor = make_column_transformer(
    (StandardScaler(), ['age', 'income']),
    (OneHotEncoder(), ['city', 'gender'])
)

# Transform data
transformed = preprocessor.fit_transform(data)

Feature Union Example

from sklearn.pipeline import FeatureUnion, make_union
from sklearn.decomposition import PCA
from sklearn.feature_selection import SelectKBest

# Combine PCA and feature selection
feature_union = FeatureUnion([
    ('pca', PCA(n_components=2)),
    ('select_k_best', SelectKBest(k=2))
])

# Or using make_union
feature_union = make_union(
    PCA(n_components=2),
    SelectKBest(k=2)
)

# Transform features
X_combined = feature_union.fit_transform(X, y)

Configuration Example

from sklearn import set_config, get_config, config_context
from sklearn.linear_model import LinearRegression

# Get current config
current_config = get_config()
print(current_config)

# Set global configuration
set_config(display='diagram', print_changed_only=True)

# Use configuration context
with config_context(assume_finite=True):
    # Operations within this block use assume_finite=True
    model = LinearRegression()
    model.fit(X, y)

# Configuration reverts to previous state outside the context

Partial Dependence Example

from sklearn.inspection import partial_dependence, PartialDependenceDisplay
from sklearn.ensemble import RandomForestRegressor
import matplotlib.pyplot as plt

# Train model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X, y)

# Compute partial dependence
pd_result = partial_dependence(
    model, X, features=[0, 1], 
    grid_resolution=20
)

# Create display
display = PartialDependenceDisplay.from_estimator(
    model, X, features=[0, 1]
)
display.plot()
plt.show()

Permutation Importance Example

from sklearn.inspection import permutation_importance

# Calculate permutation importance
result = permutation_importance(
    model, X, y, n_repeats=10, random_state=42
)

# Get importance scores
importance_scores = result.importances_mean
importance_std = result.importances_std

# Print results
for i, (score, std) in enumerate(zip(importance_scores, importance_std)):
    print(f"Feature {i}: {score:.3f} +/- {std:.3f}")

Install with Tessl CLI

npx tessl i tessl/pypi-scikit-learn

docs

datasets.md

feature-extraction.md

index.md

metrics.md

model-selection.md

neighbors.md

pipelines.md

preprocessing.md

supervised-learning.md

unsupervised-learning.md

utilities.md

tile.json