tessl/pypi-scikit-learn

A comprehensive machine learning library providing supervised and unsupervised learning algorithms with consistent APIs and extensive tools for data preprocessing, model evaluation, and deployment.

0.98x

Overview

Eval results

Files

Utilities and Core Functions

Name: tessl/pypi-scikit-learn
Rating: 0.87 (1 reviews)
Author: tessl

This document covers core utilities, configuration functions, pipelines, composition tools, and other utility functions in scikit-learn.

Core Utilities

Base Functions

clone { .api }

from sklearn.base import clone

clone(
    estimator: BaseEstimator,
    safe: bool = True
) -> BaseEstimator

Construct a new unfitted estimator with the same parameters.

Configuration Functions

get_config { .api }

from sklearn import get_config

get_config() -> dict

Retrieve current scikit-learn configuration.

set_config { .api }

from sklearn import set_config

set_config(
    assume_finite: bool | None = None,
    working_memory: int | None = None,
    print_changed_only: bool | None = None,
    display: str | None = None,
    pairwise_distances_chunk_size: int | None = None,
    enable_cython_pairwise_dist: bool | None = None,
    array_api_dispatch: bool | None = None,
    transform_output: str | None = None,
    enable_metadata_routing: bool | None = None,
    skip_parameter_validation: bool | None = None
) -> dict

Set global scikit-learn configuration.

config_context { .api }

from sklearn import config_context

config_context(**new_config) -> ContextManager

Temporarily change global configuration.

Version Information

show_versions { .api }

from sklearn import show_versions

show_versions() -> None

Print system and dependency version information.

version { .api }

import sklearn
sklearn.__version__  # "1.7.1"

Current scikit-learn version string.

Pipeline

Pipeline Classes

Pipeline { .api }

from sklearn.pipeline import Pipeline

Pipeline(
    steps: list[tuple[str, BaseEstimator]],
    memory: str | object | None = None,
    verbose: bool = False
)

Pipeline of transforms with a final estimator.

FeatureUnion { .api }

from sklearn.pipeline import FeatureUnion

FeatureUnion(
    transformer_list: list[tuple[str, BaseTransformer]],
    n_jobs: int | None = None,
    transformer_weights: dict | None = None,
    verbose: bool = False,
    verbose_feature_names_out: bool = True
)

Concatenates results of multiple transformer objects.

Pipeline Functions

make_pipeline { .api }

from sklearn.pipeline import make_pipeline

make_pipeline(
    *steps: BaseEstimator,
    memory: str | object | None = None,
    verbose: bool = False
) -> Pipeline

Construct a Pipeline from the given estimators.

make_union { .api }

from sklearn.pipeline import make_union

make_union(
    *transformers: BaseTransformer,
    n_jobs: int | None = None,
    verbose: bool = False
) -> FeatureUnion

Construct a FeatureUnion from the given transformers.

Compose

Column Transformer

ColumnTransformer { .api }

from sklearn.compose import ColumnTransformer

ColumnTransformer(
    transformers: list[tuple[str, BaseTransformer, ArrayLike | str | Callable]],
    remainder: str | BaseTransformer = "drop",
    sparse_threshold: float = 0.3,
    n_jobs: int | None = None,
    transformer_weights: dict | None = None,
    verbose: bool = False,
    verbose_feature_names_out: bool = True,
    force_int_remainder_cols: bool = True
)

Applies transformers to columns of an array or pandas DataFrame.

TransformedTargetRegressor { .api }

from sklearn.compose import TransformedTargetRegressor

TransformedTargetRegressor(
    regressor: BaseRegressor | None = None,
    transformer: BaseTransformer | None = None,
    func: Callable | None = None,
    inverse_func: Callable | None = None,
    check_inverse: bool = True
)

Meta-estimator to regress on a transformed target.

Compose Functions

make_column_transformer { .api }

from sklearn.compose import make_column_transformer

make_column_transformer(
    *transformers: tuple[BaseTransformer, ArrayLike | str | Callable],
    remainder: str | BaseTransformer = "drop",
    sparse_threshold: float = 0.3,
    n_jobs: int | None = None,
    verbose: bool = False,
    verbose_feature_names_out: bool = True,
    force_int_remainder_cols: bool = True
) -> ColumnTransformer

Construct a ColumnTransformer from the given transformers.

make_column_selector { .api }

from sklearn.compose import make_column_selector

make_column_selector(
    pattern: str | None = None,
    dtype_include: type | str | list | None = None,
    dtype_exclude: type | str | list | None = None
) -> Callable

Create a callable to select columns to be used with ColumnTransformer.

Inspection

Partial Dependence

partial_dependence { .api }

from sklearn.inspection import partial_dependence

partial_dependence(
    estimator: BaseEstimator,
    X: ArrayLike,
    features: int | str | ArrayLike | list,
    response_method: str = "auto",
    percentiles: tuple[float, float] = (0.05, 0.95),
    grid_resolution: int = 100,
    method: str = "auto",
    kind: str = "average",
    subsample: int | float | None = 1000,
    n_jobs: int | None = None,
    verbose: int = 0,
    feature_names: ArrayLike | None = None,
    categorical_features: ArrayLike | None = None
) -> dict

Partial dependence of features.

permutation_importance { .api }

from sklearn.inspection import permutation_importance

permutation_importance(
    estimator: BaseEstimator,
    X: ArrayLike,
    y: ArrayLike,
    scoring: str | Callable | list | tuple | dict | None = None,
    n_repeats: int = 5,
    n_jobs: int | None = None,
    random_state: int | RandomState | None = None,
    sample_weight: ArrayLike | None = None,
    max_samples: int | float = 1.0
) -> dict

Permutation importance for feature evaluation.

Display Classes

PartialDependenceDisplay { .api }

from sklearn.inspection import PartialDependenceDisplay

PartialDependenceDisplay(
    pd_results: list[dict],
    features: list,
    feature_names: ArrayLike | None = None,
    target_idx: int | None = None,
    deciles: dict | None = None
)

Partial Dependence Plot (PDP).

DecisionBoundaryDisplay { .api }

from sklearn.inspection import DecisionBoundaryDisplay

DecisionBoundaryDisplay(
    xx0: ArrayLike,
    xx1: ArrayLike,
    response: ArrayLike
)

Visualization of decision boundaries of a classifier.

Isotonic Regression Utilities

Isotonic Functions

check_increasing { .api }

from sklearn.isotonic import check_increasing

check_increasing(
    x: ArrayLike,
    y: ArrayLike
) -> bool

Determine whether y is monotonically correlated with x.

isotonic_regression { .api }

from sklearn.isotonic import isotonic_regression

isotonic_regression(
    y: ArrayLike,
    sample_weight: ArrayLike | None = None,
    y_min: float | None = None,
    y_max: float | None = None,
    increasing: bool = True
) -> ArrayLike

Solve the isotonic regression model.

Neighbors Utilities

Neighbor Functions

kneighbors_graph { .api }

from sklearn.neighbors import kneighbors_graph

kneighbors_graph(
    X: ArrayLike,
    n_neighbors: int,
    mode: str = "connectivity",
    metric: str | Callable = "minkowski",
    p: int = 2,
    metric_params: dict | None = None,
    include_self: bool | str = "auto",
    n_jobs: int | None = None
) -> ArrayLike

Compute the (weighted) graph of k-Neighbors for points in X.

radius_neighbors_graph { .api }

from sklearn.neighbors import radius_neighbors_graph

radius_neighbors_graph(
    X: ArrayLike,
    radius: float,
    mode: str = "connectivity",
    metric: str | Callable = "minkowski",
    p: int = 2,
    metric_params: dict | None = None,
    include_self: bool | str = "auto",
    n_jobs: int | None = None
) -> ArrayLike

Compute the (weighted) graph of Neighbors for points in X.

sort_graph_by_row_values { .api }

from sklearn.neighbors import sort_graph_by_row_values

sort_graph_by_row_values(
    graph: ArrayLike,
    copy: bool = True,
    warn_when_not_sorted: bool = True
) -> ArrayLike

Sort a sparse graph such that each row has its data sorted by value.

Neighbor Data Structures

BallTree { .api }

from sklearn.neighbors import BallTree

BallTree(
    X: ArrayLike,
    leaf_size: int = 40,
    metric: str | DistanceMetric = "minkowski",
    **kwargs
)

BallTree for fast generalized N-point problems.

KDTree { .api }

from sklearn.neighbors import KDTree

KDTree(
    X: ArrayLike,
    leaf_size: int = 40,
    metric: str = "minkowski",
    **kwargs
)

KDTree for fast generalized N-point problems.

KernelDensity { .api }

from sklearn.neighbors import KernelDensity

KernelDensity(
    bandwidth: float | str = 1.0,
    algorithm: str = "auto",
    kernel: str = "gaussian",
    metric: str = "euclidean",
    atol: float = 0,
    rtol: float = 0,
    breadth_first: bool = True,
    leaf_size: int = 40,
    metric_params: dict | None = None
)

Kernel Density Estimation.

NearestNeighbors { .api }

from sklearn.neighbors import NearestNeighbors

NearestNeighbors(
    n_neighbors: int = 5,
    radius: float = 1.0,
    algorithm: str = "auto",
    leaf_size: int = 30,
    metric: str | Callable = "minkowski",
    p: int = 2,
    metric_params: dict | None = None,
    n_jobs: int | None = None
)

Unsupervised learner for implementing neighbor searches.

KNeighborsTransformer { .api }

from sklearn.neighbors import KNeighborsTransformer

KNeighborsTransformer(
    mode: str = "distance",
    n_neighbors: int = 5,
    algorithm: str = "auto",
    leaf_size: int = 30,
    metric: str | Callable = "minkowski",
    p: int = 2,
    metric_params: dict | None = None,
    n_jobs: int | None = None
)

Transform X into a (weighted) graph of k nearest neighbors.

RadiusNeighborsTransformer { .api }

from sklearn.neighbors import RadiusNeighborsTransformer

RadiusNeighborsTransformer(
    mode: str = "distance",
    radius: float = 1.0,
    algorithm: str = "auto",
    leaf_size: int = 30,
    metric: str | Callable = "minkowski",
    p: int = 2,
    metric_params: dict | None = None,
    n_jobs: int | None = None
)

Transform X into a (weighted) graph of neighbors nearer than a radius.

NeighborhoodComponentsAnalysis { .api }

from sklearn.neighbors import NeighborhoodComponentsAnalysis

NeighborhoodComponentsAnalysis(
    n_components: int | None = None,
    init: str | ArrayLike = "auto",
    warm_start: bool = False,
    max_iter: int = 50,
    tol: float = 1e-05,
    callback: Callable | None = None,
    verbose: int = 0,
    random_state: int | RandomState | None = None
)

Neighborhood Components Analysis.

Neighbor Constants

VALID_METRICS { .api }

from sklearn.neighbors import VALID_METRICS

# Dictionary mapping algorithm names to valid metrics
VALID_METRICS: dict[str, list[str]]

Valid metrics for neighbor algorithms.

VALID_METRICS_SPARSE { .api }

from sklearn.neighbors import VALID_METRICS_SPARSE

# Dictionary mapping algorithm names to valid metrics for sparse matrices  
VALID_METRICS_SPARSE: dict[str, list[str]]

Valid metrics for neighbor algorithms with sparse matrices.

Exception Classes

NotFittedError { .api }

from sklearn.exceptions import NotFittedError

class NotFittedError(ValueError, AttributeError):
    """Exception class to raise if estimator is used before fitting."""
    pass

Exception class to raise if estimator is used before fitting.

ConvergenceWarning { .api }

from sklearn.exceptions import ConvergenceWarning

class ConvergenceWarning(UserWarning):
    """Custom warning to capture convergence problems."""
    pass

Custom warning to capture convergence problems.

DataConversionWarning { .api }

from sklearn.exceptions import DataConversionWarning

class DataConversionWarning(UserWarning):
    """Warning used to notify implicit data conversions happening in the code."""
    pass

Warning used to notify implicit data conversions happening in the code.

DataDimensionalityWarning { .api }

from sklearn.exceptions import DataDimensionalityWarning

class DataDimensionalityWarning(UserWarning):
    """Custom warning to capture data dimensionality problems."""
    pass

Custom warning to capture data dimensionality problems.

EfficiencyWarning { .api }

from sklearn.exceptions import EfficiencyWarning

class EfficiencyWarning(UserWarning):
    """Warning used to notify the user of inefficient computation."""
    pass

Warning used to notify the user of inefficient computation.

EstimatorCheckFailedWarning { .api }

from sklearn.exceptions import EstimatorCheckFailedWarning

class EstimatorCheckFailedWarning(UserWarning):
    """Warning used when an estimator check fails."""
    pass

Warning used when an estimator check fails.

FitFailedWarning { .api }

from sklearn.exceptions import FitFailedWarning

class FitFailedWarning(RuntimeWarning):
    """Warning class used if there is an error while fitting the estimator."""
    pass

Warning class used if there is an error while fitting the estimator.

PositiveSpectrumWarning { .api }

from sklearn.exceptions import PositiveSpectrumWarning

class PositiveSpectrumWarning(UserWarning):
    """Warning raised when the eigenvalues of a PSD matrix have issues."""
    pass

Warning raised when the eigenvalues of a PSD matrix have issues.

SkipTestWarning { .api }

from sklearn.exceptions import SkipTestWarning

class SkipTestWarning(UserWarning):
    """Warning class used to notify the user of a test that was skipped."""
    pass

Warning class used to notify the user of a test that was skipped.

UndefinedMetricWarning { .api }

from sklearn.exceptions import UndefinedMetricWarning

class UndefinedMetricWarning(UserWarning):
    """Warning used when the metric is invalid."""
    pass

Warning used when the metric is invalid.

UnsetMetadataPassedError { .api }

from sklearn.exceptions import UnsetMetadataPassedError

class UnsetMetadataPassedError(ValueError):
    """Exception when metadata is passed which is not explicitly requested."""
    pass

Exception when metadata is passed which is not explicitly requested.

Frozen Estimators

FrozenEstimator { .api }

from sklearn.frozen import FrozenEstimator

FrozenEstimator(
    estimator: BaseEstimator
)

Wrapper to freeze an estimator and use it as a transformer.

Examples

Basic Pipeline Example

from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_iris

# Load data
X, y = load_iris(return_X_y=True)

# Method 1: Using Pipeline class
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('classifier', LogisticRegression())
])

# Method 2: Using make_pipeline function
pipeline = make_pipeline(
    StandardScaler(),
    LogisticRegression()
)

# Fit and predict
pipeline.fit(X, y)
predictions = pipeline.predict(X)

Column Transformer Example

from sklearn.compose import ColumnTransformer, make_column_transformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
import pandas as pd

# Example with mixed data types
data = pd.DataFrame({
    'age': [25, 30, 35],
    'income': [50000, 60000, 70000], 
    'city': ['NYC', 'LA', 'Chicago'],
    'gender': ['M', 'F', 'M']
})

# Method 1: Using ColumnTransformer class
preprocessor = ColumnTransformer([
    ('num', StandardScaler(), ['age', 'income']),
    ('cat', OneHotEncoder(), ['city', 'gender'])
])

# Method 2: Using make_column_transformer function
preprocessor = make_column_transformer(
    (StandardScaler(), ['age', 'income']),
    (OneHotEncoder(), ['city', 'gender'])
)

# Transform data
transformed = preprocessor.fit_transform(data)

Feature Union Example

from sklearn.pipeline import FeatureUnion, make_union
from sklearn.decomposition import PCA
from sklearn.feature_selection import SelectKBest

# Combine PCA and feature selection
feature_union = FeatureUnion([
    ('pca', PCA(n_components=2)),
    ('select_k_best', SelectKBest(k=2))
])

# Or using make_union
feature_union = make_union(
    PCA(n_components=2),
    SelectKBest(k=2)
)

# Transform features
X_combined = feature_union.fit_transform(X, y)

Configuration Example

from sklearn import set_config, get_config, config_context
from sklearn.linear_model import LinearRegression

# Get current config
current_config = get_config()
print(current_config)

# Set global configuration
set_config(display='diagram', print_changed_only=True)

# Use configuration context
with config_context(assume_finite=True):
    # Operations within this block use assume_finite=True
    model = LinearRegression()
    model.fit(X, y)

# Configuration reverts to previous state outside the context

Partial Dependence Example

from sklearn.inspection import partial_dependence, PartialDependenceDisplay
from sklearn.ensemble import RandomForestRegressor
import matplotlib.pyplot as plt

# Train model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X, y)

# Compute partial dependence
pd_result = partial_dependence(
    model, X, features=[0, 1], 
    grid_resolution=20
)

# Create display
display = PartialDependenceDisplay.from_estimator(
    model, X, features=[0, 1]
)
display.plot()
plt.show()

Permutation Importance Example

from sklearn.inspection import permutation_importance

# Calculate permutation importance
result = permutation_importance(
    model, X, y, n_repeats=10, random_state=42
)

# Get importance scores
importance_scores = result.importances_mean
importance_std = result.importances_std

# Print results
for i, (score, std) in enumerate(zip(importance_scores, importance_std)):
    print(f"Feature {i}: {score:.3f} +/- {std:.3f}")

Install with Tessl CLI

npx tessl i tessl/pypi-scikit-learn