CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-scikit-learn

A comprehensive machine learning library providing supervised and unsupervised learning algorithms with consistent APIs and extensive tools for data preprocessing, model evaluation, and deployment.

87

0.98x
Overview
Eval results
Files

pipelines.mddocs/

Pipelines and Composition

Pipeline utilities for building composite estimators that chain together preprocessing steps and learning algorithms. These tools enable creating robust, reproducible machine learning workflows.

Core Pipeline Classes

Pipeline

Chain transformers and estimators together in a single workflow.

from sklearn.pipeline import Pipeline

Pipeline(
    steps: list[tuple[str, estimator]],
    memory: str | object | None = None,
    verbose: bool = False
)

FeatureUnion

Combine multiple transformer objects into a single transformer.

from sklearn.pipeline import FeatureUnion

FeatureUnion(
    transformer_list: list[tuple[str, transformer]],
    n_jobs: int | None = None,
    transformer_weights: dict | None = None,
    verbose: bool = False
)

ColumnTransformer

Apply different transformers to different columns of the data.

from sklearn.compose import ColumnTransformer

ColumnTransformer(
    transformers: list[tuple[str, transformer, columns]],
    remainder: str | transformer = "drop",
    sparse_threshold: float = 0.3,
    n_jobs: int | None = None,
    transformer_weights: dict | None = None,
    verbose: bool = False,
    verbose_feature_names_out: bool = True
)

TransformedTargetRegressor

Meta-estimator to regress on a transformed target.

from sklearn.compose import TransformedTargetRegressor

TransformedTargetRegressor(
    regressor: estimator | None = None,
    transformer: transformer | None = None,
    func: callable | None = None,
    inverse_func: callable | None = None,
    check_inverse: bool = True
)

Convenience Functions

make_pipeline

Create a Pipeline using abbreviated syntax.

from sklearn.pipeline import make_pipeline

def make_pipeline(
    *steps: estimator,
    memory: str | object | None = None,
    verbose: bool = False
) -> Pipeline: ...

make_union

Create a FeatureUnion using abbreviated syntax.

from sklearn.pipeline import make_union

def make_union(
    *transformers: transformer,
    n_jobs: int | None = None,
    verbose: bool = False
) -> FeatureUnion: ...

make_column_transformer

Create a ColumnTransformer using abbreviated syntax.

from sklearn.compose import make_column_transformer

def make_column_transformer(
    *transformers: tuple[transformer, columns],
    remainder: str | transformer = "drop",
    sparse_threshold: float = 0.3,
    n_jobs: int | None = None,
    verbose: bool = False,
    verbose_feature_names_out: bool = True
) -> ColumnTransformer: ...

make_column_selector

Create a callable to select columns based on column properties.

from sklearn.compose import make_column_selector

def make_column_selector(
    pattern: str | None = None,
    dtype_include: type | list[type] | None = None,
    dtype_exclude: type | list[type] | None = None
) -> callable: ...

Usage Examples

Basic Pipeline

from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression

# Explicit pipeline creation
pipe = Pipeline([
    ('scaler', StandardScaler()),
    ('classifier', LogisticRegression())
])

# Abbreviated syntax
pipe = make_pipeline(
    StandardScaler(),
    LogisticRegression()
)

# Fit and predict
pipe.fit(X_train, y_train)
y_pred = pipe.predict(X_test)

Column-wise Transformations

from sklearn.compose import ColumnTransformer, make_column_transformer, make_column_selector
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer

# Explicit column transformer
preprocessor = ColumnTransformer([
    ('num', StandardScaler(), ['age', 'income']),
    ('cat', OneHotEncoder(), ['category', 'region'])
])

# Using make_column_transformer
preprocessor = make_column_transformer(
    (StandardScaler(), ['age', 'income']),
    (OneHotEncoder(), ['category', 'region'])
)

# Using column selectors
preprocessor = ColumnTransformer([
    ('num', StandardScaler(), make_column_selector(dtype_include=np.number)),
    ('cat', OneHotEncoder(), make_column_selector(dtype_include='object'))
])

Feature Union

from sklearn.pipeline import FeatureUnion, make_union
from sklearn.decomposition import PCA
from sklearn.feature_selection import SelectKBest

# Combine multiple feature transformations
feature_union = FeatureUnion([
    ('pca', PCA(n_components=2)),
    ('select_best', SelectKBest(k=3))
])

# Abbreviated syntax
feature_union = make_union(
    PCA(n_components=2),
    SelectKBest(k=3)
)

Complex Pipeline with Column Transformer

from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestClassifier

# Preprocessing for numerical columns
numeric_features = ['age', 'income', 'score']
numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())
])

# Preprocessing for categorical columns  
categorical_features = ['category', 'region', 'type']
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

# Combine preprocessing steps
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ]
)

# Create full pipeline
clf = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', RandomForestClassifier())
])

# Train the pipeline
clf.fit(X_train, y_train)
accuracy = clf.score(X_test, y_test)

Target Transformation

from sklearn.compose import TransformedTargetRegressor
from sklearn.preprocessing import QuantileTransformer
from sklearn.linear_model import LinearRegression
import numpy as np

# Apply log transformation to target variable
regressor = TransformedTargetRegressor(
    regressor=LinearRegression(),
    func=np.log,
    inverse_func=np.exp
)

# Or use a transformer
regressor = TransformedTargetRegressor(
    regressor=LinearRegression(),
    transformer=QuantileTransformer()
)

regressor.fit(X_train, y_train)
y_pred = regressor.predict(X_test)

Pipeline with Memory Caching

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.linear_model import LogisticRegression
from tempfile import mkdtemp

# Cache intermediate results for faster re-fitting
cachedir = mkdtemp()
pipe = Pipeline([
    ('scale', StandardScaler()),
    ('reduce_dim', PCA()),
    ('classify', LogisticRegression())
], memory=cachedir)

# First fit will cache intermediate results
pipe.fit(X_train, y_train)

# Subsequent fits with same early steps will use cache
pipe.set_params(classify__C=0.1)
pipe.fit(X_train, y_train)  # Only refits the classifier

Pipeline Properties and Methods

Accessing Pipeline Steps

# Access steps by name
pipe['scaler']  # Returns the scaler step
pipe[0]         # Returns first step
pipe[:-1]       # Returns all steps except the last

# Get step names
pipe.named_steps.keys()

# Set parameters for specific steps
pipe.set_params(scaler__with_mean=False)

Feature Names and Selection

# Get feature names from transformers
preprocessor.get_feature_names_out()

# Get transformed feature names
pipe[:-1].get_feature_names_out()

# Feature selection with pipelines
from sklearn.feature_selection import SelectKBest
from sklearn.pipeline import Pipeline

pipe = Pipeline([
    ('select', SelectKBest(k=10)),
    ('classify', LogisticRegression())
])

Install with Tessl CLI

npx tessl i tessl/pypi-scikit-learn

docs

datasets.md

feature-extraction.md

index.md

metrics.md

model-selection.md

neighbors.md

pipelines.md

preprocessing.md

supervised-learning.md

unsupervised-learning.md

utilities.md

tile.json