Machine Learning Library Extensions providing essential tools for day-to-day data science tasks
—
Data transformation utilities including scaling, encoding, and array manipulation functions compatible with scikit-learn pipelines.
Center data around the mean for normalization.
class MeanCenterer:
def __init__(self):
"""Mean centering transformer"""
def fit(self, X, y=None):
"""Compute the mean to be used for centering"""
def transform(self, X):
"""Center data around the mean"""
def fit_transform(self, X, y=None):
"""Fit and transform data"""
mean_: # Computed mean valuesEncode transaction data for frequent pattern mining algorithms.
class TransactionEncoder:
def __init__(self):
"""Encode transaction data to binary matrix format"""
def fit(self, X):
"""Learn the unique items in the transaction dataset"""
def transform(self, X):
"""Transform transactions to binary matrix"""
def fit_transform(self, X):
"""Fit and transform transactions"""
columns_: # Column names (unique items)Scaling and standardization utilities for feature normalization.
def standardize(array, columns=None, ddof=0):
"""
Z-score standardization of features.
Parameters:
- array: array-like, input data
- columns: list, columns to standardize (all if None)
- ddof: int, degrees of freedom for standard deviation
Returns:
- standardized_array: array-like, standardized data
"""
def minmax_scaling(array, columns=None, min_val=0, max_val=1):
"""
Min-max feature scaling to specified range.
Parameters:
- array: array-like, input data
- columns: list, columns to scale (all if None)
- min_val: float, minimum value of scaled range
- max_val: float, maximum value of scaled range
Returns:
- scaled_array: array-like, scaled data
"""Utility transformers for data pipeline integration.
class CopyTransformer:
def __init__(self):
"""Identity transformer that copies input data"""
def fit(self, X, y=None):
"""Fit transformer (no-op)"""
def transform(self, X):
"""Return copy of input data"""
class DenseTransformer:
def __init__(self):
"""Convert sparse matrices to dense format"""
def fit(self, X, y=None):
"""Fit transformer (no-op)"""
def transform(self, X):
"""Convert sparse matrix to dense"""
def one_hot(y, dtype=int):
"""
One-hot encode categorical labels.
Parameters:
- y: array-like, categorical labels
- dtype: data type for output array
Returns:
- encoded: array, one-hot encoded matrix
"""
def shuffle_arrays_unison(*arrays, random_seed=None):
"""
Shuffle multiple arrays in unison.
Parameters:
- arrays: array-like objects to shuffle together
- random_seed: int, random seed for reproducibility
Returns:
- shuffled_arrays: tuple of shuffled arrays
"""from mlxtend.preprocessing import TransactionEncoder, MeanCenterer, standardize
import pandas as pd
import numpy as np
# Transaction encoding example
transactions = [['bread', 'milk'], ['bread', 'beer'], ['milk', 'beer']]
te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
df = pd.DataFrame(te_ary, columns=te.columns_)
# Mean centering example
X = np.random.randn(100, 5)
mc = MeanCenterer()
X_centered = mc.fit_transform(X)
# Standardization example
X_std = standardize(X)Install with Tessl CLI
npx tessl i tessl/pypi-mlxtend