Machine Learning Library Extensions providing essential tools for day-to-day data science tasks
npx @tessl/cli install tessl/pypi-mlxtend@0.23.0MLxtend (Machine Learning Extensions) is a comprehensive Python library that provides essential tools for day-to-day data science tasks, extending scikit-learn and other scientific computing libraries. The package offers advanced machine learning algorithms including ensemble methods, frequent pattern mining algorithms, feature selection and extraction techniques, model evaluation utilities, and specialized plotting functions for visualization of decision regions and model performance.
pip install mlxtendimport mlxtendCommon import patterns for specific modules:
from mlxtend.classifier import EnsembleVoteClassifier, StackingClassifier
from mlxtend.feature_selection import SequentialFeatureSelector
from mlxtend.plotting import plot_decision_regions, plot_learning_curves
from mlxtend.evaluate import mcnemar, bootstrap_point632_score
from mlxtend.frequent_patterns import apriori, association_rulesfrom mlxtend.classifier import EnsembleVoteClassifier
from mlxtend.plotting import plot_decision_regions
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import make_classification
import matplotlib.pyplot as plt
# Create sample data
X, y = make_classification(n_samples=1000, n_features=2, n_redundant=0,
n_informative=2, random_state=42, n_clusters_per_class=1)
# Create ensemble classifier
clf1 = LogisticRegression(random_state=42)
clf2 = RandomForestClassifier(random_state=42)
clf3 = SVC(probability=True, random_state=42)
ensemble = EnsembleVoteClassifier(clfs=[clf1, clf2, clf3], voting='soft')
ensemble.fit(X, y)
# Visualize decision regions
plot_decision_regions(X, y, clf=ensemble, legend=2)
plt.title('Ensemble Classifier Decision Regions')
plt.show()MLxtend is organized into 14 specialized modules, each focusing on specific aspects of machine learning:
This modular design allows users to import only the functionality they need while maintaining compatibility with the broader Python scientific ecosystem, particularly scikit-learn.
Advanced classification methods including ensemble voting, stacking, neural networks, and classic algorithms like perceptron and logistic regression.
class EnsembleVoteClassifier:
def __init__(self, clfs, voting='hard', weights=None): ...
def fit(self, X, y): ...
def predict(self, X): ...
def predict_proba(self, X): ...
class StackingClassifier:
def __init__(self, classifiers, meta_classifier): ...
def fit(self, X, y): ...
def predict(self, X): ...
class MultiLayerPerceptron:
def __init__(self, eta=0.5, epochs=50, hidden_layers=[50]): ...
def fit(self, X, y): ...
def predict(self, X): ...Tools for selecting optimal feature subsets and extracting new features through dimensionality reduction techniques.
class SequentialFeatureSelector:
def __init__(self, estimator, k_features=1, forward=True, scoring=None): ...
def fit(self, X, y): ...
def transform(self, X): ...
class PrincipalComponentAnalysis:
def __init__(self, n_components=None): ...
def fit(self, X, y=None): ...
def transform(self, X): ...
class LinearDiscriminantAnalysis:
def __init__(self, n_discriminants=None): ...
def fit(self, X, y): ...
def transform(self, X): ...Comprehensive model evaluation tools including statistical tests, bootstrap methods, and cross-validation utilities.
def mcnemar(ary, corrected=True, exact=False):
"""McNemar test for classifier comparison"""
def bootstrap_point632_score(estimator, X, y, n_splits=200, method='.632+'):
"""Bootstrap .632 and .632+ error estimation"""
def paired_ttest_5x2cv(estimator1, estimator2, X, y, scoring=None):
"""5x2cv paired t-test for comparing classifiers"""
class BootstrapOutOfBag:
def __init__(self, n_splits=200, random_state=None): ...
def split(self, X, y=None): ...Specialized plotting functions for machine learning model analysis including decision regions, learning curves, and confusion matrices.
def plot_decision_regions(X, y, clf, feature_idx=None, filler_feature_values=None):
"""Plot decision regions for 2D datasets"""
def plot_learning_curves(X_train, y_train, X_test, y_test, clf, scoring='misclassification error'):
"""Plot learning curves"""
def plot_confusion_matrix(conf_mat, hide_spines=False, hide_ticks=False, figsize=None):
"""Plot confusion matrix"""
def plot_sequential_feature_selection(metric_dict, kind='std_dev', color='blue'):
"""Plot sequential feature selection results"""Association rule mining and frequent pattern discovery algorithms for transaction data analysis.
def apriori(df, min_support=0.5, use_colnames=False, max_len=None):
"""Apriori algorithm for frequent itemset mining"""
def association_rules(df, metric="confidence", min_threshold=0.8):
"""Generate association rules from frequent itemsets"""
def fpgrowth(df, min_support=0.5, use_colnames=False, max_len=None):
"""FP-Growth algorithm for frequent itemset mining"""
def fpmax(df, min_support=0.5, use_colnames=False):
"""FPMax algorithm for maximal frequent itemsets"""Data transformation utilities including scaling, encoding, and array manipulation functions.
class MeanCenterer:
def fit(self, X): ...
def transform(self, X): ...
class TransactionEncoder:
def fit(self, X): ...
def transform(self, X): ...
def standardize(array, columns=None, ddof=0):
"""Standardize features by removing mean and scaling to unit variance"""
def minmax_scaling(array, columns=None, min_val=0, max_val=1):
"""Min-max feature scaling"""Unsupervised learning algorithms for data clustering and pattern discovery.
class Kmeans:
def __init__(self, k, max_iter=100, convergence_tolerance=1e-05): ...
def fit(self, X): ...
def predict(self, X): ...Utilities for loading common machine learning datasets and generating synthetic data.
def iris_data():
"""Load the Iris dataset"""
def wine_data():
"""Load the Wine dataset"""
def mnist_data():
"""Load the MNIST dataset"""
def boston_housing_data():
"""Load the Boston Housing dataset"""Ensemble regression methods including stacking for improved prediction performance.
class LinearRegression:
def __init__(self, eta=0.01, epochs=50): ...
def fit(self, X, y): ...
def predict(self, X): ...
class StackingRegressor:
def __init__(self, regressors, meta_regressor): ...
def fit(self, X, y): ...
def predict(self, X): ...Mathematical functions and utilities commonly used in machine learning computations.
def num_combinations(n, r):
"""Calculate number of combinations"""
def num_permutations(n, r):
"""Calculate number of permutations"""
def factorial(n):
"""Calculate factorial"""
def vectorspace_orthonormalization(ary):
"""Orthonormalize vectors using Gram-Schmidt process"""Text processing utilities for natural language processing tasks.
def generalize_names(name):
"""Generalize person names for consistency"""
def tokenizer_words_and_emoticons(text):
"""Tokenize text including emoticons"""
def tokenizer_emoticons(text):
"""Extract emoticons from text"""File system utilities for finding and organizing files.
def find_files(substring, path, recursive=True, check_ext=None, ignore_invisible=True):
"""Find files matching criteria"""
def find_filegroups(paths, substring='', extensions=None, ignore_invisible=True):
"""Group files by specified criteria"""General-purpose utilities for testing, data validation, and parameter handling.
class Counter:
def __init__(self, iterable=None): ...
def update(self, iterable): ...
def most_common(self, n=None): ...
def check_Xy(X, y, y_int=True):
"""Validate input data format"""
def assert_raises(exception_type, callable_obj, *args, **kwargs):
"""Test utility for verifying exceptions"""# Core types used across multiple modules
from typing import Union, Optional, List, Tuple, Dict, Any
from numpy import ndarray
from pandas import DataFrame
# Common type aliases
ArrayLike = Union[ndarray, List, Tuple]
DataFrameLike = Union[DataFrame, ndarray]
ClassifierLike = object # sklearn-compatible classifier
RegressorLike = object # sklearn-compatible regressor