Machine Learning Library Extensions providing essential tools for day-to-day data science tasks
—
General-purpose utilities for testing, data validation, and parameter handling in machine learning workflows.
Counting utility for collections and iterables.
class Counter:
def __init__(self, iterable=None):
"""
Counter for counting hashable objects.
Parameters:
- iterable: iterable, initial data to count
"""
def update(self, iterable):
"""Update counts from iterable"""
def most_common(self, n=None):
"""
Return list of (element, count) tuples for most common elements.
Parameters:
- n: int, number of most common elements to return
Returns:
- common_elements: list, tuples of (element, count)
"""
def keys(self):
"""Return iterator over counter keys"""
def values(self):
"""Return iterator over counter values"""
def items(self):
"""Return iterator over (key, value) pairs"""Functions for validating input data format and consistency.
def check_Xy(X, y, y_int=True):
"""
Validate input data format for machine learning algorithms.
Parameters:
- X: array-like, feature matrix
- y: array-like, target labels/values
- y_int: bool, whether y should contain integers
Returns:
- X_validated: array, validated feature matrix
- y_validated: array, validated target array
Raises:
- ValueError: if data format is invalid
"""Utilities for testing and exception handling.
def assert_raises(exception_type, callable_obj, *args, **kwargs):
"""
Test utility for verifying that a function raises expected exception.
Parameters:
- exception_type: Exception class, expected exception type
- callable_obj: callable, function to test
- args: arguments to pass to callable
- kwargs: keyword arguments to pass to callable
Raises:
- AssertionError: if expected exception is not raised
"""Utilities for formatting and handling parameters.
def format_kwarg_dictionaries(**kwargs):
"""
Format keyword argument dictionaries for display or logging.
Parameters:
- kwargs: keyword arguments to format
Returns:
- formatted_dict: dict, formatted parameter dictionary
"""from mlxtend.utils import Counter
# Count elements in a list
data = ['apple', 'banana', 'apple', 'cherry', 'banana', 'apple']
counter = Counter(data)
print("Element counts:")
for item, count in counter.items():
print(f" {item}: {count}")
print(f"\nMost common: {counter.most_common(2)}")
# Update with more data
counter.update(['banana', 'date', 'apple'])
print(f"After update: {counter.most_common()}")from mlxtend.utils import check_Xy
import numpy as np
# Valid data
X = np.random.randn(100, 5)
y = np.random.randint(0, 3, 100)
try:
X_val, y_val = check_Xy(X, y, y_int=True)
print("Data validation passed")
print(f"X shape: {X_val.shape}, y shape: {y_val.shape}")
except ValueError as e:
print(f"Validation error: {e}")
# Invalid data (mismatched samples)
X_invalid = np.random.randn(100, 5)
y_invalid = np.random.randint(0, 3, 90) # Wrong number of samples
try:
X_val, y_val = check_Xy(X_invalid, y_invalid)
except ValueError as e:
print(f"Expected validation error: {e}")from mlxtend.utils import assert_raises
def divide_by_zero():
return 1 / 0
def safe_divide(a, b):
if b == 0:
raise ValueError("Cannot divide by zero")
return a / b
# Test that functions raise expected exceptions
try:
assert_raises(ZeroDivisionError, divide_by_zero)
print("✓ ZeroDivisionError assertion passed")
except AssertionError:
print("✗ Expected ZeroDivisionError not raised")
try:
assert_raises(ValueError, safe_divide, 10, 0)
print("✓ ValueError assertion passed")
except AssertionError:
print("✗ Expected ValueError not raised")from mlxtend.utils import format_kwarg_dictionaries
# Format parameters for logging
params = {
'learning_rate': 0.01,
'epochs': 100,
'batch_size': 32,
'optimizer': 'adam'
}
formatted = format_kwarg_dictionaries(**params)
print("Formatted parameters:")
for key, value in formatted.items():
print(f" {key}: {value}")Install with Tessl CLI
npx tessl i tessl/pypi-mlxtend