tessl/pypi-flaml

A fast library for automated machine learning and tuning

—

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Overview

Eval results

Files

Automated Machine Learning

Name: tessl/pypi-flaml
Author: tessl

Complete automated machine learning pipeline that supports classification, regression, forecasting, ranking, and NLP tasks. AutoML automatically selects the best model and hyperparameters within a specified time budget, providing an efficient solution for various machine learning problems.

Capabilities

AutoML Class

The main AutoML class provides automated machine learning with intelligent model selection, hyperparameter optimization, and ensemble methods.

class AutoML:
    def __init__(self):
        """Initialize AutoML instance."""
        
    def fit(self, X_train, y_train, task="classification", time_budget=60, 
            metric="auto", estimator_list="auto", eval_method="auto", 
            split_ratio=0.1, n_splits=5, ensemble=False, 
            n_jobs=1, verbose=0, **kwargs):
        """
        Train AutoML model.
        
        Args:
            X_train: Training feature data (pandas DataFrame, numpy array, or sparse matrix)
            y_train: Training target data (pandas Series or numpy array)
            task (str): Task type - 'classification', 'regression', 'ts_forecast', 'rank', 'nlp'
            time_budget (float): Time budget in seconds for training
            metric (str or callable): Evaluation metric ('accuracy', 'roc_auc', 'rmse', 'mae', etc.)
            estimator_list (list): List of estimator names to try ('auto' for default selection)
            eval_method (str): Evaluation method - 'auto', 'cv', 'holdout'
            split_ratio (float): Validation split ratio for holdout method
            n_splits (int): Number of cross-validation folds
            ensemble (bool): Whether to perform ensemble learning
            n_jobs (int): Number of parallel jobs (-1 for all processors)
            verbose (int): Verbosity level (0-5+)
            
        Returns:
            self: Fitted AutoML instance
        """
        
    def predict(self, X, **kwargs):
        """
        Make predictions on new data.
        
        Args:
            X: Feature data for prediction (same format as training data)
            **kwargs: Additional prediction parameters
            
        Returns:
            numpy.ndarray: Predictions
        """
        
    def predict_proba(self, X, **kwargs):
        """
        Get prediction probabilities (classification only).
        
        Args:
            X: Feature data for prediction
            **kwargs: Additional prediction parameters
            
        Returns:
            numpy.ndarray: Prediction probabilities
        """
        
    def score(self, X, y, **kwargs):
        """
        Evaluate model performance.
        
        Args:
            X: Feature data for evaluation
            y: True target values
            **kwargs: Additional scoring parameters
            
        Returns:
            float: Score based on the specified metric
        """
        
    def add_learner(self, learner_name, learner_class):
        """
        Add custom learner to estimator list.
        
        Args:
            learner_name (str): Name for the custom learner
            learner_class: Learner class implementing fit/predict interface
        """

Model Properties and Results

Access to the best model, configuration, and training results.

class AutoML:
    @property
    def best_estimator(self):
        """Best trained estimator instance."""
        
    @property
    def best_config(self):
        """Best hyperparameter configuration found."""
        
    @property
    def best_loss(self):
        """Best validation loss achieved."""
        
    @property
    def model(self):
        """Trained model object (alias for best_estimator)."""
        
    @property
    def feature_importances_(self):
        """Feature importance values from the best model."""
        
    @property
    def classes_(self):
        """Class labels for classification tasks."""
        
    @property
    def best_config_per_estimator(self):
        """Best configuration for each estimator type tried."""
        
    @property
    def time_to_find_best_model(self):
        """Time taken to find the best model in seconds."""
        
    @property
    def feature_transformer(self):
        """Feature preprocessing pipeline."""
        
    @property
    def label_transformer(self):
        """Label preprocessing pipeline."""

Model Management and Persistence

Save, load, and retrain models with configuration management.

class AutoML:
    def save_best_config(self, filename):
        """
        Save best configuration to file.
        
        Args:
            filename (str): Path to save configuration
        """
        
    def get_estimator_from_log(self, log_file_name, record_id, task):
        """
        Extract estimator from training log.
        
        Args:
            log_file_name (str): Path to log file
            record_id (int): Record identifier
            task (str): Task type
            
        Returns:
            Trained estimator instance
        """
        
    def retrain_from_log(self, log_file_name, X_train, y_train, 
                        task, record_id=-1, **kwargs):
        """
        Retrain model from logged configuration.
        
        Args:
            log_file_name (str): Path to training log
            X_train: Training features
            y_train: Training targets  
            task (str): Task type
            record_id (int): Record ID (-1 for best)
            **kwargs: Additional training parameters
        """

Utility Functions

Helper functions for model analysis and configuration.

def size(learner_classes, config):
    """
    Calculate memory size for a model configuration.
    
    Args:
        learner_classes (dict): Dictionary of learner classes
        config (dict): Model configuration
        
    Returns:
        float: Estimated memory size in bytes
    """

Usage Examples

Basic Classification

from flaml import AutoML
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

# Load data
X, y = load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Train AutoML model
automl = AutoML()
automl.fit(X_train, y_train, task="classification", time_budget=30)

# Make predictions
predictions = automl.predict(X_test)
probabilities = automl.predict_proba(X_test)

print(f"Best model: {automl.best_estimator}")
print(f"Accuracy: {automl.score(X_test, y_test)}")

Regression with Custom Settings

from flaml import AutoML
import pandas as pd

# Load regression data
df = pd.read_csv("regression_data.csv")
X = df.drop("target", axis=1)
y = df["target"]

# Configure AutoML
automl = AutoML()
settings = {
    "task": "regression",
    "time_budget": 300,
    "metric": "rmse", 
    "estimator_list": ["lgbm", "xgboost", "rf"],
    "ensemble": True,
    "n_jobs": -1,
    "verbose": 1
}

# Train and evaluate
automl.fit(X, y, **settings)
print(f"Best RMSE: {automl.best_loss}")
print(f"Feature importance: {automl.feature_importances_}")

Time Series Forecasting

from flaml import AutoML
import pandas as pd

# Load time series data
df = pd.read_csv("timeseries.csv")
df["ds"] = pd.to_datetime(df["ds"])

# Configure for forecasting
automl = AutoML()
automl.fit(
    df, 
    task="ts_forecast",
    time_budget=600,
    metric="mape",
    period=12,  # seasonal period
    verbose=2
)

# Generate forecasts
forecasts = automl.predict(steps=24)  # 24 steps ahead

Custom Learner Integration

from flaml import AutoML
from sklearn.svm import SVC

# Add custom learner
automl = AutoML()
automl.add_learner("custom_svm", SVC)

# Use custom learner in training
automl.fit(
    X_train, y_train,
    task="classification", 
    estimator_list=["lgbm", "custom_svm"],
    time_budget=120
)

State Management Classes

Classes for managing AutoML training state and search configuration.

class AutoMLState:
    """Manages AutoML training state and sample data preparation."""
    
    def prepare_sample_train_data(self, sample_size):
        """
        Prepare sampled training data for efficient search.
        
        Args:
            sample_size (int): Size of sample to create
        """

class SearchState:
    """Manages hyperparameter search state and validation."""
    
    @property
    def search_space(self):
        """Current search space configuration."""
        
    @property  
    def estimated_cost4improvement(self):
        """Estimated cost for model improvement."""

Supported Tasks and Metrics

Task Types

classification: Binary and multi-class classification
regression: Continuous target prediction
ts_forecast: Time series forecasting
rank: Learning to rank
nlp: Natural language processing tasks

Metrics

Classification: accuracy, roc_auc, roc_auc_ovr, f1, log_loss, precision, recall
Regression: rmse, mae, mse, r2, mape
Forecasting: mape, smape, mae, rmse
Ranking: ndcg, ap