A fast library for automated machine learning and tuning
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Complete automated machine learning pipeline that supports classification, regression, forecasting, ranking, and NLP tasks. AutoML automatically selects the best model and hyperparameters within a specified time budget, providing an efficient solution for various machine learning problems.
The main AutoML class provides automated machine learning with intelligent model selection, hyperparameter optimization, and ensemble methods.
class AutoML:
def __init__(self):
"""Initialize AutoML instance."""
def fit(self, X_train, y_train, task="classification", time_budget=60,
metric="auto", estimator_list="auto", eval_method="auto",
split_ratio=0.1, n_splits=5, ensemble=False,
n_jobs=1, verbose=0, **kwargs):
"""
Train AutoML model.
Args:
X_train: Training feature data (pandas DataFrame, numpy array, or sparse matrix)
y_train: Training target data (pandas Series or numpy array)
task (str): Task type - 'classification', 'regression', 'ts_forecast', 'rank', 'nlp'
time_budget (float): Time budget in seconds for training
metric (str or callable): Evaluation metric ('accuracy', 'roc_auc', 'rmse', 'mae', etc.)
estimator_list (list): List of estimator names to try ('auto' for default selection)
eval_method (str): Evaluation method - 'auto', 'cv', 'holdout'
split_ratio (float): Validation split ratio for holdout method
n_splits (int): Number of cross-validation folds
ensemble (bool): Whether to perform ensemble learning
n_jobs (int): Number of parallel jobs (-1 for all processors)
verbose (int): Verbosity level (0-5+)
Returns:
self: Fitted AutoML instance
"""
def predict(self, X, **kwargs):
"""
Make predictions on new data.
Args:
X: Feature data for prediction (same format as training data)
**kwargs: Additional prediction parameters
Returns:
numpy.ndarray: Predictions
"""
def predict_proba(self, X, **kwargs):
"""
Get prediction probabilities (classification only).
Args:
X: Feature data for prediction
**kwargs: Additional prediction parameters
Returns:
numpy.ndarray: Prediction probabilities
"""
def score(self, X, y, **kwargs):
"""
Evaluate model performance.
Args:
X: Feature data for evaluation
y: True target values
**kwargs: Additional scoring parameters
Returns:
float: Score based on the specified metric
"""
def add_learner(self, learner_name, learner_class):
"""
Add custom learner to estimator list.
Args:
learner_name (str): Name for the custom learner
learner_class: Learner class implementing fit/predict interface
"""Access to the best model, configuration, and training results.
class AutoML:
@property
def best_estimator(self):
"""Best trained estimator instance."""
@property
def best_config(self):
"""Best hyperparameter configuration found."""
@property
def best_loss(self):
"""Best validation loss achieved."""
@property
def model(self):
"""Trained model object (alias for best_estimator)."""
@property
def feature_importances_(self):
"""Feature importance values from the best model."""
@property
def classes_(self):
"""Class labels for classification tasks."""
@property
def best_config_per_estimator(self):
"""Best configuration for each estimator type tried."""
@property
def time_to_find_best_model(self):
"""Time taken to find the best model in seconds."""
@property
def feature_transformer(self):
"""Feature preprocessing pipeline."""
@property
def label_transformer(self):
"""Label preprocessing pipeline."""Save, load, and retrain models with configuration management.
class AutoML:
def save_best_config(self, filename):
"""
Save best configuration to file.
Args:
filename (str): Path to save configuration
"""
def get_estimator_from_log(self, log_file_name, record_id, task):
"""
Extract estimator from training log.
Args:
log_file_name (str): Path to log file
record_id (int): Record identifier
task (str): Task type
Returns:
Trained estimator instance
"""
def retrain_from_log(self, log_file_name, X_train, y_train,
task, record_id=-1, **kwargs):
"""
Retrain model from logged configuration.
Args:
log_file_name (str): Path to training log
X_train: Training features
y_train: Training targets
task (str): Task type
record_id (int): Record ID (-1 for best)
**kwargs: Additional training parameters
"""Helper functions for model analysis and configuration.
def size(learner_classes, config):
"""
Calculate memory size for a model configuration.
Args:
learner_classes (dict): Dictionary of learner classes
config (dict): Model configuration
Returns:
float: Estimated memory size in bytes
"""from flaml import AutoML
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
# Load data
X, y = load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
# Train AutoML model
automl = AutoML()
automl.fit(X_train, y_train, task="classification", time_budget=30)
# Make predictions
predictions = automl.predict(X_test)
probabilities = automl.predict_proba(X_test)
print(f"Best model: {automl.best_estimator}")
print(f"Accuracy: {automl.score(X_test, y_test)}")from flaml import AutoML
import pandas as pd
# Load regression data
df = pd.read_csv("regression_data.csv")
X = df.drop("target", axis=1)
y = df["target"]
# Configure AutoML
automl = AutoML()
settings = {
"task": "regression",
"time_budget": 300,
"metric": "rmse",
"estimator_list": ["lgbm", "xgboost", "rf"],
"ensemble": True,
"n_jobs": -1,
"verbose": 1
}
# Train and evaluate
automl.fit(X, y, **settings)
print(f"Best RMSE: {automl.best_loss}")
print(f"Feature importance: {automl.feature_importances_}")from flaml import AutoML
import pandas as pd
# Load time series data
df = pd.read_csv("timeseries.csv")
df["ds"] = pd.to_datetime(df["ds"])
# Configure for forecasting
automl = AutoML()
automl.fit(
df,
task="ts_forecast",
time_budget=600,
metric="mape",
period=12, # seasonal period
verbose=2
)
# Generate forecasts
forecasts = automl.predict(steps=24) # 24 steps aheadfrom flaml import AutoML
from sklearn.svm import SVC
# Add custom learner
automl = AutoML()
automl.add_learner("custom_svm", SVC)
# Use custom learner in training
automl.fit(
X_train, y_train,
task="classification",
estimator_list=["lgbm", "custom_svm"],
time_budget=120
)Classes for managing AutoML training state and search configuration.
class AutoMLState:
"""Manages AutoML training state and sample data preparation."""
def prepare_sample_train_data(self, sample_size):
"""
Prepare sampled training data for efficient search.
Args:
sample_size (int): Size of sample to create
"""
class SearchState:
"""Manages hyperparameter search state and validation."""
@property
def search_space(self):
"""Current search space configuration."""
@property
def estimated_cost4improvement(self):
"""Estimated cost for model improvement."""Install with Tessl CLI
npx tessl i tessl/pypi-flaml