AutoGluon TabularPredictor for automated machine learning on tabular datasets
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
AutoGluon provides experimental scikit-learn compatible interfaces for seamless integration with existing scikit-learn workflows, pipelines, and ecosystem tools. These classes provide familiar fit/predict APIs while leveraging AutoGluon's automated machine learning capabilities.
Scikit-learn compatible classifier interface that wraps AutoGluon's TabularPredictor for classification tasks with standard sklearn API conventions.
class TabularClassifier:
"""
Scikit-learn compatible classifier using AutoGluon's automated ML.
Provides standard sklearn interface (fit, predict, predict_proba, score)
while leveraging AutoGluon's model selection and ensemble capabilities.
"""
def __init__(
self,
eval_metric: str = None,
time_limit: float = None,
presets: list[str] | str = None,
hyperparameters: dict | str = None,
path: str = None,
verbosity: int = 2,
init_args: dict = None,
fit_args: dict = None
):
"""
Initialize TabularClassifier.
Parameters:
- eval_metric: Evaluation metric for model selection
- time_limit: Maximum training time in seconds
- presets: Preset configurations for training
- hyperparameters: Custom hyperparameter configurations
- path: Directory to save models
- verbosity: Logging level (0-4)
- init_args: Additional initialization arguments
- fit_args: Additional fitting arguments
"""
def fit(
self,
X: pd.DataFrame | np.ndarray,
y: pd.Series | np.ndarray,
**kwargs
) -> 'TabularClassifier':
"""
Train the classifier on the provided data.
Parameters:
- X: Training features
- y: Training labels
- kwargs: Additional arguments passed to TabularPredictor.fit()
Returns:
Self (fitted TabularClassifier)
"""
def predict(
self,
X: pd.DataFrame | np.ndarray
) -> np.ndarray:
"""
Generate class predictions for input data.
Parameters:
- X: Input features
Returns:
Predicted class labels as numpy array
"""
def predict_proba(
self,
X: pd.DataFrame | np.ndarray
) -> np.ndarray:
"""
Generate class probabilities for input data.
Parameters:
- X: Input features
Returns:
Class probabilities as numpy array
"""
def score(
self,
X: pd.DataFrame | np.ndarray,
y: pd.Series | np.ndarray,
sample_weight: np.ndarray = None
) -> float:
"""
Calculate accuracy score on the given test data and labels.
Parameters:
- X: Test features
- y: True labels
- sample_weight: Sample weights for scoring
Returns:
Mean accuracy score
"""Scikit-learn compatible regressor interface that wraps AutoGluon's TabularPredictor for regression tasks with standard sklearn API conventions.
class TabularRegressor:
"""
Scikit-learn compatible regressor using AutoGluon's automated ML.
Provides standard sklearn interface (fit, predict, score)
while leveraging AutoGluon's model selection and ensemble capabilities.
"""
def __init__(
self,
eval_metric: str = None,
time_limit: float = None,
presets: list[str] | str = None,
hyperparameters: dict | str = None,
path: str = None,
verbosity: int = 2,
init_args: dict = None,
fit_args: dict = None
):
"""
Initialize TabularRegressor.
Parameters:
- eval_metric: Evaluation metric for model selection
- time_limit: Maximum training time in seconds
- presets: Preset configurations for training
- hyperparameters: Custom hyperparameter configurations
- path: Directory to save models
- verbosity: Logging level (0-4)
- init_args: Additional initialization arguments
- fit_args: Additional fitting arguments
"""
def fit(
self,
X: pd.DataFrame | np.ndarray,
y: pd.Series | np.ndarray,
**kwargs
) -> 'TabularRegressor':
"""
Train the regressor on the provided data.
Parameters:
- X: Training features
- y: Training target values
- kwargs: Additional arguments passed to TabularPredictor.fit()
Returns:
Self (fitted TabularRegressor)
"""
def predict(
self,
X: pd.DataFrame | np.ndarray
) -> np.ndarray:
"""
Generate predictions for input data.
Parameters:
- X: Input features
Returns:
Predicted values as numpy array
"""
def score(
self,
X: pd.DataFrame | np.ndarray,
y: pd.Series | np.ndarray,
sample_weight: np.ndarray = None
) -> float:
"""
Calculate R² coefficient of determination on test data.
Parameters:
- X: Test features
- y: True target values
- sample_weight: Sample weights for scoring
Returns:
R² score
"""from autogluon.tabular.experimental import TabularClassifier
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import cross_val_score
import pandas as pd
# Load data
X_train = pd.read_csv('X_train.csv')
y_train = pd.read_csv('y_train.csv').squeeze()
X_test = pd.read_csv('X_test.csv')
# Create sklearn-compatible classifier
classifier = TabularClassifier(
eval_metric='roc_auc',
verbosity=1
)
# Use in sklearn pipeline
pipeline = Pipeline([
('scaler', StandardScaler()),
('classifier', classifier)
])
# Cross-validation with sklearn
cv_scores = cross_val_score(
pipeline,
X_train,
y_train,
cv=5,
scoring='roc_auc'
)
print(f"Cross-validation AUC: {cv_scores.mean():.4f} (+/- {cv_scores.std() * 2:.4f})")
# Fit and predict
pipeline.fit(X_train, y_train)
predictions = pipeline.predict(X_test)
probabilities = pipeline.predict_proba(X_test)from autogluon.tabular.experimental import TabularRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error
import pandas as pd
import numpy as np
# Load regression data
X_train = pd.read_csv('X_train.csv')
y_train = pd.read_csv('y_train.csv').squeeze()
X_test = pd.read_csv('X_test.csv')
y_test = pd.read_csv('y_test.csv').squeeze()
# Create regressor
regressor = TabularRegressor(verbosity=1)
# Grid search over AutoGluon parameters
param_grid = {
'eval_metric': ['mean_squared_error', 'mean_absolute_error'],
'time_limit': [300, 600],
'presets': ['good_quality', 'best_quality']
}
# Perform grid search
grid_search = GridSearchCV(
regressor,
param_grid,
cv=3,
scoring='neg_mean_squared_error',
n_jobs=1 # AutoGluon handles parallelization internally
)
# Fit with grid search
grid_search.fit(X_train, y_train)
# Best model predictions
best_model = grid_search.best_estimator_
predictions = best_model.predict(X_test)
# Evaluate
mse = mean_squared_error(y_test, predictions)
rmse = np.sqrt(mse)
print(f"Best parameters: {grid_search.best_params_}")
print(f"Test RMSE: {rmse:.4f}")
print(f"Test R²: {best_model.score(X_test, y_test):.4f}")from autogluon.tabular.experimental import TabularClassifier, TabularRegressor
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
import pandas as pd
# Prepare data
X = pd.read_csv('features.csv')
y = pd.read_csv('target.csv').squeeze()
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
# Compare AutoGluon with sklearn models
models = {
'AutoGluon': TabularClassifier(time_limit=300, verbosity=0),
'RandomForest': RandomForestClassifier(n_estimators=100, random_state=42),
'LogisticRegression': LogisticRegression(random_state=42)
}
results = {}
for name, model in models.items():
# Fit model
model.fit(X_train, y_train)
# Predictions
predictions = model.predict(X_val)
# Store results
results[name] = {
'accuracy': model.score(X_val, y_val),
'predictions': predictions
}
print(f"\n{name} Results:")
print(f"Accuracy: {results[name]['accuracy']:.4f}")
print(classification_report(y_val, predictions))from autogluon.tabular.experimental import TabularClassifier
# Custom hyperparameters for AutoGluon models
hyperparameters = {
'LGB': {'num_leaves': [26, 66, 176]},
'XGB': {'n_estimators': [50, 100, 200]},
'CAT': {'iterations': [100, 200, 500]}
}
# Advanced classifier with custom settings
classifier = TabularClassifier(
problem_type='multiclass',
eval_metric='f1_macro',
path='./sklearn_compatible_models/',
verbosity=2
)
# Fit with custom hyperparameters and advanced options
classifier.fit(
X_train,
y_train,
time_limit=900,
hyperparameters=hyperparameters,
num_bag_folds=5,
presets='best_quality'
)
# Access underlying AutoGluon predictor for advanced functionality
autogluon_predictor = classifier.predictor
leaderboard = autogluon_predictor.leaderboard(extra_info=True)
print(leaderboard)
# Standard sklearn predictions
predictions = classifier.predict(X_test)
probabilities = classifier.predict_proba(X_test)Install with Tessl CLI
npx tessl i tessl/pypi-autogluon--tabular