AutoGluon TabularPredictor for automated machine learning on tabular datasets
npx @tessl/cli install tessl/pypi-autogluon--tabular@1.4.0AutoGluon Tabular is a comprehensive automated machine learning library designed for tabular data analysis that enables developers to build high-accuracy predictive models for classification and regression tasks with minimal code. The package provides the TabularPredictor class which automatically handles feature engineering, model selection, hyperparameter optimization, and ensemble creation across a wide range of algorithms including gradient boosting (LightGBM, XGBoost, CatBoost), neural networks (FastAI, TabPFN), and traditional machine learning models.
pip install autogluon.tabularfrom autogluon.tabular import TabularPredictorFor advanced usage:
from autogluon.tabular import TabularPredictor, TabularDataset, FeatureMetadatafrom autogluon.tabular import TabularPredictor
import pandas as pd
# Load your data
train_data = pd.read_csv('train.csv')
test_data = pd.read_csv('test.csv')
# Create and train predictor
predictor = TabularPredictor(label='target_column')
predictor.fit(train_data)
# Make predictions
predictions = predictor.predict(test_data)
probabilities = predictor.predict_proba(test_data)
# Evaluate performance
performance = predictor.evaluate(test_data)
leaderboard = predictor.leaderboard(test_data)AutoGluon Tabular uses a multi-layered architecture for automated machine learning:
This design enables AutoGluon to automatically handle complex ML workflows while providing flexibility for advanced users to customize components and strategies.
The primary TabularPredictor class provides automated machine learning capabilities including model training, prediction, evaluation, and model management with minimal code required.
class TabularPredictor:
def __init__(
self,
label: str,
problem_type: str = None,
eval_metric: str = None,
path: str = None,
verbosity: int = 2,
sample_weight: str = None,
weight_evaluation: bool = False,
groups: str = None,
positive_class: str | int = None,
**kwargs
): ...
def fit(
self,
train_data: pd.DataFrame,
tuning_data: pd.DataFrame = None,
time_limit: int = None,
presets: str = None,
hyperparameters: dict = None,
**kwargs
) -> 'TabularPredictor': ...
def predict(
self,
data: pd.DataFrame | str,
model: str = None,
as_pandas: bool = True,
transform_features: bool = True,
**kwargs
) -> pd.Series | np.ndarray: ...
def predict_proba(
self,
data: pd.DataFrame | str,
model: str = None,
as_pandas: bool = True,
as_multiclass: bool = True,
**kwargs
) -> pd.DataFrame | np.ndarray: ...
def evaluate(
self,
data: pd.DataFrame | str,
model: str = None,
**kwargs
) -> dict: ...
def leaderboard(
self,
data: pd.DataFrame | str = None,
extra_info: bool = False,
**kwargs
) -> pd.DataFrame: ...
class InterpretableTabularPredictor(TabularPredictor):
"""
Experimental predictor limited to interpretable models only.
Same interface as TabularPredictor but restricted to simple models.
"""Scikit-learn compatible wrappers providing familiar fit/predict interfaces for integration with existing scikit-learn workflows and pipelines.
class TabularClassifier:
def fit(self, X: pd.DataFrame, y: pd.Series, **kwargs): ...
def predict(self, X: pd.DataFrame) -> np.ndarray: ...
def predict_proba(self, X: pd.DataFrame) -> np.ndarray: ...
def score(self, X: pd.DataFrame, y: pd.Series) -> float: ...
class TabularRegressor:
def fit(self, X: pd.DataFrame, y: pd.Series, **kwargs): ...
def predict(self, X: pd.DataFrame) -> np.ndarray: ...
def score(self, X: pd.DataFrame, y: pd.Series) -> float: ...Comprehensive collection of machine learning models with unified interfaces, model registry for extensibility, and access to 30+ different algorithms from traditional ML to deep learning approaches.
# Core Models
class LGBModel: ... # LightGBM gradient boosting
class XGBoostModel: ... # XGBoost gradient boosting
class CatBoostModel: ... # CatBoost gradient boosting
class RFModel: ... # Random Forest
class LinearModel: ... # Linear/Logistic Regression
class KNNModel: ... # K-Nearest Neighbors
# Neural Network Models
class NNFastAiTabularModel: ... # FastAI neural networks
class TabularNeuralNetTorchModel: ... # PyTorch neural networks
class TabPFNV2Model: ... # TabPFN v2
class FTTransformerModel: ... # Feature Tokenizer Transformer
# Model Registry
class ModelRegistry:
def register_model(self, name: str, model_class: type): ...
def get_model(self, name: str) -> type: ...
ag_model_registry: ModelRegistryPre-configured settings for different use cases, hyperparameter configuration system, and extensive customization options for advanced users.
# Available presets
PRESETS = [
"best_quality", # Maximum accuracy, longer training
"high_quality", # High accuracy with fast inference
"good_quality", # Good accuracy with very fast inference
"medium_quality", # Medium accuracy, very fast training (default)
"optimize_for_deployment", # Optimizes for deployment by cleaning up models
"interpretable" # Interpretable models only
]
# Hyperparameter configuration functions
def get_hyperparameter_config(preset: str) -> dict: ...
def get_default_feature_generator(preset: str = "auto"): ...# Core data structures
TabularDataset = pd.DataFrame # Enhanced DataFrame for tabular data
class FeatureMetadata:
"""Metadata container for feature information"""
def __init__(self, type_map_raw: dict = None): ...
def get_features(self, valid_raw_types: list = None) -> list: ...
def get_feature_type_raw(self, feature: str) -> str: ...
# Problem types
PROBLEM_TYPES = Literal["binary", "multiclass", "regression", "quantile", "softclass"]
# Evaluation metrics
CLASSIFICATION_METRICS = [
"accuracy", "balanced_accuracy", "log_loss", "f1", "f1_macro", "f1_micro",
"f1_weighted", "roc_auc", "roc_auc_ovo", "roc_auc_ovr", "precision",
"precision_macro", "recall", "recall_macro", "mcc", "pac_score"
]
REGRESSION_METRICS = [
"root_mean_squared_error", "mean_squared_error", "mean_absolute_error",
"median_absolute_error", "mean_absolute_percentage_error", "r2",
"symmetric_mean_absolute_percentage_error"
]
# Weight strategies
WEIGHT_STRATEGIES = Literal["auto_weight", "balance_weight"]