CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-autogluon--tabular

AutoGluon TabularPredictor for automated machine learning on tabular datasets

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Overview
Eval results
Files

models.mddocs/

Models and Registry

AutoGluon Tabular provides a comprehensive collection of machine learning models with unified interfaces, spanning from traditional algorithms to modern deep learning approaches. The model registry system enables extensibility and customization of the available model portfolio.

Capabilities

Core Machine Learning Models

Traditional and gradient boosting models that form the backbone of AutoGluon's automated machine learning capabilities, providing robust performance across diverse tabular datasets.

# Gradient Boosting Models
class LGBModel:
    """LightGBM gradient boosting model optimized for speed and memory efficiency."""
    
class XGBoostModel:
    """XGBoost gradient boosting model with advanced regularization and handling of missing values."""
    
class CatBoostModel:
    """CatBoost gradient boosting model with native categorical feature support."""

# Tree-based Models
class RFModel:
    """Random Forest model providing ensemble of decision trees with feature bagging."""
    
class XTModel:
    """Extra Trees (Extremely Randomized Trees) model with increased randomization."""

# Linear Models
class LinearModel:
    """Linear/Logistic Regression with automatic regularization and feature scaling."""

# Instance-based Models
class KNNModel:
    """K-Nearest Neighbors model for both classification and regression tasks."""

Neural Network Models

Deep learning models optimized for tabular data with automatic architecture selection, hyperparameter optimization, and specialized architectures for structured data.

# Traditional Neural Networks
class NNFastAiTabularModel:
    """FastAI-based neural network with automated preprocessing and training."""
    
class TabularNeuralNetTorchModel:
    """PyTorch-based neural network with custom architecture for tabular data."""

# Transformer-based Models
class FTTransformerModel:
    """Feature Tokenizer Transformer - specialized transformer architecture for tabular data."""

# Pre-trained Foundation Models
class TabPFNV2Model:
    """TabPFN v2 - pre-trained transformer model fine-tuned for tabular prediction."""
    
class TabPFNMixModel:
    """TabPFN Mix - ensemble of pre-trained transformers for improved performance."""
    
class MitraModel:
    """Mitra - advanced transformer architecture optimized for tabular classification."""

# Specialized Neural Networks
class TabMModel:
    """TabM - neural network with attention mechanisms for tabular data."""
    
class RealMLPModel:
    """Real-valued MLP with specialized training procedures for tabular prediction."""
    
class TabICLModel:
    """TabICL - in-context learning model for few-shot tabular prediction."""

Multi-Modal Models

Models capable of handling mixed data types including text, images, and structured features within the same prediction task.

class MultiModalPredictorModel:
    """
    AutoMM-based multi-modal model handling tabular, text, and image features.
    Automatically detects and processes different data modalities.
    """

class TextPredictorModel:
    """Specialized model for tabular data containing text features."""

class FastTextModel:
    """FastText model for efficient text classification and representation learning."""
    
class ImagePredictorModel:
    """Model for tabular data with image features or image-based prediction."""

Interpretable Models

Models designed for interpretability and explainability, providing transparent decision-making processes suitable for regulated industries and high-stakes applications.

# Base Interpretable Model
class _IModelsModel:
    """Base class for interpretable machine learning models."""

# Rule-based Models
class BoostedRulesModel:
    """Gradient-boosted rule ensemble providing interpretable decision rules."""
    
class RuleFitModel:
    """RuleFit model combining linear regression with decision rules."""
    
class FigsModel:
    """FIGS (Fast Interpretable Greedy-tree Sums) model for rule-based predictions."""

# Tree-based Interpretable Models
class GreedyTreeModel:
    """Greedy decision tree optimized for interpretability over accuracy."""
    
class HSTreeModel:
    """Hierarchical Shrinkage Tree with built-in regularization."""

# Text Models
class FastTextModel:
    """FastText model for text classification in tabular datasets."""

Model Registry System

Extensible registry system for managing, registering, and accessing machine learning models within AutoGluon's framework.

class ModelRegistry:
    """
    Registry for managing available machine learning models.
    Enables custom model registration and retrieval.
    """
    
    def __init__(self):
        """Initialize empty model registry."""
    
    def register_model(
        self,
        name: str,
        model_class: type,
        tags: list[str] = None
    ) -> None:
        """
        Register a new model class in the registry.
        
        Parameters:
        - name: Unique identifier for the model
        - model_class: Model class to register
        - tags: Optional tags for categorization
        """
    
    def get_model(self, name: str) -> type:
        """
        Retrieve a registered model class by name.
        
        Parameters:
        - name: Name of the registered model
        
        Returns:
        Model class
        """
    
    def list_models(self, tags: list[str] = None) -> list[str]:
        """
        List all registered model names.
        
        Parameters:
        - tags: Filter by tags (optional)
        
        Returns:
        List of model names
        """
    
    def unregister_model(self, name: str) -> None:
        """
        Remove a model from the registry.
        
        Parameters:
        - name: Name of the model to remove
        """

# Global model registry instance
ag_model_registry: ModelRegistry

Base Model Interface

Abstract base class defining the common interface that all AutoGluon models must implement for consistent behavior and integration.

class AbstractModel:
    """
    Abstract base class for all AutoGluon tabular models.
    Defines the standard interface and common functionality.
    """
    
    def __init__(
        self,
        problem_type: str,
        objective: str = None,
        **kwargs
    ):
        """
        Initialize model with problem configuration.
        
        Parameters:
        - problem_type: Type of ML problem ('binary', 'multiclass', 'regression')
        - objective: Optimization objective/metric
        - kwargs: Model-specific parameters
        """
    
    def fit(
        self,
        X_train: pd.DataFrame,
        y_train: pd.Series,
        X_val: pd.DataFrame = None,
        y_val: pd.Series = None,
        **kwargs
    ) -> None:
        """
        Train the model on provided data.
        
        Parameters:
        - X_train: Training features
        - y_train: Training labels
        - X_val: Validation features (optional)
        - y_val: Validation labels (optional)
        """
    
    def predict(self, X: pd.DataFrame, **kwargs) -> np.ndarray:
        """
        Generate predictions for input data.
        
        Parameters:
        - X: Input features
        
        Returns:
        Predictions as numpy array
        """
    
    def predict_proba(self, X: pd.DataFrame, **kwargs) -> np.ndarray:
        """
        Generate prediction probabilities (classification only).
        
        Parameters:
        - X: Input features
        
        Returns:
        Prediction probabilities as numpy array
        """
    
    def get_memory_size(self) -> int:
        """
        Get approximate memory usage of the model in bytes.
        
        Returns:
        Memory usage in bytes
        """
    
    def save(self, path: str) -> None:
        """
        Save model to disk.
        
        Parameters:
        - path: File path for saving
        """
    
    def load(self, path: str) -> None:
        """
        Load model from disk.
        
        Parameters:
        - path: File path for loading
        """

Usage Examples

Custom Model Registration

from autogluon.tabular.models import AbstractModel
from autogluon.tabular.registry import ag_model_registry
import pandas as pd
import numpy as np
from sklearn.ensemble import GradientBoostingClassifier

class CustomGBModel(AbstractModel):
    """Custom Gradient Boosting model wrapper."""
    
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.model = GradientBoostingClassifier(
            n_estimators=kwargs.get('n_estimators', 100),
            learning_rate=kwargs.get('learning_rate', 0.1),
            random_state=42
        )
    
    def fit(self, X_train, y_train, **kwargs):
        self.model.fit(X_train, y_train)
    
    def predict(self, X):
        return self.model.predict(X)
    
    def predict_proba(self, X):
        return self.model.predict_proba(X)

# Register custom model
ag_model_registry.register_model(
    name='CustomGB',
    model_class=CustomGBModel,
    tags=['tree', 'gradient_boosting', 'custom']
)

# Use in TabularPredictor
from autogluon.tabular import TabularPredictor

predictor = TabularPredictor(label='target')
predictor.fit(
    train_data,
    hyperparameters={'CustomGB': {'n_estimators': [50, 100, 200]}}
)

Model-Specific Hyperparameter Tuning

from autogluon.tabular import TabularPredictor

# Define model-specific hyperparameters
hyperparameters = {
    # LightGBM configurations
    'LGB': {
        'num_leaves': [31, 127, 255],
        'learning_rate': [0.01, 0.05, 0.1],
        'feature_fraction': [0.8, 0.9, 1.0],
        'bagging_fraction': [0.8, 0.9, 1.0],
        'min_data_in_leaf': [10, 20, 50]
    },
    
    # XGBoost configurations
    'XGB': {
        'n_estimators': [100, 300, 500],
        'max_depth': [3, 6, 10],
        'learning_rate': [0.01, 0.1, 0.2],
        'subsample': [0.8, 0.9, 1.0],
        'colsample_bytree': [0.8, 0.9, 1.0]
    },
    
    # CatBoost configurations
    'CAT': {
        'iterations': [100, 500, 1000],
        'depth': [4, 6, 8],
        'learning_rate': [0.01, 0.1, 0.2],
        'l2_leaf_reg': [1, 3, 5, 7, 9]
    },
    
    # Neural Network configurations
    'NN_TORCH': {
        'num_epochs': [10, 50, 100],
        'learning_rate': [1e-4, 1e-3, 1e-2],
        'weight_decay': [1e-6, 1e-4, 1e-2],
        'dropout_prob': [0.0, 0.1, 0.2, 0.5]
    }
}

predictor = TabularPredictor(label='target')
predictor.fit(
    train_data,
    hyperparameters=hyperparameters,
    time_limit=1800  # 30 minutes
)

# Check which models were trained
leaderboard = predictor.leaderboard()
print("Trained models:")
print(leaderboard[['model', 'score_val']].head(10))

Model Selection and Filtering

from autogluon.tabular import TabularPredictor

# Include only specific model types
predictor = TabularPredictor(label='target')
predictor.fit(
    train_data,
    included_model_types=['LGB', 'XGB', 'CAT'],  # Only gradient boosting
    time_limit=600
)

# Exclude interpretable models for best performance
predictor_performance = TabularPredictor(label='target')
predictor_performance.fit(
    train_data,
    excluded_model_types=['LR', 'KNN'],  # Exclude simpler models
    presets='best_quality'
)

# Include only interpretable models
predictor_interpretable = TabularPredictor(label='target')
predictor_interpretable.fit(
    train_data,
    included_model_types=['LR', 'RF', 'XGB'],  # More interpretable options
    presets='interpretable'
)

Advanced Model Configuration

from autogluon.tabular import TabularPredictor

# Advanced configuration with model-specific arguments
ag_args_fit = {
    'num_cpus': 8,           # CPU cores for training
    'num_gpus': 1,           # GPU devices  
    'memory_limit': 16000,   # Memory limit in MB
}

ag_args_ensemble = {
    'fold_fitting_strategy': 'sequential_local',
    'auto_stack': True,
    'bagging_mode': 'oob',   # Out-of-bag validation
}

predictor = TabularPredictor(
    label='target',
    eval_metric='roc_auc'
)

predictor.fit(
    train_data,
    time_limit=3600,  # 1 hour
    presets='best_quality',
    num_bag_folds=10,
    num_stack_levels=3,
    ag_args_fit=ag_args_fit,
    ag_args_ensemble=ag_args_ensemble,
    
    # Model-specific advanced arguments
    hyperparameters={
        'LGB': {'ag_args': {'name_suffix': '_Large', 'priority': 1}},
        'XGB': {'ag_args': {'name_suffix': '_XL', 'priority': 2}},
        'CAT': {'ag_args': {'name_suffix': '_Balanced', 'priority': 3}}
    }
)

# Analyze model performance and resource usage
leaderboard = predictor.leaderboard(extra_info=True)
print(leaderboard[['model', 'score_val', 'fit_time', 'pred_time_val']].head())

Working with Model Registry

from autogluon.tabular.registry import ag_model_registry

# List all available models
all_models = ag_model_registry.list_models()
print(f"Available models: {len(all_models)}")
print(all_models[:10])  # First 10 models

# Get specific model class
lgb_class = ag_model_registry.get_model('LGBModel')
print(f"LightGBM model class: {lgb_class}")

# Check if model is registered
if 'XGBModel' in all_models:
    xgb_class = ag_model_registry.get_model('XGBModel')
    print(f"XGBoost available: {xgb_class is not None}")

# Custom model usage
from autogluon.tabular.models import RFModel

# Instantiate model directly (advanced usage)
rf_model = RFModel(
    problem_type='binary',
    objective='binary_logloss'
)

# This would typically be done within TabularPredictor
# rf_model.fit(X_train, y_train)
# predictions = rf_model.predict(X_test)

Install with Tessl CLI

npx tessl i tessl/pypi-autogluon--tabular

docs

configurations.md

experimental.md

index.md

models.md

predictor.md

tile.json