AutoGluon automates machine learning tasks enabling you to easily achieve strong predictive performance in your applications.
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Automated machine learning for heterogeneous data combining text, images, and tabular features. MultiModalPredictor supports diverse tasks including classification, regression, object detection, named entity recognition, semantic matching, and feature extraction using state-of-the-art foundation models.
Main predictor class for multimodal data that automatically handles different data modalities and task types with minimal configuration.
class MultiModalPredictor:
def __init__(
self,
label: str = None,
problem_type: str = None,
query: str = None,
response: str = None,
match_label = None,
presets: str = None,
eval_metric = None,
hyperparameters: dict = None,
path: str = None,
verbosity: int = 2,
num_classes: int = None,
classes: list = None,
warn_if_exist: bool = True,
enable_progress_bar: bool = None,
pretrained: bool = True,
validation_metric: str = None,
sample_data_path: str = None,
use_ensemble: bool = False,
ensemble_size: int = 2,
ensemble_mode: str = "one_shot"
):
"""
Initialize MultiModalPredictor for automated multimodal machine learning.
Parameters:
- label: Name of target column to predict
- problem_type: Problem type ('binary', 'multiclass', 'regression', 'object_detection',
'ner', 'text_similarity', 'image_similarity', 'image_text_similarity',
'feature_extraction', 'zero_shot_image_classification', 'few_shot_classification',
'semantic_segmentation')
- query: Column name for query data in semantic matching tasks
- response: Column name for response data in semantic matching tasks
- match_label: Label indicating positive matches in semantic matching
- presets: Quality presets ('best_quality', 'high_quality', 'medium_quality')
- eval_metric: Evaluation metric for model selection
- hyperparameters: Custom hyperparameter configurations
- path: Directory to save models and artifacts
- verbosity: Logging verbosity level (0-4)
- num_classes: Number of classes for object detection
- classes: Class names for object detection
- warn_if_exist: Whether to warn if save path exists
- enable_progress_bar: Show training progress bars
- pretrained: Use pretrained model weights
- validation_metric: Metric for validation and early stopping
- sample_data_path: Path to sample data for inference shape
- use_ensemble: Enable ensemble learning
- ensemble_size: Number of models in ensemble
- ensemble_mode: Ensemble construction mode ('one_shot', 'sequential')
"""Train multimodal models on heterogeneous data with automatic preprocessing and model selection.
def fit(
self,
train_data,
presets: str = None,
tuning_data = None,
max_num_tuning_data: int = None,
id_mappings: dict = None,
time_limit: int = None,
save_path: str = None,
hyperparameters = None,
column_types: dict = None,
holdout_frac: float = None,
teacher_predictor = None,
seed: int = 0,
standalone: bool = True,
hyperparameter_tune_kwargs: dict = None,
clean_ckpts: bool = True,
predictions: list = None,
labels = None,
predictors: list = None
):
"""
Fit MultiModalPredictor on multimodal training data.
Parameters:
- train_data: Training data (DataFrame with text, images, tabular columns)
- presets: Quality/speed presets
- tuning_data: Validation data for hyperparameter tuning
- max_num_tuning_data: Maximum tuning samples for object detection
- id_mappings: ID-to-content mappings for semantic matching
- time_limit: Maximum training time in seconds
- save_path: Directory to save models
- hyperparameters: Custom hyperparameter configurations
- column_types: Manual column type specifications
- holdout_frac: Fraction of data for validation
- teacher_predictor: Teacher model for knowledge distillation
- seed: Random seed for reproducibility
- standalone: Save complete model for offline deployment
- hyperparameter_tune_kwargs: HPO configuration
- clean_ckpts: Clean intermediate checkpoints
- predictions: Pre-computed predictions for ensemble
- labels: Pre-computed labels for ensemble
- predictors: Pre-trained predictors for ensemble
Returns:
MultiModalPredictor: Fitted predictor instance
"""Generate predictions for multimodal data across different task types.
def predict(
self,
data,
candidate_data = None,
id_mappings: dict = None,
as_pandas: bool = None,
realtime: bool = False,
save_results: bool = None,
**kwargs
):
"""
Generate predictions for multimodal data.
Parameters:
- data: Input data (DataFrame, dict, list, or file path)
- candidate_data: Candidate data for semantic matching/retrieval
- id_mappings: ID-to-content mappings
- as_pandas: Return results as pandas DataFrame/Series
- realtime: Use realtime inference optimization
- save_results: Save prediction results to disk
- **kwargs: Additional arguments (e.g., as_coco for object detection)
Returns:
Predictions in format appropriate for the task type
"""
def predict_proba(
self,
data,
candidate_data = None,
id_mappings: dict = None,
as_pandas: bool = None,
as_multiclass: bool = True,
realtime: bool = False
):
"""
Generate prediction probabilities for classification tasks.
Parameters:
- data: Input data
- candidate_data: Candidate data for retrieval tasks
- id_mappings: ID-to-content mappings
- as_pandas: Return results as pandas DataFrame
- as_multiclass: Return all class probabilities vs positive class only
- realtime: Use realtime inference optimization
Returns:
Prediction probabilities as DataFrame or numpy array
"""Extract embeddings and features from multimodal data for downstream tasks.
def extract_embedding(
self,
data,
id_mappings: dict = None,
return_masks: bool = False,
as_tensor: bool = False,
as_pandas: bool = False,
realtime: bool = False,
signature: str = None
):
"""
Extract feature embeddings from multimodal data.
Parameters:
- data: Input data (DataFrame, dict, or list)
- id_mappings: ID-to-content mappings
- return_masks: Return attention masks for missing data
- as_tensor: Return PyTorch tensors
- as_pandas: Return pandas DataFrame
- realtime: Use realtime inference optimization
- signature: Signature type for semantic matching ('query' or 'response')
Returns:
Feature embeddings as numpy array, tensor, or DataFrame
"""Evaluate multimodal model performance with task-specific metrics.
def evaluate(
self,
data,
query_data: list = None,
response_data: list = None,
id_mappings: dict = None,
metrics: list = None,
chunk_size: int = 1024,
similarity_type: str = "cosine",
cutoffs: list = [1, 5, 10],
label: str = None,
return_pred: bool = False,
realtime: bool = False,
eval_tool: str = None,
predictions: list = None,
labels = None
):
"""
Evaluate multimodal model performance.
Parameters:
- data: Test data (DataFrame, dict, list, or annotation file path)
- query_data: Query data for ranking evaluation
- response_data: Response data for ranking evaluation
- id_mappings: ID-to-content mappings
- metrics: List of evaluation metrics
- chunk_size: Batch size for similarity computation
- similarity_type: Similarity function ('cosine', 'dot_prod')
- cutoffs: Cutoff values for ranking metrics
- label: Label column name
- return_pred: Return individual predictions
- realtime: Use realtime inference
- eval_tool: Evaluation tool for object detection ('pycocotools', 'torchmetrics')
- predictions: Pre-computed predictions
- labels: Pre-computed labels
Returns:
dict: Evaluation metrics and optionally predictions
"""Save, load, and export multimodal models for deployment.
def save(self, path: str, standalone: bool = True):
"""
Save trained predictor to disk.
Parameters:
- path: Directory to save predictor
- standalone: Save complete model for offline deployment
"""
@classmethod
def load(
cls,
path: str,
resume: bool = False,
verbosity: int = 3
):
"""
Load saved predictor from disk.
Parameters:
- path: Directory containing saved predictor
- resume: Resume training from checkpoint
- verbosity: Logging verbosity level
Returns:
MultiModalPredictor: Loaded predictor instance
"""
def export_onnx(
self,
data,
path: str = None,
batch_size: int = None,
verbose: bool = False,
opset_version: int = 16,
truncate_long_and_double: bool = False
):
"""
Export model to ONNX format for deployment.
Parameters:
- data: Sample data for tracing
- path: Export path (if None, returns bytes)
- batch_size: Batch size for export
- verbose: Verbose export logging
- opset_version: ONNX opset version
- truncate_long_and_double: Truncate precision for compatibility
Returns:
Export path or ONNX model bytes
"""
def optimize_for_inference(self, providers: list = None):
"""
Optimize model for faster inference using ONNX runtime.
Parameters:
- providers: ONNX execution providers
Returns:
Optimized ONNX module for inference
"""Advanced functionality for specialized use cases and model analysis.
def fit_summary(self, verbosity: int = 0, show_plot: bool = False):
"""
Display training summary and model information.
Parameters:
- verbosity: Detail level (0-4)
- show_plot: Show training plots
Returns:
dict: Training summary information
"""
def list_supported_models(self, pretrained: bool = True):
"""
List supported models for the current problem type.
Parameters:
- pretrained: Show only models with pretrained weights
Returns:
list: Available model names
"""
def dump_model(self, save_path: str = None):
"""
Export model weights and configs to local directory.
Parameters:
- save_path: Directory to save model files
"""
def set_num_gpus(self, num_gpus: int):
"""
Set number of GPUs for training/inference.
Parameters:
- num_gpus: Number of GPUs to use
"""Access model and training information through properties.
@property
def problem_type(self) -> str:
"""Type of ML problem (classification, object_detection, etc.)"""
@property
def label(self) -> str:
"""Name of target label column"""
@property
def eval_metric(self) -> str:
"""Evaluation metric used for model selection"""
@property
def class_labels(self) -> list:
"""Original class label names for classification"""
@property
def positive_class(self):
"""Positive class label for binary classification"""
@property
def total_parameters(self) -> int:
"""Total number of model parameters"""
@property
def trainable_parameters(self) -> int:
"""Number of trainable model parameters"""
@property
def model_size(self) -> float:
"""Model size in megabytes"""from autogluon.multimodal import MultiModalPredictor
import pandas as pd
# Prepare multimodal dataset
data = pd.DataFrame({
'image_path': ['img1.jpg', 'img2.jpg', 'img3.jpg'],
'text_content': ['Product description 1', 'Product description 2', 'Product description 3'],
'price': [10.5, 25.0, 15.5],
'category': ['A', 'B', 'A']
})
# Train multimodal classifier
predictor = MultiModalPredictor(
label='category',
problem_type='multiclass',
presets='high_quality'
)
predictor.fit(
data,
time_limit=3600,
column_types={
'image_path': 'image_path',
'text_content': 'text',
'price': 'numerical'
}
)
# Make predictions
predictions = predictor.predict(test_data)
probabilities = predictor.predict_proba(test_data)
# Extract embeddings
embeddings = predictor.extract_embedding(test_data)
print(f"Embedding shape: {embeddings.shape}")# Object detection on image data
detector = MultiModalPredictor(
problem_type='object_detection',
presets='medium_quality',
classes=['person', 'car', 'bicycle'], # Target classes
path='./detection_models'
)
# Train on COCO-format data
detector.fit(
train_data='train_annotations.json', # COCO format
time_limit=7200
)
# Predict bounding boxes
detections = detector.predict(
'test_images/',
save_results=True,
as_coco=True # Return COCO format results
)
# Evaluate with COCO metrics
metrics = detector.evaluate(
'test_annotations.json',
eval_tool='pycocotools'
)
print(f"mAP: {metrics['map']:.3f}")# Text similarity for semantic matching
matcher = MultiModalPredictor(
problem_type='text_similarity',
query='question',
response='answer',
match_label='relevant'
)
# Train on query-response pairs
matcher.fit(qa_data, time_limit=1800)
# Find similar documents
query_data = ['What is machine learning?']
candidate_data = [
'ML is a subset of AI',
'Python is a programming language',
'Deep learning uses neural networks'
]
similarities = matcher.predict(
data=query_data,
candidate_data=candidate_data
)
print("Most similar:", candidate_data[similarities.argmax()])Install with Tessl CLI
npx tessl i tessl/pypi-autogluon