CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-mlflow

MLflow is an open source platform for the complete machine learning lifecycle

Pending
Overview
Eval results
Files

frameworks.mddocs/

ML Framework Integrations

MLflow provides comprehensive integrations with popular machine learning and deep learning frameworks, enabling seamless model logging, loading, and deployment across different ML ecosystems. Each integration offers framework-specific optimizations and native model format support.

Capabilities

Scikit-learn Integration

Native integration for scikit-learn models with automatic dependency management and preprocessing pipeline support.

import mlflow.sklearn

def log_model(sk_model, artifact_path, conda_env=None, code_paths=None, registered_model_name=None, signature=None, input_example=None, await_registration_for=DEFAULT_AWAIT_MAX_SLEEP_SECONDS, pip_requirements=None, extra_pip_requirements=None, serialization_format=SERIALIZATION_FORMAT_PICKLE, metadata=None, **kwargs):
    """
    Log scikit-learn model as MLflow artifact.
    
    Parameters:
    - sk_model: Trained scikit-learn model object
    - artifact_path: str - Run-relative artifact path
    - conda_env: str or dict, optional - Conda environment specification
    - code_paths: list, optional - List of local code paths to include
    - registered_model_name: str, optional - Name for model registry
    - signature: ModelSignature, optional - Model input/output schema
    - input_example: Any, optional - Example input for inference
    - await_registration_for: int - Seconds to wait for registration
    - pip_requirements: list, optional - List of pip package requirements
    - extra_pip_requirements: list, optional - Additional pip requirements
    - serialization_format: str - Serialization format (pickle, cloudpickle)
    - metadata: dict, optional - Custom model metadata
    
    Returns:
    ModelInfo object with logged model details
    """

def load_model(model_uri, dst_path=None):
    """
    Load scikit-learn model from MLflow.
    
    Parameters:
    - model_uri: str - URI pointing to MLflow model
    - dst_path: str, optional - Local destination path
    
    Returns:
    Loaded scikit-learn model object
    """

def save_model(sk_model, path, conda_env=None, code_paths=None, mlflow_model=None, signature=None, input_example=None, pip_requirements=None, extra_pip_requirements=None, serialization_format=SERIALIZATION_FORMAT_PICKLE, metadata=None):
    """
    Save scikit-learn model to local path.
    
    Parameters:
    - sk_model: Trained scikit-learn model object
    - path: str - Local path to save model
    - conda_env: str or dict, optional - Conda environment
    - code_paths: list, optional - Code dependencies to include
    - mlflow_model: Model, optional - MLflow model configuration
    - signature: ModelSignature, optional - Model signature
    - input_example: Any, optional - Example input
    - pip_requirements: list, optional - Pip package requirements
    - extra_pip_requirements: list, optional - Additional pip requirements
    - serialization_format: str - Serialization format
    - metadata: dict, optional - Custom metadata
    """

PyTorch Integration

Comprehensive PyTorch support including standard models, PyTorch Lightning, and TorchScript compilation.

import mlflow.pytorch

def log_model(pytorch_model, artifact_path, conda_env=None, code_paths=None, pickle_module=None, registered_model_name=None, signature=None, input_example=None, await_registration_for=DEFAULT_AWAIT_MAX_SLEEP_SECONDS, requirements_file=None, extra_files=None, pip_requirements=None, extra_pip_requirements=None, metadata=None, **kwargs):
    """
    Log PyTorch model as MLflow artifact.
    
    Parameters:
    - pytorch_model: PyTorch model object or state_dict
    - artifact_path: str - Run-relative artifact path
    - conda_env: str or dict, optional - Conda environment
    - code_paths: list, optional - Local code paths to include
    - pickle_module: module, optional - Module for model serialization
    - registered_model_name: str, optional - Registry model name
    - signature: ModelSignature, optional - Model schema
    - input_example: Any, optional - Example model input
    - await_registration_for: int - Registration wait time
    - requirements_file: str, optional - Path to requirements file
    - extra_files: list, optional - Additional files to include
    - pip_requirements: list, optional - Pip requirements
    - extra_pip_requirements: list, optional - Additional pip requirements
    - metadata: dict, optional - Custom metadata
    
    Returns:
    ModelInfo object
    """

def load_model(model_uri, map_location=None, dst_path=None):
    """
    Load PyTorch model from MLflow.
    
    Parameters:
    - model_uri: str - URI pointing to MLflow model
    - map_location: str or torch.device, optional - Device mapping for loading
    - dst_path: str, optional - Local destination path
    
    Returns:
    Loaded PyTorch model object
    """

def log_state_dict(state_dict, artifact_path, **kwargs):
    """
    Log PyTorch model state dictionary.
    
    Parameters:
    - state_dict: dict - PyTorch model state dictionary
    - artifact_path: str - Artifact path for state dict
    - kwargs: Additional logging arguments
    """

def load_state_dict(model_uri, map_location=None):
    """
    Load PyTorch state dictionary from MLflow.
    
    Parameters:
    - model_uri: str - URI pointing to saved state dict
    - map_location: str or device, optional - Device for loading
    
    Returns:
    PyTorch state dictionary
    """

TensorFlow Integration

Full TensorFlow support including Keras models, SavedModel format, and TensorFlow Serving compatibility.

import mlflow.tensorflow

def log_model(tf_saved_model_dir=None, tf_meta_graph_tags=None, tf_signature_def_key=None, artifact_path=None, conda_env=None, code_paths=None, registered_model_name=None, signature=None, input_example=None, await_registration_for=DEFAULT_AWAIT_MAX_SLEEP_SECONDS, pip_requirements=None, extra_pip_requirements=None, metadata=None, **kwargs):
    """
    Log TensorFlow model as MLflow artifact.
    
    Parameters:
    - tf_saved_model_dir: str - Path to TensorFlow SavedModel directory
    - tf_meta_graph_tags: list, optional - MetaGraph tags to load
    - tf_signature_def_key: str, optional - SignatureDef key for inference
    - artifact_path: str - Run-relative artifact path
    - conda_env: str or dict, optional - Conda environment
    - code_paths: list, optional - Code dependencies
    - registered_model_name: str, optional - Registry model name
    - signature: ModelSignature, optional - Model schema
    - input_example: Any, optional - Example input
    - await_registration_for: int - Registration wait time
    - pip_requirements: list, optional - Pip requirements
    - extra_pip_requirements: list, optional - Additional pip requirements
    - metadata: dict, optional - Custom metadata
    
    Returns:
    ModelInfo object
    """

def load_model(model_uri, dst_path=None):
    """
    Load TensorFlow model from MLflow.
    
    Parameters:
    - model_uri: str - URI pointing to MLflow model
    - dst_path: str, optional - Local destination path
    
    Returns:
    Loaded TensorFlow model object
    """

import mlflow.keras

def log_model(keras_model, artifact_path, conda_env=None, code_paths=None, custom_objects=None, keras_module=None, registered_model_name=None, signature=None, input_example=None, await_registration_for=DEFAULT_AWAIT_MAX_SLEEP_SECONDS, pip_requirements=None, extra_pip_requirements=None, metadata=None, **kwargs):
    """
    Log Keras model as MLflow artifact.
    
    Parameters:
    - keras_model: Compiled Keras model object
    - artifact_path: str - Run-relative artifact path
    - conda_env: str or dict, optional - Conda environment
    - code_paths: list, optional - Code dependencies
    - custom_objects: dict, optional - Custom objects for model loading
    - keras_module: module, optional - Keras module for compatibility
    - registered_model_name: str, optional - Registry model name
    - signature: ModelSignature, optional - Model schema
    - input_example: Any, optional - Example input
    - await_registration_for: int - Registration wait time
    - pip_requirements: list, optional - Pip requirements
    - extra_pip_requirements: list, optional - Additional pip requirements
    - metadata: dict, optional - Custom metadata
    
    Returns:
    ModelInfo object
    """

XGBoost Integration

Native XGBoost model support with automatic hyperparameter tracking and feature importance logging.

import mlflow.xgboost

def log_model(xgb_model, artifact_path, conda_env=None, code_paths=None, registered_model_name=None, signature=None, input_example=None, await_registration_for=DEFAULT_AWAIT_MAX_SLEEP_SECONDS, pip_requirements=None, extra_pip_requirements=None, model_format="xgb", metadata=None, **kwargs):
    """
    Log XGBoost model as MLflow artifact.
    
    Parameters:
    - xgb_model: Trained XGBoost model (Booster, XGBClassifier, XGBRegressor)
    - artifact_path: str - Run-relative artifact path
    - conda_env: str or dict, optional - Conda environment
    - code_paths: list, optional - Code dependencies
    - registered_model_name: str, optional - Registry model name
    - signature: ModelSignature, optional - Model schema
    - input_example: Any, optional - Example input
    - await_registration_for: int - Registration wait time
    - pip_requirements: list, optional - Pip requirements
    - extra_pip_requirements: list, optional - Additional requirements
    - model_format: str - Save format ("xgb", "json", "ubj")
    - metadata: dict, optional - Custom metadata
    
    Returns:
    ModelInfo object
    """

def load_model(model_uri, dst_path=None):
    """
    Load XGBoost model from MLflow.
    
    Parameters:
    - model_uri: str - URI pointing to MLflow model
    - dst_path: str, optional - Local destination path
    
    Returns:
    Loaded XGBoost model object
    """

def autolog(importance_type="weight", log_input_examples=False, log_model_signatures=True, log_models=True, disable=False, exclusive=False, disable_for_unsupported_versions=False, silent=False, registered_model_name=None):
    """
    Enable automatic logging for XGBoost training.
    
    Parameters:
    - importance_type: str - Feature importance type to log
    - log_input_examples: bool - Whether to log input examples
    - log_model_signatures: bool - Whether to log model signatures
    - log_models: bool - Whether to log trained models
    - disable: bool - Disable autologging if True
    - exclusive: bool - Exclusive autologging mode
    - disable_for_unsupported_versions: bool - Skip for unsupported versions
    - silent: bool - Suppress autolog warnings
    - registered_model_name: str, optional - Auto-register model name
    """

LightGBM Integration

Comprehensive LightGBM support with early stopping integration and automatic metric logging.

import mlflow.lightgbm

def log_model(lgb_model, artifact_path, conda_env=None, code_paths=None, registered_model_name=None, signature=None, input_example=None, await_registration_for=DEFAULT_AWAIT_MAX_SLEEP_SECONDS, pip_requirements=None, extra_pip_requirements=None, metadata=None, **kwargs):
    """
    Log LightGBM model as MLflow artifact.
    
    Parameters:
    - lgb_model: Trained LightGBM model (Booster, LGBMClassifier, LGBMRegressor)
    - artifact_path: str - Run-relative artifact path
    - conda_env: str or dict, optional - Conda environment
    - code_paths: list, optional - Code dependencies
    - registered_model_name: str, optional - Registry model name
    - signature: ModelSignature, optional - Model schema
    - input_example: Any, optional - Example input
    - await_registration_for: int - Registration wait time
    - pip_requirements: list, optional - Pip requirements
    - extra_pip_requirements: list, optional - Additional requirements
    - metadata: dict, optional - Custom metadata
    
    Returns:
    ModelInfo object
    """

def autolog(importance_type="split", log_input_examples=False, log_model_signatures=True, log_models=True, disable=False, exclusive=False, disable_for_unsupported_versions=False, silent=False, registered_model_name=None):
    """
    Enable automatic logging for LightGBM training.
    
    Parameters:
    - importance_type: str - Feature importance type ("split", "gain")
    - log_input_examples: bool - Log input examples
    - log_model_signatures: bool - Log model signatures
    - log_models: bool - Log trained models
    - disable: bool - Disable autologging
    - exclusive: bool - Exclusive autologging mode
    - disable_for_unsupported_versions: bool - Skip unsupported versions
    - silent: bool - Suppress warnings
    - registered_model_name: str, optional - Auto-register model name
    """

Transformers Integration

Hugging Face Transformers integration with support for various model types and tokenizers.

import mlflow.transformers

def log_model(transformers_model, artifact_path, task=None, conda_env=None, code_paths=None, registered_model_name=None, signature=None, input_example=None, await_registration_for=DEFAULT_AWAIT_MAX_SLEEP_SECONDS, pip_requirements=None, extra_pip_requirements=None, metadata=None, tokenizer=None, feature_extractor=None, processor=None, model_config=None, **kwargs):
    """
    Log Transformers model as MLflow artifact.
    
    Parameters:
    - transformers_model: Transformers model or pipeline object
    - artifact_path: str - Run-relative artifact path
    - task: str, optional - Task type for the model
    - conda_env: str or dict, optional - Conda environment
    - code_paths: list, optional - Code dependencies
    - registered_model_name: str, optional - Registry model name
    - signature: ModelSignature, optional - Model schema
    - input_example: Any, optional - Example input
    - await_registration_for: int - Registration wait time
    - pip_requirements: list, optional - Pip requirements
    - extra_pip_requirements: list, optional - Additional requirements
    - metadata: dict, optional - Custom metadata
    - tokenizer: Tokenizer, optional - Associated tokenizer
    - feature_extractor: FeatureExtractor, optional - Feature extractor
    - processor: Processor, optional - Processor object
    - model_config: dict, optional - Model configuration
    
    Returns:
    ModelInfo object
    """

def load_model(model_uri, dst_path=None, device=None):
    """
    Load Transformers model from MLflow.
    
    Parameters:
    - model_uri: str - URI pointing to MLflow model
    - dst_path: str, optional - Local destination path
    - device: str or int, optional - Device for model loading
    
    Returns:
    Loaded Transformers model or pipeline
    """

Spark MLlib Integration

Apache Spark MLlib integration for distributed machine learning model logging and serving.

import mlflow.spark

def log_model(spark_model, artifact_path, conda_env=None, code_paths=None, dfs_tmpdir=None, sample_input=None, registered_model_name=None, signature=None, input_example=None, await_registration_for=DEFAULT_AWAIT_MAX_SLEEP_SECONDS, pip_requirements=None, extra_pip_requirements=None, metadata=None, **kwargs):
    """
    Log Spark MLlib model as MLflow artifact.
    
    Parameters:
    - spark_model: Fitted Spark MLlib model or pipeline
    - artifact_path: str - Run-relative artifact path
    - conda_env: str or dict, optional - Conda environment
    - code_paths: list, optional - Code dependencies
    - dfs_tmpdir: str, optional - Temporary directory for DFS operations
    - sample_input: DataFrame, optional - Sample input for schema inference
    - registered_model_name: str, optional - Registry model name
    - signature: ModelSignature, optional - Model schema
    - input_example: Any, optional - Example input
    - await_registration_for: int - Registration wait time
    - pip_requirements: list, optional - Pip requirements
    - extra_pip_requirements: list, optional - Additional requirements
    - metadata: dict, optional - Custom metadata
    
    Returns:
    ModelInfo object
    """

def load_model(model_uri, dfs_tmpdir=None):
    """
    Load Spark MLlib model from MLflow.
    
    Parameters:
    - model_uri: str - URI pointing to MLflow model
    - dfs_tmpdir: str, optional - Temporary directory for DFS
    
    Returns:
    Loaded Spark MLlib model or pipeline
    """

import mlflow.pyspark.ml

def autolog(disable=False, exclusive=False, disable_for_unsupported_versions=False, silent=False, log_models=True, log_input_examples=False, log_model_signatures=True, log_post_training_metrics=True, registered_model_name=None):
    """
    Enable automatic logging for PySpark ML training.
    
    Parameters:
    - disable: bool - Disable autologging
    - exclusive: bool - Exclusive autologging mode
    - disable_for_unsupported_versions: bool - Skip unsupported versions
    - silent: bool - Suppress warnings
    - log_models: bool - Log trained models
    - log_input_examples: bool - Log input examples
    - log_model_signatures: bool - Log model signatures
    - log_post_training_metrics: bool - Log evaluation metrics
    - registered_model_name: str, optional - Auto-register model name
    """

AG2 (AutoGen) Integration

Multi-agent conversation framework integration with automatic conversation logging and observability (experimental in MLflow 3.0.0).

import mlflow.ag2

def autolog(disable=False, log_traces=True, log_models=False, log_input_examples=False, log_model_signatures=True, silent=False):
    """
    Enable automatic logging for AG2 (AutoGen) conversations.
    
    Parameters:
    - disable: bool - Disable AG2 autologging
    - log_traces: bool - Log conversation traces
    - log_models: bool - Log agent models
    - log_input_examples: bool - Log conversation examples
    - log_model_signatures: bool - Log model signatures
    - silent: bool - Suppress autolog warnings
    """

Pydantic AI Integration

Pydantic AI framework integration for structured AI application development with automatic model and conversation logging (experimental in MLflow 3.0.0).

import mlflow.pydantic_ai

def autolog(disable=False, log_traces=True, log_models=False, log_input_examples=False, log_model_signatures=True, silent=False):
    """
    Enable automatic logging for Pydantic AI applications.
    
    Parameters:
    - disable: bool - Disable Pydantic AI autologging
    - log_traces: bool - Log AI application traces
    - log_models: bool - Log AI models
    - log_input_examples: bool - Log input examples
    - log_model_signatures: bool - Log model signatures
    - silent: bool - Suppress autolog warnings
    """

Smolagents Integration

Smolagents AI agents framework integration with conversation and task execution logging (experimental in MLflow 3.0.0).

import mlflow.smolagents

def autolog(disable=False, log_traces=True, log_models=False, log_input_examples=False, log_model_signatures=True, silent=False):
    """
    Enable automatic logging for Smolagents AI agents.
    
    Parameters:
    - disable: bool - Disable Smolagents autologging
    - log_traces: bool - Log agent execution traces
    - log_models: bool - Log agent models
    - log_input_examples: bool - Log input examples
    - log_model_signatures: bool - Log model signatures
    - silent: bool - Suppress autolog warnings
    """

Groq Integration

Groq API integration with automatic request/response logging and performance tracking.

import mlflow.groq

def autolog(disable=False, log_traces=True, log_models=False, log_input_examples=False, log_model_signatures=True, silent=False):
    """
    Enable automatic logging for Groq API calls.
    
    Parameters:
    - disable: bool - Disable Groq autologging
    - log_traces: bool - Log API call traces
    - log_models: bool - Log model configurations
    - log_input_examples: bool - Log input examples
    - log_model_signatures: bool - Log model signatures
    - silent: bool - Suppress autolog warnings
    """

Semantic Kernel Integration

Microsoft Semantic Kernel framework integration for orchestrating AI services with automatic logging and observability.

import mlflow.semantic_kernel

def autolog(disable=False, log_traces=True, log_models=False, log_input_examples=False, log_model_signatures=True, silent=False):
    """
    Enable automatic logging for Semantic Kernel applications.
    
    Parameters:
    - disable: bool - Disable Semantic Kernel autologging
    - log_traces: bool - Log kernel execution traces
    - log_models: bool - Log AI service configurations
    - log_input_examples: bool - Log input examples
    - log_model_signatures: bool - Log model signatures
    - silent: bool - Suppress autolog warnings
    """

Auto-logging Capabilities

Automatic experiment tracking across supported frameworks with minimal code changes.

import mlflow

def autolog(log_input_examples=False, log_model_signatures=True, log_models=True, log_datasets=True, disable=False, exclusive=False, disable_for_unsupported_versions=False, silent=False, extra_tags=None, registered_model_name=None):
    """
    Enable automatic logging across all supported frameworks.
    
    Parameters:
    - log_input_examples: bool - Log input examples for models
    - log_model_signatures: bool - Log model input/output signatures
    - log_models: bool - Log trained model objects
    - log_datasets: bool - Log training/validation datasets
    - disable: bool - Disable all autologging if True
    - exclusive: bool - Use exclusive autologging mode
    - disable_for_unsupported_versions: bool - Skip unsupported library versions
    - silent: bool - Suppress autolog setup warnings
    - extra_tags: dict, optional - Additional tags for all runs
    - registered_model_name: str, optional - Auto-register models with name
    """

# Framework-specific autolog functions
def sklearn_autolog(**kwargs):
    """Enable scikit-learn autologging."""

def pytorch_autolog(**kwargs):
    """Enable PyTorch autologging."""

def tensorflow_autolog(**kwargs):
    """Enable TensorFlow/Keras autologging."""

def xgboost_autolog(**kwargs):
    """Enable XGBoost autologging."""

def lightgbm_autolog(**kwargs):
    """Enable LightGBM autologging."""

def spark_autolog(**kwargs):
    """Enable Spark MLlib autologging."""

Usage Examples

Scikit-learn Model Logging

import mlflow
import mlflow.sklearn
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

# Generate sample data
X, y = make_classification(n_samples=1000, n_features=20, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Create and train pipeline
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('classifier', RandomForestClassifier(n_estimators=100, random_state=42))
])

mlflow.set_experiment("sklearn_integration")

with mlflow.start_run():
    # Train model
    pipeline.fit(X_train, y_train)
    
    # Log model with signature and example
    signature = mlflow.models.infer_signature(X_train, pipeline.predict(X_train))
    
    mlflow.sklearn.log_model(
        sk_model=pipeline,
        artifact_path="model",
        signature=signature,
        input_example=X_train[:3],
        registered_model_name="rf_pipeline"
    )
    
    # Log metrics
    train_score = pipeline.score(X_train, y_train)
    test_score = pipeline.score(X_test, y_test)
    
    mlflow.log_metric("train_accuracy", train_score)
    mlflow.log_metric("test_accuracy", test_score)
    
    print(f"Model logged with accuracy: {test_score:.3f}")

# Load and use model
model_uri = f"runs:/{mlflow.active_run().info.run_id}/model"
loaded_model = mlflow.sklearn.load_model(model_uri)
predictions = loaded_model.predict(X_test)

PyTorch Model with Custom Architecture

import mlflow
import mlflow.pytorch
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

# Define custom model
class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, num_classes)
    
    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        return out

# Prepare data
X = torch.randn(1000, 20)
y = torch.randint(0, 2, (1000,))
dataset = TensorDataset(X, y)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

mlflow.set_experiment("pytorch_integration")

with mlflow.start_run():
    # Initialize model
    model = NeuralNet(input_size=20, hidden_size=50, num_classes=2)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.01)
    
    # Log hyperparameters
    mlflow.log_param("input_size", 20)
    mlflow.log_param("hidden_size", 50)
    mlflow.log_param("learning_rate", 0.01)
    mlflow.log_param("batch_size", 32)
    
    # Training loop
    for epoch in range(10):
        total_loss = 0
        for batch_x, batch_y in dataloader:
            optimizer.zero_grad()
            outputs = model(batch_x)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        
        avg_loss = total_loss / len(dataloader)
        mlflow.log_metric("loss", avg_loss, step=epoch)
    
    # Log model
    mlflow.pytorch.log_model(
        pytorch_model=model,
        artifact_path="model",
        registered_model_name="neural_net"
    )
    
    # Log state dict separately
    mlflow.pytorch.log_state_dict(
        state_dict=model.state_dict(),
        artifact_path="state_dict"
    )
    
    print("PyTorch model logged successfully")

# Load model
model_uri = f"runs:/{mlflow.active_run().info.run_id}/model"
loaded_model = mlflow.pytorch.load_model(model_uri)

XGBoost with Autologging

import mlflow
import mlflow.xgboost
import xgboost as xgb
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split

# Enable XGBoost autologging
mlflow.xgboost.autolog(
    importance_type="gain",
    log_input_examples=True,
    log_model_signatures=True,
    registered_model_name="xgb_automodel"
)

# Prepare data
X, y = make_classification(n_samples=1000, n_features=20, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

mlflow.set_experiment("xgboost_autolog")

with mlflow.start_run():
    # Train XGBoost model - automatically logged
    model = xgb.XGBClassifier(
        n_estimators=100,
        max_depth=6,
        learning_rate=0.1,
        random_state=42
    )
    
    model.fit(
        X_train, y_train,
        eval_set=[(X_test, y_test)],
        eval_metric="logloss",
        verbose=False
    )
    
    # Additional manual logging
    test_accuracy = model.score(X_test, y_test)
    mlflow.log_metric("test_accuracy", test_accuracy)
    
    print(f"XGBoost model auto-logged with accuracy: {test_accuracy:.3f}")
    
    # Feature importance is automatically logged
    # Model is automatically registered with specified name

Transformers with Multiple Components

import mlflow
import mlflow.transformers
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification

mlflow.set_experiment("transformers_integration")

with mlflow.start_run():
    # Load pre-trained model and tokenizer
    model_name = "distilbert-base-uncased-finetuned-sst-2-english"
    
    # Load components separately for more control
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForSequenceClassification.from_pretrained(model_name)
    
    # Create pipeline
    sentiment_pipeline = pipeline(
        "sentiment-analysis",
        model=model,
        tokenizer=tokenizer,
        return_all_scores=True
    )
    
    # Log model with all components
    mlflow.transformers.log_model(
        transformers_model=sentiment_pipeline,
        artifact_path="sentiment_model",
        task="text-classification",
        tokenizer=tokenizer,
        model_config={
            "max_length": 512,
            "padding": True,
            "truncation": True
        },
        registered_model_name="sentiment_classifier"
    )
    
    # Test the pipeline
    test_texts = [
        "I love this product!",
        "This is terrible.",
        "It's okay, nothing special."
    ]
    
    results = sentiment_pipeline(test_texts)
    
    # Log example predictions
    for text, result in zip(test_texts, results):
        print(f"'{text}' -> {result}")
        mlflow.log_text(f"Prediction: {result}", f"example_{hash(text)}.txt")
    
    print("Transformers model logged with tokenizer and config")

# Load and use model
model_uri = f"runs:/{mlflow.active_run().info.run_id}/sentiment_model"
loaded_pipeline = mlflow.transformers.load_model(model_uri)
new_predictions = loaded_pipeline(["MLflow is amazing!"])

Spark MLlib Distributed Training

import mlflow
import mlflow.spark
from pyspark.sql import SparkSession
from pyspark.ml.feature import VectorAssembler, StringIndexer
from pyspark.ml.classification import RandomForestClassifier
from pyspark.ml import Pipeline
from pyspark.ml.evaluation import MulticlassClassificationEvaluator

# Initialize Spark
spark = SparkSession.builder.appName("MLflow Spark Integration").getOrCreate()

# Enable Spark autologging
mlflow.spark.autolog(log_models=True, log_input_examples=True)

mlflow.set_experiment("spark_integration")

with mlflow.start_run():
    # Create sample DataFrame
    data = [(0.0, "a", 1.0, 0),
            (1.0, "b", 2.0, 1),
            (2.0, "c", 3.0, 0),
            (3.0, "a", 4.0, 1)] * 100
    
    columns = ["feature1", "category", "feature2", "label"]
    df = spark.createDataFrame(data, columns)
    
    # Create ML Pipeline
    indexer = StringIndexer(inputCol="category", outputCol="categoryIndex")
    assembler = VectorAssembler(
        inputCols=["feature1", "categoryIndex", "feature2"],
        outputCol="features"
    )
    rf = RandomForestClassifier(featuresCol="features", labelCol="label")
    
    pipeline = Pipeline(stages=[indexer, assembler, rf])
    
    # Split data
    train_df, test_df = df.randomSplit([0.8, 0.2], seed=42)
    
    # Train pipeline - automatically logged
    model = pipeline.fit(train_df)
    
    # Make predictions
    predictions = model.transform(test_df)
    
    # Evaluate model
    evaluator = MulticlassClassificationEvaluator(
        labelCol="label",
        predictionCol="prediction",
        metricName="accuracy"
    )
    accuracy = evaluator.evaluate(predictions)
    
    mlflow.log_metric("test_accuracy", accuracy)
    
    # Log model manually for more control
    mlflow.spark.log_model(
        spark_model=model,
        artifact_path="spark_pipeline",
        registered_model_name="spark_rf_pipeline"
    )
    
    print(f"Spark pipeline logged with accuracy: {accuracy:.3f}")

spark.stop()

Multi-Framework Comparison

import mlflow
import numpy as np
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
import xgboost as xgb
import lightgbm as lgb

# Generate data
X, y = make_classification(n_samples=10000, n_features=20, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

mlflow.set_experiment("framework_comparison")

# Compare multiple frameworks
frameworks = {
    "sklearn": {
        "model": RandomForestClassifier(n_estimators=100, random_state=42),
        "log_func": mlflow.sklearn.log_model
    },
    "xgboost": {
        "model": xgb.XGBClassifier(n_estimators=100, random_state=42),
        "log_func": mlflow.xgboost.log_model
    },
    "lightgbm": {
        "model": lgb.LGBMClassifier(n_estimators=100, random_state=42),
        "log_func": mlflow.lightgbm.log_model
    }
}

results = {}

for framework_name, config in frameworks.items():
    with mlflow.start_run(run_name=f"{framework_name}_model"):
        # Train model
        model = config["model"]
        model.fit(X_train, y_train)
        
        # Evaluate
        train_acc = model.score(X_train, y_train)
        test_acc = model.score(X_test, y_test)
        
        # Log metrics
        mlflow.log_param("framework", framework_name)
        mlflow.log_metric("train_accuracy", train_acc)
        mlflow.log_metric("test_accuracy", test_acc)
        
        # Log model
        config["log_func"](
            model,
            artifact_path="model",
            registered_model_name=f"{framework_name}_classifier"
        )
        
        results[framework_name] = {
            "train_acc": train_acc,
            "test_acc": test_acc,
            "run_id": mlflow.active_run().info.run_id
        }
        
        print(f"{framework_name}: Train={train_acc:.3f}, Test={test_acc:.3f}")

# Find best model
best_framework = max(results.keys(), key=lambda k: results[k]["test_acc"])
print(f"\nBest framework: {best_framework} (Test Acc: {results[best_framework]['test_acc']:.3f})")

Universal Autologging Setup

import mlflow
import warnings

# Enable universal autologging
mlflow.autolog(
    log_input_examples=True,
    log_model_signatures=True,
    log_models=True,
    log_datasets=True,
    extra_tags={"environment": "production", "team": "ml-platform"},
    registered_model_name="auto_registered_model"
)

# Suppress warnings for cleaner output
warnings.filterwarnings("ignore")

mlflow.set_experiment("universal_autolog")

# Now any supported ML training will be automatically logged
from sklearn.ensemble import GradientBoostingClassifier
import xgboost as xgb
from sklearn.datasets import make_classification

X, y = make_classification(n_samples=1000, n_features=10, random_state=42)

# Train multiple models - all automatically logged
models = [
    ("sklearn_gb", GradientBoostingClassifier(random_state=42)),
    ("xgboost", xgb.XGBClassifier(random_state=42))
]

for model_name, model in models:
    with mlflow.start_run(run_name=f"auto_{model_name}"):
        # Just train - everything else is automatic
        model.fit(X, y)
        
        # Only need to log custom metrics if desired
        custom_score = model.score(X, y)
        mlflow.log_metric("custom_accuracy", custom_score)
        
        print(f"{model_name} automatically logged")

# Disable autologging when done
mlflow.autolog(disable=True)

Types

from typing import Any, Dict, List, Optional, Union
import torch
import tensorflow as tf
from sklearn.base import BaseEstimator
import xgboost
import lightgbm

# Common model types across frameworks
SklearnModel = BaseEstimator
PyTorchModel = torch.nn.Module
TensorFlowModel = Union[tf.keras.Model, str]  # Model or SavedModel path
XGBoostModel = Union[xgboost.Booster, xgboost.XGBModel]
LightGBMModel = Union[lightgbm.Booster, lightgbm.LGBMModel]

# Framework-specific logging function signatures
def sklearn_log_model(
    sk_model: SklearnModel,
    artifact_path: str,
    **kwargs
) -> 'ModelInfo': ...

def pytorch_log_model(
    pytorch_model: PyTorchModel,
    artifact_path: str,
    **kwargs
) -> 'ModelInfo': ...

def tensorflow_log_model(
    tf_saved_model_dir: str,
    artifact_path: str,
    **kwargs
) -> 'ModelInfo': ...

def xgboost_log_model(
    xgb_model: XGBoostModel,
    artifact_path: str,
    **kwargs
) -> 'ModelInfo': ...

def lightgbm_log_model(
    lgb_model: LightGBMModel,
    artifact_path: str,
    **kwargs
) -> 'ModelInfo': ...

# Loading function return types
def sklearn_load_model(model_uri: str) -> SklearnModel: ...
def pytorch_load_model(model_uri: str) -> PyTorchModel: ...
def tensorflow_load_model(model_uri: str) -> TensorFlowModel: ...
def xgboost_load_model(model_uri: str) -> XGBoostModel: ...
def lightgbm_load_model(model_uri: str) -> LightGBMModel: ...

# Autolog configuration types
AutologConfig = Dict[str, Union[bool, str, Dict[str, Any]]]

def autolog_function(
    log_input_examples: bool = False,
    log_model_signatures: bool = True,
    log_models: bool = True,
    disable: bool = False,
    exclusive: bool = False,
    disable_for_unsupported_versions: bool = False,
    silent: bool = False,
    registered_model_name: Optional[str] = None,
    **kwargs
) -> None: ...

# Framework-specific types
class TorchStateDict:
    """PyTorch model state dictionary type."""
    pass

class SparkPipeline:
    """Spark ML Pipeline type."""
    pass

class TransformersPipeline:
    """Hugging Face Transformers Pipeline type."""
    pass

# Serialization format constants
SERIALIZATION_FORMAT_PICKLE = "pickle"
SERIALIZATION_FORMAT_CLOUDPICKLE = "cloudpickle" 
SERIALIZATION_FORMAT_JSON = "json"

Install with Tessl CLI

npx tessl i tessl/pypi-mlflow

docs

client.md

configuration.md

data.md

frameworks.md

genai.md

index.md

models.md

projects.md

tracing.md

tracking.md

tile.json