MLflow is an open source platform for the complete machine learning lifecycle
—
MLflow provides comprehensive integrations with popular machine learning and deep learning frameworks, enabling seamless model logging, loading, and deployment across different ML ecosystems. Each integration offers framework-specific optimizations and native model format support.
Native integration for scikit-learn models with automatic dependency management and preprocessing pipeline support.
import mlflow.sklearn
def log_model(sk_model, artifact_path, conda_env=None, code_paths=None, registered_model_name=None, signature=None, input_example=None, await_registration_for=DEFAULT_AWAIT_MAX_SLEEP_SECONDS, pip_requirements=None, extra_pip_requirements=None, serialization_format=SERIALIZATION_FORMAT_PICKLE, metadata=None, **kwargs):
"""
Log scikit-learn model as MLflow artifact.
Parameters:
- sk_model: Trained scikit-learn model object
- artifact_path: str - Run-relative artifact path
- conda_env: str or dict, optional - Conda environment specification
- code_paths: list, optional - List of local code paths to include
- registered_model_name: str, optional - Name for model registry
- signature: ModelSignature, optional - Model input/output schema
- input_example: Any, optional - Example input for inference
- await_registration_for: int - Seconds to wait for registration
- pip_requirements: list, optional - List of pip package requirements
- extra_pip_requirements: list, optional - Additional pip requirements
- serialization_format: str - Serialization format (pickle, cloudpickle)
- metadata: dict, optional - Custom model metadata
Returns:
ModelInfo object with logged model details
"""
def load_model(model_uri, dst_path=None):
"""
Load scikit-learn model from MLflow.
Parameters:
- model_uri: str - URI pointing to MLflow model
- dst_path: str, optional - Local destination path
Returns:
Loaded scikit-learn model object
"""
def save_model(sk_model, path, conda_env=None, code_paths=None, mlflow_model=None, signature=None, input_example=None, pip_requirements=None, extra_pip_requirements=None, serialization_format=SERIALIZATION_FORMAT_PICKLE, metadata=None):
"""
Save scikit-learn model to local path.
Parameters:
- sk_model: Trained scikit-learn model object
- path: str - Local path to save model
- conda_env: str or dict, optional - Conda environment
- code_paths: list, optional - Code dependencies to include
- mlflow_model: Model, optional - MLflow model configuration
- signature: ModelSignature, optional - Model signature
- input_example: Any, optional - Example input
- pip_requirements: list, optional - Pip package requirements
- extra_pip_requirements: list, optional - Additional pip requirements
- serialization_format: str - Serialization format
- metadata: dict, optional - Custom metadata
"""Comprehensive PyTorch support including standard models, PyTorch Lightning, and TorchScript compilation.
import mlflow.pytorch
def log_model(pytorch_model, artifact_path, conda_env=None, code_paths=None, pickle_module=None, registered_model_name=None, signature=None, input_example=None, await_registration_for=DEFAULT_AWAIT_MAX_SLEEP_SECONDS, requirements_file=None, extra_files=None, pip_requirements=None, extra_pip_requirements=None, metadata=None, **kwargs):
"""
Log PyTorch model as MLflow artifact.
Parameters:
- pytorch_model: PyTorch model object or state_dict
- artifact_path: str - Run-relative artifact path
- conda_env: str or dict, optional - Conda environment
- code_paths: list, optional - Local code paths to include
- pickle_module: module, optional - Module for model serialization
- registered_model_name: str, optional - Registry model name
- signature: ModelSignature, optional - Model schema
- input_example: Any, optional - Example model input
- await_registration_for: int - Registration wait time
- requirements_file: str, optional - Path to requirements file
- extra_files: list, optional - Additional files to include
- pip_requirements: list, optional - Pip requirements
- extra_pip_requirements: list, optional - Additional pip requirements
- metadata: dict, optional - Custom metadata
Returns:
ModelInfo object
"""
def load_model(model_uri, map_location=None, dst_path=None):
"""
Load PyTorch model from MLflow.
Parameters:
- model_uri: str - URI pointing to MLflow model
- map_location: str or torch.device, optional - Device mapping for loading
- dst_path: str, optional - Local destination path
Returns:
Loaded PyTorch model object
"""
def log_state_dict(state_dict, artifact_path, **kwargs):
"""
Log PyTorch model state dictionary.
Parameters:
- state_dict: dict - PyTorch model state dictionary
- artifact_path: str - Artifact path for state dict
- kwargs: Additional logging arguments
"""
def load_state_dict(model_uri, map_location=None):
"""
Load PyTorch state dictionary from MLflow.
Parameters:
- model_uri: str - URI pointing to saved state dict
- map_location: str or device, optional - Device for loading
Returns:
PyTorch state dictionary
"""Full TensorFlow support including Keras models, SavedModel format, and TensorFlow Serving compatibility.
import mlflow.tensorflow
def log_model(tf_saved_model_dir=None, tf_meta_graph_tags=None, tf_signature_def_key=None, artifact_path=None, conda_env=None, code_paths=None, registered_model_name=None, signature=None, input_example=None, await_registration_for=DEFAULT_AWAIT_MAX_SLEEP_SECONDS, pip_requirements=None, extra_pip_requirements=None, metadata=None, **kwargs):
"""
Log TensorFlow model as MLflow artifact.
Parameters:
- tf_saved_model_dir: str - Path to TensorFlow SavedModel directory
- tf_meta_graph_tags: list, optional - MetaGraph tags to load
- tf_signature_def_key: str, optional - SignatureDef key for inference
- artifact_path: str - Run-relative artifact path
- conda_env: str or dict, optional - Conda environment
- code_paths: list, optional - Code dependencies
- registered_model_name: str, optional - Registry model name
- signature: ModelSignature, optional - Model schema
- input_example: Any, optional - Example input
- await_registration_for: int - Registration wait time
- pip_requirements: list, optional - Pip requirements
- extra_pip_requirements: list, optional - Additional pip requirements
- metadata: dict, optional - Custom metadata
Returns:
ModelInfo object
"""
def load_model(model_uri, dst_path=None):
"""
Load TensorFlow model from MLflow.
Parameters:
- model_uri: str - URI pointing to MLflow model
- dst_path: str, optional - Local destination path
Returns:
Loaded TensorFlow model object
"""
import mlflow.keras
def log_model(keras_model, artifact_path, conda_env=None, code_paths=None, custom_objects=None, keras_module=None, registered_model_name=None, signature=None, input_example=None, await_registration_for=DEFAULT_AWAIT_MAX_SLEEP_SECONDS, pip_requirements=None, extra_pip_requirements=None, metadata=None, **kwargs):
"""
Log Keras model as MLflow artifact.
Parameters:
- keras_model: Compiled Keras model object
- artifact_path: str - Run-relative artifact path
- conda_env: str or dict, optional - Conda environment
- code_paths: list, optional - Code dependencies
- custom_objects: dict, optional - Custom objects for model loading
- keras_module: module, optional - Keras module for compatibility
- registered_model_name: str, optional - Registry model name
- signature: ModelSignature, optional - Model schema
- input_example: Any, optional - Example input
- await_registration_for: int - Registration wait time
- pip_requirements: list, optional - Pip requirements
- extra_pip_requirements: list, optional - Additional pip requirements
- metadata: dict, optional - Custom metadata
Returns:
ModelInfo object
"""Native XGBoost model support with automatic hyperparameter tracking and feature importance logging.
import mlflow.xgboost
def log_model(xgb_model, artifact_path, conda_env=None, code_paths=None, registered_model_name=None, signature=None, input_example=None, await_registration_for=DEFAULT_AWAIT_MAX_SLEEP_SECONDS, pip_requirements=None, extra_pip_requirements=None, model_format="xgb", metadata=None, **kwargs):
"""
Log XGBoost model as MLflow artifact.
Parameters:
- xgb_model: Trained XGBoost model (Booster, XGBClassifier, XGBRegressor)
- artifact_path: str - Run-relative artifact path
- conda_env: str or dict, optional - Conda environment
- code_paths: list, optional - Code dependencies
- registered_model_name: str, optional - Registry model name
- signature: ModelSignature, optional - Model schema
- input_example: Any, optional - Example input
- await_registration_for: int - Registration wait time
- pip_requirements: list, optional - Pip requirements
- extra_pip_requirements: list, optional - Additional requirements
- model_format: str - Save format ("xgb", "json", "ubj")
- metadata: dict, optional - Custom metadata
Returns:
ModelInfo object
"""
def load_model(model_uri, dst_path=None):
"""
Load XGBoost model from MLflow.
Parameters:
- model_uri: str - URI pointing to MLflow model
- dst_path: str, optional - Local destination path
Returns:
Loaded XGBoost model object
"""
def autolog(importance_type="weight", log_input_examples=False, log_model_signatures=True, log_models=True, disable=False, exclusive=False, disable_for_unsupported_versions=False, silent=False, registered_model_name=None):
"""
Enable automatic logging for XGBoost training.
Parameters:
- importance_type: str - Feature importance type to log
- log_input_examples: bool - Whether to log input examples
- log_model_signatures: bool - Whether to log model signatures
- log_models: bool - Whether to log trained models
- disable: bool - Disable autologging if True
- exclusive: bool - Exclusive autologging mode
- disable_for_unsupported_versions: bool - Skip for unsupported versions
- silent: bool - Suppress autolog warnings
- registered_model_name: str, optional - Auto-register model name
"""Comprehensive LightGBM support with early stopping integration and automatic metric logging.
import mlflow.lightgbm
def log_model(lgb_model, artifact_path, conda_env=None, code_paths=None, registered_model_name=None, signature=None, input_example=None, await_registration_for=DEFAULT_AWAIT_MAX_SLEEP_SECONDS, pip_requirements=None, extra_pip_requirements=None, metadata=None, **kwargs):
"""
Log LightGBM model as MLflow artifact.
Parameters:
- lgb_model: Trained LightGBM model (Booster, LGBMClassifier, LGBMRegressor)
- artifact_path: str - Run-relative artifact path
- conda_env: str or dict, optional - Conda environment
- code_paths: list, optional - Code dependencies
- registered_model_name: str, optional - Registry model name
- signature: ModelSignature, optional - Model schema
- input_example: Any, optional - Example input
- await_registration_for: int - Registration wait time
- pip_requirements: list, optional - Pip requirements
- extra_pip_requirements: list, optional - Additional requirements
- metadata: dict, optional - Custom metadata
Returns:
ModelInfo object
"""
def autolog(importance_type="split", log_input_examples=False, log_model_signatures=True, log_models=True, disable=False, exclusive=False, disable_for_unsupported_versions=False, silent=False, registered_model_name=None):
"""
Enable automatic logging for LightGBM training.
Parameters:
- importance_type: str - Feature importance type ("split", "gain")
- log_input_examples: bool - Log input examples
- log_model_signatures: bool - Log model signatures
- log_models: bool - Log trained models
- disable: bool - Disable autologging
- exclusive: bool - Exclusive autologging mode
- disable_for_unsupported_versions: bool - Skip unsupported versions
- silent: bool - Suppress warnings
- registered_model_name: str, optional - Auto-register model name
"""Hugging Face Transformers integration with support for various model types and tokenizers.
import mlflow.transformers
def log_model(transformers_model, artifact_path, task=None, conda_env=None, code_paths=None, registered_model_name=None, signature=None, input_example=None, await_registration_for=DEFAULT_AWAIT_MAX_SLEEP_SECONDS, pip_requirements=None, extra_pip_requirements=None, metadata=None, tokenizer=None, feature_extractor=None, processor=None, model_config=None, **kwargs):
"""
Log Transformers model as MLflow artifact.
Parameters:
- transformers_model: Transformers model or pipeline object
- artifact_path: str - Run-relative artifact path
- task: str, optional - Task type for the model
- conda_env: str or dict, optional - Conda environment
- code_paths: list, optional - Code dependencies
- registered_model_name: str, optional - Registry model name
- signature: ModelSignature, optional - Model schema
- input_example: Any, optional - Example input
- await_registration_for: int - Registration wait time
- pip_requirements: list, optional - Pip requirements
- extra_pip_requirements: list, optional - Additional requirements
- metadata: dict, optional - Custom metadata
- tokenizer: Tokenizer, optional - Associated tokenizer
- feature_extractor: FeatureExtractor, optional - Feature extractor
- processor: Processor, optional - Processor object
- model_config: dict, optional - Model configuration
Returns:
ModelInfo object
"""
def load_model(model_uri, dst_path=None, device=None):
"""
Load Transformers model from MLflow.
Parameters:
- model_uri: str - URI pointing to MLflow model
- dst_path: str, optional - Local destination path
- device: str or int, optional - Device for model loading
Returns:
Loaded Transformers model or pipeline
"""Apache Spark MLlib integration for distributed machine learning model logging and serving.
import mlflow.spark
def log_model(spark_model, artifact_path, conda_env=None, code_paths=None, dfs_tmpdir=None, sample_input=None, registered_model_name=None, signature=None, input_example=None, await_registration_for=DEFAULT_AWAIT_MAX_SLEEP_SECONDS, pip_requirements=None, extra_pip_requirements=None, metadata=None, **kwargs):
"""
Log Spark MLlib model as MLflow artifact.
Parameters:
- spark_model: Fitted Spark MLlib model or pipeline
- artifact_path: str - Run-relative artifact path
- conda_env: str or dict, optional - Conda environment
- code_paths: list, optional - Code dependencies
- dfs_tmpdir: str, optional - Temporary directory for DFS operations
- sample_input: DataFrame, optional - Sample input for schema inference
- registered_model_name: str, optional - Registry model name
- signature: ModelSignature, optional - Model schema
- input_example: Any, optional - Example input
- await_registration_for: int - Registration wait time
- pip_requirements: list, optional - Pip requirements
- extra_pip_requirements: list, optional - Additional requirements
- metadata: dict, optional - Custom metadata
Returns:
ModelInfo object
"""
def load_model(model_uri, dfs_tmpdir=None):
"""
Load Spark MLlib model from MLflow.
Parameters:
- model_uri: str - URI pointing to MLflow model
- dfs_tmpdir: str, optional - Temporary directory for DFS
Returns:
Loaded Spark MLlib model or pipeline
"""
import mlflow.pyspark.ml
def autolog(disable=False, exclusive=False, disable_for_unsupported_versions=False, silent=False, log_models=True, log_input_examples=False, log_model_signatures=True, log_post_training_metrics=True, registered_model_name=None):
"""
Enable automatic logging for PySpark ML training.
Parameters:
- disable: bool - Disable autologging
- exclusive: bool - Exclusive autologging mode
- disable_for_unsupported_versions: bool - Skip unsupported versions
- silent: bool - Suppress warnings
- log_models: bool - Log trained models
- log_input_examples: bool - Log input examples
- log_model_signatures: bool - Log model signatures
- log_post_training_metrics: bool - Log evaluation metrics
- registered_model_name: str, optional - Auto-register model name
"""Multi-agent conversation framework integration with automatic conversation logging and observability (experimental in MLflow 3.0.0).
import mlflow.ag2
def autolog(disable=False, log_traces=True, log_models=False, log_input_examples=False, log_model_signatures=True, silent=False):
"""
Enable automatic logging for AG2 (AutoGen) conversations.
Parameters:
- disable: bool - Disable AG2 autologging
- log_traces: bool - Log conversation traces
- log_models: bool - Log agent models
- log_input_examples: bool - Log conversation examples
- log_model_signatures: bool - Log model signatures
- silent: bool - Suppress autolog warnings
"""Pydantic AI framework integration for structured AI application development with automatic model and conversation logging (experimental in MLflow 3.0.0).
import mlflow.pydantic_ai
def autolog(disable=False, log_traces=True, log_models=False, log_input_examples=False, log_model_signatures=True, silent=False):
"""
Enable automatic logging for Pydantic AI applications.
Parameters:
- disable: bool - Disable Pydantic AI autologging
- log_traces: bool - Log AI application traces
- log_models: bool - Log AI models
- log_input_examples: bool - Log input examples
- log_model_signatures: bool - Log model signatures
- silent: bool - Suppress autolog warnings
"""Smolagents AI agents framework integration with conversation and task execution logging (experimental in MLflow 3.0.0).
import mlflow.smolagents
def autolog(disable=False, log_traces=True, log_models=False, log_input_examples=False, log_model_signatures=True, silent=False):
"""
Enable automatic logging for Smolagents AI agents.
Parameters:
- disable: bool - Disable Smolagents autologging
- log_traces: bool - Log agent execution traces
- log_models: bool - Log agent models
- log_input_examples: bool - Log input examples
- log_model_signatures: bool - Log model signatures
- silent: bool - Suppress autolog warnings
"""Groq API integration with automatic request/response logging and performance tracking.
import mlflow.groq
def autolog(disable=False, log_traces=True, log_models=False, log_input_examples=False, log_model_signatures=True, silent=False):
"""
Enable automatic logging for Groq API calls.
Parameters:
- disable: bool - Disable Groq autologging
- log_traces: bool - Log API call traces
- log_models: bool - Log model configurations
- log_input_examples: bool - Log input examples
- log_model_signatures: bool - Log model signatures
- silent: bool - Suppress autolog warnings
"""Microsoft Semantic Kernel framework integration for orchestrating AI services with automatic logging and observability.
import mlflow.semantic_kernel
def autolog(disable=False, log_traces=True, log_models=False, log_input_examples=False, log_model_signatures=True, silent=False):
"""
Enable automatic logging for Semantic Kernel applications.
Parameters:
- disable: bool - Disable Semantic Kernel autologging
- log_traces: bool - Log kernel execution traces
- log_models: bool - Log AI service configurations
- log_input_examples: bool - Log input examples
- log_model_signatures: bool - Log model signatures
- silent: bool - Suppress autolog warnings
"""Automatic experiment tracking across supported frameworks with minimal code changes.
import mlflow
def autolog(log_input_examples=False, log_model_signatures=True, log_models=True, log_datasets=True, disable=False, exclusive=False, disable_for_unsupported_versions=False, silent=False, extra_tags=None, registered_model_name=None):
"""
Enable automatic logging across all supported frameworks.
Parameters:
- log_input_examples: bool - Log input examples for models
- log_model_signatures: bool - Log model input/output signatures
- log_models: bool - Log trained model objects
- log_datasets: bool - Log training/validation datasets
- disable: bool - Disable all autologging if True
- exclusive: bool - Use exclusive autologging mode
- disable_for_unsupported_versions: bool - Skip unsupported library versions
- silent: bool - Suppress autolog setup warnings
- extra_tags: dict, optional - Additional tags for all runs
- registered_model_name: str, optional - Auto-register models with name
"""
# Framework-specific autolog functions
def sklearn_autolog(**kwargs):
"""Enable scikit-learn autologging."""
def pytorch_autolog(**kwargs):
"""Enable PyTorch autologging."""
def tensorflow_autolog(**kwargs):
"""Enable TensorFlow/Keras autologging."""
def xgboost_autolog(**kwargs):
"""Enable XGBoost autologging."""
def lightgbm_autolog(**kwargs):
"""Enable LightGBM autologging."""
def spark_autolog(**kwargs):
"""Enable Spark MLlib autologging."""import mlflow
import mlflow.sklearn
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
# Generate sample data
X, y = make_classification(n_samples=1000, n_features=20, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
# Create and train pipeline
pipeline = Pipeline([
('scaler', StandardScaler()),
('classifier', RandomForestClassifier(n_estimators=100, random_state=42))
])
mlflow.set_experiment("sklearn_integration")
with mlflow.start_run():
# Train model
pipeline.fit(X_train, y_train)
# Log model with signature and example
signature = mlflow.models.infer_signature(X_train, pipeline.predict(X_train))
mlflow.sklearn.log_model(
sk_model=pipeline,
artifact_path="model",
signature=signature,
input_example=X_train[:3],
registered_model_name="rf_pipeline"
)
# Log metrics
train_score = pipeline.score(X_train, y_train)
test_score = pipeline.score(X_test, y_test)
mlflow.log_metric("train_accuracy", train_score)
mlflow.log_metric("test_accuracy", test_score)
print(f"Model logged with accuracy: {test_score:.3f}")
# Load and use model
model_uri = f"runs:/{mlflow.active_run().info.run_id}/model"
loaded_model = mlflow.sklearn.load_model(model_uri)
predictions = loaded_model.predict(X_test)import mlflow
import mlflow.pytorch
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
# Define custom model
class NeuralNet(nn.Module):
def __init__(self, input_size, hidden_size, num_classes):
super(NeuralNet, self).__init__()
self.fc1 = nn.Linear(input_size, hidden_size)
self.relu = nn.ReLU()
self.fc2 = nn.Linear(hidden_size, num_classes)
def forward(self, x):
out = self.fc1(x)
out = self.relu(out)
out = self.fc2(out)
return out
# Prepare data
X = torch.randn(1000, 20)
y = torch.randint(0, 2, (1000,))
dataset = TensorDataset(X, y)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)
mlflow.set_experiment("pytorch_integration")
with mlflow.start_run():
# Initialize model
model = NeuralNet(input_size=20, hidden_size=50, num_classes=2)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)
# Log hyperparameters
mlflow.log_param("input_size", 20)
mlflow.log_param("hidden_size", 50)
mlflow.log_param("learning_rate", 0.01)
mlflow.log_param("batch_size", 32)
# Training loop
for epoch in range(10):
total_loss = 0
for batch_x, batch_y in dataloader:
optimizer.zero_grad()
outputs = model(batch_x)
loss = criterion(outputs, batch_y)
loss.backward()
optimizer.step()
total_loss += loss.item()
avg_loss = total_loss / len(dataloader)
mlflow.log_metric("loss", avg_loss, step=epoch)
# Log model
mlflow.pytorch.log_model(
pytorch_model=model,
artifact_path="model",
registered_model_name="neural_net"
)
# Log state dict separately
mlflow.pytorch.log_state_dict(
state_dict=model.state_dict(),
artifact_path="state_dict"
)
print("PyTorch model logged successfully")
# Load model
model_uri = f"runs:/{mlflow.active_run().info.run_id}/model"
loaded_model = mlflow.pytorch.load_model(model_uri)import mlflow
import mlflow.xgboost
import xgboost as xgb
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
# Enable XGBoost autologging
mlflow.xgboost.autolog(
importance_type="gain",
log_input_examples=True,
log_model_signatures=True,
registered_model_name="xgb_automodel"
)
# Prepare data
X, y = make_classification(n_samples=1000, n_features=20, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
mlflow.set_experiment("xgboost_autolog")
with mlflow.start_run():
# Train XGBoost model - automatically logged
model = xgb.XGBClassifier(
n_estimators=100,
max_depth=6,
learning_rate=0.1,
random_state=42
)
model.fit(
X_train, y_train,
eval_set=[(X_test, y_test)],
eval_metric="logloss",
verbose=False
)
# Additional manual logging
test_accuracy = model.score(X_test, y_test)
mlflow.log_metric("test_accuracy", test_accuracy)
print(f"XGBoost model auto-logged with accuracy: {test_accuracy:.3f}")
# Feature importance is automatically logged
# Model is automatically registered with specified nameimport mlflow
import mlflow.transformers
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
mlflow.set_experiment("transformers_integration")
with mlflow.start_run():
# Load pre-trained model and tokenizer
model_name = "distilbert-base-uncased-finetuned-sst-2-english"
# Load components separately for more control
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)
# Create pipeline
sentiment_pipeline = pipeline(
"sentiment-analysis",
model=model,
tokenizer=tokenizer,
return_all_scores=True
)
# Log model with all components
mlflow.transformers.log_model(
transformers_model=sentiment_pipeline,
artifact_path="sentiment_model",
task="text-classification",
tokenizer=tokenizer,
model_config={
"max_length": 512,
"padding": True,
"truncation": True
},
registered_model_name="sentiment_classifier"
)
# Test the pipeline
test_texts = [
"I love this product!",
"This is terrible.",
"It's okay, nothing special."
]
results = sentiment_pipeline(test_texts)
# Log example predictions
for text, result in zip(test_texts, results):
print(f"'{text}' -> {result}")
mlflow.log_text(f"Prediction: {result}", f"example_{hash(text)}.txt")
print("Transformers model logged with tokenizer and config")
# Load and use model
model_uri = f"runs:/{mlflow.active_run().info.run_id}/sentiment_model"
loaded_pipeline = mlflow.transformers.load_model(model_uri)
new_predictions = loaded_pipeline(["MLflow is amazing!"])import mlflow
import mlflow.spark
from pyspark.sql import SparkSession
from pyspark.ml.feature import VectorAssembler, StringIndexer
from pyspark.ml.classification import RandomForestClassifier
from pyspark.ml import Pipeline
from pyspark.ml.evaluation import MulticlassClassificationEvaluator
# Initialize Spark
spark = SparkSession.builder.appName("MLflow Spark Integration").getOrCreate()
# Enable Spark autologging
mlflow.spark.autolog(log_models=True, log_input_examples=True)
mlflow.set_experiment("spark_integration")
with mlflow.start_run():
# Create sample DataFrame
data = [(0.0, "a", 1.0, 0),
(1.0, "b", 2.0, 1),
(2.0, "c", 3.0, 0),
(3.0, "a", 4.0, 1)] * 100
columns = ["feature1", "category", "feature2", "label"]
df = spark.createDataFrame(data, columns)
# Create ML Pipeline
indexer = StringIndexer(inputCol="category", outputCol="categoryIndex")
assembler = VectorAssembler(
inputCols=["feature1", "categoryIndex", "feature2"],
outputCol="features"
)
rf = RandomForestClassifier(featuresCol="features", labelCol="label")
pipeline = Pipeline(stages=[indexer, assembler, rf])
# Split data
train_df, test_df = df.randomSplit([0.8, 0.2], seed=42)
# Train pipeline - automatically logged
model = pipeline.fit(train_df)
# Make predictions
predictions = model.transform(test_df)
# Evaluate model
evaluator = MulticlassClassificationEvaluator(
labelCol="label",
predictionCol="prediction",
metricName="accuracy"
)
accuracy = evaluator.evaluate(predictions)
mlflow.log_metric("test_accuracy", accuracy)
# Log model manually for more control
mlflow.spark.log_model(
spark_model=model,
artifact_path="spark_pipeline",
registered_model_name="spark_rf_pipeline"
)
print(f"Spark pipeline logged with accuracy: {accuracy:.3f}")
spark.stop()import mlflow
import numpy as np
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
import xgboost as xgb
import lightgbm as lgb
# Generate data
X, y = make_classification(n_samples=10000, n_features=20, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
mlflow.set_experiment("framework_comparison")
# Compare multiple frameworks
frameworks = {
"sklearn": {
"model": RandomForestClassifier(n_estimators=100, random_state=42),
"log_func": mlflow.sklearn.log_model
},
"xgboost": {
"model": xgb.XGBClassifier(n_estimators=100, random_state=42),
"log_func": mlflow.xgboost.log_model
},
"lightgbm": {
"model": lgb.LGBMClassifier(n_estimators=100, random_state=42),
"log_func": mlflow.lightgbm.log_model
}
}
results = {}
for framework_name, config in frameworks.items():
with mlflow.start_run(run_name=f"{framework_name}_model"):
# Train model
model = config["model"]
model.fit(X_train, y_train)
# Evaluate
train_acc = model.score(X_train, y_train)
test_acc = model.score(X_test, y_test)
# Log metrics
mlflow.log_param("framework", framework_name)
mlflow.log_metric("train_accuracy", train_acc)
mlflow.log_metric("test_accuracy", test_acc)
# Log model
config["log_func"](
model,
artifact_path="model",
registered_model_name=f"{framework_name}_classifier"
)
results[framework_name] = {
"train_acc": train_acc,
"test_acc": test_acc,
"run_id": mlflow.active_run().info.run_id
}
print(f"{framework_name}: Train={train_acc:.3f}, Test={test_acc:.3f}")
# Find best model
best_framework = max(results.keys(), key=lambda k: results[k]["test_acc"])
print(f"\nBest framework: {best_framework} (Test Acc: {results[best_framework]['test_acc']:.3f})")import mlflow
import warnings
# Enable universal autologging
mlflow.autolog(
log_input_examples=True,
log_model_signatures=True,
log_models=True,
log_datasets=True,
extra_tags={"environment": "production", "team": "ml-platform"},
registered_model_name="auto_registered_model"
)
# Suppress warnings for cleaner output
warnings.filterwarnings("ignore")
mlflow.set_experiment("universal_autolog")
# Now any supported ML training will be automatically logged
from sklearn.ensemble import GradientBoostingClassifier
import xgboost as xgb
from sklearn.datasets import make_classification
X, y = make_classification(n_samples=1000, n_features=10, random_state=42)
# Train multiple models - all automatically logged
models = [
("sklearn_gb", GradientBoostingClassifier(random_state=42)),
("xgboost", xgb.XGBClassifier(random_state=42))
]
for model_name, model in models:
with mlflow.start_run(run_name=f"auto_{model_name}"):
# Just train - everything else is automatic
model.fit(X, y)
# Only need to log custom metrics if desired
custom_score = model.score(X, y)
mlflow.log_metric("custom_accuracy", custom_score)
print(f"{model_name} automatically logged")
# Disable autologging when done
mlflow.autolog(disable=True)from typing import Any, Dict, List, Optional, Union
import torch
import tensorflow as tf
from sklearn.base import BaseEstimator
import xgboost
import lightgbm
# Common model types across frameworks
SklearnModel = BaseEstimator
PyTorchModel = torch.nn.Module
TensorFlowModel = Union[tf.keras.Model, str] # Model or SavedModel path
XGBoostModel = Union[xgboost.Booster, xgboost.XGBModel]
LightGBMModel = Union[lightgbm.Booster, lightgbm.LGBMModel]
# Framework-specific logging function signatures
def sklearn_log_model(
sk_model: SklearnModel,
artifact_path: str,
**kwargs
) -> 'ModelInfo': ...
def pytorch_log_model(
pytorch_model: PyTorchModel,
artifact_path: str,
**kwargs
) -> 'ModelInfo': ...
def tensorflow_log_model(
tf_saved_model_dir: str,
artifact_path: str,
**kwargs
) -> 'ModelInfo': ...
def xgboost_log_model(
xgb_model: XGBoostModel,
artifact_path: str,
**kwargs
) -> 'ModelInfo': ...
def lightgbm_log_model(
lgb_model: LightGBMModel,
artifact_path: str,
**kwargs
) -> 'ModelInfo': ...
# Loading function return types
def sklearn_load_model(model_uri: str) -> SklearnModel: ...
def pytorch_load_model(model_uri: str) -> PyTorchModel: ...
def tensorflow_load_model(model_uri: str) -> TensorFlowModel: ...
def xgboost_load_model(model_uri: str) -> XGBoostModel: ...
def lightgbm_load_model(model_uri: str) -> LightGBMModel: ...
# Autolog configuration types
AutologConfig = Dict[str, Union[bool, str, Dict[str, Any]]]
def autolog_function(
log_input_examples: bool = False,
log_model_signatures: bool = True,
log_models: bool = True,
disable: bool = False,
exclusive: bool = False,
disable_for_unsupported_versions: bool = False,
silent: bool = False,
registered_model_name: Optional[str] = None,
**kwargs
) -> None: ...
# Framework-specific types
class TorchStateDict:
"""PyTorch model state dictionary type."""
pass
class SparkPipeline:
"""Spark ML Pipeline type."""
pass
class TransformersPipeline:
"""Hugging Face Transformers Pipeline type."""
pass
# Serialization format constants
SERIALIZATION_FORMAT_PICKLE = "pickle"
SERIALIZATION_FORMAT_CLOUDPICKLE = "cloudpickle"
SERIALIZATION_FORMAT_JSON = "json"Install with Tessl CLI
npx tessl i tessl/pypi-mlflow