tessl/pypi-mlflow

MLflow is an open source platform for the complete machine learning lifecycle

—

Pending

Overview

Eval results

Files

Model Management

Name: tessl/pypi-mlflow
Author: tessl

MLflow's model management capabilities provide comprehensive model lifecycle support including logging, loading, evaluation, deployment, and registry operations. The system supports multiple ML frameworks with a universal model format and deployment across various platforms.

Capabilities

Model Logging and Loading

Core functions for saving and loading models with support for multiple ML frameworks and custom model formats.

def log_model(model, artifact_path, registered_model_name=None, signature=None, input_example=None, await_registration_for=DEFAULT_AWAIT_MAX_SLEEP_SECONDS, pip_requirements=None, extra_pip_requirements=None, conda_env=None, extra_conda_requirements=None, metadata=None, **kwargs):
    """
    Log machine learning model as MLflow artifact.
    
    Parameters:
    - model: Model object - The model to be logged
    - artifact_path: str - Relative artifact path within run
    - registered_model_name: str, optional - Name for model registry
    - signature: ModelSignature, optional - Model input/output signature
    - input_example: Any, optional - Example model input for inference
    - await_registration_for: int - Seconds to wait for registry completion
    - pip_requirements: list, optional - List of pip package requirements
    - extra_pip_requirements: list, optional - Additional pip requirements
    - conda_env: str or dict, optional - Conda environment specification
    - extra_conda_requirements: list, optional - Additional conda requirements
    - metadata: dict, optional - Custom model metadata
    
    Returns:
    ModelInfo object with logged model details
    """

def load_model(model_uri, dst_path=None, **kwargs):
    """
    Load MLflow model from URI.
    
    Parameters:
    - model_uri: str - URI pointing to MLflow model
    - dst_path: str, optional - Local destination for model artifacts
    - kwargs: Additional framework-specific arguments
    
    Returns:
    Loaded model object ready for inference
    """

def predict(model_uri, input_data, content_type=None, json_format=None, **kwargs):
    """
    Generate predictions using MLflow model.
    
    Parameters:
    - model_uri: str - URI pointing to MLflow model
    - input_data: DataFrame, array, or dict - Input data for predictions
    - content_type: str, optional - Input data content type
    - json_format: str, optional - JSON serialization format
    - kwargs: Additional prediction arguments
    
    Returns:
    Predictions in framework-specific format
    """

def get_model_info(model_uri):
    """
    Get comprehensive model information.
    
    Parameters:
    - model_uri: str - URI pointing to MLflow model
    
    Returns:
    ModelInfo object with model metadata and signature
    """

def set_model(model):
    """
    Set active model in current context.
    
    Parameters:
    - model: Model object - Model to set as active
    """

def update_model_requirements(model_uri, requirements_file_path):
    """
    Update model requirements from file.
    
    Parameters:
    - model_uri: str - URI pointing to MLflow model
    - requirements_file_path: str - Path to requirements file
    """

Model Evaluation

Comprehensive model evaluation framework with built-in metrics, custom evaluators, and automated assessment capabilities.

def evaluate(model=None, data=None, targets=None, model_type=None, evaluators=None, evaluator_config=None, custom_metrics=None, extra_metrics=None, custom_artifacts=None, baseline_model=None, env_manager=None, model_config=None, baseline_config=None, inference_params=None, baseline_inference_params=None):
    """
    Evaluate model performance with comprehensive metrics.
    
    Parameters:
    - model: Model, callable, or URI - Model to evaluate
    - data: DataFrame, array, or URI - Evaluation dataset
    - targets: str or array, optional - Target column name or values
    - model_type: str, optional - Type of model (classifier, regressor, etc.)
    - evaluators: list, optional - List of evaluator names or objects
    - evaluator_config: dict, optional - Configuration for evaluators
    - custom_metrics: list, optional - Custom metric functions
    - extra_metrics: list, optional - Additional built-in metrics
    - custom_artifacts: list, optional - Custom artifact generators
    - baseline_model: Model or URI, optional - Baseline for comparison
    - env_manager: str, optional - Environment management method
    - model_config: dict, optional - Model configuration parameters
    - baseline_config: dict, optional - Baseline model configuration
    - inference_params: dict, optional - Model inference parameters
    - baseline_inference_params: dict, optional - Baseline inference parameters
    
    Returns:
    EvaluationResult object with metrics and artifacts
    """

def list_evaluators():
    """
    List available built-in evaluators.
    
    Returns:
    List of evaluator names and descriptions
    """

def make_metric(eval_fn, greater_is_better=True, name=None, long_name=None, version=None, metric_details=None, metric_metadata=None, genai_metric_args=None):
    """
    Create custom evaluation metric.
    
    Parameters:
    - eval_fn: callable - Function that computes metric
    - greater_is_better: bool - Whether higher values are better
    - name: str, optional - Metric name (inferred if not provided)
    - long_name: str, optional - Human-readable metric name
    - version: str, optional - Metric version
    - metric_details: str, optional - Metric description
    - metric_metadata: dict, optional - Additional metadata
    - genai_metric_args: dict, optional - GenAI-specific arguments
    
    Returns:
    EvaluationMetric object
    """

def validate_evaluation_results(results):
    """
    Validate evaluation results structure.
    
    Parameters:
    - results: EvaluationResult - Results to validate
    
    Returns:
    bool - Whether results are valid
    """

Model Signature and Schema

Functions for defining and validating model input/output schemas and signatures for robust model interfaces.

def infer_signature(model_input, model_output=None, params=None):
    """
    Infer model signature from input/output examples.
    
    Parameters:
    - model_input: DataFrame, array, dict - Example model input
    - model_output: array, dict, optional - Example model output
    - params: dict, optional - Model parameters schema
    
    Returns:
    ModelSignature object describing input/output schema
    """

def set_signature(model_uri, signature):
    """
    Set signature for existing model.
    
    Parameters:
    - model_uri: str - URI pointing to MLflow model
    - signature: ModelSignature - Signature to set
    """

def validate_schema(input_data, expected_schema):
    """
    Validate data against expected schema.
    
    Parameters:
    - input_data: DataFrame, array, dict - Data to validate
    - expected_schema: Schema - Expected data schema
    
    Returns:
    bool - Whether data matches schema
    """

def validate_serving_input(input_data, expected_signature):
    """
    Validate serving input against model signature.
    
    Parameters:
    - input_data: dict - Serving input data
    - expected_signature: ModelSignature - Expected signature
    
    Returns:
    bool - Whether input is valid for serving
    """

def convert_input_example_to_serving_input(input_example, signature=None):
    """
    Convert input example to serving format.
    
    Parameters:
    - input_example: Any - Model input example
    - signature: ModelSignature, optional - Model signature
    
    Returns:
    dict - Input in serving API format
    """

Model Registry Integration

Functions for registering models and managing model versions in the MLflow Model Registry.

def register_model(model_uri, name, await_registration_for=DEFAULT_AWAIT_MAX_SLEEP_SECONDS, tags=None, **kwargs):
    """
    Register model in MLflow Model Registry.
    
    Parameters:
    - model_uri: str - URI pointing to MLflow model
    - name: str - Name of registered model
    - await_registration_for: int - Seconds to wait for completion
    - tags: dict, optional - Tags for model version
    - kwargs: Additional registration arguments
    
    Returns:
    ModelVersion object representing registered version
    """

def add_libraries_to_model(model_uri, run_id=None, registered_model_name=None, await_registration_for=DEFAULT_AWAIT_MAX_SLEEP_SECONDS):
    """
    Add current environment libraries to model.
    
    Parameters:
    - model_uri: str - URI pointing to MLflow model
    - run_id: str, optional - Run ID for model artifacts
    - registered_model_name: str, optional - Name for registration
    - await_registration_for: int - Seconds to wait for completion
    
    Returns:
    Updated model with library dependencies
    """

Model Configuration and Resources

Classes and functions for managing model configuration, resources, and deployment requirements.

class ModelConfig:
    def __init__(self, development_config=None, **kwargs):
        """
        Model configuration for serving and deployment.
        
        Parameters:
        - development_config: dict, optional - Development-specific config
        - kwargs: Additional configuration parameters
        """

class Resource:
    def __init__(self, name, resource_type, config=None):
        """
        Model resource specification.
        
        Parameters:
        - name: str - Resource name
        - resource_type: ResourceType - Type of resource
        - config: dict, optional - Resource configuration
        """

class ResourceType:
    """Enumeration of supported resource types."""
    DATABRICKS_SERVING_ENDPOINT = "databricks_serving_endpoint"
    DATABRICKS_VECTOR_SEARCH_INDEX = "databricks_vector_search_index"

Deployment and Serving

Functions for building and deploying models to various serving platforms and environments.

def build_docker(model_uri, name=None, env_manager=None, mlflow_home=None, install_java=False, install_mlflow=False, enable_mlserver=False, base_image=None):
    """
    Build Docker image for model serving.
    
    Parameters:
    - model_uri: str - URI pointing to MLflow model
    - name: str, optional - Docker image name
    - env_manager: str, optional - Environment manager (conda, virtualenv)
    - mlflow_home: str, optional - MLflow installation path
    - install_java: bool - Whether to install Java runtime
    - install_mlflow: bool - Whether to install MLflow in image
    - enable_mlserver: bool - Whether to use MLServer for serving
    - base_image: str, optional - Base Docker image
    
    Returns:
    str - Built Docker image name
    """

Model Input Examples

Utilities for managing and validating model input examples for testing and documentation.

class ModelInputExample:
    def __init__(self, input_example):
        """
        Container for model input example.
        
        Parameters:
        - input_example: Any - Example input data
        """

Usage Examples

Basic Model Logging and Loading

import mlflow
import mlflow.sklearn
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split

# Generate sample data
X, y = make_classification(n_samples=1000, n_features=20, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Log model with signature and input example
with mlflow.start_run():
    # Infer model signature
    signature = mlflow.models.infer_signature(X_train, model.predict(X_train))
    
    # Log model with metadata
    mlflow.sklearn.log_model(
        sk_model=model,
        artifact_path="model",
        signature=signature,
        input_example=X_train[:3],
        registered_model_name="random-forest-classifier",
        metadata={"algorithm": "RandomForest", "framework": "scikit-learn"}
    )
    
    # Get model info
    model_uri = mlflow.get_artifact_uri("model")
    model_info = mlflow.models.get_model_info(model_uri)
    print(f"Model signature: {model_info.signature}")

# Load model for inference
loaded_model = mlflow.sklearn.load_model(model_uri)
predictions = loaded_model.predict(X_test)

Model Evaluation

import mlflow
import pandas as pd
from mlflow.models import evaluate

# Prepare evaluation data
eval_data = pd.DataFrame(X_test)
eval_data['target'] = y_test

# Evaluate model with built-in metrics
with mlflow.start_run():
    results = evaluate(
        model=model_uri,
        data=eval_data,
        targets="target",
        model_type="classifier",
        evaluators=["default"],
        evaluator_config={
            "pos_label": 1,
            "average": "weighted"
        }
    )
    
    # Print evaluation results
    print("Evaluation metrics:")
    for metric_name, metric_value in results.metrics.items():
        print(f"{metric_name}: {metric_value}")
    
    # Log evaluation results
    mlflow.log_metrics(results.metrics)

Custom Evaluation Metrics

import mlflow
from mlflow.models import make_metric, evaluate
import numpy as np

# Define custom metric
def balanced_accuracy(eval_df, builtin_metrics):
    """Custom balanced accuracy metric."""
    y_true = eval_df["target"]
    y_pred = eval_df["prediction"]
    
    # Calculate balanced accuracy
    from sklearn.metrics import balanced_accuracy_score
    return balanced_accuracy_score(y_true, y_pred)

# Create metric object
balanced_acc_metric = make_metric(
    eval_fn=balanced_accuracy,
    greater_is_better=True,
    name="balanced_accuracy",
    long_name="Balanced Accuracy Score"
)

# Evaluate with custom metric
results = evaluate(
    model=model_uri,
    data=eval_data,
    targets="target",
    model_type="classifier",
    extra_metrics=[balanced_acc_metric]
)

print(f"Balanced accuracy: {results.metrics['balanced_accuracy']}")

Model Comparison and Baseline

import mlflow
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier

# Train baseline model
baseline_model = GradientBoostingClassifier(random_state=42)
baseline_model.fit(X_train, y_train)

# Log baseline model
with mlflow.start_run(run_name="baseline_model"):
    mlflow.sklearn.log_model(baseline_model, "model")
    baseline_uri = mlflow.get_artifact_uri("model")

# Train candidate model
candidate_model = RandomForestClassifier(n_estimators=200, random_state=42)
candidate_model.fit(X_train, y_train)

# Log candidate model
with mlflow.start_run(run_name="candidate_model"):
    mlflow.sklearn.log_model(candidate_model, "model")
    candidate_uri = mlflow.get_artifact_uri("model")

# Compare models
with mlflow.start_run(run_name="model_comparison"):
    results = evaluate(
        model=candidate_uri,
        data=eval_data,
        targets="target",
        model_type="classifier",
        baseline_model=baseline_uri,
        evaluators=["default"]
    )
    
    # Log comparison results
    for metric_name, metric_value in results.metrics.items():
        mlflow.log_metric(metric_name, metric_value)

Model Serving with Docker

import mlflow.models

# Build Docker image for model serving
model_uri = "runs:/abc123/model"
docker_image = mlflow.models.build_docker(
    model_uri=model_uri,
    name="my-model-serving",
    env_manager="conda",
    enable_mlserver=True
)

print(f"Built Docker image: {docker_image}")

# The image can now be deployed to container platforms
# docker run -p 5000:8080 my-model-serving

Advanced Model Configuration

import mlflow
from mlflow.models import ModelConfig, Resource, ResourceType

# Define model configuration
model_config = ModelConfig(
    development_config={
        "batch_size": 32,
        "max_sequence_length": 512
    }
)

# Define model resources
vector_search_resource = Resource(
    name="product_vectors",
    resource_type=ResourceType.DATABRICKS_VECTOR_SEARCH_INDEX,
    config={
        "endpoint_name": "vector_search_endpoint",
        "index_name": "product_embeddings"
    }
)

# Log model with configuration and resources
with mlflow.start_run():
    mlflow.transformers.log_model(
        transformers_model=model,
        artifact_path="model",
        model_config=model_config,
        resources=[vector_search_resource]
    )

Types

from mlflow.models.model import Model, ModelInfo
from mlflow.models.signature import ModelSignature
from mlflow.models.evaluation import EvaluationResult, EvaluationMetric, EvaluationArtifact, MetricThreshold
from mlflow.models.utils import ModelInputExample
from mlflow.models.model_config import ModelConfig
from mlflow.models.resources import Resource, ResourceType
from mlflow.entities.model_registry import ModelVersion, RegisteredModel

class ModelInfo:
    artifact_path: str
    flavors: Dict[str, Any]
    model_size_bytes: int
    model_uuid: str
    run_id: str
    saved_input_example_info: Dict[str, Any]
    signature: ModelSignature
    utc_time_created: str
    mlflow_version: str
    metadata: Dict[str, Any]

class ModelSignature:
    inputs: Schema
    outputs: Schema
    params: ParamsSchema

class Schema:
    inputs: List[ColSpec]
    
class ColSpec:
    type: DataType
    name: str
    required: bool

class EvaluationResult:
    metrics: Dict[str, float]
    artifacts: Dict[str, str]
    run_id: str
    baseline_model_metrics: Dict[str, float]

class EvaluationMetric:
    name: str
    long_name: str
    version: str
    metric_details: str
    greater_is_better: bool
    eval_fn: callable

class EvaluationArtifact:
    uri: str
    content: Any

class MetricThreshold:
    threshold: float
    greater_is_better: bool
    
class ModelInputExample:
    input_example: Any
    
class ModelConfig:
    development_config: Dict[str, Any]
    
class Resource:
    name: str
    resource_type: ResourceType
    config: Dict[str, Any]

class ResourceType:
    DATABRICKS_SERVING_ENDPOINT: str
    DATABRICKS_VECTOR_SEARCH_INDEX: str

class Model:
    artifact_path: str
    flavors: Dict[str, Any]
    model_uuid: str
    mlflow_version: str
    saved_input_example_info: Dict[str, Any]
    signature: ModelSignature
    utc_time_created: str

Install with Tessl CLI