tessl/pypi-mlflow

MLflow is an open source platform for the complete machine learning lifecycle

—

Pending

Overview

Eval results

Files

Configuration and System Management

Name: tessl/pypi-mlflow
Author: tessl

MLflow's configuration system provides comprehensive control over tracking URIs, authentication, storage backends, and system behavior. The configuration API enables programmatic setup of MLflow environments with support for various storage systems, authentication methods, and deployment configurations.

Capabilities

URI and Connection Configuration

Functions for configuring MLflow tracking and registry URIs with support for various backend storage systems.

def set_tracking_uri(uri):
    """
    Set the tracking server URI for MLflow operations.
    
    Parameters:
    - uri: str - Tracking server URI (file://, http://, https://, databricks://, etc.)
    
    Supported URI formats:
    - file:///absolute/path/to/directory (local filesystem)
    - http://hostname:port (MLflow tracking server)
    - https://hostname:port (secure MLflow tracking server)  
    - databricks:// (Databricks workspace)
    - sqlite:///path/to/database.db (SQLite database)
    - mysql://user:password@host:port/database (MySQL)
    - postgresql://user:password@host:port/database (PostgreSQL)
    """

def get_tracking_uri():
    """
    Get current MLflow tracking URI.
    
    Returns:
    str - Current tracking URI or default if not set
    """

def set_registry_uri(uri):
    """
    Set the model registry URI for MLflow model registry operations.
    
    Parameters:
    - uri: str - Model registry URI (same formats as tracking URI)
    """

def get_registry_uri():
    """
    Get current MLflow model registry URI.
    
    Returns:
    str - Current registry URI or tracking URI if not set separately
    """

def get_artifact_uri(run_id=None):
    """
    Get artifact storage URI for run or current run.
    
    Parameters:
    - run_id: str, optional - Run ID (defaults to current active run)
    
    Returns:
    str - Artifact storage URI for the specified run
    """

def set_system_metrics_node_id(node_id):
    """
    Set node identifier for system metrics collection.
    
    Parameters:
    - node_id: str - Unique identifier for the current node
    """

def set_system_metrics_sampling_interval(interval):
    """
    Set sampling interval for system metrics collection.
    
    Parameters:
    - interval: float - Sampling interval in seconds
    """

def set_system_metrics_samples_before_logging(samples):
    """
    Set number of samples to collect before logging system metrics.
    
    Parameters:
    - samples: int - Number of samples to buffer before logging
    """

Environment and Authentication

Functions for managing authentication credentials and environment configuration.

def set_experiment_id(experiment_id):
    """
    Set active experiment ID for current session.
    
    Parameters:
    - experiment_id: str - Experiment ID to set as active
    """

def get_experiment_id():
    """
    Get current active experiment ID.
    
    Returns:
    str - Active experiment ID or None if not set
    """

def set_run_id(run_id):
    """
    Set active run ID for current session.
    
    Parameters:
    - run_id: str - Run ID to set as active
    """

def get_run_id():
    """
    Get current active run ID.
    
    Returns:
    str - Active run ID or None if not set
    """

Artifact Storage Configuration

Configuration options for artifact storage backends including S3, Azure, GCS, and local storage.

def set_default_artifact_root(artifact_root):
    """
    Set default artifact root for experiments.
    
    Parameters:
    - artifact_root: str - Default artifact storage location
    
    Supported formats:
    - s3://bucket/path (Amazon S3)
    - gs://bucket/path (Google Cloud Storage)
    - wasbs://container@account.blob.core.windows.net/path (Azure Blob)
    - hdfs://namenode:port/path (HDFS)
    - file:///absolute/path (local filesystem)
    """

def get_default_artifact_root():
    """
    Get default artifact root configuration.
    
    Returns:
    str - Default artifact root path
    """

System Metrics Configuration

Configuration for automatic system metrics collection including CPU, memory, and GPU usage.

def enable_system_metrics_logging():
    """
    Enable automatic system metrics logging for MLflow runs.
    Collects CPU, memory, disk, and network metrics during run execution.
    """

def disable_system_metrics_logging():
    """
    Disable automatic system metrics logging.
    """

def is_system_metrics_logging_enabled():
    """
    Check if system metrics logging is enabled.
    
    Returns:
    bool - True if system metrics logging is enabled
    """

Backend Store Configuration

Configuration utilities for different backend store types and database connections.

def is_tracking_uri_set():
    """
    Check if tracking URI has been explicitly set.
    
    Returns:
    bool - True if tracking URI is explicitly configured
    """

def get_db_profile_from_uri(db_uri):
    """
    Extract database connection profile from URI.
    
    Parameters:
    - db_uri: str - Database URI
    
    Returns:
    dict - Database connection profile with host, port, database, etc.
    """

def construct_db_uri_from_profile(profile):
    """
    Construct database URI from connection profile.
    
    Parameters:
    - profile: dict - Database connection profile
    
    Returns:
    str - Constructed database URI
    """

Configuration Validation

Functions for validating configuration settings and checking system requirements.

def validate_tracking_uri(uri):
    """
    Validate tracking URI format and accessibility.
    
    Parameters:
    - uri: str - Tracking URI to validate
    
    Returns:
    bool - True if URI is valid and accessible
    
    Raises:
    MlflowException - If URI is invalid or inaccessible
    """

def validate_artifact_root(artifact_root):
    """
    Validate artifact storage root configuration.
    
    Parameters:
    - artifact_root: str - Artifact root path to validate
    
    Returns:
    bool - True if artifact root is valid and accessible
    """

def check_server_connection(tracking_uri=None):
    """
    Check connection to MLflow tracking server.
    
    Parameters:
    - tracking_uri: str, optional - Server URI to check (defaults to current)
    
    Returns:
    dict - Server status and version information
    """

Environment Variables and Settings

Configuration through environment variables and programmatic settings management.

class MLflowEnvironment:
    """
    MLflow environment variable constants and utilities.
    """
    
    # Tracking and Registry URIs
    MLFLOW_TRACKING_URI = "MLFLOW_TRACKING_URI"
    MLFLOW_REGISTRY_URI = "MLFLOW_REGISTRY_URI"
    
    # Authentication
    MLFLOW_TRACKING_USERNAME = "MLFLOW_TRACKING_USERNAME"
    MLFLOW_TRACKING_PASSWORD = "MLFLOW_TRACKING_PASSWORD"
    MLFLOW_TRACKING_TOKEN = "MLFLOW_TRACKING_TOKEN"
    MLFLOW_TRACKING_INSECURE_TLS = "MLFLOW_TRACKING_INSECURE_TLS"
    
    # Artifact Storage
    MLFLOW_DEFAULT_ARTIFACT_ROOT = "MLFLOW_DEFAULT_ARTIFACT_ROOT"
    MLFLOW_ARTIFACT_UPLOAD_DOWNLOAD_TIMEOUT = "MLFLOW_ARTIFACT_UPLOAD_DOWNLOAD_TIMEOUT"
    
    # System Configuration  
    MLFLOW_EXPERIMENT_ID = "MLFLOW_EXPERIMENT_ID"
    MLFLOW_RUN_ID = "MLFLOW_RUN_ID"
    MLFLOW_ENABLE_SYSTEM_METRICS_LOGGING = "MLFLOW_ENABLE_SYSTEM_METRICS_LOGGING"
    
    # S3 Configuration
    AWS_ACCESS_KEY_ID = "AWS_ACCESS_KEY_ID"
    AWS_SECRET_ACCESS_KEY = "AWS_SECRET_ACCESS_KEY"
    AWS_SESSION_TOKEN = "AWS_SESSION_TOKEN"
    MLFLOW_S3_ENDPOINT_URL = "MLFLOW_S3_ENDPOINT_URL"
    MLFLOW_S3_IGNORE_TLS = "MLFLOW_S3_IGNORE_TLS"
    
    # Azure Configuration
    AZURE_STORAGE_CONNECTION_STRING = "AZURE_STORAGE_CONNECTION_STRING"
    AZURE_STORAGE_ACCESS_KEY = "AZURE_STORAGE_ACCESS_KEY"
    
    # Google Cloud Configuration
    GOOGLE_APPLICATION_CREDENTIALS = "GOOGLE_APPLICATION_CREDENTIALS"
    
    @staticmethod
    def get_env_var(key, default=None):
        """Get environment variable value."""
        import os
        return os.getenv(key, default)
    
    @staticmethod
    def set_env_var(key, value):
        """Set environment variable value.""" 
        import os
        os.environ[key] = str(value)

def get_env_vars_dict():
    """
    Get dictionary of all MLflow-related environment variables.
    
    Returns:
    dict - Environment variables and their values
    """

def set_env_vars_from_dict(env_dict):
    """
    Set environment variables from dictionary.
    
    Parameters:
    - env_dict: dict - Environment variables to set
    """

Configuration Context Managers

Context managers for temporary configuration changes and isolated environments.

@contextmanager
def temporary_tracking_uri(uri):
    """
    Temporarily set tracking URI for block execution.
    
    Parameters:
    - uri: str - Temporary tracking URI
    
    Usage:
    with temporary_tracking_uri("sqlite:///temp.db"):
        # Operations use temporary URI
        mlflow.create_experiment("temp_exp")
    # Original URI restored
    """

@contextmanager  
def temporary_experiment_id(experiment_id):
    """
    Temporarily set experiment ID for block execution.
    
    Parameters:
    - experiment_id: str - Temporary experiment ID
    """

@contextmanager
def isolated_mlflow_config():
    """
    Create isolated MLflow configuration context.
    All configuration changes are isolated and restored on exit.
    """

Usage Examples

Basic Configuration Setup

import mlflow
import os

# Set tracking URI
mlflow.set_tracking_uri("http://localhost:5000")

# Verify connection
print(f"Tracking URI: {mlflow.get_tracking_uri()}")

# Set registry URI (if different from tracking)
mlflow.set_registry_uri("sqlite:///model_registry.db")

# Set default artifact location
mlflow.set_default_artifact_root("s3://my-mlflow-bucket/artifacts")

# Enable system metrics logging
mlflow.enable_system_metrics_logging()

# Check current configuration
print(f"Registry URI: {mlflow.get_registry_uri()}")
print(f"Artifact root: {mlflow.get_default_artifact_root()}")
print(f"System metrics enabled: {mlflow.is_system_metrics_logging_enabled()}")

Environment Variable Configuration

import os
import mlflow

# Set configuration via environment variables
os.environ["MLFLOW_TRACKING_URI"] = "https://mlflow.company.com"
os.environ["MLFLOW_TRACKING_USERNAME"] = "user@company.com"  
os.environ["MLFLOW_TRACKING_PASSWORD"] = "secure_password"
os.environ["MLFLOW_DEFAULT_ARTIFACT_ROOT"] = "s3://company-mlflow/artifacts"
os.environ["MLFLOW_ENABLE_SYSTEM_METRICS_LOGGING"] = "true"

# MLflow automatically uses environment variables
print(f"Tracking URI: {mlflow.get_tracking_uri()}")

# Alternative: Use MLflowEnvironment helper
from mlflow.config import MLflowEnvironment

MLflowEnvironment.set_env_var("MLFLOW_EXPERIMENT_ID", "123")
experiment_id = MLflowEnvironment.get_env_var("MLFLOW_EXPERIMENT_ID")
print(f"Experiment ID: {experiment_id}")

# Get all MLflow environment variables
env_vars = mlflow.get_env_vars_dict()
for key, value in env_vars.items():
    if value:  # Only show set variables
        print(f"{key}: {value}")

Database Backend Configuration

import mlflow

# PostgreSQL backend
postgres_uri = "postgresql://mlflow:password@localhost:5432/mlflow_db"
mlflow.set_tracking_uri(postgres_uri)

# MySQL backend  
mysql_uri = "mysql://mlflow:password@localhost:3306/mlflow_db"
mlflow.set_tracking_uri(mysql_uri)

# SQLite backend (for development)
sqlite_uri = "sqlite:///mlflow.db"
mlflow.set_tracking_uri(sqlite_uri)

# Validate configuration
try:
    mlflow.validate_tracking_uri(mlflow.get_tracking_uri())
    print("Tracking URI is valid")
    
    # Check server connection
    server_status = mlflow.check_server_connection()
    print(f"Server version: {server_status.get('version')}")
    print(f"Server status: {server_status.get('status')}")
    
except Exception as e:
    print(f"Configuration error: {e}")

Cloud Storage Configuration

import mlflow
import os

# AWS S3 Configuration
os.environ["AWS_ACCESS_KEY_ID"] = "your_access_key"
os.environ["AWS_SECRET_ACCESS_KEY"] = "your_secret_key"
os.environ["MLFLOW_S3_ENDPOINT_URL"] = "https://s3.amazonaws.com"

mlflow.set_tracking_uri("http://localhost:5000")
mlflow.set_default_artifact_root("s3://my-mlflow-bucket/artifacts")

# Azure Blob Storage Configuration
os.environ["AZURE_STORAGE_CONNECTION_STRING"] = "DefaultEndpointsProtocol=https;..."
mlflow.set_default_artifact_root("wasbs://container@account.blob.core.windows.net/artifacts")

# Google Cloud Storage Configuration  
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "/path/to/service-account.json"
mlflow.set_default_artifact_root("gs://my-mlflow-bucket/artifacts")

# Validate artifact storage
try:
    mlflow.validate_artifact_root(mlflow.get_default_artifact_root())
    print("Artifact root is accessible")
except Exception as e:
    print(f"Artifact storage error: {e}")

System Metrics Configuration

import mlflow

# Configure system metrics collection
mlflow.set_system_metrics_node_id("worker-node-1")
mlflow.set_system_metrics_sampling_interval(1.0)  # 1 second intervals
mlflow.set_system_metrics_samples_before_logging(60)  # Log every 60 samples

# Enable system metrics
mlflow.enable_system_metrics_logging()

# Start experiment with system metrics
mlflow.set_experiment("system-metrics-experiment")

with mlflow.start_run():
    # Run your ML code - system metrics collected automatically
    import time
    import numpy as np
    
    # Simulate ML training workload
    for epoch in range(10):
        # Simulate CPU/memory intensive work
        data = np.random.randn(10000, 100)
        result = np.dot(data, data.T)
        
        mlflow.log_metric("epoch", epoch)
        time.sleep(2)  # System metrics collected during sleep
    
    print("System metrics logged automatically")

# Check system metrics in MLflow UI

Temporary Configuration Context

import mlflow
from mlflow.config import temporary_tracking_uri, temporary_experiment_id

# Set default configuration
mlflow.set_tracking_uri("http://localhost:5000")
mlflow.set_experiment("default-experiment")

# Use temporary configuration
with temporary_tracking_uri("sqlite:///temp_experiments.db"):
    # This block uses temporary SQLite backend
    temp_exp_id = mlflow.create_experiment("temporary-experiment")
    
    with mlflow.start_run():
        mlflow.log_metric("temp_metric", 42)
        print("Logged to temporary backend")

# Back to original configuration
print(f"Original tracking URI: {mlflow.get_tracking_uri()}")

# Temporary experiment ID
with temporary_experiment_id("123"):
    with mlflow.start_run():
        mlflow.log_metric("experiment_123_metric", 100)
        print("Logged to experiment 123")

# Original experiment restored
print(f"Current experiment: {mlflow.get_experiment_id()}")

Configuration Validation and Debugging

import mlflow
from mlflow.config import MLflowEnvironment

def diagnose_mlflow_config():
    """Comprehensive MLflow configuration diagnostic."""
    
    print("=== MLflow Configuration Diagnostic ===")
    
    # Check basic configuration
    print(f"Tracking URI: {mlflow.get_tracking_uri()}")
    print(f"Registry URI: {mlflow.get_registry_uri()}")
    print(f"Artifact Root: {mlflow.get_default_artifact_root()}")
    
    # Check environment variables
    print("\n=== Environment Variables ===")
    env_vars = mlflow.get_env_vars_dict()
    for key, value in sorted(env_vars.items()):
        if value:
            # Mask sensitive values
            if "password" in key.lower() or "token" in key.lower() or "key" in key.lower():
                value = "***masked***"
            print(f"{key}: {value}")
    
    # Validate configuration
    print("\n=== Validation ===")
    try:
        mlflow.validate_tracking_uri(mlflow.get_tracking_uri())
        print("✓ Tracking URI is valid")
    except Exception as e:
        print(f"✗ Tracking URI error: {e}")
    
    try:
        mlflow.validate_artifact_root(mlflow.get_default_artifact_root())
        print("✓ Artifact root is accessible")
    except Exception as e:
        print(f"✗ Artifact root error: {e}")
    
    # Test server connection
    try:
        server_info = mlflow.check_server_connection()
        print(f"✓ Server connection successful")
        print(f"  Version: {server_info.get('version', 'unknown')}")
    except Exception as e:
        print(f"✗ Server connection failed: {e}")
    
    # System metrics status
    print(f"\nSystem metrics enabled: {mlflow.is_system_metrics_logging_enabled()}")

# Run diagnostic
diagnose_mlflow_config()

Advanced Configuration Management

import mlflow
from mlflow.config import MLflowEnvironment, isolated_mlflow_config
import json

class MLflowConfigManager:
    """Advanced MLflow configuration management."""
    
    def __init__(self):
        self.configs = {}
    
    def save_config(self, name, config_dict):
        """Save configuration profile."""
        self.configs[name] = config_dict
    
    def load_config(self, name):
        """Load and apply configuration profile."""
        if name not in self.configs:
            raise ValueError(f"Configuration '{name}' not found")
        
        config = self.configs[name]
        
        if "tracking_uri" in config:
            mlflow.set_tracking_uri(config["tracking_uri"])
        if "registry_uri" in config:
            mlflow.set_registry_uri(config["registry_uri"])
        if "artifact_root" in config:
            mlflow.set_default_artifact_root(config["artifact_root"])
        if "system_metrics" in config:
            if config["system_metrics"]:
                mlflow.enable_system_metrics_logging()
            else:
                mlflow.disable_system_metrics_logging()
    
    def export_config(self, name, filepath):
        """Export configuration to file."""
        if name in self.configs:
            with open(filepath, 'w') as f:
                json.dump(self.configs[name], f, indent=2)
    
    def import_config(self, name, filepath):
        """Import configuration from file."""
        with open(filepath, 'r') as f:
            self.configs[name] = json.load(f)

# Usage example
config_manager = MLflowConfigManager()

# Define configurations for different environments
config_manager.save_config("development", {
    "tracking_uri": "sqlite:///dev_mlflow.db",
    "artifact_root": "./dev_artifacts",
    "system_metrics": True
})

config_manager.save_config("staging", {
    "tracking_uri": "http://staging-mlflow:5000",
    "registry_uri": "postgresql://user:pass@staging-db:5432/mlflow",
    "artifact_root": "s3://staging-mlflow-bucket/artifacts",
    "system_metrics": False
})

config_manager.save_config("production", {
    "tracking_uri": "https://prod-mlflow.company.com",
    "registry_uri": "postgresql://user:pass@prod-db:5432/mlflow",
    "artifact_root": "s3://prod-mlflow-bucket/artifacts",
    "system_metrics": True
})

# Switch between configurations
config_manager.load_config("development")
print(f"Development config loaded: {mlflow.get_tracking_uri()}")

config_manager.load_config("production")
print(f"Production config loaded: {mlflow.get_tracking_uri()}")

# Export configuration for sharing
config_manager.export_config("production", "prod_config.json")

Types

from typing import Dict, Any, Optional, ContextManager
import os

class MLflowEnvironment:
    # Environment variable constants
    MLFLOW_TRACKING_URI: str
    MLFLOW_REGISTRY_URI: str
    MLFLOW_TRACKING_USERNAME: str
    MLFLOW_TRACKING_PASSWORD: str
    MLFLOW_TRACKING_TOKEN: str
    MLFLOW_DEFAULT_ARTIFACT_ROOT: str
    MLFLOW_EXPERIMENT_ID: str
    MLFLOW_RUN_ID: str
    MLFLOW_ENABLE_SYSTEM_METRICS_LOGGING: str
    
    @staticmethod
    def get_env_var(key: str, default: Optional[str] = None) -> Optional[str]: ...
    
    @staticmethod  
    def set_env_var(key: str, value: str) -> None: ...

# Configuration functions return types
def get_tracking_uri() -> str: ...
def get_registry_uri() -> str: ...
def get_artifact_uri(run_id: Optional[str] = None) -> str: ...
def get_default_artifact_root() -> str: ...
def get_experiment_id() -> Optional[str]: ...
def get_run_id() -> Optional[str]: ...

def is_tracking_uri_set() -> bool: ...
def is_system_metrics_logging_enabled() -> bool: ...

def validate_tracking_uri(uri: str) -> bool: ...
def validate_artifact_root(artifact_root: str) -> bool: ...

def check_server_connection(tracking_uri: Optional[str] = None) -> Dict[str, Any]: ...
def get_env_vars_dict() -> Dict[str, str]: ...

# Context managers
def temporary_tracking_uri(uri: str) -> ContextManager[None]: ...
def temporary_experiment_id(experiment_id: str) -> ContextManager[None]: ...
def isolated_mlflow_config() -> ContextManager[None]: ...

# Configuration profile type
ConfigProfile = Dict[str, Any]

# Database profile type  
DatabaseProfile = Dict[str, str]  # host, port, database, username, etc.

Install with Tessl CLI