MLflow is an open source platform for the complete machine learning lifecycle
—
MLflow Projects provide a standard format for packaging data science code in a reusable and reproducible way. Projects enable running ML workflows locally or on remote compute platforms with automatic environment management, parameter validation, and dependency tracking. Each project defines entry points, parameters, and environment specifications that can be executed across different backends.
Execute MLflow projects from local directories or remote Git repositories with comprehensive parameter validation and environment management.
def run(uri, entry_point="main", version=None, parameters=None, docker_args=None, experiment_name=None, experiment_id=None, backend="local", backend_config=None, storage_dir=None, synchronous=True, run_id=None, run_name=None, env_manager=None, build_image=False, docker_auth=None):
"""
Run MLflow project from local or remote URI.
Parameters:
- uri: str - Project URI (local path or Git repository)
- entry_point: str - Entry point to run (default: "main")
- version: str, optional - Git commit hash or branch name
- parameters: dict, optional - Parameters for entry point command
- docker_args: dict, optional - Docker execution arguments
- experiment_name: str, optional - MLflow experiment name
- experiment_id: str, optional - MLflow experiment ID
- backend: str - Execution backend ("local", "databricks", "kubernetes")
- backend_config: dict or str, optional - Backend configuration
- storage_dir: str, optional - Directory for remote URI downloads
- synchronous: bool - Wait for run completion (default: True)
- run_id: str, optional - Specific MLflow run ID to use
- run_name: str, optional - Name for the MLflow run
- env_manager: str, optional - Environment manager ("local", "virtualenv", "uv", "conda")
- build_image: bool - Build new docker image (default: False)
- docker_auth: dict, optional - Docker registry authentication
Returns:
SubmittedRun object with run information and control methods
"""Control and monitor submitted project runs with status tracking and cancellation capabilities.
class SubmittedRun:
"""
Represents a submitted MLflow project run.
"""
@property
def run_id(self) -> str:
"""MLflow run ID of the submitted project run."""
def wait(self) -> bool:
"""
Wait for run completion.
Returns:
bool - True if run completed successfully, False otherwise
"""
def get_status(self) -> str:
"""
Get current run status.
Returns:
str - Current status ("RUNNING", "FINISHED", "FAILED", "KILLED")
"""
def cancel(self):
"""
Cancel the running project and wait for termination.
"""MLflow projects are defined using an MLproject file in YAML format that specifies entry points, parameters, and environment requirements.
name: my_project
entry_points:
main:
parameters:
data_path:
type: path
default: data/input.csv
learning_rate:
type: float
default: 0.01
max_epochs:
type: int
default: 100
model_name:
type: string
default: my_model
command: "python train.py --data {data_path} --lr {learning_rate} --epochs {max_epochs} --name {model_name}"
evaluate:
parameters:
model_uri:
type: uri
test_data:
type: path
command: "python evaluate.py --model {model_uri} --data {test_data}"
# Environment specification (choose one)
conda_env: conda.yamlConda Environment:
conda_env: conda.yaml # Points to conda environment filePython Environment:
python_env: python_env.yaml # Points to python environment fileDocker Environment:
docker_env:
image: "tensorflow/tensorflow:2.8.0"
volumes: ["/host/data:/container/data"]
environment:
- ["CUDA_VISIBLE_DEVICES", "0"]
- "PATH" # Copy from hostSupported parameter types with validation:
string - Basic string parameterfloat - Floating point numeric parameterint - Integer numeric parameterpath - File or directory path (downloads remote URIs)uri - URI parameter with validationParameter Definition:
parameters:
param_name:
type: string|float|int|path|uri
default: default_value # OptionalExecute projects on the local machine with environment isolation and dependency management.
import mlflow.projects
# Run with local backend (default)
run = mlflow.projects.run(
uri=".",
entry_point="train",
parameters={"learning_rate": 0.001},
backend="local",
env_manager="conda" # or "virtualenv", "uv", "local"
)Execute projects on Databricks clusters with automatic cluster management and artifact storage.
# Databricks backend configuration
backend_config = {
"cluster_spec": {
"spark_version": "7.3.x-scala2.12",
"node_type_id": "i3.xlarge",
"num_workers": 2
}
}
run = mlflow.projects.run(
uri="git+https://github.com/user/ml-project.git",
backend="databricks",
backend_config=backend_config
)Execute projects as Kubernetes jobs with container orchestration and resource management.
# Kubernetes backend with job template
backend_config = {
"kube-job-template-path": "k8s-job-template.yaml",
"kube-context": "my-k8s-context"
}
run = mlflow.projects.run(
uri=".",
backend="kubernetes",
backend_config=backend_config,
docker_args={"image": "my-project:latest"}
)import mlflow.projects
# Simple local execution
run = mlflow.projects.run(
uri=".",
entry_point="main",
parameters={"alpha": 0.5, "l1_ratio": 0.1}
)
# Wait for completion and check status
success = run.wait()
print(f"Run {'succeeded' if success else 'failed'}")# Run from Git repository with specific version
run = mlflow.projects.run(
uri="https://github.com/mlflow/mlflow-example.git",
version="main",
entry_point="main",
parameters={"alpha": 0.3},
experiment_name="remote-experiment"
)# Run with Docker environment
run = mlflow.projects.run(
uri=".",
entry_point="train",
backend="local",
docker_args={
"image": "tensorflow/tensorflow:2.8.0-gpu",
"volumes": {"/data": "/workspace/data"}
}
)# Non-blocking execution with status monitoring
run = mlflow.projects.run(
uri=".",
synchronous=False
)
# Monitor status
while run.get_status() == "RUNNING":
time.sleep(10)
print("Still running...")
if run.get_status() == "FINISHED":
print(f"Completed successfully. Run ID: {run.run_id}")
else:
print("Run failed or was cancelled")MLflow Projects support multiple environment managers for dependency isolation:
conda.yaml file specifying dependenciespython_env.yaml with pip requirementsmy-ml-project/
├── MLproject
├── conda.yaml
├── train.py
├── evaluate.py
└── data/
└── train.csvml-pipeline/
├── MLproject
├── conda.yaml
├── steps/
│ ├── data_prep.py
│ ├── train.py
│ └── evaluate.py
└── configs/
└── model_config.yamlfrom mlflow.projects import SubmittedRun
from mlflow.projects.submitted_run import LocalSubmittedRun
from mlflow.exceptions import ExecutionException
class SubmittedRun:
"""Abstract base class for submitted project runs."""
run_id: str
def wait(self) -> bool: ...
def get_status(self) -> str: ...
def cancel(self) -> None: ...
class LocalSubmittedRun(SubmittedRun):
"""Local backend implementation of SubmittedRun."""
pass
class ExecutionException(Exception):
"""Exception raised when project execution fails."""
pass
# Backend types
Backend = Literal["local", "databricks", "kubernetes"]
EnvironmentManager = Literal["local", "virtualenv", "uv", "conda"]
RunStatus = Literal["RUNNING", "FINISHED", "FAILED", "KILLED"]
# Parameter types
ProjectParameter = Dict[str, Union[str, float, int]]
BackendConfig = Union[Dict[str, Any], str] # Dict or path to JSON file
DockerArgs = Dict[str, Union[str, Dict[str, str], List[str]]]Install with Tessl CLI
npx tessl i tessl/pypi-mlflow