ZenML is a unified MLOps framework that extends battle-tested machine learning operations principles to support the entire AI stack, from classical machine learning models to advanced AI agents.
Configuration classes for controlling step output artifacts. These classes provide fine-grained control over artifact naming, versioning, tagging, and materialization.
class ArtifactConfig:
"""
Configuration for artifacts produced by steps.
Controls how step outputs are saved, named, versioned, and tracked.
Attributes:
- name: Artifact name (overrides default)
- version: Artifact version strategy
- tags: List of tags to attach
- run_metadata: Metadata dict to attach
- artifact_type: Optional type of the artifact
"""
def __init__(
self,
name: str = None,
version: str = None,
tags: list = None,
run_metadata: dict = None,
artifact_type = None
):
"""
Initialize artifact configuration.
Parameters:
- name: Custom artifact name (default: derived from step/output name)
- version: Version identifier or strategy
- tags: List of tag names to attach
- run_metadata: Metadata dict to log with artifact
- artifact_type: Optional type of the artifact (e.g., ArtifactType.MODEL)
Example:
```python
from zenml import step, ArtifactConfig
from zenml.enums import ArtifactType
@step
def create_model() -> tuple[dict, ArtifactConfig]:
model = {"weights": [0.1, 0.2]}
config = ArtifactConfig(
name="production_model",
version="v1.0",
tags=["production", "validated"],
run_metadata={"accuracy": 0.95},
artifact_type=ArtifactType.MODEL
)
return model, config
```
"""Import from:
from zenml import ArtifactConfigclass ExternalArtifact:
"""
External artifacts can be used to provide values as input to ZenML steps.
ZenML steps accept either artifacts (=outputs of other steps), parameters
(raw, JSON serializable values) or external artifacts. External artifacts
can be used to provide any value as input to a step without needing to
write an additional step that returns this value.
The external artifact needs to have a value associated with it that will
be uploaded to the artifact store.
Attributes:
- value: The artifact value (any Python object)
- materializer: Materializer to use for saving the artifact value
- store_artifact_metadata: Whether metadata for the artifact should be stored
- store_artifact_visualizations: Whether visualizations for the artifact should be stored
"""
def __init__(
self,
value = None,
materializer: type = None,
store_artifact_metadata: bool = True,
store_artifact_visualizations: bool = True
):
"""
Initialize external artifact with a value to upload.
Parameters:
- value: The artifact value (any Python object)
- materializer: Custom materializer for saving the value (optional)
- store_artifact_metadata: Extract and store metadata (default: True)
- store_artifact_visualizations: Generate and store visualizations (default: True)
Example:
```python
from zenml import step, pipeline
from zenml import ExternalArtifact
import numpy as np
@step
def train_model(data: np.ndarray) -> dict:
# Use external data
return {"model": "trained", "samples": len(data)}
@pipeline
def training_pipeline():
# Provide external data value
my_array = np.array([1, 2, 3, 4, 5])
external_data = ExternalArtifact(value=my_array)
model = train_model(data=external_data)
```
"""Import from:
from zenml import ExternalArtifactfrom zenml import step, ArtifactConfig
@step
def train_model(data: list) -> tuple[dict, ArtifactConfig]:
"""Train model with custom artifact configuration."""
model = {
"weights": [0.1, 0.2, 0.3],
"accuracy": 0.95
}
# Configure artifact
config = ArtifactConfig(
name="production_model",
tags=["production", "trained"],
run_metadata={
"training_samples": len(data),
"accuracy": 0.95
}
)
return model, configfrom zenml import step, ArtifactConfig
from typing import Annotated
@step
def train_and_evaluate(data: list) -> tuple[
Annotated[dict, "model"],
Annotated[dict, "metrics"]
]:
"""Step with multiple configured outputs."""
model = {"weights": [0.1, 0.2]}
metrics = {"accuracy": 0.95, "loss": 0.05}
return (
model,
ArtifactConfig(
name="trained_model",
version="v1.0",
tags=["model", "production"]
),
metrics,
ArtifactConfig(
name="evaluation_metrics",
tags=["metrics", "validation"]
)
)from zenml import step, pipeline, ExternalArtifact
import numpy as np
@step
def train_model(data: np.ndarray) -> dict:
"""Train model using external data."""
return {"model": "trained", "samples": len(data), "accuracy": 0.95}
@pipeline
def training_pipeline():
"""Pipeline using external artifact."""
# Provide external data value
training_data = np.array([[1, 2], [3, 4], [5, 6]])
external_data = ExternalArtifact(value=training_data)
# Use external artifact as input
model = train_model(data=external_data)from zenml import step, pipeline, ExternalArtifact
from zenml.materializers import CloudpickleMaterializer
class CustomModel:
def __init__(self, weights):
self.weights = weights
@step
def evaluate_model(model: CustomModel) -> dict:
"""Evaluate custom model."""
return {"evaluation": "complete", "weights": len(model.weights)}
@pipeline
def evaluation_pipeline():
"""Pipeline using external custom object."""
# Create custom object
my_model = CustomModel(weights=[0.1, 0.2, 0.3])
# Provide as external artifact with custom materializer
model_artifact = ExternalArtifact(
value=my_model,
materializer=CloudpickleMaterializer
)
# Use in step
evaluation = evaluate_model(model=model_artifact)from zenml import step, ArtifactConfig
from datetime import datetime
@step
def daily_snapshot(data: list) -> tuple[dict, ArtifactConfig]:
"""Create daily data snapshot with date-based versioning."""
snapshot = {"data": data, "timestamp": datetime.now().isoformat()}
config = ArtifactConfig(
name="daily_snapshot",
version=f"v{datetime.now().strftime('%Y%m%d')}",
tags=["snapshot", "daily"]
)
return snapshot, configfrom zenml import step, ArtifactConfig
import json
@step
def train_with_tracking(data: list) -> tuple[dict, ArtifactConfig]:
"""Train model with detailed tracking metadata."""
model = {"weights": [0.1, 0.2, 0.3]}
# Comprehensive metadata
metadata = {
"training_config": {
"learning_rate": 0.001,
"batch_size": 32,
"epochs": 10
},
"data_info": {
"samples": len(data),
"features": 10,
"split": "80/20"
},
"environment": {
"framework": "pytorch",
"version": "2.0.0",
"cuda": "11.8"
},
"metrics": {
"final_loss": 0.05,
"final_accuracy": 0.95
}
}
config = ArtifactConfig(
name="tracked_model",
version="v1.0",
tags=["production", "tracked"],
run_metadata=metadata
)
return model, configfrom zenml import step, pipeline, ExternalArtifact, ArtifactConfig
@step
def merge_models(
model_a: dict,
model_b: dict
) -> tuple[dict, ArtifactConfig]:
"""Merge two models."""
merged = {
"weights_a": model_a.get("weights", []),
"weights_b": model_b.get("weights", []),
"merged": True
}
config = ArtifactConfig(
name="ensemble_model",
tags=["ensemble", "merged"],
run_metadata={
"component_models": 2,
"merge_strategy": "average"
}
)
return merged, config
@pipeline
def ensemble_pipeline():
"""Create ensemble from external model values."""
# Provide external model values
model_a = ExternalArtifact(value={"weights": [0.1, 0.2]})
model_b = ExternalArtifact(value={"weights": [0.3, 0.4]})
# Create ensemble with custom output config
ensemble = merge_models(model_a=model_a, model_b=model_b)from zenml import step, ArtifactConfig
import os
@step
def train_with_env_aware_config(data: list) -> tuple[dict, ArtifactConfig]:
"""Configure artifact based on environment."""
model = {"weights": [0.1, 0.2]}
# Different config for different environments
environment = os.getenv("ENV", "development")
if environment == "production":
config = ArtifactConfig(
name="production_model",
version="stable",
tags=["production", "validated", "monitored"]
)
else:
config = ArtifactConfig(
name="dev_model",
version="latest",
tags=["development", "experimental"]
)
return model, configfrom zenml import step, pipeline, ExternalArtifact
@step
def process_data(data: list) -> dict:
"""Process external data."""
return {"processed": len(data)}
@pipeline
def lightweight_pipeline():
"""Pipeline with external artifact without metadata extraction."""
# Provide data without storing metadata/visualizations
raw_data = [1, 2, 3, 4, 5]
data_artifact = ExternalArtifact(
value=raw_data,
store_artifact_metadata=False,
store_artifact_visualizations=False
)
result = process_data(data=data_artifact)from zenml import step, pipeline, ExternalArtifact
import numpy as np
@step
def train_with_multiple_inputs(
train_data: np.ndarray,
validation_data: np.ndarray,
config: dict
) -> dict:
"""Train using multiple external artifacts."""
return {"model": "trained", "train_samples": len(train_data)}
@pipeline
def multi_input_pipeline():
"""Pipeline with multiple external artifacts."""
# Provide multiple external values
train = ExternalArtifact(value=np.array([[1, 2], [3, 4]]))
val = ExternalArtifact(value=np.array([[5, 6]]))
cfg = ExternalArtifact(value={"learning_rate": 0.001, "epochs": 10})
model = train_with_multiple_inputs(
train_data=train,
validation_data=val,
config=cfg
)Install with Tessl CLI
npx tessl i tessl/pypi-zenml