tessl/pypi-zenml

ZenML is a unified MLOps framework that extends battle-tested machine learning operations principles to support the entire AI stack, from classical machine learning models to advanced AI agents.

Overview

Eval results

Files

Artifact Configuration

Name: tessl/pypi-zenml
Author: tessl

Configuration classes for controlling step output artifacts. These classes provide fine-grained control over artifact naming, versioning, tagging, and materialization.

Capabilities

Artifact Config

class ArtifactConfig:
    """
    Configuration for artifacts produced by steps.

    Controls how step outputs are saved, named, versioned, and tracked.

    Attributes:
    - name: Artifact name (overrides default)
    - version: Artifact version strategy
    - tags: List of tags to attach
    - run_metadata: Metadata dict to attach
    - artifact_type: Optional type of the artifact
    """

    def __init__(
        self,
        name: str = None,
        version: str = None,
        tags: list = None,
        run_metadata: dict = None,
        artifact_type = None
    ):
        """
        Initialize artifact configuration.

        Parameters:
        - name: Custom artifact name (default: derived from step/output name)
        - version: Version identifier or strategy
        - tags: List of tag names to attach
        - run_metadata: Metadata dict to log with artifact
        - artifact_type: Optional type of the artifact (e.g., ArtifactType.MODEL)

        Example:
        ```python
        from zenml import step, ArtifactConfig
        from zenml.enums import ArtifactType

        @step
        def create_model() -> tuple[dict, ArtifactConfig]:
            model = {"weights": [0.1, 0.2]}
            config = ArtifactConfig(
                name="production_model",
                version="v1.0",
                tags=["production", "validated"],
                run_metadata={"accuracy": 0.95},
                artifact_type=ArtifactType.MODEL
            )
            return model, config
        ```
        """

Import from:

from zenml import ArtifactConfig

External Artifact

class ExternalArtifact:
    """
    External artifacts can be used to provide values as input to ZenML steps.

    ZenML steps accept either artifacts (=outputs of other steps), parameters
    (raw, JSON serializable values) or external artifacts. External artifacts
    can be used to provide any value as input to a step without needing to
    write an additional step that returns this value.

    The external artifact needs to have a value associated with it that will
    be uploaded to the artifact store.

    Attributes:
    - value: The artifact value (any Python object)
    - materializer: Materializer to use for saving the artifact value
    - store_artifact_metadata: Whether metadata for the artifact should be stored
    - store_artifact_visualizations: Whether visualizations for the artifact should be stored
    """

    def __init__(
        self,
        value = None,
        materializer: type = None,
        store_artifact_metadata: bool = True,
        store_artifact_visualizations: bool = True
    ):
        """
        Initialize external artifact with a value to upload.

        Parameters:
        - value: The artifact value (any Python object)
        - materializer: Custom materializer for saving the value (optional)
        - store_artifact_metadata: Extract and store metadata (default: True)
        - store_artifact_visualizations: Generate and store visualizations (default: True)

        Example:
        ```python
        from zenml import step, pipeline
        from zenml import ExternalArtifact
        import numpy as np

        @step
        def train_model(data: np.ndarray) -> dict:
            # Use external data
            return {"model": "trained", "samples": len(data)}

        @pipeline
        def training_pipeline():
            # Provide external data value
            my_array = np.array([1, 2, 3, 4, 5])
            external_data = ExternalArtifact(value=my_array)
            model = train_model(data=external_data)
        ```
        """

Import from:

from zenml import ExternalArtifact

Usage Examples

Basic Artifact Config

from zenml import step, ArtifactConfig

@step
def train_model(data: list) -> tuple[dict, ArtifactConfig]:
    """Train model with custom artifact configuration."""
    model = {
        "weights": [0.1, 0.2, 0.3],
        "accuracy": 0.95
    }

    # Configure artifact
    config = ArtifactConfig(
        name="production_model",
        tags=["production", "trained"],
        run_metadata={
            "training_samples": len(data),
            "accuracy": 0.95
        }
    )

    return model, config

Multiple Outputs with Configs

from zenml import step, ArtifactConfig
from typing import Annotated

@step
def train_and_evaluate(data: list) -> tuple[
    Annotated[dict, "model"],
    Annotated[dict, "metrics"]
]:
    """Step with multiple configured outputs."""
    model = {"weights": [0.1, 0.2]}
    metrics = {"accuracy": 0.95, "loss": 0.05}

    return (
        model,
        ArtifactConfig(
            name="trained_model",
            version="v1.0",
            tags=["model", "production"]
        ),
        metrics,
        ArtifactConfig(
            name="evaluation_metrics",
            tags=["metrics", "validation"]
        )
    )

External Artifact with Value

from zenml import step, pipeline, ExternalArtifact
import numpy as np

@step
def train_model(data: np.ndarray) -> dict:
    """Train model using external data."""
    return {"model": "trained", "samples": len(data), "accuracy": 0.95}

@pipeline
def training_pipeline():
    """Pipeline using external artifact."""
    # Provide external data value
    training_data = np.array([[1, 2], [3, 4], [5, 6]])
    external_data = ExternalArtifact(value=training_data)

    # Use external artifact as input
    model = train_model(data=external_data)

External Artifact with Custom Materializer

from zenml import step, pipeline, ExternalArtifact
from zenml.materializers import CloudpickleMaterializer

class CustomModel:
    def __init__(self, weights):
        self.weights = weights

@step
def evaluate_model(model: CustomModel) -> dict:
    """Evaluate custom model."""
    return {"evaluation": "complete", "weights": len(model.weights)}

@pipeline
def evaluation_pipeline():
    """Pipeline using external custom object."""
    # Create custom object
    my_model = CustomModel(weights=[0.1, 0.2, 0.3])

    # Provide as external artifact with custom materializer
    model_artifact = ExternalArtifact(
        value=my_model,
        materializer=CloudpickleMaterializer
    )

    # Use in step
    evaluation = evaluate_model(model=model_artifact)

Versioning Strategy

from zenml import step, ArtifactConfig
from datetime import datetime

@step
def daily_snapshot(data: list) -> tuple[dict, ArtifactConfig]:
    """Create daily data snapshot with date-based versioning."""
    snapshot = {"data": data, "timestamp": datetime.now().isoformat()}

    config = ArtifactConfig(
        name="daily_snapshot",
        version=f"v{datetime.now().strftime('%Y%m%d')}",
        tags=["snapshot", "daily"]
    )

    return snapshot, config

Rich Metadata in Config

from zenml import step, ArtifactConfig
import json

@step
def train_with_tracking(data: list) -> tuple[dict, ArtifactConfig]:
    """Train model with detailed tracking metadata."""
    model = {"weights": [0.1, 0.2, 0.3]}

    # Comprehensive metadata
    metadata = {
        "training_config": {
            "learning_rate": 0.001,
            "batch_size": 32,
            "epochs": 10
        },
        "data_info": {
            "samples": len(data),
            "features": 10,
            "split": "80/20"
        },
        "environment": {
            "framework": "pytorch",
            "version": "2.0.0",
            "cuda": "11.8"
        },
        "metrics": {
            "final_loss": 0.05,
            "final_accuracy": 0.95
        }
    }

    config = ArtifactConfig(
        name="tracked_model",
        version="v1.0",
        tags=["production", "tracked"],
        run_metadata=metadata
    )

    return model, config

Combining External Artifacts and Configs

from zenml import step, pipeline, ExternalArtifact, ArtifactConfig

@step
def merge_models(
    model_a: dict,
    model_b: dict
) -> tuple[dict, ArtifactConfig]:
    """Merge two models."""
    merged = {
        "weights_a": model_a.get("weights", []),
        "weights_b": model_b.get("weights", []),
        "merged": True
    }

    config = ArtifactConfig(
        name="ensemble_model",
        tags=["ensemble", "merged"],
        run_metadata={
            "component_models": 2,
            "merge_strategy": "average"
        }
    )

    return merged, config

@pipeline
def ensemble_pipeline():
    """Create ensemble from external model values."""
    # Provide external model values
    model_a = ExternalArtifact(value={"weights": [0.1, 0.2]})
    model_b = ExternalArtifact(value={"weights": [0.3, 0.4]})

    # Create ensemble with custom output config
    ensemble = merge_models(model_a=model_a, model_b=model_b)

Conditional Artifact Configuration

from zenml import step, ArtifactConfig
import os

@step
def train_with_env_aware_config(data: list) -> tuple[dict, ArtifactConfig]:
    """Configure artifact based on environment."""
    model = {"weights": [0.1, 0.2]}

    # Different config for different environments
    environment = os.getenv("ENV", "development")

    if environment == "production":
        config = ArtifactConfig(
            name="production_model",
            version="stable",
            tags=["production", "validated", "monitored"]
        )
    else:
        config = ArtifactConfig(
            name="dev_model",
            version="latest",
            tags=["development", "experimental"]
        )

    return model, config

External Artifact without Metadata

from zenml import step, pipeline, ExternalArtifact

@step
def process_data(data: list) -> dict:
    """Process external data."""
    return {"processed": len(data)}

@pipeline
def lightweight_pipeline():
    """Pipeline with external artifact without metadata extraction."""
    # Provide data without storing metadata/visualizations
    raw_data = [1, 2, 3, 4, 5]
    data_artifact = ExternalArtifact(
        value=raw_data,
        store_artifact_metadata=False,
        store_artifact_visualizations=False
    )

    result = process_data(data=data_artifact)

Multiple External Artifacts

from zenml import step, pipeline, ExternalArtifact
import numpy as np

@step
def train_with_multiple_inputs(
    train_data: np.ndarray,
    validation_data: np.ndarray,
    config: dict
) -> dict:
    """Train using multiple external artifacts."""
    return {"model": "trained", "train_samples": len(train_data)}

@pipeline
def multi_input_pipeline():
    """Pipeline with multiple external artifacts."""
    # Provide multiple external values
    train = ExternalArtifact(value=np.array([[1, 2], [3, 4]]))
    val = ExternalArtifact(value=np.array([[5, 6]]))
    cfg = ExternalArtifact(value={"learning_rate": 0.001, "epochs": 10})

    model = train_with_multiple_inputs(
        train_data=train,
        validation_data=val,
        config=cfg
    )

Install with Tessl CLI

npx tessl i tessl/pypi-zenml