CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-zenml

ZenML is a unified MLOps framework that extends battle-tested machine learning operations principles to support the entire AI stack, from classical machine learning models to advanced AI agents.

Overview
Eval results
Files

materializers.mddocs/

Materializers

Built-in materializers for serializing and deserializing Python objects. Materializers handle the conversion of artifacts between Python objects and storage formats, enabling automatic artifact persistence and lineage tracking.

Capabilities

Built-In Materializer

class BuiltInMaterializer:
    """
    Materializer for built-in Python types.

    Handles: int, float, str, bool, bytes, None

    Automatically used for these types without explicit configuration.
    """

Import from:

from zenml.materializers import BuiltInMaterializer

Built-In Container Materializer

class BuiltInContainerMaterializer:
    """
    Materializer for container types.

    Handles: list, dict, tuple, set

    Uses JSON serialization for storage.
    Automatically used for these types.
    """

Import from:

from zenml.materializers import BuiltInContainerMaterializer

Bytes Materializer

class BytesMaterializer:
    """
    Materializer for bytes objects.

    Stores bytes directly without additional encoding.
    """

Import from:

from zenml.materializers import BytesMaterializer

Cloudpickle Materializer

class CloudpickleMaterializer:
    """
    Materializer using cloudpickle for serialization.

    Handles most Python objects including functions, lambdas, and classes.
    Uses cloudpickle which is more flexible than standard pickle.

    Useful for complex objects that don't have specialized materializers.
    """

Import from:

from zenml.materializers import CloudpickleMaterializer

In-Memory Materializer

class InMemoryMaterializer:
    """
    Materializer that keeps artifacts in memory.

    Does not persist to disk. Useful for temporary data
    that should not be saved.
    """

Import from:

from zenml.materializers import InMemoryMaterializer

Path Materializer

class PathMaterializer:
    """
    Materializer for pathlib.Path objects.

    Stores the path as a string and reconstructs Path object on load.
    """

Import from:

from zenml.materializers import PathMaterializer

Pydantic Materializer

class PydanticMaterializer:
    """
    Materializer for Pydantic models.

    Serializes Pydantic models to JSON and deserializes back.
    Preserves model validation and structure.

    Automatically used for Pydantic model subclasses.
    """

Import from:

from zenml.materializers import PydanticMaterializer

Service Materializer

class ServiceMaterializer:
    """
    Materializer for ZenML services.

    Handles persistence of service configurations and state.
    Used for model deployment services and other long-running processes.
    """

Import from:

from zenml.materializers import ServiceMaterializer

Structured String Materializer

class StructuredStringMaterializer:
    """
    Materializer for structured string types.

    Handles: HTMLString, MarkdownString, CSVString, JSONString

    Preserves the string content and type information.
    """

Import from:

from zenml.materializers import StructuredStringMaterializer

UUID Materializer

class UUIDMaterializer:
    """
    Materializer for UUID objects.

    Stores UUID as string and reconstructs UUID object on load.
    """

Import from:

from zenml.materializers import UUIDMaterializer

Integration Materializers

ZenML integrations provide additional materializers for framework-specific types:

  • NumPy: NumPy array materializers
  • Pandas: DataFrame, Series materializers
  • PyTorch: Tensor, Module, DataLoader materializers
  • TensorFlow: Tensor, Model materializers
  • Scikit-learn: Model materializers
  • XGBoost: Booster, DMatrix materializers
  • LightGBM: Booster, Dataset materializers
  • HuggingFace: Tokenizer, Model, Dataset materializers
  • Pillow: Image materializers
  • PyArrow: Table materializers

Usage Examples

Automatic Materialization

from zenml import step

@step
def process_data(data: list) -> dict:
    """Built-in types use automatic materializers."""
    return {"processed": data, "count": len(data)}

# BuiltInContainerMaterializer automatically handles list and dict

Custom Materializer for Step Output

from zenml import step
from zenml.materializers import CloudpickleMaterializer

class CustomModel:
    def __init__(self, weights):
        self.weights = weights

@step(output_materializers=CloudpickleMaterializer)
def train_custom_model(data: list) -> CustomModel:
    """Use cloudpickle for custom class."""
    return CustomModel(weights=[0.1, 0.2, 0.3])

Pydantic Model Materialization

from zenml import step
from pydantic import BaseModel

class ModelMetrics(BaseModel):
    accuracy: float
    precision: float
    recall: float
    f1_score: float

@step
def evaluate_model(data: list) -> ModelMetrics:
    """Pydantic models automatically use PydanticMaterializer."""
    return ModelMetrics(
        accuracy=0.95,
        precision=0.93,
        recall=0.97,
        f1_score=0.95
    )

@step
def report_metrics(metrics: ModelMetrics):
    """Pydantic model automatically deserialized."""
    print(f"Accuracy: {metrics.accuracy}")
    print(f"F1: {metrics.f1_score}")

Different Materializers for Multiple Outputs

from zenml import step
from zenml.materializers import CloudpickleMaterializer, PydanticMaterializer
from typing import Tuple
from pydantic import BaseModel

class Config(BaseModel):
    learning_rate: float

class CustomModel:
    pass

@step(
    output_materializers={
        "model": CloudpickleMaterializer,
        "config": PydanticMaterializer
    }
)
def train_with_config(data: list) -> Tuple[CustomModel, Config]:
    """Different materializers for different outputs."""
    model = CustomModel()
    config = Config(learning_rate=0.001)
    return model, config

In-Memory Artifacts

from zenml import step
from zenml.materializers import InMemoryMaterializer

@step(output_materializers=InMemoryMaterializer)
def generate_temp_data() -> dict:
    """Temporary data not persisted to storage."""
    return {"temp": "data", "should_not_save": True}

Structured String Types

from zenml import step
from zenml.types import HTMLString, MarkdownString, CSVString

@step
def generate_report() -> HTMLString:
    """Generate HTML report."""
    html = HTMLString("<html><body><h1>Report</h1></body></html>")
    return html

@step
def generate_markdown() -> MarkdownString:
    """Generate Markdown documentation."""
    md = MarkdownString("# Title\n\nThis is content.")
    return md

@step
def export_csv() -> CSVString:
    """Export as CSV."""
    csv = CSVString("name,value\nitem1,100\nitem2,200")
    return csv

Cloudpickle for Complex Objects

from zenml import step
from zenml.materializers import CloudpickleMaterializer

@step(output_materializers=CloudpickleMaterializer)
def create_pipeline_config() -> dict:
    """Complex object with functions."""
    def preprocess(x):
        return x * 2

    return {
        "preprocessor": preprocess,  # Function
        "params": {"learning_rate": 0.001},
        "nested": {"deep": {"value": 42}}
    }

@step
def use_config(config: dict):
    """Use the complex config."""
    preprocessor = config["preprocessor"]
    result = preprocessor(5)
    print(f"Result: {result}")

Integration Materializers

# NumPy arrays (requires zenml[numpy] or automatic with numpy installed)
from zenml import step
import numpy as np

@step
def process_array(data: list) -> np.ndarray:
    """NumPy array automatically materialized."""
    return np.array(data)

# Pandas DataFrames (requires zenml[pandas])
import pandas as pd

@step
def process_dataframe(data: dict) -> pd.DataFrame:
    """DataFrame automatically materialized."""
    return pd.DataFrame(data)

# PyTorch models (requires zenml[pytorch])
import torch

@step
def train_pytorch_model(data: list) -> torch.nn.Module:
    """PyTorch model automatically materialized."""
    model = torch.nn.Linear(10, 1)
    return model

Custom Materializer Example

from zenml.materializers import BaseMaterializer
from typing import Type
import json

class MyCustomClass:
    def __init__(self, data: dict):
        self.data = data

class MyCustomMaterializer(BaseMaterializer):
    """Custom materializer for MyCustomClass."""

    ASSOCIATED_TYPES = (MyCustomClass,)
    ASSOCIATED_ARTIFACT_TYPE = "custom_data"

    def load(self, data_type: Type[MyCustomClass]) -> MyCustomClass:
        """Load from artifact store."""
        with self.artifact_store.open(
            self.uri + "/data.json", "r"
        ) as f:
            data = json.load(f)
        return MyCustomClass(data)

    def save(self, obj: MyCustomClass):
        """Save to artifact store."""
        with self.artifact_store.open(
            self.uri + "/data.json", "w"
        ) as f:
            json.dump(obj.data, f)

# Use custom materializer
from zenml import step

@step(output_materializers=MyCustomMaterializer)
def create_custom_object() -> MyCustomClass:
    return MyCustomClass({"key": "value"})

Install with Tessl CLI

npx tessl i tessl/pypi-zenml

docs

artifact-config.md

artifacts.md

client.md

config.md

enums.md

exceptions.md

hooks.md

index.md

integrations.md

materializers.md

metadata-tags.md

models.md

pipelines-and-steps.md

pydantic-models.md

services.md

stack-components.md

stacks.md

types.md

utilities.md

tile.json