0
# Artifact Configuration
1
2
Configuration classes for controlling step output artifacts. These classes provide fine-grained control over artifact naming, versioning, tagging, and materialization.
3
4
## Capabilities
5
6
### Artifact Config
7
8
```python { .api }
9
class ArtifactConfig:
10
"""
11
Configuration for artifacts produced by steps.
12
13
Controls how step outputs are saved, named, versioned, and tracked.
14
15
Attributes:
16
- name: Artifact name (overrides default)
17
- version: Artifact version strategy
18
- tags: List of tags to attach
19
- run_metadata: Metadata dict to attach
20
- artifact_type: Optional type of the artifact
21
"""
22
23
def __init__(
24
self,
25
name: str = None,
26
version: str = None,
27
tags: list = None,
28
run_metadata: dict = None,
29
artifact_type = None
30
):
31
"""
32
Initialize artifact configuration.
33
34
Parameters:
35
- name: Custom artifact name (default: derived from step/output name)
36
- version: Version identifier or strategy
37
- tags: List of tag names to attach
38
- run_metadata: Metadata dict to log with artifact
39
- artifact_type: Optional type of the artifact (e.g., ArtifactType.MODEL)
40
41
Example:
42
```python
43
from zenml import step, ArtifactConfig
44
from zenml.enums import ArtifactType
45
46
@step
47
def create_model() -> tuple[dict, ArtifactConfig]:
48
model = {"weights": [0.1, 0.2]}
49
config = ArtifactConfig(
50
name="production_model",
51
version="v1.0",
52
tags=["production", "validated"],
53
run_metadata={"accuracy": 0.95},
54
artifact_type=ArtifactType.MODEL
55
)
56
return model, config
57
```
58
"""
59
```
60
61
Import from:
62
63
```python
64
from zenml import ArtifactConfig
65
```
66
67
### External Artifact
68
69
```python { .api }
70
class ExternalArtifact:
71
"""
72
External artifacts can be used to provide values as input to ZenML steps.
73
74
ZenML steps accept either artifacts (=outputs of other steps), parameters
75
(raw, JSON serializable values) or external artifacts. External artifacts
76
can be used to provide any value as input to a step without needing to
77
write an additional step that returns this value.
78
79
The external artifact needs to have a value associated with it that will
80
be uploaded to the artifact store.
81
82
Attributes:
83
- value: The artifact value (any Python object)
84
- materializer: Materializer to use for saving the artifact value
85
- store_artifact_metadata: Whether metadata for the artifact should be stored
86
- store_artifact_visualizations: Whether visualizations for the artifact should be stored
87
"""
88
89
def __init__(
90
self,
91
value = None,
92
materializer: type = None,
93
store_artifact_metadata: bool = True,
94
store_artifact_visualizations: bool = True
95
):
96
"""
97
Initialize external artifact with a value to upload.
98
99
Parameters:
100
- value: The artifact value (any Python object)
101
- materializer: Custom materializer for saving the value (optional)
102
- store_artifact_metadata: Extract and store metadata (default: True)
103
- store_artifact_visualizations: Generate and store visualizations (default: True)
104
105
Example:
106
```python
107
from zenml import step, pipeline
108
from zenml import ExternalArtifact
109
import numpy as np
110
111
@step
112
def train_model(data: np.ndarray) -> dict:
113
# Use external data
114
return {"model": "trained", "samples": len(data)}
115
116
@pipeline
117
def training_pipeline():
118
# Provide external data value
119
my_array = np.array([1, 2, 3, 4, 5])
120
external_data = ExternalArtifact(value=my_array)
121
model = train_model(data=external_data)
122
```
123
"""
124
```
125
126
Import from:
127
128
```python
129
from zenml import ExternalArtifact
130
```
131
132
## Usage Examples
133
134
### Basic Artifact Config
135
136
```python
137
from zenml import step, ArtifactConfig
138
139
@step
140
def train_model(data: list) -> tuple[dict, ArtifactConfig]:
141
"""Train model with custom artifact configuration."""
142
model = {
143
"weights": [0.1, 0.2, 0.3],
144
"accuracy": 0.95
145
}
146
147
# Configure artifact
148
config = ArtifactConfig(
149
name="production_model",
150
tags=["production", "trained"],
151
run_metadata={
152
"training_samples": len(data),
153
"accuracy": 0.95
154
}
155
)
156
157
return model, config
158
```
159
160
### Multiple Outputs with Configs
161
162
```python
163
from zenml import step, ArtifactConfig
164
from typing import Annotated
165
166
@step
167
def train_and_evaluate(data: list) -> tuple[
168
Annotated[dict, "model"],
169
Annotated[dict, "metrics"]
170
]:
171
"""Step with multiple configured outputs."""
172
model = {"weights": [0.1, 0.2]}
173
metrics = {"accuracy": 0.95, "loss": 0.05}
174
175
return (
176
model,
177
ArtifactConfig(
178
name="trained_model",
179
version="v1.0",
180
tags=["model", "production"]
181
),
182
metrics,
183
ArtifactConfig(
184
name="evaluation_metrics",
185
tags=["metrics", "validation"]
186
)
187
)
188
```
189
190
191
### External Artifact with Value
192
193
```python
194
from zenml import step, pipeline, ExternalArtifact
195
import numpy as np
196
197
@step
198
def train_model(data: np.ndarray) -> dict:
199
"""Train model using external data."""
200
return {"model": "trained", "samples": len(data), "accuracy": 0.95}
201
202
@pipeline
203
def training_pipeline():
204
"""Pipeline using external artifact."""
205
# Provide external data value
206
training_data = np.array([[1, 2], [3, 4], [5, 6]])
207
external_data = ExternalArtifact(value=training_data)
208
209
# Use external artifact as input
210
model = train_model(data=external_data)
211
```
212
213
### External Artifact with Custom Materializer
214
215
```python
216
from zenml import step, pipeline, ExternalArtifact
217
from zenml.materializers import CloudpickleMaterializer
218
219
class CustomModel:
220
def __init__(self, weights):
221
self.weights = weights
222
223
@step
224
def evaluate_model(model: CustomModel) -> dict:
225
"""Evaluate custom model."""
226
return {"evaluation": "complete", "weights": len(model.weights)}
227
228
@pipeline
229
def evaluation_pipeline():
230
"""Pipeline using external custom object."""
231
# Create custom object
232
my_model = CustomModel(weights=[0.1, 0.2, 0.3])
233
234
# Provide as external artifact with custom materializer
235
model_artifact = ExternalArtifact(
236
value=my_model,
237
materializer=CloudpickleMaterializer
238
)
239
240
# Use in step
241
evaluation = evaluate_model(model=model_artifact)
242
```
243
244
### Versioning Strategy
245
246
```python
247
from zenml import step, ArtifactConfig
248
from datetime import datetime
249
250
@step
251
def daily_snapshot(data: list) -> tuple[dict, ArtifactConfig]:
252
"""Create daily data snapshot with date-based versioning."""
253
snapshot = {"data": data, "timestamp": datetime.now().isoformat()}
254
255
config = ArtifactConfig(
256
name="daily_snapshot",
257
version=f"v{datetime.now().strftime('%Y%m%d')}",
258
tags=["snapshot", "daily"]
259
)
260
261
return snapshot, config
262
```
263
264
### Rich Metadata in Config
265
266
```python
267
from zenml import step, ArtifactConfig
268
import json
269
270
@step
271
def train_with_tracking(data: list) -> tuple[dict, ArtifactConfig]:
272
"""Train model with detailed tracking metadata."""
273
model = {"weights": [0.1, 0.2, 0.3]}
274
275
# Comprehensive metadata
276
metadata = {
277
"training_config": {
278
"learning_rate": 0.001,
279
"batch_size": 32,
280
"epochs": 10
281
},
282
"data_info": {
283
"samples": len(data),
284
"features": 10,
285
"split": "80/20"
286
},
287
"environment": {
288
"framework": "pytorch",
289
"version": "2.0.0",
290
"cuda": "11.8"
291
},
292
"metrics": {
293
"final_loss": 0.05,
294
"final_accuracy": 0.95
295
}
296
}
297
298
config = ArtifactConfig(
299
name="tracked_model",
300
version="v1.0",
301
tags=["production", "tracked"],
302
run_metadata=metadata
303
)
304
305
return model, config
306
```
307
308
### Combining External Artifacts and Configs
309
310
```python
311
from zenml import step, pipeline, ExternalArtifact, ArtifactConfig
312
313
@step
314
def merge_models(
315
model_a: dict,
316
model_b: dict
317
) -> tuple[dict, ArtifactConfig]:
318
"""Merge two models."""
319
merged = {
320
"weights_a": model_a.get("weights", []),
321
"weights_b": model_b.get("weights", []),
322
"merged": True
323
}
324
325
config = ArtifactConfig(
326
name="ensemble_model",
327
tags=["ensemble", "merged"],
328
run_metadata={
329
"component_models": 2,
330
"merge_strategy": "average"
331
}
332
)
333
334
return merged, config
335
336
@pipeline
337
def ensemble_pipeline():
338
"""Create ensemble from external model values."""
339
# Provide external model values
340
model_a = ExternalArtifact(value={"weights": [0.1, 0.2]})
341
model_b = ExternalArtifact(value={"weights": [0.3, 0.4]})
342
343
# Create ensemble with custom output config
344
ensemble = merge_models(model_a=model_a, model_b=model_b)
345
```
346
347
### Conditional Artifact Configuration
348
349
```python
350
from zenml import step, ArtifactConfig
351
import os
352
353
@step
354
def train_with_env_aware_config(data: list) -> tuple[dict, ArtifactConfig]:
355
"""Configure artifact based on environment."""
356
model = {"weights": [0.1, 0.2]}
357
358
# Different config for different environments
359
environment = os.getenv("ENV", "development")
360
361
if environment == "production":
362
config = ArtifactConfig(
363
name="production_model",
364
version="stable",
365
tags=["production", "validated", "monitored"]
366
)
367
else:
368
config = ArtifactConfig(
369
name="dev_model",
370
version="latest",
371
tags=["development", "experimental"]
372
)
373
374
return model, config
375
```
376
377
### External Artifact without Metadata
378
379
```python
380
from zenml import step, pipeline, ExternalArtifact
381
382
@step
383
def process_data(data: list) -> dict:
384
"""Process external data."""
385
return {"processed": len(data)}
386
387
@pipeline
388
def lightweight_pipeline():
389
"""Pipeline with external artifact without metadata extraction."""
390
# Provide data without storing metadata/visualizations
391
raw_data = [1, 2, 3, 4, 5]
392
data_artifact = ExternalArtifact(
393
value=raw_data,
394
store_artifact_metadata=False,
395
store_artifact_visualizations=False
396
)
397
398
result = process_data(data=data_artifact)
399
```
400
401
### Multiple External Artifacts
402
403
```python
404
from zenml import step, pipeline, ExternalArtifact
405
import numpy as np
406
407
@step
408
def train_with_multiple_inputs(
409
train_data: np.ndarray,
410
validation_data: np.ndarray,
411
config: dict
412
) -> dict:
413
"""Train using multiple external artifacts."""
414
return {"model": "trained", "train_samples": len(train_data)}
415
416
@pipeline
417
def multi_input_pipeline():
418
"""Pipeline with multiple external artifacts."""
419
# Provide multiple external values
420
train = ExternalArtifact(value=np.array([[1, 2], [3, 4]]))
421
val = ExternalArtifact(value=np.array([[5, 6]]))
422
cfg = ExternalArtifact(value={"learning_rate": 0.001, "epochs": 10})
423
424
model = train_with_multiple_inputs(
425
train_data=train,
426
validation_data=val,
427
config=cfg
428
)
429
```
430