0
# AutoML
1
2
Automated machine learning capabilities for tabular data, image classification, text classification, and time series forecasting with minimal configuration required. AutoML automatically handles feature engineering, algorithm selection, and hyperparameter tuning.
3
4
## Capabilities
5
6
### AutoML v1
7
8
Original AutoML implementation for tabular data with automatic model selection and optimization.
9
10
```python { .api }
11
class AutoML:
12
"""
13
AutoML estimator for automated machine learning on tabular data.
14
15
Parameters:
16
- role (str): IAM role ARN
17
- target_attribute_name (str): Name of target column
18
- output_path (str, optional): S3 path for output
19
- compression_type (str, optional): Input compression type
20
- sagemaker_session (Session, optional): SageMaker session
21
- max_candidates (int, optional): Maximum number of candidates to evaluate
22
- max_runtime_per_training_job_in_seconds (int, optional): Max runtime per job
23
- total_job_runtime_in_seconds (int, optional): Total job runtime limit
24
- problem_type (str, optional): Problem type ("BinaryClassification", "MulticlassClassification", "Regression")
25
- objective (dict, optional): Optimization objective configuration
26
- generate_candidate_definitions_only (bool, optional): Only generate definitions
27
- tags (list, optional): Resource tags
28
"""
29
def __init__(self, role: str, target_attribute_name: str, **kwargs): ...
30
31
def fit(self, inputs: 'AutoMLInput', wait: bool = True, logs: bool = True,
32
job_name: str = None) -> 'AutoMLJob': ...
33
34
def deploy(self, initial_instance_count: int, instance_type: str,
35
candidate: dict = None, **kwargs) -> 'Predictor': ...
36
37
def describe_auto_ml_job(self, job_name: str = None) -> dict: ...
38
39
def list_candidates(self, job_name: str = None, status_equals: str = None) -> list: ...
40
41
def best_candidate(self, job_name: str = None) -> dict: ...
42
43
class AutoMLJob:
44
"""
45
Represents an AutoML training job.
46
"""
47
def __init__(self, sagemaker_session: 'Session', job_name: str, inputs: 'AutoMLInput'): ...
48
49
def wait(self, logs: bool = True): ...
50
51
def describe(self) -> dict: ...
52
53
def stop(self): ...
54
55
class AutoMLInput:
56
"""
57
Input configuration for AutoML jobs.
58
59
Parameters:
60
- inputs (str or list): S3 paths to training data
61
- target_attribute_name (str): Target column name
62
- compression (str, optional): Data compression type
63
- s3_data_type (str, optional): S3 data type ("S3Prefix", "ManifestFile")
64
"""
65
def __init__(self, inputs, target_attribute_name: str, **kwargs): ...
66
67
class CandidateEstimator:
68
"""
69
Estimator for AutoML candidate models.
70
71
Parameters:
72
- candidate (dict): Candidate definition from AutoML job
73
- sagemaker_session (Session, optional): SageMaker session
74
"""
75
def __init__(self, candidate: dict, **kwargs): ...
76
77
def fit(self, inputs, **kwargs): ...
78
79
def deploy(self, initial_instance_count: int, instance_type: str, **kwargs) -> 'Predictor': ...
80
81
class CandidateStep:
82
"""
83
Individual step in AutoML candidate pipeline.
84
"""
85
def __init__(self, name: str, step_type: str, **kwargs): ...
86
```
87
88
### AutoML v2
89
90
Enhanced AutoML with support for multiple problem types including time series forecasting and text classification.
91
92
```python { .api }
93
class AutoMLV2:
94
"""
95
AutoML v2 estimator with enhanced capabilities.
96
97
Parameters:
98
- role (str): IAM role ARN
99
- output_path (str, optional): S3 path for output
100
- sagemaker_session (Session, optional): SageMaker session
101
- max_candidates (int, optional): Maximum number of candidates
102
- max_runtime_per_training_job_in_seconds (int, optional): Max runtime per job
103
- total_job_runtime_in_seconds (int, optional): Total job runtime limit
104
- tags (list, optional): Resource tags
105
- security_config (dict, optional): Security configuration
106
- data_split_config (dict, optional): Data splitting configuration
107
"""
108
def __init__(self, role: str, **kwargs): ...
109
110
def fit(self, inputs: 'AutoMLDataChannel', auto_ml_job_config: dict = None,
111
wait: bool = True, logs: bool = True, job_name: str = None) -> 'AutoMLJobV2': ...
112
113
def deploy(self, initial_instance_count: int, instance_type: str,
114
candidate: dict = None, **kwargs) -> 'Predictor': ...
115
116
class AutoMLJobV2:
117
"""
118
Represents an AutoML v2 training job.
119
"""
120
def __init__(self, sagemaker_session: 'Session', job_name: str): ...
121
122
def wait(self, logs: bool = True): ...
123
124
def describe(self) -> dict: ...
125
126
def stop(self): ...
127
128
class AutoMLDataChannel:
129
"""
130
Data channel configuration for AutoML v2.
131
132
Parameters:
133
- s3_data_source (dict): S3 data source configuration
134
- channel_type (str): Channel type ("training", "validation")
135
- compression_type (str, optional): Data compression
136
- content_type (str, optional): Content type
137
- sample_weight_attribute_name (str, optional): Sample weight column
138
"""
139
def __init__(self, s3_data_source: dict, channel_type: str = "training", **kwargs): ...
140
141
class LocalAutoMLDataChannel:
142
"""
143
Local data channel for AutoML development.
144
"""
145
def __init__(self, data, channel_type: str = "training", **kwargs): ...
146
```
147
148
### AutoML Configuration Classes
149
150
Problem-specific configuration classes for different AutoML use cases.
151
152
```python { .api }
153
class AutoMLTabularConfig:
154
"""
155
Configuration for tabular data AutoML.
156
157
Parameters:
158
- target_attribute_name (str): Target column name
159
- problem_type (str, optional): Problem type ("BinaryClassification", "MulticlassClassification", "Regression")
160
- objective (dict, optional): Optimization objective
161
- sample_weight_attribute_name (str, optional): Sample weight column
162
- feature_specification_s3_uri (str, optional): S3 path to feature specification
163
- mode (str, optional): AutoML mode ("AUTO", "ENSEMBLING", "HYPERPARAMETER_TUNING")
164
- generate_candidate_definitions_only (bool, optional): Only generate definitions
165
- candidate_generation_config (dict, optional): Candidate generation configuration
166
"""
167
def __init__(self, target_attribute_name: str, **kwargs): ...
168
169
class AutoMLTimeSeriesForecastingConfig:
170
"""
171
Configuration for time series forecasting AutoML.
172
173
Parameters:
174
- forecast_frequency (str): Forecasting frequency ("Y", "M", "W", "D", "H", "30min", "15min", "10min", "5min", "1min")
175
- forecast_horizon (int): Number of time steps to forecast
176
- forecast_quantiles (list, optional): Quantiles for probabilistic forecasting
177
- transformations (dict, optional): Data transformations configuration
178
- time_series_config (dict, optional): Time series specific configuration
179
- holiday_config (list, optional): Holiday calendar configuration
180
"""
181
def __init__(self, forecast_frequency: str, forecast_horizon: int, **kwargs): ...
182
183
class AutoMLImageClassificationConfig:
184
"""
185
Configuration for image classification AutoML.
186
187
Parameters:
188
- mode (str, optional): Training mode ("AUTO", "ENSEMBLING")
189
"""
190
def __init__(self, mode: str = "AUTO"): ...
191
192
class AutoMLTextClassificationConfig:
193
"""
194
Configuration for text classification AutoML.
195
196
Parameters:
197
- content_column (str): Name of text content column
198
- target_label_column (str): Name of target label column
199
- mode (str, optional): Training mode ("AUTO", "ENSEMBLING")
200
"""
201
def __init__(self, content_column: str, target_label_column: str, **kwargs): ...
202
203
class AutoMLTextGenerationConfig:
204
"""
205
Configuration for text generation AutoML.
206
207
Parameters:
208
- base_model_name (str): Base model for fine-tuning
209
- text_generation_hyper_parameters (dict, optional): Hyperparameters
210
"""
211
def __init__(self, base_model_name: str, **kwargs): ...
212
```
213
214
## Usage Examples
215
216
### AutoML v1 for Tabular Classification
217
218
```python
219
from sagemaker.automl.automl import AutoML, AutoMLInput
220
221
# Create AutoML estimator
222
automl = AutoML(
223
role=role,
224
target_attribute_name="target",
225
max_candidates=20,
226
max_runtime_per_training_job_in_seconds=3600,
227
total_job_runtime_in_seconds=36000
228
)
229
230
# Create input configuration
231
automl_input = AutoMLInput(
232
inputs="s3://my-bucket/training-data.csv",
233
target_attribute_name="target",
234
compression="None"
235
)
236
237
# Start AutoML job
238
automl.fit(automl_input, wait=True)
239
240
# Get best candidate
241
best_candidate = automl.best_candidate()
242
print(f"Best candidate: {best_candidate['CandidateName']}")
243
244
# Deploy best model
245
predictor = automl.deploy(
246
initial_instance_count=1,
247
instance_type="ml.m5.large"
248
)
249
250
# Make predictions
251
predictions = predictor.predict(test_data)
252
```
253
254
### AutoML v2 for Time Series Forecasting
255
256
```python
257
from sagemaker.automl.automlv2 import AutoMLV2, AutoMLDataChannel, AutoMLTimeSeriesForecastingConfig
258
259
# Create time series forecasting configuration
260
ts_config = AutoMLTimeSeriesForecastingConfig(
261
forecast_frequency="D", # Daily forecasting
262
forecast_horizon=30, # 30 days ahead
263
forecast_quantiles=[0.1, 0.5, 0.9]
264
)
265
266
# Create AutoML v2 estimator
267
automl_v2 = AutoMLV2(
268
role=role,
269
max_candidates=10,
270
total_job_runtime_in_seconds=43200 # 12 hours
271
)
272
273
# Create data channel
274
data_channel = AutoMLDataChannel(
275
s3_data_source={
276
"S3DataType": "S3Prefix",
277
"S3Uri": "s3://my-bucket/timeseries-data/"
278
},
279
channel_type="training"
280
)
281
282
# Create job configuration
283
job_config = {
284
"ProblemConfig": {
285
"TimeSeriesForecastingConfig": ts_config
286
}
287
}
288
289
# Start AutoML job
290
automl_v2.fit(
291
inputs=data_channel,
292
auto_ml_job_config=job_config,
293
wait=True
294
)
295
296
# Deploy model
297
predictor = automl_v2.deploy(
298
initial_instance_count=1,
299
instance_type="ml.m5.large"
300
)
301
302
# Make forecasts
303
forecasts = predictor.predict(inference_data)
304
```
305
306
### AutoML v2 for Text Classification
307
308
```python
309
from sagemaker.automl.automlv2 import AutoMLTextClassificationConfig
310
311
# Create text classification configuration
312
text_config = AutoMLTextClassificationConfig(
313
content_column="text",
314
target_label_column="label"
315
)
316
317
# Create AutoML v2 estimator
318
automl_v2 = AutoMLV2(role=role)
319
320
# Create job configuration
321
job_config = {
322
"ProblemConfig": {
323
"TextClassificationConfig": text_config
324
}
325
}
326
327
# Start AutoML job
328
automl_v2.fit(
329
inputs=data_channel,
330
auto_ml_job_config=job_config
331
)
332
```