or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

amazon-algorithms.mdautoml.mdcore-training.mddata-processing.mddebugging-profiling.mdexperiments.mdframework-training.mdhyperparameter-tuning.mdindex.mdmodel-monitoring.mdmodel-serving.mdremote-functions.md

automl.mddocs/

0

# AutoML

1

2

Automated machine learning capabilities for tabular data, image classification, text classification, and time series forecasting with minimal configuration required. AutoML automatically handles feature engineering, algorithm selection, and hyperparameter tuning.

3

4

## Capabilities

5

6

### AutoML v1

7

8

Original AutoML implementation for tabular data with automatic model selection and optimization.

9

10

```python { .api }

11

class AutoML:

12

"""

13

AutoML estimator for automated machine learning on tabular data.

14

15

Parameters:

16

- role (str): IAM role ARN

17

- target_attribute_name (str): Name of target column

18

- output_path (str, optional): S3 path for output

19

- compression_type (str, optional): Input compression type

20

- sagemaker_session (Session, optional): SageMaker session

21

- max_candidates (int, optional): Maximum number of candidates to evaluate

22

- max_runtime_per_training_job_in_seconds (int, optional): Max runtime per job

23

- total_job_runtime_in_seconds (int, optional): Total job runtime limit

24

- problem_type (str, optional): Problem type ("BinaryClassification", "MulticlassClassification", "Regression")

25

- objective (dict, optional): Optimization objective configuration

26

- generate_candidate_definitions_only (bool, optional): Only generate definitions

27

- tags (list, optional): Resource tags

28

"""

29

def __init__(self, role: str, target_attribute_name: str, **kwargs): ...

30

31

def fit(self, inputs: 'AutoMLInput', wait: bool = True, logs: bool = True,

32

job_name: str = None) -> 'AutoMLJob': ...

33

34

def deploy(self, initial_instance_count: int, instance_type: str,

35

candidate: dict = None, **kwargs) -> 'Predictor': ...

36

37

def describe_auto_ml_job(self, job_name: str = None) -> dict: ...

38

39

def list_candidates(self, job_name: str = None, status_equals: str = None) -> list: ...

40

41

def best_candidate(self, job_name: str = None) -> dict: ...

42

43

class AutoMLJob:

44

"""

45

Represents an AutoML training job.

46

"""

47

def __init__(self, sagemaker_session: 'Session', job_name: str, inputs: 'AutoMLInput'): ...

48

49

def wait(self, logs: bool = True): ...

50

51

def describe(self) -> dict: ...

52

53

def stop(self): ...

54

55

class AutoMLInput:

56

"""

57

Input configuration for AutoML jobs.

58

59

Parameters:

60

- inputs (str or list): S3 paths to training data

61

- target_attribute_name (str): Target column name

62

- compression (str, optional): Data compression type

63

- s3_data_type (str, optional): S3 data type ("S3Prefix", "ManifestFile")

64

"""

65

def __init__(self, inputs, target_attribute_name: str, **kwargs): ...

66

67

class CandidateEstimator:

68

"""

69

Estimator for AutoML candidate models.

70

71

Parameters:

72

- candidate (dict): Candidate definition from AutoML job

73

- sagemaker_session (Session, optional): SageMaker session

74

"""

75

def __init__(self, candidate: dict, **kwargs): ...

76

77

def fit(self, inputs, **kwargs): ...

78

79

def deploy(self, initial_instance_count: int, instance_type: str, **kwargs) -> 'Predictor': ...

80

81

class CandidateStep:

82

"""

83

Individual step in AutoML candidate pipeline.

84

"""

85

def __init__(self, name: str, step_type: str, **kwargs): ...

86

```

87

88

### AutoML v2

89

90

Enhanced AutoML with support for multiple problem types including time series forecasting and text classification.

91

92

```python { .api }

93

class AutoMLV2:

94

"""

95

AutoML v2 estimator with enhanced capabilities.

96

97

Parameters:

98

- role (str): IAM role ARN

99

- output_path (str, optional): S3 path for output

100

- sagemaker_session (Session, optional): SageMaker session

101

- max_candidates (int, optional): Maximum number of candidates

102

- max_runtime_per_training_job_in_seconds (int, optional): Max runtime per job

103

- total_job_runtime_in_seconds (int, optional): Total job runtime limit

104

- tags (list, optional): Resource tags

105

- security_config (dict, optional): Security configuration

106

- data_split_config (dict, optional): Data splitting configuration

107

"""

108

def __init__(self, role: str, **kwargs): ...

109

110

def fit(self, inputs: 'AutoMLDataChannel', auto_ml_job_config: dict = None,

111

wait: bool = True, logs: bool = True, job_name: str = None) -> 'AutoMLJobV2': ...

112

113

def deploy(self, initial_instance_count: int, instance_type: str,

114

candidate: dict = None, **kwargs) -> 'Predictor': ...

115

116

class AutoMLJobV2:

117

"""

118

Represents an AutoML v2 training job.

119

"""

120

def __init__(self, sagemaker_session: 'Session', job_name: str): ...

121

122

def wait(self, logs: bool = True): ...

123

124

def describe(self) -> dict: ...

125

126

def stop(self): ...

127

128

class AutoMLDataChannel:

129

"""

130

Data channel configuration for AutoML v2.

131

132

Parameters:

133

- s3_data_source (dict): S3 data source configuration

134

- channel_type (str): Channel type ("training", "validation")

135

- compression_type (str, optional): Data compression

136

- content_type (str, optional): Content type

137

- sample_weight_attribute_name (str, optional): Sample weight column

138

"""

139

def __init__(self, s3_data_source: dict, channel_type: str = "training", **kwargs): ...

140

141

class LocalAutoMLDataChannel:

142

"""

143

Local data channel for AutoML development.

144

"""

145

def __init__(self, data, channel_type: str = "training", **kwargs): ...

146

```

147

148

### AutoML Configuration Classes

149

150

Problem-specific configuration classes for different AutoML use cases.

151

152

```python { .api }

153

class AutoMLTabularConfig:

154

"""

155

Configuration for tabular data AutoML.

156

157

Parameters:

158

- target_attribute_name (str): Target column name

159

- problem_type (str, optional): Problem type ("BinaryClassification", "MulticlassClassification", "Regression")

160

- objective (dict, optional): Optimization objective

161

- sample_weight_attribute_name (str, optional): Sample weight column

162

- feature_specification_s3_uri (str, optional): S3 path to feature specification

163

- mode (str, optional): AutoML mode ("AUTO", "ENSEMBLING", "HYPERPARAMETER_TUNING")

164

- generate_candidate_definitions_only (bool, optional): Only generate definitions

165

- candidate_generation_config (dict, optional): Candidate generation configuration

166

"""

167

def __init__(self, target_attribute_name: str, **kwargs): ...

168

169

class AutoMLTimeSeriesForecastingConfig:

170

"""

171

Configuration for time series forecasting AutoML.

172

173

Parameters:

174

- forecast_frequency (str): Forecasting frequency ("Y", "M", "W", "D", "H", "30min", "15min", "10min", "5min", "1min")

175

- forecast_horizon (int): Number of time steps to forecast

176

- forecast_quantiles (list, optional): Quantiles for probabilistic forecasting

177

- transformations (dict, optional): Data transformations configuration

178

- time_series_config (dict, optional): Time series specific configuration

179

- holiday_config (list, optional): Holiday calendar configuration

180

"""

181

def __init__(self, forecast_frequency: str, forecast_horizon: int, **kwargs): ...

182

183

class AutoMLImageClassificationConfig:

184

"""

185

Configuration for image classification AutoML.

186

187

Parameters:

188

- mode (str, optional): Training mode ("AUTO", "ENSEMBLING")

189

"""

190

def __init__(self, mode: str = "AUTO"): ...

191

192

class AutoMLTextClassificationConfig:

193

"""

194

Configuration for text classification AutoML.

195

196

Parameters:

197

- content_column (str): Name of text content column

198

- target_label_column (str): Name of target label column

199

- mode (str, optional): Training mode ("AUTO", "ENSEMBLING")

200

"""

201

def __init__(self, content_column: str, target_label_column: str, **kwargs): ...

202

203

class AutoMLTextGenerationConfig:

204

"""

205

Configuration for text generation AutoML.

206

207

Parameters:

208

- base_model_name (str): Base model for fine-tuning

209

- text_generation_hyper_parameters (dict, optional): Hyperparameters

210

"""

211

def __init__(self, base_model_name: str, **kwargs): ...

212

```

213

214

## Usage Examples

215

216

### AutoML v1 for Tabular Classification

217

218

```python

219

from sagemaker.automl.automl import AutoML, AutoMLInput

220

221

# Create AutoML estimator

222

automl = AutoML(

223

role=role,

224

target_attribute_name="target",

225

max_candidates=20,

226

max_runtime_per_training_job_in_seconds=3600,

227

total_job_runtime_in_seconds=36000

228

)

229

230

# Create input configuration

231

automl_input = AutoMLInput(

232

inputs="s3://my-bucket/training-data.csv",

233

target_attribute_name="target",

234

compression="None"

235

)

236

237

# Start AutoML job

238

automl.fit(automl_input, wait=True)

239

240

# Get best candidate

241

best_candidate = automl.best_candidate()

242

print(f"Best candidate: {best_candidate['CandidateName']}")

243

244

# Deploy best model

245

predictor = automl.deploy(

246

initial_instance_count=1,

247

instance_type="ml.m5.large"

248

)

249

250

# Make predictions

251

predictions = predictor.predict(test_data)

252

```

253

254

### AutoML v2 for Time Series Forecasting

255

256

```python

257

from sagemaker.automl.automlv2 import AutoMLV2, AutoMLDataChannel, AutoMLTimeSeriesForecastingConfig

258

259

# Create time series forecasting configuration

260

ts_config = AutoMLTimeSeriesForecastingConfig(

261

forecast_frequency="D", # Daily forecasting

262

forecast_horizon=30, # 30 days ahead

263

forecast_quantiles=[0.1, 0.5, 0.9]

264

)

265

266

# Create AutoML v2 estimator

267

automl_v2 = AutoMLV2(

268

role=role,

269

max_candidates=10,

270

total_job_runtime_in_seconds=43200 # 12 hours

271

)

272

273

# Create data channel

274

data_channel = AutoMLDataChannel(

275

s3_data_source={

276

"S3DataType": "S3Prefix",

277

"S3Uri": "s3://my-bucket/timeseries-data/"

278

},

279

channel_type="training"

280

)

281

282

# Create job configuration

283

job_config = {

284

"ProblemConfig": {

285

"TimeSeriesForecastingConfig": ts_config

286

}

287

}

288

289

# Start AutoML job

290

automl_v2.fit(

291

inputs=data_channel,

292

auto_ml_job_config=job_config,

293

wait=True

294

)

295

296

# Deploy model

297

predictor = automl_v2.deploy(

298

initial_instance_count=1,

299

instance_type="ml.m5.large"

300

)

301

302

# Make forecasts

303

forecasts = predictor.predict(inference_data)

304

```

305

306

### AutoML v2 for Text Classification

307

308

```python

309

from sagemaker.automl.automlv2 import AutoMLTextClassificationConfig

310

311

# Create text classification configuration

312

text_config = AutoMLTextClassificationConfig(

313

content_column="text",

314

target_label_column="label"

315

)

316

317

# Create AutoML v2 estimator

318

automl_v2 = AutoMLV2(role=role)

319

320

# Create job configuration

321

job_config = {

322

"ProblemConfig": {

323

"TextClassificationConfig": text_config

324

}

325

}

326

327

# Start AutoML job

328

automl_v2.fit(

329

inputs=data_channel,

330

auto_ml_job_config=job_config

331

)

332

```