or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

core.mdfeatures.mdindex.mdmultimodal.mdtabular.mdtimeseries.md

core.mddocs/

0

# Core Utilities

1

2

Shared utilities for metrics, constants, and data structures used across all AutoGluon predictors. These components provide the foundational infrastructure for consistent evaluation, problem type handling, and data management throughout the AutoGluon ecosystem.

3

4

## Capabilities

5

6

### Problem Type Constants

7

8

Standard constants for different machine learning problem types and configurations.

9

10

```python { .api }

11

# Core problem type constants

12

BINARY = "binary"

13

MULTICLASS = "multiclass"

14

REGRESSION = "regression"

15

SOFTCLASS = "softclass" # Classification with soft targets (probabilities)

16

QUANTILE = "quantile" # Quantile regression

17

18

# Problem type groupings

19

PROBLEM_TYPES_CLASSIFICATION = ["binary", "multiclass"]

20

PROBLEM_TYPES_REGRESSION = ["regression"]

21

PROBLEM_TYPES = ["binary", "multiclass", "regression", "softclass", "quantile"]

22

23

# Model configuration constants

24

AG_ARGS = "ag_args" # AutoGluon model arguments

25

AG_ARGS_FIT = "ag_args_fit" # Training-specific arguments

26

AG_ARGS_ENSEMBLE = "ag_args_ensemble" # Ensemble-specific arguments

27

28

# Weight handling constants

29

AUTO_WEIGHT = "auto_weight"

30

BALANCE_WEIGHT = "balance_weight"

31

32

# Problem type inference thresholds

33

MULTICLASS_UPPER_LIMIT = 1000 # Max unique values for classification

34

LARGE_DATA_THRESHOLD = 1000

35

REGRESS_THRESHOLD_LARGE_DATA = 0.05

36

REGRESS_THRESHOLD_SMALL_DATA = 0.1

37

```

38

39

### Data Structures

40

41

Core data structures for handling tabular data and feature metadata.

42

43

```python { .api }

44

class TabularDataset:

45

def __init__(self, df: pd.DataFrame):

46

"""

47

Enhanced DataFrame wrapper with AutoGluon-specific utilities.

48

49

Parameters:

50

- df: Input pandas DataFrame

51

"""

52

53

@classmethod

54

def load(cls, file_path: str, **kwargs):

55

"""

56

Load tabular data from file.

57

58

Parameters:

59

- file_path: Path to data file (CSV, TSV, Parquet, etc.)

60

- **kwargs: Additional pandas read parameters

61

62

Returns:

63

TabularDataset: Loaded dataset

64

"""

65

66

def save(self, file_path: str, **kwargs):

67

"""

68

Save tabular data to file.

69

70

Parameters:

71

- file_path: Output file path

72

- **kwargs: Additional pandas save parameters

73

"""

74

75

class FeatureMetadata:

76

def __init__(

77

self,

78

type_map_raw: dict = None,

79

type_group_map_special: dict = None,

80

**kwargs

81

):

82

"""

83

Metadata container for feature type information and processing hints.

84

85

Parameters:

86

- type_map_raw: Mapping of feature names to raw data types

87

- type_group_map_special: Mapping of features to special type groups

88

"""

89

90

def get_features(self, valid_raw_types: list = None, invalid_raw_types: list = None):

91

"""

92

Get features filtered by data types.

93

94

Parameters:

95

- valid_raw_types: Include only these raw types

96

- invalid_raw_types: Exclude these raw types

97

98

Returns:

99

list: Filtered feature names

100

"""

101

```

102

103

### Evaluation Metrics

104

105

Comprehensive metric system for model evaluation across different problem types.

106

107

```python { .api }

108

class Scorer:

109

def __init__(

110

self,

111

name: str,

112

score_func: callable,

113

optimum: float = None,

114

sign: int = None,

115

greater_is_better: bool = None,

116

**kwargs

117

):

118

"""

119

Scorer wrapper for evaluation metrics.

120

121

Parameters:

122

- name: Metric name

123

- score_func: Function to compute metric

124

- optimum: Optimal score value

125

- sign: Sign for score interpretation (-1 or 1)

126

- greater_is_better: Whether higher scores are better

127

"""

128

129

def __call__(self, y_true, y_pred, **kwargs):

130

"""

131

Compute metric score.

132

133

Parameters:

134

- y_true: True target values

135

- y_pred: Predicted values

136

137

Returns:

138

float: Computed metric score

139

"""

140

141

# Classification metrics

142

def accuracy_score(y_true, y_pred, **kwargs) -> float:

143

"""Compute classification accuracy."""

144

145

def balanced_accuracy_score(y_true, y_pred, **kwargs) -> float:

146

"""Compute balanced accuracy for imbalanced datasets."""

147

148

def f1_score(y_true, y_pred, **kwargs) -> float:

149

"""Compute F1 score (harmonic mean of precision and recall)."""

150

151

def precision_score(y_true, y_pred, **kwargs) -> float:

152

"""Compute precision score."""

153

154

def recall_score(y_true, y_pred, **kwargs) -> float:

155

"""Compute recall score."""

156

157

def roc_auc_score(y_true, y_pred_proba, **kwargs) -> float:

158

"""Compute Area Under the ROC Curve."""

159

160

def log_loss(y_true, y_pred_proba, **kwargs) -> float:

161

"""Compute logistic loss."""

162

163

# Regression metrics

164

def mean_squared_error(y_true, y_pred, **kwargs) -> float:

165

"""Compute mean squared error."""

166

167

def root_mean_squared_error(y_true, y_pred, **kwargs) -> float:

168

"""Compute root mean squared error."""

169

170

def mean_absolute_error(y_true, y_pred, **kwargs) -> float:

171

"""Compute mean absolute error."""

172

173

def mean_absolute_percentage_error(y_true, y_pred, **kwargs) -> float:

174

"""Compute mean absolute percentage error."""

175

176

def r2_score(y_true, y_pred, **kwargs) -> float:

177

"""Compute R² coefficient of determination."""

178

179

# Metric computation utilities

180

def compute_metric(metric_name: str, y_true, y_pred, **kwargs) -> float:

181

"""

182

Compute metric by name with automatic type handling.

183

184

Parameters:

185

- metric_name: Name of metric to compute

186

- y_true: True target values

187

- y_pred: Predictions or prediction probabilities

188

189

Returns:

190

float: Computed metric value

191

"""

192

```

193

194

### Exploratory Data Analysis

195

196

State management and utilities for exploratory data analysis workflows.

197

198

```python { .api }

199

class AnalysisState:

200

def __init__(self, *args, **kwargs):

201

"""

202

Dictionary-like state container with dot notation access.

203

204

Enables dynamic attribute access for analysis results:

205

state.model_performance instead of state['model_performance']

206

"""

207

208

def __getattr__(self, item):

209

"""Enable dot notation access to stored values."""

210

211

def __setattr__(self, name: str, value):

212

"""Enable dot notation assignment with nested dict conversion."""

213

214

class StateCheckMixin:

215

def at_least_one_key_must_be_present(self, state: AnalysisState, *keys) -> bool:

216

"""

217

Check if at least one required key is present in analysis state.

218

219

Parameters:

220

- state: Analysis state to check

221

- *keys: Required keys to check for

222

223

Returns:

224

bool: True if at least one key is present

225

"""

226

227

def is_key_present_in_state(state: dict, key: str) -> bool:

228

"""

229

Check if a key exists in nested state dictionary.

230

231

Parameters:

232

- state: State dictionary to search

233

- key: Key to search for

234

235

Returns:

236

bool: True if key is present

237

"""

238

```

239

240

### Utility Functions

241

242

Helper functions for common operations across AutoGluon components.

243

244

```python { .api }

245

def infer_problem_type(y: pd.Series, silent: bool = False) -> str:

246

"""

247

Automatically infer problem type from target variable.

248

249

Parameters:

250

- y: Target variable values

251

- silent: Suppress logging output

252

253

Returns:

254

str: Inferred problem type ('binary', 'multiclass', 'regression')

255

"""

256

257

def get_pred_from_proba(y_pred_proba, problem_type: str) -> np.ndarray:

258

"""

259

Convert prediction probabilities to class predictions.

260

261

Parameters:

262

- y_pred_proba: Prediction probabilities

263

- problem_type: Type of ML problem

264

265

Returns:

266

numpy.ndarray: Class predictions

267

"""

268

269

def normalize_pred_probas(y_pred_proba, problem_type: str) -> np.ndarray:

270

"""

271

Normalize prediction probabilities to valid probability distributions.

272

273

Parameters:

274

- y_pred_proba: Raw prediction probabilities

275

- problem_type: Type of ML problem

276

277

Returns:

278

numpy.ndarray: Normalized probabilities

279

"""

280

281

def setup_outputdir(path: str, warn_if_exist: bool = True) -> str:

282

"""

283

Set up output directory for saving models and artifacts.

284

285

Parameters:

286

- path: Desired output directory path

287

- warn_if_exist: Whether to warn if directory exists

288

289

Returns:

290

str: Validated output directory path

291

"""

292

```

293

294

## Usage Examples

295

296

### Problem Type Inference and Metrics

297

298

```python

299

from autogluon.core import constants, metrics

300

import pandas as pd

301

import numpy as np

302

303

# Infer problem type from target variable

304

target_binary = pd.Series([0, 1, 1, 0, 1])

305

target_multiclass = pd.Series(['A', 'B', 'C', 'A', 'B'])

306

target_regression = pd.Series([1.5, 2.3, 3.7, 4.1, 5.2])

307

308

print(f"Binary: {constants.infer_problem_type(target_binary)}")

309

print(f"Multiclass: {constants.infer_problem_type(target_multiclass)}")

310

print(f"Regression: {constants.infer_problem_type(target_regression)}")

311

312

# Use problem type constants

313

if problem_type == constants.BINARY:

314

eval_metric = 'roc_auc'

315

elif problem_type == constants.MULTICLASS:

316

eval_metric = 'accuracy'

317

elif problem_type == constants.REGRESSION:

318

eval_metric = 'rmse'

319

320

# Compute metrics

321

y_true = [0, 1, 1, 0, 1]

322

y_pred = [0, 1, 0, 0, 1]

323

y_pred_proba = [[0.8, 0.2], [0.3, 0.7], [0.6, 0.4], [0.9, 0.1], [0.2, 0.8]]

324

325

accuracy = metrics.compute_metric('accuracy', y_true, y_pred)

326

auc = metrics.compute_metric('roc_auc', y_true, y_pred_proba)

327

print(f"Accuracy: {accuracy:.3f}, AUC: {auc:.3f}")

328

```

329

330

### Working with TabularDataset

331

332

```python

333

from autogluon.common import TabularDataset

334

import pandas as pd

335

336

# Create TabularDataset from DataFrame

337

df = pd.DataFrame({

338

'feature1': [1, 2, 3, 4, 5],

339

'feature2': ['A', 'B', 'A', 'C', 'B'],

340

'target': [0, 1, 0, 1, 1]

341

})

342

343

dataset = TabularDataset(df)

344

345

# Load from file

346

dataset = TabularDataset.load('data.csv')

347

348

# Access underlying DataFrame

349

print(f"Shape: {dataset.shape}")

350

print(f"Columns: {list(dataset.columns)}")

351

352

# Save to file

353

dataset.save('processed_data.csv')

354

```

355

356

### Feature Metadata Management

357

358

```python

359

from autogluon.common import FeatureMetadata

360

361

# Create feature metadata

362

feature_metadata = FeatureMetadata(

363

type_map_raw={

364

'numerical_col': 'float',

365

'categorical_col': 'object',

366

'text_col': 'object',

367

'datetime_col': 'datetime'

368

},

369

type_group_map_special={

370

'text_col': 'text',

371

'datetime_col': 'datetime'

372

}

373

)

374

375

# Get features by type

376

numerical_features = feature_metadata.get_features(valid_raw_types=['float', 'int'])

377

categorical_features = feature_metadata.get_features(valid_raw_types=['object'])

378

text_features = feature_metadata.get_features(invalid_raw_types=['float', 'int'])

379

380

print(f"Numerical features: {numerical_features}")

381

print(f"Categorical features: {categorical_features}")

382

print(f"Text features: {text_features}")

383

```

384

385

### Custom Metric Creation

386

387

```python

388

from autogluon.core.metrics import Scorer

389

import numpy as np

390

391

# Define custom metric function

392

def custom_weighted_accuracy(y_true, y_pred, sample_weight=None):

393

"""Custom weighted accuracy metric."""

394

correct = (y_true == y_pred).astype(float)

395

if sample_weight is not None:

396

return np.average(correct, weights=sample_weight)

397

return np.mean(correct)

398

399

# Create custom scorer

400

custom_scorer = Scorer(

401

name='weighted_accuracy',

402

score_func=custom_weighted_accuracy,

403

greater_is_better=True,

404

optimum=1.0

405

)

406

407

# Use custom scorer

408

y_true = [0, 1, 1, 0, 1]

409

y_pred = [0, 1, 0, 0, 1]

410

weights = [1, 2, 1, 1, 2]

411

412

score = custom_scorer(y_true, y_pred, sample_weight=weights)

413

print(f"Custom weighted accuracy: {score:.3f}")

414

```

415

416

### Analysis State Management

417

418

```python

419

from autogluon.eda import AnalysisState

420

421

# Create analysis state

422

state = AnalysisState()

423

424

# Use dot notation for assignment

425

state.model_performance = {'accuracy': 0.85, 'f1': 0.82}

426

state.feature_importance = ['feature1', 'feature2', 'feature3']

427

state.training_time = 120.5

428

429

# Access with dot notation

430

print(f"Best accuracy: {state.model_performance['accuracy']}")

431

print(f"Training time: {state.training_time}s")

432

print(f"Top feature: {state.feature_importance[0]}")

433

434

# Nested state management

435

state.validation = AnalysisState()

436

state.validation.scores = {'val_acc': 0.83, 'val_f1': 0.80}

437

state.validation.fold_results = [0.82, 0.84, 0.83, 0.85, 0.81]

438

439

print(f"Validation accuracy: {state.validation.scores['val_acc']}")

440

```