Tessl Tile for pypi/autogluon@1.4.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

core.md features.md index.md multimodal.md tabular.md timeseries.md

core.mddocs/

0
# Core Utilities
1

2
Shared utilities for metrics, constants, and data structures used across all AutoGluon predictors. These components provide the foundational infrastructure for consistent evaluation, problem type handling, and data management throughout the AutoGluon ecosystem.
3

4
## Capabilities
5

6
### Problem Type Constants
7

8
Standard constants for different machine learning problem types and configurations.
9

10
```python { .api }
11
# Core problem type constants
12
BINARY = "binary"
13
MULTICLASS = "multiclass"
14
REGRESSION = "regression"
15
SOFTCLASS = "softclass"  # Classification with soft targets (probabilities)
16
QUANTILE = "quantile"    # Quantile regression
17

18
# Problem type groupings
19
PROBLEM_TYPES_CLASSIFICATION = ["binary", "multiclass"]
20
PROBLEM_TYPES_REGRESSION = ["regression"]
21
PROBLEM_TYPES = ["binary", "multiclass", "regression", "softclass", "quantile"]
22

23
# Model configuration constants
24
AG_ARGS = "ag_args"          # AutoGluon model arguments
25
AG_ARGS_FIT = "ag_args_fit"  # Training-specific arguments
26
AG_ARGS_ENSEMBLE = "ag_args_ensemble"  # Ensemble-specific arguments
27

28
# Weight handling constants
29
AUTO_WEIGHT = "auto_weight"
30
BALANCE_WEIGHT = "balance_weight"
31

32
# Problem type inference thresholds
33
MULTICLASS_UPPER_LIMIT = 1000  # Max unique values for classification
34
LARGE_DATA_THRESHOLD = 1000
35
REGRESS_THRESHOLD_LARGE_DATA = 0.05
36
REGRESS_THRESHOLD_SMALL_DATA = 0.1
37
```
38

39
### Data Structures
40

41
Core data structures for handling tabular data and feature metadata.
42

43
```python { .api }
44
class TabularDataset:
45
    def __init__(self, df: pd.DataFrame):
46
        """
47
        Enhanced DataFrame wrapper with AutoGluon-specific utilities.
48
        
49
        Parameters:
50
        - df: Input pandas DataFrame
51
        """
52
    
53
    @classmethod
54
    def load(cls, file_path: str, **kwargs):
55
        """
56
        Load tabular data from file.
57
        
58
        Parameters:
59
        - file_path: Path to data file (CSV, TSV, Parquet, etc.)
60
        - **kwargs: Additional pandas read parameters
61
        
62
        Returns:
63
        TabularDataset: Loaded dataset
64
        """
65
    
66
    def save(self, file_path: str, **kwargs):
67
        """
68
        Save tabular data to file.
69
        
70
        Parameters:
71
        - file_path: Output file path
72
        - **kwargs: Additional pandas save parameters
73
        """
74

75
class FeatureMetadata:
76
    def __init__(
77
        self,
78
        type_map_raw: dict = None,
79
        type_group_map_special: dict = None,
80
        **kwargs
81
    ):
82
        """
83
        Metadata container for feature type information and processing hints.
84
        
85
        Parameters:
86
        - type_map_raw: Mapping of feature names to raw data types
87
        - type_group_map_special: Mapping of features to special type groups
88
        """
89
    
90
    def get_features(self, valid_raw_types: list = None, invalid_raw_types: list = None):
91
        """
92
        Get features filtered by data types.
93
        
94
        Parameters:
95
        - valid_raw_types: Include only these raw types
96
        - invalid_raw_types: Exclude these raw types
97
        
98
        Returns:
99
        list: Filtered feature names
100
        """
101
```
102

103
### Evaluation Metrics
104

105
Comprehensive metric system for model evaluation across different problem types.
106

107
```python { .api }
108
class Scorer:
109
    def __init__(
110
        self,
111
        name: str,
112
        score_func: callable,
113
        optimum: float = None,
114
        sign: int = None,
115
        greater_is_better: bool = None,
116
        **kwargs
117
    ):
118
        """
119
        Scorer wrapper for evaluation metrics.
120
        
121
        Parameters:
122
        - name: Metric name
123
        - score_func: Function to compute metric
124
        - optimum: Optimal score value
125
        - sign: Sign for score interpretation (-1 or 1)
126
        - greater_is_better: Whether higher scores are better
127
        """
128
    
129
    def __call__(self, y_true, y_pred, **kwargs):
130
        """
131
        Compute metric score.
132
        
133
        Parameters:
134
        - y_true: True target values
135
        - y_pred: Predicted values
136
        
137
        Returns:
138
        float: Computed metric score
139
        """
140

141
# Classification metrics
142
def accuracy_score(y_true, y_pred, **kwargs) -> float:
143
    """Compute classification accuracy."""
144

145
def balanced_accuracy_score(y_true, y_pred, **kwargs) -> float:
146
    """Compute balanced accuracy for imbalanced datasets."""
147

148
def f1_score(y_true, y_pred, **kwargs) -> float:
149
    """Compute F1 score (harmonic mean of precision and recall)."""
150

151
def precision_score(y_true, y_pred, **kwargs) -> float:
152
    """Compute precision score."""
153

154
def recall_score(y_true, y_pred, **kwargs) -> float:
155
    """Compute recall score."""
156

157
def roc_auc_score(y_true, y_pred_proba, **kwargs) -> float:
158
    """Compute Area Under the ROC Curve."""
159

160
def log_loss(y_true, y_pred_proba, **kwargs) -> float:
161
    """Compute logistic loss."""
162

163
# Regression metrics
164
def mean_squared_error(y_true, y_pred, **kwargs) -> float:
165
    """Compute mean squared error."""
166

167
def root_mean_squared_error(y_true, y_pred, **kwargs) -> float:
168
    """Compute root mean squared error."""
169

170
def mean_absolute_error(y_true, y_pred, **kwargs) -> float:
171
    """Compute mean absolute error."""
172

173
def mean_absolute_percentage_error(y_true, y_pred, **kwargs) -> float:
174
    """Compute mean absolute percentage error."""
175

176
def r2_score(y_true, y_pred, **kwargs) -> float:
177
    """Compute R² coefficient of determination."""
178

179
# Metric computation utilities
180
def compute_metric(metric_name: str, y_true, y_pred, **kwargs) -> float:
181
    """
182
    Compute metric by name with automatic type handling.
183
    
184
    Parameters:
185
    - metric_name: Name of metric to compute
186
    - y_true: True target values
187
    - y_pred: Predictions or prediction probabilities
188
    
189
    Returns:
190
    float: Computed metric value
191
    """
192
```
193

194
### Exploratory Data Analysis
195

196
State management and utilities for exploratory data analysis workflows.
197

198
```python { .api }
199
class AnalysisState:
200
    def __init__(self, *args, **kwargs):
201
        """
202
        Dictionary-like state container with dot notation access.
203
        
204
        Enables dynamic attribute access for analysis results:
205
        state.model_performance instead of state['model_performance']
206
        """
207
    
208
    def __getattr__(self, item):
209
        """Enable dot notation access to stored values."""
210
    
211
    def __setattr__(self, name: str, value):
212
        """Enable dot notation assignment with nested dict conversion."""
213

214
class StateCheckMixin:
215
    def at_least_one_key_must_be_present(self, state: AnalysisState, *keys) -> bool:
216
        """
217
        Check if at least one required key is present in analysis state.
218
        
219
        Parameters:
220
        - state: Analysis state to check
221
        - *keys: Required keys to check for
222
        
223
        Returns:
224
        bool: True if at least one key is present
225
        """
226

227
def is_key_present_in_state(state: dict, key: str) -> bool:
228
    """
229
    Check if a key exists in nested state dictionary.
230
    
231
    Parameters:
232
    - state: State dictionary to search
233
    - key: Key to search for
234
    
235
    Returns:
236
    bool: True if key is present
237
    """
238
```
239

240
### Utility Functions
241

242
Helper functions for common operations across AutoGluon components.
243

244
```python { .api }
245
def infer_problem_type(y: pd.Series, silent: bool = False) -> str:
246
    """
247
    Automatically infer problem type from target variable.
248
    
249
    Parameters:
250
    - y: Target variable values
251
    - silent: Suppress logging output
252
    
253
    Returns:
254
    str: Inferred problem type ('binary', 'multiclass', 'regression')
255
    """
256

257
def get_pred_from_proba(y_pred_proba, problem_type: str) -> np.ndarray:
258
    """
259
    Convert prediction probabilities to class predictions.
260
    
261
    Parameters:
262
    - y_pred_proba: Prediction probabilities
263
    - problem_type: Type of ML problem
264
    
265
    Returns:
266
    numpy.ndarray: Class predictions
267
    """
268

269
def normalize_pred_probas(y_pred_proba, problem_type: str) -> np.ndarray:
270
    """
271
    Normalize prediction probabilities to valid probability distributions.
272
    
273
    Parameters:
274
    - y_pred_proba: Raw prediction probabilities
275
    - problem_type: Type of ML problem
276
    
277
    Returns:
278
    numpy.ndarray: Normalized probabilities
279
    """
280

281
def setup_outputdir(path: str, warn_if_exist: bool = True) -> str:
282
    """
283
    Set up output directory for saving models and artifacts.
284
    
285
    Parameters:
286
    - path: Desired output directory path
287
    - warn_if_exist: Whether to warn if directory exists
288
    
289
    Returns:
290
    str: Validated output directory path
291
    """
292
```
293

294
## Usage Examples
295

296
### Problem Type Inference and Metrics
297

298
```python
299
from autogluon.core import constants, metrics
300
import pandas as pd
301
import numpy as np
302

303
# Infer problem type from target variable
304
target_binary = pd.Series([0, 1, 1, 0, 1])  
305
target_multiclass = pd.Series(['A', 'B', 'C', 'A', 'B'])
306
target_regression = pd.Series([1.5, 2.3, 3.7, 4.1, 5.2])
307

308
print(f"Binary: {constants.infer_problem_type(target_binary)}")
309
print(f"Multiclass: {constants.infer_problem_type(target_multiclass)}")
310
print(f"Regression: {constants.infer_problem_type(target_regression)}")
311

312
# Use problem type constants
313
if problem_type == constants.BINARY:
314
    eval_metric = 'roc_auc'
315
elif problem_type == constants.MULTICLASS:
316
    eval_metric = 'accuracy'
317
elif problem_type == constants.REGRESSION:
318
    eval_metric = 'rmse'
319

320
# Compute metrics
321
y_true = [0, 1, 1, 0, 1]
322
y_pred = [0, 1, 0, 0, 1]
323
y_pred_proba = [[0.8, 0.2], [0.3, 0.7], [0.6, 0.4], [0.9, 0.1], [0.2, 0.8]]
324

325
accuracy = metrics.compute_metric('accuracy', y_true, y_pred)
326
auc = metrics.compute_metric('roc_auc', y_true, y_pred_proba)
327
print(f"Accuracy: {accuracy:.3f}, AUC: {auc:.3f}")
328
```
329

330
### Working with TabularDataset
331

332
```python
333
from autogluon.common import TabularDataset
334
import pandas as pd
335

336
# Create TabularDataset from DataFrame
337
df = pd.DataFrame({
338
    'feature1': [1, 2, 3, 4, 5],
339
    'feature2': ['A', 'B', 'A', 'C', 'B'],
340
    'target': [0, 1, 0, 1, 1]
341
})
342

343
dataset = TabularDataset(df)
344

345
# Load from file
346
dataset = TabularDataset.load('data.csv')
347

348
# Access underlying DataFrame
349
print(f"Shape: {dataset.shape}")
350
print(f"Columns: {list(dataset.columns)}")
351

352
# Save to file
353
dataset.save('processed_data.csv')
354
```
355

356
### Feature Metadata Management
357

358
```python
359
from autogluon.common import FeatureMetadata
360

361
# Create feature metadata
362
feature_metadata = FeatureMetadata(
363
    type_map_raw={
364
        'numerical_col': 'float',
365
        'categorical_col': 'object',
366
        'text_col': 'object',
367
        'datetime_col': 'datetime'
368
    },
369
    type_group_map_special={
370
        'text_col': 'text',
371
        'datetime_col': 'datetime'
372
    }
373
)
374

375
# Get features by type
376
numerical_features = feature_metadata.get_features(valid_raw_types=['float', 'int'])
377
categorical_features = feature_metadata.get_features(valid_raw_types=['object'])
378
text_features = feature_metadata.get_features(invalid_raw_types=['float', 'int'])
379

380
print(f"Numerical features: {numerical_features}")
381
print(f"Categorical features: {categorical_features}")
382
print(f"Text features: {text_features}")
383
```
384

385
### Custom Metric Creation
386

387
```python
388
from autogluon.core.metrics import Scorer
389
import numpy as np
390

391
# Define custom metric function
392
def custom_weighted_accuracy(y_true, y_pred, sample_weight=None):
393
    """Custom weighted accuracy metric."""
394
    correct = (y_true == y_pred).astype(float)
395
    if sample_weight is not None:
396
        return np.average(correct, weights=sample_weight)
397
    return np.mean(correct)
398

399
# Create custom scorer
400
custom_scorer = Scorer(
401
    name='weighted_accuracy',
402
    score_func=custom_weighted_accuracy,
403
    greater_is_better=True,
404
    optimum=1.0
405
)
406

407
# Use custom scorer
408
y_true = [0, 1, 1, 0, 1]
409
y_pred = [0, 1, 0, 0, 1] 
410
weights = [1, 2, 1, 1, 2]
411

412
score = custom_scorer(y_true, y_pred, sample_weight=weights)
413
print(f"Custom weighted accuracy: {score:.3f}")
414
```
415

416
### Analysis State Management
417

418
```python
419
from autogluon.eda import AnalysisState
420

421
# Create analysis state
422
state = AnalysisState()
423

424
# Use dot notation for assignment
425
state.model_performance = {'accuracy': 0.85, 'f1': 0.82}
426
state.feature_importance = ['feature1', 'feature2', 'feature3']
427
state.training_time = 120.5
428

429
# Access with dot notation
430
print(f"Best accuracy: {state.model_performance['accuracy']}")
431
print(f"Training time: {state.training_time}s")
432
print(f"Top feature: {state.feature_importance[0]}")
433

434
# Nested state management
435
state.validation = AnalysisState()
436
state.validation.scores = {'val_acc': 0.83, 'val_f1': 0.80}
437
state.validation.fold_results = [0.82, 0.84, 0.83, 0.85, 0.81]
438

439
print(f"Validation accuracy: {state.validation.scores['val_acc']}")
440
```

Version

Tile

Files

core.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

core.mddocs/