Hierarchical Methods Time series forecasting
—
Comprehensive evaluation framework for measuring hierarchical forecast accuracy across different levels of the hierarchy. The evaluation system integrates with utilsforecast.losses metrics and provides specialized functionality for hierarchical forecasting assessment.
Primary function for evaluating hierarchical forecasts using standard accuracy metrics from utilsforecast library.
def evaluate(
df: FrameT,
metrics: list[Callable],
tags: dict[str, np.ndarray],
models: Optional[list[str]] = None,
train_df: Optional[FrameT] = None,
level: Optional[list[int]] = None,
id_col: str = 'unique_id',
time_col: str = 'ds',
target_col: str = 'y',
agg_fn: Optional[str] = 'mean',
benchmark: Optional[str] = None
) -> FrameT:
"""
Evaluate hierarchical forecasts using specified metrics.
Parameters:
- df: DataFrame with actual values and forecasts
Must contain id_col, time_col, target_col, and model prediction columns
- metrics: list of callable metric functions from utilsforecast.losses
Examples: [mse, mae, mape, smape, rmse]
- tags: dict mapping hierarchy levels to series indices
Format: {'level_name': array_of_indices}
- models: list of model names to evaluate (if None, evaluates all model columns)
- train_df: DataFrame with training data (required for some metrics like msse)
- level: list of confidence levels for probabilistic metrics (e.g., [80, 95])
- id_col: str, name of the unique identifier column
- time_col: str, name of the time column
- target_col: str, name of the target variable column
- agg_fn: str, aggregation function for combining scores ('mean', 'median', 'sum')
- benchmark: str, name of benchmark model for scaled metrics
Returns:
DataFrame with evaluation results by hierarchy level and model
Columns: ['unique_id', 'metric', <model_names>...]
"""import pandas as pd
from hierarchicalforecast.evaluation import evaluate
from utilsforecast.losses import mse, mae, mape
# Prepare evaluation data with actuals and forecasts
eval_df = pd.DataFrame({
'unique_id': ['A', 'B', 'Total', 'A', 'B', 'Total'],
'ds': pd.to_datetime(['2023-01-01', '2023-01-01', '2023-01-01',
'2023-01-02', '2023-01-02', '2023-01-02']),
'y': [100, 200, 300, 110, 210, 320], # actual values
'BottomUp': [95, 205, 300, 105, 215, 320],
'MinTrace': [98, 198, 296, 108, 212, 320]
})
# Define hierarchy tags
tags = {
'Bottom': np.array([0, 1]), # Series A and B
'Total': np.array([2]) # Aggregated series
}
# Evaluate with multiple metrics
results = evaluate(
df=eval_df,
metrics=[mse, mae, mape],
tags=tags,
models=['BottomUp', 'MinTrace']
)
print(results)# Evaluate performance at different hierarchy levels
from utilsforecast.losses import rmse, smape
results = evaluate(
df=forecast_results,
metrics=[rmse, smape],
tags=hierarchy_tags,
models=['BottomUp', 'TopDown', 'MinTrace'],
agg_fn='mean'
)
# Results will show performance for each hierarchy level
# Example output:
# unique_id metric BottomUp TopDown MinTrace
# Bottom rmse 12.5 15.2 11.8
# Middle rmse 8.9 9.1 8.7
# Total rmse 5.2 5.8 4.9from utilsforecast.losses import quantile_loss
# Evaluate prediction intervals
prob_results = evaluate(
df=forecasts_with_intervals,
metrics=[quantile_loss],
tags=hierarchy_tags,
level=[80, 95], # Confidence levels to evaluate
models=['BottomUp', 'MinTrace']
)from utilsforecast.losses import msse, mase
# Use scaled metrics with training data
scaled_results = evaluate(
df=test_forecasts,
metrics=[msse, mase],
tags=hierarchy_tags,
train_df=training_data, # Required for scaled metrics
benchmark='Naive', # Benchmark model for scaling
models=['BottomUp', 'TopDown', 'MinTrace']
)# Use different aggregation functions
results_median = evaluate(
df=eval_df,
metrics=[mse, mae],
tags=tags,
agg_fn='median' # Use median instead of mean
)
results_sum = evaluate(
df=eval_df,
metrics=[mse],
tags=tags,
agg_fn='sum' # Sum across hierarchy levels
)The evaluation function works with any metric from utilsforecast.losses. Common metrics include:
from utilsforecast.losses import (
mse, # Mean Squared Error
mae, # Mean Absolute Error
mape, # Mean Absolute Percentage Error
smape, # Symmetric Mean Absolute Percentage Error
rmse, # Root Mean Squared Error
)from utilsforecast.losses import (
msse, # Mean Scaled Squared Error
mase, # Mean Absolute Scaled Error
rmsse, # Root Mean Scaled Squared Error
)from utilsforecast.losses import (
quantile_loss, # Quantile Loss
coverage, # Coverage probability
mis, # Mean Interval Score
)from hierarchicalforecast import HierarchicalReconciliation
from hierarchicalforecast.methods import BottomUp, MinTrace
from hierarchicalforecast.evaluation import evaluate
from utilsforecast.losses import mse, mae
# Generate reconciled forecasts
reconcilers = [BottomUp(), MinTrace(method='ols')]
hrec = HierarchicalReconciliation(reconcilers=reconcilers)
reconciled = hrec.reconcile(
Y_hat_df=base_forecasts,
S=summing_matrix,
tags=hierarchy_tags,
Y_df=historical_data
)
# Evaluate reconciled forecasts
evaluation_results = evaluate(
df=reconciled,
metrics=[mse, mae],
tags=hierarchy_tags,
models=['BottomUp', 'MinTrace']
)class HierarchicalEvaluation:
"""
Deprecated: Use the evaluate() function instead.
Legacy evaluation class that will be removed in future versions.
All functionality has been migrated to the evaluate() function.
"""
# This class is deprecated - use evaluate() functionThe following functions are deprecated and will be removed. Use equivalent functions from utilsforecast.losses instead:
mse() → use utilsforecast.losses.msemqloss() → use utilsforecast.losses.quantile_lossrel_mse() → use custom implementation with utilsforecast metricsmsse() → use utilsforecast.losses.mssescaled_crps() → use utilsforecast.losses.scaled_crpsenergy_score() → use custom implementationlog_score() → use custom implementationInstall with Tessl CLI
npx tessl i tessl/pypi-hierarchicalforecast