Mathematical functions for verification, evaluation and optimization of forecasts, predictions or models
Experimental scoring methods under development and peer review. These metrics represent cutting-edge approaches to forecast verification that may become standard methods in future versions.
A decision-oriented scoring framework designed for risk-based evaluation of forecasts with asymmetric loss functions.
def risk_matrix_score(
fcst: XarrayLike,
obs: XarrayLike,
risk_matrix: Union[Sequence[Sequence[float]], np.ndarray],
*,
fcst_bins: Optional[Sequence[float]] = None,
obs_bins: Optional[Sequence[float]] = None,
reduce_dims: Optional[FlexibleDimensionTypes] = None,
preserve_dims: Optional[FlexibleDimensionTypes] = None,
weights: Optional[xr.DataArray] = None,
) -> XarrayLike:
"""
Calculate risk matrix score for decision-oriented forecast evaluation.
Args:
fcst: Forecast values
obs: Observation values
risk_matrix: 2D matrix defining loss/risk values for each forecast-observation bin combination
fcst_bins: Bin edges for discretizing forecast values
obs_bins: Bin edges for discretizing observation values
reduce_dims: Dimensions to reduce
preserve_dims: Dimensions to preserve
weights: Optional weights
Returns:
Risk matrix scores
Notes:
- Designed for asymmetric loss situations
- Risk matrix defines penalties for different forecast-observation combinations
- Higher scores indicate greater risk/loss
- Useful for decision-making contexts where different errors have different consequences
- Under peer review - interface may change in future versions
"""Helper functions for creating and manipulating risk matrices.
Converts matrix weight specifications to array format for computational efficiency.
def matrix_weights_to_array(
matrix_weights: Union[dict, Sequence[Sequence[float]]],
fcst_categories: Optional[Sequence[str]] = None,
obs_categories: Optional[Sequence[str]] = None,
) -> np.ndarray:
"""
Convert matrix weights to array format.
Args:
matrix_weights: Weight matrix as dict or nested sequences
fcst_categories: Forecast category labels
obs_categories: Observation category labels
Returns:
Numpy array representation of weights matrix
Notes:
- Standardizes weight matrix format for internal use
- Supports both dictionary and array input formats
- Used internally by risk matrix scoring functions
"""Creates weight matrices based on warning system scaling parameters.
def weights_from_warning_scaling(
scaling_factors: Sequence[float],
n_categories: int,
*,
symmetric: bool = False,
) -> np.ndarray:
"""
Create weight matrix from warning scaling parameters.
Args:
scaling_factors: Scaling factors for different warning levels
n_categories: Number of forecast/observation categories
symmetric: Whether to apply symmetric weighting
Returns:
Weight matrix for risk-based scoring
Notes:
- Generates matrices suitable for warning system evaluation
- Scaling factors control penalty severity for different categories
- Symmetric option creates balanced penalty structure
- Used for meteorological warning verification
"""from scores.emerging import risk_matrix_score, matrix_weights_to_array
import numpy as np
# Create sample forecast and observation data
forecast = np.random.normal(10, 3, 1000)
observation = np.random.normal(10, 2, 1000)
# Define a simple 3x3 risk matrix for low/medium/high categories
# Rows = forecast categories, Columns = observation categories
risk_matrix = [
[0.0, 0.5, 2.0], # Low forecast: small penalty for missing medium/high events
[0.3, 0.0, 1.5], # Medium forecast: balanced penalties
[1.0, 0.8, 0.0] # High forecast: large penalty for false alarms
]
# Define bin edges for categorization
bins = [5, 12, 15] # Creates categories: <5, 5-12, 12-15, >15
# Calculate risk matrix score
rms = risk_matrix_score(
forecast, observation,
risk_matrix=risk_matrix,
fcst_bins=bins,
obs_bins=bins
)
print(f"Risk Matrix Score: {rms.values:.3f}")from scores.emerging import risk_matrix_score, weights_from_warning_scaling
# Precipitation forecast evaluation for warning systems
precip_forecast = np.random.exponential(3, 500)
precip_observed = np.random.exponential(3, 500)
# Create warning-based weight matrix
# Categories: No warning, Advisory, Watch, Warning
n_categories = 4
scaling_factors = [1.0, 2.0, 4.0, 8.0] # Increasing penalties for higher warnings
warning_weights = weights_from_warning_scaling(
scaling_factors,
n_categories,
symmetric=False # Asymmetric penalties
)
print("Warning Weight Matrix:")
print(warning_weights)
# Define precipitation warning thresholds
warning_thresholds = [1.0, 10.0, 25.0] # mm/hr thresholds
# Evaluate warning system performance
warning_score = risk_matrix_score(
precip_forecast, precip_observed,
risk_matrix=warning_weights,
fcst_bins=warning_thresholds,
obs_bins=warning_thresholds
)
print(f"Warning System Risk Score: {warning_score.values:.3f}")# Economic loss-based evaluation for agricultural applications
# Temperature forecasts for frost warning
temp_forecast = np.random.normal(2, 4, 300) # Temperature in °C
temp_observed = np.random.normal(2, 3, 300)
# Economic loss matrix (in thousands of dollars)
# Categories: Safe (>5°C), Caution (0-5°C), Frost (<0°C)
economic_loss_matrix = [
[0, 5, 20], # Safe forecast: minimal loss if wrong
[2, 0, 15], # Caution forecast: moderate false alarm cost
[10, 8, 0] # Frost forecast: high false alarm but necessary protection
]
frost_thresholds = [0, 5] # Temperature thresholds in °C
# Calculate economic risk score
economic_score = risk_matrix_score(
temp_forecast, temp_observed,
risk_matrix=economic_loss_matrix,
fcst_bins=frost_thresholds,
obs_bins=frost_thresholds
)
print(f"Economic Risk Score: {economic_score.values:.1f} (k$)")
# Compare to symmetric scoring (traditional approach)
symmetric_weights = weights_from_warning_scaling(
[1, 1, 1], # Equal penalties
3,
symmetric=True
)
symmetric_score = risk_matrix_score(
temp_forecast, temp_observed,
risk_matrix=symmetric_weights,
fcst_bins=frost_thresholds,
obs_bins=frost_thresholds
)
print(f"Symmetric Score: {symmetric_score.values:.3f}")
print(f"Asymmetric advantage: {(symmetric_score - economic_score).values:.3f}")# Custom risk matrix for multi-dimensional evaluation
# Create complex weight matrix with specific penalty structure
def create_custom_risk_matrix(n_cats, penalty_type='conservative'):
"""Create custom risk matrices for different evaluation contexts."""
matrix = np.zeros((n_cats, n_cats))
if penalty_type == 'conservative':
# Heavy penalty for missing events, lighter for false alarms
for i in range(n_cats):
for j in range(n_cats):
if i < j: # Under-forecasting
matrix[i, j] = (j - i) * 2.0
elif i > j: # Over-forecasting
matrix[i, j] = (i - j) * 1.0
else: # Perfect forecast
matrix[i, j] = 0.0
elif penalty_type == 'balanced':
# Equal penalties for under- and over-forecasting
for i in range(n_cats):
for j in range(n_cats):
matrix[i, j] = abs(i - j)
return matrix
# Test different penalty structures
conservative_matrix = create_custom_risk_matrix(4, 'conservative')
balanced_matrix = create_custom_risk_matrix(4, 'balanced')
# Multi-threshold evaluation
thresholds = [2, 8, 15, 25]
conservative_score = risk_matrix_score(
precip_forecast, precip_observed,
risk_matrix=conservative_matrix,
fcst_bins=thresholds,
obs_bins=thresholds
)
balanced_score = risk_matrix_score(
precip_forecast, precip_observed,
risk_matrix=balanced_matrix,
fcst_bins=thresholds,
obs_bins=thresholds
)
print(f"Conservative penalty score: {conservative_score.values:.3f}")
print(f"Balanced penalty score: {balanced_score.values:.3f}")# Evaluate risk across multiple dimensions
forecast_3d = xr.DataArray(
np.random.exponential(2, (50, 10, 15)), # time, lat, lon
dims=["time", "lat", "lon"]
)
observation_3d = xr.DataArray(
np.random.exponential(2, (50, 10, 15)),
dims=["time", "lat", "lon"]
)
# Simple 3-category risk matrix
simple_risk = [
[0, 1, 3],
[1, 0, 2],
[2, 1, 0]
]
# Risk assessment at each grid point (temporal aggregation)
spatial_risk = risk_matrix_score(
forecast_3d, observation_3d,
risk_matrix=simple_risk,
fcst_bins=[1, 5],
obs_bins=[1, 5],
reduce_dims="time"
)
# Overall risk assessment
total_risk = risk_matrix_score(
forecast_3d, observation_3d,
risk_matrix=simple_risk,
fcst_bins=[1, 5],
obs_bins=[1, 5],
reduce_dims=["time", "lat", "lon"]
)
print(f"Spatial risk assessment shape: {spatial_risk.shape}")
print(f"Total risk score: {total_risk.values:.3f}")
print(f"Highest risk location: {spatial_risk.max().values:.3f}")
print(f"Lowest risk location: {spatial_risk.min().values:.3f}")⚠️ Warning: The functions in this module are under active development and peer review. The API may change in future versions of the scores package.
These emerging methods may be integrated into the core scoring modules in future releases. Users should expect potential API changes and should validate results against established methods during the experimental phase.
Install with Tessl CLI
npx tessl i tessl/pypi-scores