Mathematical functions for verification, evaluation and optimization of forecasts, predictions or models
Metrics for evaluating probability forecasts, ensemble forecasts, and distributional predictions. Includes Brier score for binary events, CRPS (Continuous Ranked Probability Score) for continuous distributions, and threshold-weighted variants for focused evaluation.
The fundamental proper scoring rule for evaluating binary probability forecasts.
def brier_score(
fcst: XarrayLike,
obs: XarrayLike,
*,
reduce_dims: Optional[FlexibleDimensionTypes] = None,
preserve_dims: Optional[FlexibleDimensionTypes] = None,
weights: Optional[xr.DataArray] = None,
check_args: bool = True,
) -> XarrayLike:
"""
Calculate Brier Score for probability forecasts.
Args:
fcst: Probability forecasts [0, 1]
obs: Binary observations {0, 1}
reduce_dims: Dimensions to reduce
preserve_dims: Dimensions to preserve
weights: Optional weights
check_args: Validate input data ranges
Returns:
Brier scores
Formula:
BS = (1/n) * Σ(forecast_i - observed_i)²
Notes:
- Perfect forecast has BS = 0
- Range: [0, 1]
- Lower scores indicate better performance
- Forecasts must be probabilities [0, 1]
- Observations must be binary {0, 1}
"""Brier score calculation for ensemble forecasts with optional fair correction.
def brier_score_for_ensemble(
fcst: XarrayLike,
obs: XarrayLike,
ensemble_member_dim: str,
event_thresholds: Union[Real, Sequence[Real]],
*,
reduce_dims: Optional[FlexibleDimensionTypes] = None,
preserve_dims: Optional[FlexibleDimensionTypes] = None,
weights: Optional[xr.DataArray] = None,
fair_correction: bool = True,
event_threshold_operator: Callable = operator.ge,
threshold_dim: str = "threshold",
) -> XarrayLike:
"""
Calculate Brier Score for ensemble forecasts.
Args:
fcst: Ensemble forecast data
obs: Observation data
ensemble_member_dim: Name of ensemble member dimension
event_thresholds: Threshold values for binary events
reduce_dims: Dimensions to reduce
preserve_dims: Dimensions to preserve
weights: Optional weights
fair_correction: Apply fair correction for finite ensemble size
event_threshold_operator: Comparison operator (ge, le, gt, lt)
threshold_dim: Name for threshold dimension in output
Returns:
Brier scores for each threshold
Notes:
- Converts ensemble to probabilities using threshold exceedance
- Fair correction accounts for finite ensemble size effects
- Multiple thresholds evaluated simultaneously
"""Usage Example:
from scores.probability import brier_score, brier_score_for_ensemble
import xarray as xr
import numpy as np
# Basic Brier score for probability forecasts
prob_forecast = xr.DataArray([0.8, 0.6, 0.3, 0.1, 0.9])
binary_obs = xr.DataArray([1, 1, 0, 0, 1])
bs = brier_score(prob_forecast, binary_obs)
# Ensemble Brier score
ensemble_fcst = xr.DataArray(
np.random.normal(10, 2, (100, 20)), # 100 time steps, 20 members
dims=["time", "member"]
)
obs = xr.DataArray(np.random.normal(10, 2, 100), dims=["time"])
thresholds = [8, 10, 12, 15]
ensemble_bs = brier_score_for_ensemble(
ensemble_fcst, obs, "member", thresholds
)The extension of Brier score to continuous distributions, evaluating the full probabilistic forecast.
def crps_cdf(
fcst: xr.DataArray,
obs: xr.DataArray,
threshold_dim: str,
*,
threshold_weight: Optional[xr.DataArray] = None,
additional_thresholds: Optional[xr.DataArray] = None,
fcst_fill_method: str = "linear",
threshold_weight_fill_method: str = "forward",
integration_method: str = "exact",
reduce_dims: Optional[FlexibleDimensionTypes] = None,
preserve_dims: Optional[FlexibleDimensionTypes] = None,
weights: Optional[xr.DataArray] = None,
) -> xr.DataArray:
"""
Calculate CRPS for CDF forecasts.
Args:
fcst: CDF forecast values [0, 1]
obs: Observation values
threshold_dim: Name of threshold dimension in CDF
threshold_weight: Optional threshold weighting function
additional_thresholds: Additional evaluation thresholds
fcst_fill_method: Method for interpolating CDF ("linear", "step")
threshold_weight_fill_method: Weight interpolation method
integration_method: Integration approach ("exact", "trapz")
reduce_dims: Dimensions to reduce
preserve_dims: Dimensions to preserve
weights: Optional weights
Returns:
CRPS values
Notes:
- Evaluates complete probabilistic forecast
- CDF must be monotonically increasing
- Threshold dimension contains evaluation points
- Lower scores indicate better performance
"""def crps_for_ensemble(
fcst: xr.DataArray,
obs: xr.DataArray,
ensemble_member_dim: str,
*,
method: str = "closed_form",
reduce_dims: Optional[FlexibleDimensionTypes] = None,
preserve_dims: Optional[FlexibleDimensionTypes] = None,
weights: Optional[xr.DataArray] = None,
) -> xr.DataArray:
"""
Calculate CRPS for ensemble forecasts.
Args:
fcst: Ensemble forecast data
obs: Observation data
ensemble_member_dim: Name of ensemble member dimension
method: Calculation method ("closed_form", "fair")
reduce_dims: Dimensions to reduce
preserve_dims: Dimensions to preserve
weights: Optional weights
Returns:
CRPS values
Formula (closed form):
CRPS = E|X - Y| - 0.5 * E|X - X'|
Where:
- X: forecast distribution
- Y: observation
- X': independent copy of X
Notes:
- "closed_form": Exact calculation for ensembles
- "fair": Applies fair correction for finite ensemble size
- Computational complexity: O(n log n) where n = ensemble size
"""Decomposes CRPS-CDF into reliability and resolution components.
def crps_cdf_brier_decomposition(
fcst: xr.DataArray,
obs: xr.DataArray,
threshold_dim: str,
*,
reduce_dims: Optional[FlexibleDimensionTypes] = None,
preserve_dims: Optional[FlexibleDimensionTypes] = None,
weights: Optional[xr.DataArray] = None,
) -> xr.DataArray:
"""
Calculate CRPS-CDF with Brier decomposition.
Args:
fcst: CDF forecast values
obs: Observation values
threshold_dim: Name of threshold dimension
reduce_dims: Dimensions to reduce
preserve_dims: Dimensions to preserve
weights: Optional weights
Returns:
Dataset with CRPS and decomposition components
Components:
- crps: Total CRPS score
- reliability: Reliability component (smaller is better)
- resolution: Resolution component (larger is better)
- uncertainty: Uncertainty component (climatological)
"""CRPS variants that focus evaluation on specific value ranges or extremes.
def tw_crps_for_ensemble(
fcst: xr.DataArray,
obs: xr.DataArray,
ensemble_member_dim: str,
threshold_weight: xr.DataArray,
*,
reduce_dims: Optional[FlexibleDimensionTypes] = None,
preserve_dims: Optional[FlexibleDimensionTypes] = None,
weights: Optional[xr.DataArray] = None,
) -> xr.DataArray:
"""
Calculate threshold-weighted CRPS for ensemble forecasts.
Args:
fcst: Ensemble forecast data
obs: Observation data
ensemble_member_dim: Name of ensemble member dimension
threshold_weight: Weight function over threshold values
reduce_dims: Dimensions to reduce
preserve_dims: Dimensions to preserve
weights: Optional weights
Returns:
Threshold-weighted CRPS values
Notes:
- Emphasizes specific value ranges via weighting
- Weight function must be non-negative
- Reduces to standard CRPS when weights are uniform
- Used for extreme value evaluation
"""Focuses evaluation on extreme values (tails of the distribution).
def tail_tw_crps_for_ensemble(
fcst: xr.DataArray,
obs: xr.DataArray,
ensemble_member_dim: str,
tail_weight: float,
*,
reduce_dims: Optional[FlexibleDimensionTypes] = None,
preserve_dims: Optional[FlexibleDimensionTypes] = None,
weights: Optional[xr.DataArray] = None,
) -> xr.DataArray:
"""
Calculate tail-weighted CRPS for extreme values.
Args:
fcst: Ensemble forecast data
obs: Observation data
ensemble_member_dim: Name of ensemble member dimension
tail_weight: Weight parameter for tail emphasis (> 0)
reduce_dims: Dimensions to reduce
preserve_dims: Dimensions to preserve
weights: Optional weights
Returns:
Tail-weighted CRPS values
Notes:
- Higher tail_weight emphasizes extreme values more
- tail_weight = 0 reduces to standard CRPS
- Useful for evaluating forecast skill for extreme events
"""Focuses evaluation on a specific value range.
def interval_tw_crps_for_ensemble(
fcst: xr.DataArray,
obs: xr.DataArray,
ensemble_member_dim: str,
lower_threshold: float,
upper_threshold: float,
*,
reduce_dims: Optional[FlexibleDimensionTypes] = None,
preserve_dims: Optional[FlexibleDimensionTypes] = None,
weights: Optional[xr.DataArray] = None,
) -> xr.DataArray:
"""
Calculate interval-weighted CRPS for specific value range.
Args:
fcst: Ensemble forecast data
obs: Observation data
ensemble_member_dim: Name of ensemble member dimension
lower_threshold: Lower bound of evaluation interval
upper_threshold: Upper bound of evaluation interval
reduce_dims: Dimensions to reduce
preserve_dims: Dimensions to preserve
weights: Optional weights
Returns:
Interval-weighted CRPS values
Notes:
- Only values within [lower_threshold, upper_threshold] contribute
- Useful for evaluating specific ranges (e.g., moderate rainfall)
- Outside interval, weight = 0
"""Utilities for preparing and manipulating CDF forecasts.
def adjust_fcst_for_crps(
fcst: xr.DataArray,
threshold_dim: str,
*,
threshold_weight: Optional[xr.DataArray] = None,
additional_thresholds: Optional[xr.DataArray] = None,
fcst_fill_method: str = "linear",
threshold_weight_fill_method: str = "forward",
) -> xr.DataArray:
"""
Prepare forecast CDF for CRPS calculation.
Args:
fcst: Raw CDF forecast data
threshold_dim: Name of threshold dimension
threshold_weight: Optional threshold weighting
additional_thresholds: Additional threshold points
fcst_fill_method: CDF interpolation method
threshold_weight_fill_method: Weight interpolation method
Returns:
Processed CDF forecast ready for CRPS calculation
Notes:
- Ensures CDF is properly formatted and monotonic
- Handles missing values and interpolation
- Adds additional threshold points if needed
"""Creates step-function threshold weights for CRPS.
def crps_step_threshold_weight(
thresholds: xr.DataArray,
threshold_bins: Sequence[float],
) -> xr.DataArray:
"""
Create step-function threshold weights.
Args:
thresholds: Threshold values for CDF
threshold_bins: Bin edges for step function
Returns:
Step-function weights matching threshold dimension
Notes:
- Creates piecewise constant weighting
- Each bin can have different weight
- Used for interval-based evaluation emphasis
"""from scores.probability import brier_score, crps_for_ensemble
from scores.sample_data import simple_forecast, simple_observations
import numpy as np
# Binary probability forecast evaluation
prob_forecast = np.random.beta(2, 2, 100) # Probabilities [0,1]
binary_obs = np.random.binomial(1, prob_forecast) # Binary outcomes
bs = brier_score(prob_forecast, binary_obs)
print(f"Brier Score: {bs.values:.4f}")
# Ensemble CRPS evaluation
ensemble = np.random.normal(10, 2, (100, 20)) # 100 times, 20 members
observations = np.random.normal(10, 2, 100)
crps = crps_for_ensemble(ensemble, observations, ensemble_member_dim="member")
print(f"CRPS: {crps.values:.4f}")# Evaluate multiple thresholds simultaneously
thresholds = [5, 10, 15, 20, 25]
ensemble_bs = brier_score_for_ensemble(
ensemble_forecast, observations,
ensemble_member_dim="member",
event_thresholds=thresholds
)
# Results have threshold dimension
for i, thresh in enumerate(thresholds):
score = ensemble_bs.isel(threshold=i)
print(f"Threshold {thresh}: BS = {score.values:.4f}")# Emphasize extreme values using tail-weighted CRPS
tail_crps = tail_tw_crps_for_ensemble(
ensemble_forecast, observations,
ensemble_member_dim="member",
tail_weight=2.0 # Strong emphasis on extremes
)
# Focus on specific range (e.g., moderate rainfall 5-15mm)
interval_crps = interval_tw_crps_for_ensemble(
ensemble_forecast, observations,
ensemble_member_dim="member",
lower_threshold=5.0,
upper_threshold=15.0
)
print(f"Standard CRPS: {crps.values:.4f}")
print(f"Tail-weighted CRPS: {tail_crps.values:.4f}")
print(f"Interval CRPS (5-15): {interval_crps.values:.4f}")# For CDF forecasts (probability vs threshold)
from scores.sample_data import cdf_forecast, cdf_observations
cdf_fcst = cdf_forecast() # CDF values at different thresholds
cdf_obs = cdf_observations() # Corresponding observations
# Standard CRPS for CDF
cdf_crps = crps_cdf(cdf_fcst, cdf_obs, threshold_dim="threshold")
# With decomposition
decomp = crps_cdf_brier_decomposition(
cdf_fcst, cdf_obs, threshold_dim="threshold"
)
print(f"CDF CRPS: {cdf_crps.values:.4f}")
print(f"Reliability: {decomp.reliability.values:.4f}")
print(f"Resolution: {decomp.resolution.values:.4f}")Install with Tessl CLI
npx tessl i tessl/pypi-scores