Mathematical functions for verification, evaluation and optimization of forecasts, predictions or models
Verification metrics for categorical and binary forecasts including contingency table statistics, skill scores, and multicategorical measures. Supports both traditional binary classification metrics and advanced multicategorical scoring methods.
Standard 2x2 contingency table metrics for evaluating binary event forecasts.
Also known as Hit Rate or Sensitivity, measures the proportion of observed events correctly forecast.
def probability_of_detection(
fcst: XarrayLike,
obs: XarrayLike,
*,
reduce_dims: Optional[FlexibleDimensionTypes] = None,
preserve_dims: Optional[FlexibleDimensionTypes] = None,
weights: Optional[xr.DataArray] = None,
check_args: bool = True,
) -> XarrayLike:
"""
Calculate Probability of Detection (Hit Rate).
Args:
fcst: Binary forecast values {0, 1}
obs: Binary observation values {0, 1}
reduce_dims: Dimensions to reduce
preserve_dims: Dimensions to preserve
weights: Optional weights
check_args: Validate binary data
Returns:
Probability of detection values
Formula:
POD = hits / (hits + misses)
Notes:
- Range: [0, 1]
- Perfect forecast has POD = 1
- Also known as Sensitivity, Hit Rate, Recall
- Measures ability to detect events when they occur
"""Also known as False Alarm Rate, measures the proportion of non-events incorrectly forecast as events.
def probability_of_false_detection(
fcst: XarrayLike,
obs: XarrayLike,
*,
reduce_dims: Optional[FlexibleDimensionTypes] = None,
preserve_dims: Optional[FlexibleDimensionTypes] = None,
weights: Optional[xr.DataArray] = None,
check_args: bool = True,
) -> XarrayLike:
"""
Calculate Probability of False Detection (False Alarm Rate).
Args:
fcst: Binary forecast values {0, 1}
obs: Binary observation values {0, 1}
reduce_dims: Dimensions to reduce
preserve_dims: Dimensions to preserve
weights: Optional weights
check_args: Validate binary data
Returns:
Probability of false detection values
Formula:
POFD = false_alarms / (false_alarms + correct_negatives)
Notes:
- Range: [0, 1]
- Perfect forecast has POFD = 0
- Also known as False Alarm Rate, Fall-out
- Measures tendency to falsely predict events
"""Usage Example:
from scores.categorical import probability_of_detection, probability_of_false_detection
import xarray as xr
import numpy as np
# Binary forecast and observation data
forecast = xr.DataArray([1, 1, 0, 0, 1, 1, 0, 1])
observed = xr.DataArray([1, 0, 0, 1, 1, 0, 0, 1])
pod = probability_of_detection(forecast, observed)
pofd = probability_of_false_detection(forecast, observed)
print(f"Probability of Detection: {pod.values:.3f}")
print(f"Probability of False Detection: {pofd.values:.3f}")Classes for systematic computation of multiple binary classification metrics.
Comprehensive manager providing access to 18 different binary contingency table metrics.
class BinaryContingencyManager:
"""
Binary contingency table manager with 18 metrics.
Computes and provides access to all standard binary classification
metrics derived from 2x2 contingency tables.
"""
def __init__(
self,
fcst: XarrayLike,
obs: XarrayLike,
*,
reduce_dims: Optional[FlexibleDimensionTypes] = None,
preserve_dims: Optional[FlexibleDimensionTypes] = None,
weights: Optional[xr.DataArray] = None,
check_args: bool = True,
):
"""
Initialize binary contingency table manager.
Args:
fcst: Binary forecast values {0, 1}
obs: Binary observation values {0, 1}
reduce_dims: Dimensions to reduce
preserve_dims: Dimensions to preserve
weights: Optional weights
check_args: Validate binary data
"""
# Available metrics (all return XarrayLike):
def pod(self) -> XarrayLike:
"""Probability of Detection (Hit Rate, Sensitivity)."""
def pofd(self) -> XarrayLike:
"""Probability of False Detection (False Alarm Rate)."""
def far(self) -> XarrayLike:
"""False Alarm Ratio."""
def success_ratio(self) -> XarrayLike:
"""Success Ratio (1 - False Alarm Ratio)."""
def accuracy(self) -> XarrayLike:
"""Accuracy (Proportion Correct)."""
def bias_score(self) -> XarrayLike:
"""Bias Score (Frequency Bias)."""
def csi(self) -> XarrayLike:
"""Critical Success Index (Threat Score)."""
def ets(self) -> XarrayLike:
"""Equitable Threat Score."""
def hss(self) -> XarrayLike:
"""Heidke Skill Score."""
def pss(self) -> XarrayLike:
"""Peirce Skill Score (True Skill Statistic)."""
def odds_ratio(self) -> XarrayLike:
"""Odds Ratio."""
def log_odds_ratio(self) -> XarrayLike:
"""Logarithm of Odds Ratio."""
def yules_q(self) -> XarrayLike:
"""Yule's Q Statistic."""
def yules_y(self) -> XarrayLike:
"""Yule's Y Statistic."""
def dor(self) -> XarrayLike:
"""Diagnostic Odds Ratio."""
def log_dor(self) -> XarrayLike:
"""Logarithm of Diagnostic Odds Ratio."""
def positive_likelihood_ratio(self) -> XarrayLike:
"""Positive Likelihood Ratio."""
def negative_likelihood_ratio(self) -> XarrayLike:
"""Negative Likelihood Ratio."""Lower-level contingency table management for custom metrics.
class BasicContingencyManager:
"""
Basic contingency table manager.
Provides raw contingency table counts for custom metric computation.
"""
def __init__(
self,
fcst: XarrayLike,
obs: XarrayLike,
*,
reduce_dims: Optional[FlexibleDimensionTypes] = None,
preserve_dims: Optional[FlexibleDimensionTypes] = None,
weights: Optional[xr.DataArray] = None,
):
"""Initialize basic contingency manager."""
def hits(self) -> XarrayLike:
"""True Positives: forecast=1, observed=1."""
def misses(self) -> XarrayLike:
"""False Negatives: forecast=0, observed=1."""
def false_alarms(self) -> XarrayLike:
"""False Positives: forecast=1, observed=0."""
def correct_negatives(self) -> XarrayLike:
"""True Negatives: forecast=0, observed=0."""Classes for defining binary events from continuous data.
Generic event operator for categorical scoring.
class EventOperator:
"""
Generic event operator for categorical scoring.
Defines binary events from continuous forecast and observation data
using custom logic or thresholds.
"""
def __init__(self, event_fcst_func: Callable, event_obs_func: Callable):
"""
Initialize event operator.
Args:
event_fcst_func: Function to convert forecast to binary events
event_obs_func: Function to convert observations to binary events
"""
def convert_to_events(
self,
fcst: XarrayLike,
obs: XarrayLike,
) -> Tuple[XarrayLike, XarrayLike]:
"""Convert forecast and observations to binary events."""Threshold-based event operator for categorical scoring.
class ThresholdEventOperator(EventOperator):
"""
Threshold-based event operator.
Converts continuous data to binary events using threshold comparison.
"""
def __init__(
self,
threshold: float,
operator_func: Callable = operator.ge,
):
"""
Initialize threshold event operator.
Args:
threshold: Threshold value for event definition
operator_func: Comparison operator (ge, le, gt, lt)
Example:
# Event = precipitation >= 1mm
rain_events = ThresholdEventOperator(1.0, operator.ge)
"""Advanced scoring methods for forecasts with multiple categories.
Risk-based scoring for multicategorical forecasts with asymmetric loss functions.
def firm(
fcst: XarrayLike,
obs: XarrayLike,
risk_parameter: float,
categorical_thresholds: Union[Sequence[float], Sequence[xr.DataArray]],
threshold_weights: Sequence[Union[float, xr.DataArray]],
*,
discount_distance: float = 0,
reduce_dims: Optional[FlexibleDimensionTypes] = None,
preserve_dims: Optional[FlexibleDimensionTypes] = None,
weights: Optional[xr.DataArray] = None,
include_components: bool = False,
) -> XarrayLike:
"""
Calculate Fixed Risk Multicategorical (FIRM) Score.
Args:
fcst: Continuous forecast values
obs: Continuous observation values
risk_parameter: Risk aversion parameter (higher = more risk averse)
categorical_thresholds: Threshold values defining categories
threshold_weights: Weights for each threshold/category
discount_distance: Distance-based penalty discount factor
reduce_dims: Dimensions to reduce
preserve_dims: Dimensions to preserve
weights: Optional weights
include_components: Return decomposed components
Returns:
FIRM scores (and components if requested)
Notes:
- Designed for asymmetric loss situations
- Risk parameter controls penalty severity
- Higher scores indicate worse performance
- Useful for decision-oriented evaluation
"""Equitable scoring for multicategorical precipitation forecasts.
def seeps(
fcst: XarrayLike,
obs: XarrayLike,
dry_threshold: float,
light_threshold: float,
*,
reduce_dims: Optional[FlexibleDimensionTypes] = None,
preserve_dims: Optional[FlexibleDimensionTypes] = None,
weights: Optional[xr.DataArray] = None,
) -> XarrayLike:
"""
Calculate Stable Equitable Error in Probability Space (SEEPS).
Args:
fcst: Precipitation forecast values
obs: Precipitation observation values
dry_threshold: Threshold separating dry from light precipitation
light_threshold: Threshold separating light from heavy precipitation
reduce_dims: Dimensions to reduce
preserve_dims: Dimensions to preserve
weights: Optional weights
Returns:
SEEPS scores
Notes:
- Specifically designed for precipitation verification
- Creates three categories: dry, light, heavy
- Equitable scoring (random forecast scores 0)
- Perfect forecast scores 1
- Range typically: [0, 1] but can exceed 1 for very poor forecasts
"""from scores.categorical import BinaryContingencyManager
import numpy as np
# Create sample binary data
np.random.seed(42)
forecast = np.random.binomial(1, 0.3, 1000)
observed = np.random.binomial(1, 0.3, 1000)
# Use contingency manager for comprehensive evaluation
contingency = BinaryContingencyManager(forecast, observed)
# Access multiple metrics
metrics = {
'POD': contingency.pod().values,
'POFD': contingency.pofd().values,
'FAR': contingency.far().values,
'CSI': contingency.csi().values,
'HSS': contingency.hss().values,
'Accuracy': contingency.accuracy().values
}
for name, value in metrics.items():
print(f"{name}: {value:.3f}")from scores.categorical import ThresholdEventOperator
from scores.processing import binary_discretise
import operator
# Continuous precipitation data
precip_forecast = np.random.exponential(2, 1000)
precip_observed = np.random.exponential(2, 1000)
# Define heavy rain event (>= 5mm)
heavy_rain = ThresholdEventOperator(5.0, operator.ge)
fcst_events, obs_events = heavy_rain.convert_to_events(
precip_forecast, precip_observed
)
# Or use direct discretization
fcst_heavy = binary_discretise(precip_forecast, 5.0)
obs_heavy = binary_discretise(precip_observed, 5.0)
# Evaluate heavy rain forecasts
heavy_contingency = BinaryContingencyManager(fcst_heavy, obs_heavy)
print(f"Heavy rain POD: {heavy_contingency.pod().values:.3f}")
print(f"Heavy rain CSI: {heavy_contingency.csi().values:.3f}")from scores.categorical import firm, seeps
import numpy as np
# Precipitation forecast/observation data
precip_fcst = np.random.exponential(3, 500)
precip_obs = np.random.exponential(3, 500)
# SEEPS evaluation (3 categories: dry, light, heavy)
seeps_score = seeps(
precip_fcst, precip_obs,
dry_threshold=0.1, # < 0.1mm = dry
light_threshold=5.0 # >= 5.0mm = heavy
)
print(f"SEEPS Score: {seeps_score.values:.3f}")
# FIRM evaluation with custom risk parameter
firm_score = firm(
precip_fcst, precip_obs,
risk_parameter=2.0, # Moderate risk aversion
categorical_thresholds=[0.1, 1.0, 5.0, 15.0], # Category boundaries
threshold_weights=[0.5, 1.0, 2.0, 4.0] # Increasing penalties
)
print(f"FIRM Score: {firm_score.values:.3f}")# Spatial-temporal categorical analysis
forecast_3d = xr.DataArray(
np.random.binomial(1, 0.4, (50, 10, 15)), # time, lat, lon
dims=["time", "lat", "lon"]
)
observed_3d = xr.DataArray(
np.random.binomial(1, 0.4, (50, 10, 15)),
dims=["time", "lat", "lon"]
)
# Temporal verification at each grid point
spatial_contingency = BinaryContingencyManager(
forecast_3d, observed_3d,
reduce_dims="time"
)
spatial_pod = spatial_contingency.pod() # Shape: (lat, lon)
# Overall verification (all dimensions)
overall_contingency = BinaryContingencyManager(
forecast_3d, observed_3d,
reduce_dims=["time", "lat", "lon"]
)
overall_csi = overall_contingency.csi() # Scalar value
print(f"Overall CSI: {overall_csi.values:.3f}")
print(f"Spatial POD range: {spatial_pod.min().values:.3f} to {spatial_pod.max().values:.3f}")# Use BasicContingencyManager for custom metrics
basic_contingency = BasicContingencyManager(forecast, observed)
# Access raw counts
hits = basic_contingency.hits()
misses = basic_contingency.misses()
false_alarms = basic_contingency.false_alarms()
correct_negatives = basic_contingency.correct_negatives()
# Calculate custom metric: Threat Score (CSI)
threat_score = hits / (hits + misses + false_alarms)
print(f"Custom CSI: {threat_score.values:.3f}")
# Calculate custom metric: Symmetric Extremal Dependence Index
n_events = hits + misses
n_forecasts = hits + false_alarms
sedi = (np.log(false_alarms + 1) - np.log(hits + 1) -
np.log(n_forecasts + 1) + np.log(n_events + 1)) / \
(np.log(false_alarms + 1) + np.log(hits + 1) +
np.log(n_forecasts + 1) + np.log(n_events + 1))
print(f"SEDI: {sedi.values:.3f}")Install with Tessl CLI
npx tessl i tessl/pypi-scores