CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-scores

Mathematical functions for verification, evaluation and optimization of forecasts, predictions or models

Overview
Eval results
Files

probability-scores.mddocs/

Probability Scores

Metrics for evaluating probability forecasts, ensemble forecasts, and distributional predictions. Includes Brier score for binary events, CRPS (Continuous Ranked Probability Score) for continuous distributions, and threshold-weighted variants for focused evaluation.

Capabilities

Brier Score

The fundamental proper scoring rule for evaluating binary probability forecasts.

Basic Brier Score

def brier_score(
    fcst: XarrayLike,
    obs: XarrayLike,
    *,
    reduce_dims: Optional[FlexibleDimensionTypes] = None,
    preserve_dims: Optional[FlexibleDimensionTypes] = None,
    weights: Optional[xr.DataArray] = None,
    check_args: bool = True,
) -> XarrayLike:
    """
    Calculate Brier Score for probability forecasts.

    Args:
        fcst: Probability forecasts [0, 1]
        obs: Binary observations {0, 1}
        reduce_dims: Dimensions to reduce
        preserve_dims: Dimensions to preserve
        weights: Optional weights
        check_args: Validate input data ranges

    Returns:
        Brier scores

    Formula:
        BS = (1/n) * Σ(forecast_i - observed_i)²

    Notes:
        - Perfect forecast has BS = 0
        - Range: [0, 1]
        - Lower scores indicate better performance
        - Forecasts must be probabilities [0, 1]
        - Observations must be binary {0, 1}
    """

Ensemble Brier Score

Brier score calculation for ensemble forecasts with optional fair correction.

def brier_score_for_ensemble(
    fcst: XarrayLike,
    obs: XarrayLike,
    ensemble_member_dim: str,
    event_thresholds: Union[Real, Sequence[Real]],
    *,
    reduce_dims: Optional[FlexibleDimensionTypes] = None,
    preserve_dims: Optional[FlexibleDimensionTypes] = None,
    weights: Optional[xr.DataArray] = None,
    fair_correction: bool = True,
    event_threshold_operator: Callable = operator.ge,
    threshold_dim: str = "threshold",
) -> XarrayLike:
    """
    Calculate Brier Score for ensemble forecasts.

    Args:
        fcst: Ensemble forecast data
        obs: Observation data
        ensemble_member_dim: Name of ensemble member dimension
        event_thresholds: Threshold values for binary events
        reduce_dims: Dimensions to reduce
        preserve_dims: Dimensions to preserve
        weights: Optional weights
        fair_correction: Apply fair correction for finite ensemble size
        event_threshold_operator: Comparison operator (ge, le, gt, lt)
        threshold_dim: Name for threshold dimension in output

    Returns:
        Brier scores for each threshold

    Notes:
        - Converts ensemble to probabilities using threshold exceedance
        - Fair correction accounts for finite ensemble size effects
        - Multiple thresholds evaluated simultaneously
    """

Usage Example:

from scores.probability import brier_score, brier_score_for_ensemble
import xarray as xr
import numpy as np

# Basic Brier score for probability forecasts
prob_forecast = xr.DataArray([0.8, 0.6, 0.3, 0.1, 0.9])
binary_obs = xr.DataArray([1, 1, 0, 0, 1])
bs = brier_score(prob_forecast, binary_obs)

# Ensemble Brier score
ensemble_fcst = xr.DataArray(
    np.random.normal(10, 2, (100, 20)),  # 100 time steps, 20 members
    dims=["time", "member"]
)
obs = xr.DataArray(np.random.normal(10, 2, 100), dims=["time"])
thresholds = [8, 10, 12, 15]

ensemble_bs = brier_score_for_ensemble(
    ensemble_fcst, obs, "member", thresholds
)

Continuous Ranked Probability Score (CRPS)

The extension of Brier score to continuous distributions, evaluating the full probabilistic forecast.

CRPS for CDF Forecasts

def crps_cdf(
    fcst: xr.DataArray,
    obs: xr.DataArray,
    threshold_dim: str,
    *,
    threshold_weight: Optional[xr.DataArray] = None,
    additional_thresholds: Optional[xr.DataArray] = None,
    fcst_fill_method: str = "linear",
    threshold_weight_fill_method: str = "forward",
    integration_method: str = "exact",
    reduce_dims: Optional[FlexibleDimensionTypes] = None,
    preserve_dims: Optional[FlexibleDimensionTypes] = None,
    weights: Optional[xr.DataArray] = None,
) -> xr.DataArray:
    """
    Calculate CRPS for CDF forecasts.

    Args:
        fcst: CDF forecast values [0, 1]
        obs: Observation values
        threshold_dim: Name of threshold dimension in CDF
        threshold_weight: Optional threshold weighting function
        additional_thresholds: Additional evaluation thresholds
        fcst_fill_method: Method for interpolating CDF ("linear", "step")
        threshold_weight_fill_method: Weight interpolation method
        integration_method: Integration approach ("exact", "trapz")
        reduce_dims: Dimensions to reduce
        preserve_dims: Dimensions to preserve
        weights: Optional weights

    Returns:
        CRPS values

    Notes:
        - Evaluates complete probabilistic forecast
        - CDF must be monotonically increasing
        - Threshold dimension contains evaluation points
        - Lower scores indicate better performance
    """

CRPS for Ensemble Forecasts

def crps_for_ensemble(
    fcst: xr.DataArray,
    obs: xr.DataArray,
    ensemble_member_dim: str,
    *,
    method: str = "closed_form",
    reduce_dims: Optional[FlexibleDimensionTypes] = None,
    preserve_dims: Optional[FlexibleDimensionTypes] = None,
    weights: Optional[xr.DataArray] = None,
) -> xr.DataArray:
    """
    Calculate CRPS for ensemble forecasts.

    Args:
        fcst: Ensemble forecast data
        obs: Observation data
        ensemble_member_dim: Name of ensemble member dimension
        method: Calculation method ("closed_form", "fair")
        reduce_dims: Dimensions to reduce
        preserve_dims: Dimensions to preserve
        weights: Optional weights

    Returns:
        CRPS values

    Formula (closed form):
        CRPS = E|X - Y| - 0.5 * E|X - X'|

    Where:
        - X: forecast distribution
        - Y: observation
        - X': independent copy of X

    Notes:
        - "closed_form": Exact calculation for ensembles
        - "fair": Applies fair correction for finite ensemble size
        - Computational complexity: O(n log n) where n = ensemble size
    """

CRPS CDF Brier Decomposition

Decomposes CRPS-CDF into reliability and resolution components.

def crps_cdf_brier_decomposition(
    fcst: xr.DataArray,
    obs: xr.DataArray,
    threshold_dim: str,
    *,
    reduce_dims: Optional[FlexibleDimensionTypes] = None,
    preserve_dims: Optional[FlexibleDimensionTypes] = None,
    weights: Optional[xr.DataArray] = None,
) -> xr.DataArray:
    """
    Calculate CRPS-CDF with Brier decomposition.

    Args:
        fcst: CDF forecast values
        obs: Observation values
        threshold_dim: Name of threshold dimension
        reduce_dims: Dimensions to reduce
        preserve_dims: Dimensions to preserve
        weights: Optional weights

    Returns:
        Dataset with CRPS and decomposition components

    Components:
        - crps: Total CRPS score
        - reliability: Reliability component (smaller is better)
        - resolution: Resolution component (larger is better)
        - uncertainty: Uncertainty component (climatological)
    """

Threshold-Weighted CRPS

CRPS variants that focus evaluation on specific value ranges or extremes.

Basic Threshold-Weighted CRPS

def tw_crps_for_ensemble(
    fcst: xr.DataArray,
    obs: xr.DataArray,
    ensemble_member_dim: str,
    threshold_weight: xr.DataArray,
    *,
    reduce_dims: Optional[FlexibleDimensionTypes] = None,
    preserve_dims: Optional[FlexibleDimensionTypes] = None,
    weights: Optional[xr.DataArray] = None,
) -> xr.DataArray:
    """
    Calculate threshold-weighted CRPS for ensemble forecasts.

    Args:
        fcst: Ensemble forecast data
        obs: Observation data
        ensemble_member_dim: Name of ensemble member dimension
        threshold_weight: Weight function over threshold values
        reduce_dims: Dimensions to reduce
        preserve_dims: Dimensions to preserve
        weights: Optional weights

    Returns:
        Threshold-weighted CRPS values

    Notes:
        - Emphasizes specific value ranges via weighting
        - Weight function must be non-negative
        - Reduces to standard CRPS when weights are uniform
        - Used for extreme value evaluation
    """

Tail-Weighted CRPS

Focuses evaluation on extreme values (tails of the distribution).

def tail_tw_crps_for_ensemble(
    fcst: xr.DataArray,
    obs: xr.DataArray,
    ensemble_member_dim: str,
    tail_weight: float,
    *,
    reduce_dims: Optional[FlexibleDimensionTypes] = None,
    preserve_dims: Optional[FlexibleDimensionTypes] = None,
    weights: Optional[xr.DataArray] = None,
) -> xr.DataArray:
    """
    Calculate tail-weighted CRPS for extreme values.

    Args:
        fcst: Ensemble forecast data
        obs: Observation data
        ensemble_member_dim: Name of ensemble member dimension
        tail_weight: Weight parameter for tail emphasis (> 0)
        reduce_dims: Dimensions to reduce
        preserve_dims: Dimensions to preserve
        weights: Optional weights

    Returns:
        Tail-weighted CRPS values

    Notes:
        - Higher tail_weight emphasizes extreme values more
        - tail_weight = 0 reduces to standard CRPS
        - Useful for evaluating forecast skill for extreme events
    """

Interval-Weighted CRPS

Focuses evaluation on a specific value range.

def interval_tw_crps_for_ensemble(
    fcst: xr.DataArray,
    obs: xr.DataArray,
    ensemble_member_dim: str,
    lower_threshold: float,
    upper_threshold: float,
    *,
    reduce_dims: Optional[FlexibleDimensionTypes] = None,
    preserve_dims: Optional[FlexibleDimensionTypes] = None,
    weights: Optional[xr.DataArray] = None,
) -> xr.DataArray:
    """
    Calculate interval-weighted CRPS for specific value range.

    Args:
        fcst: Ensemble forecast data
        obs: Observation data
        ensemble_member_dim: Name of ensemble member dimension
        lower_threshold: Lower bound of evaluation interval
        upper_threshold: Upper bound of evaluation interval
        reduce_dims: Dimensions to reduce
        preserve_dims: Dimensions to preserve
        weights: Optional weights

    Returns:
        Interval-weighted CRPS values

    Notes:
        - Only values within [lower_threshold, upper_threshold] contribute
        - Useful for evaluating specific ranges (e.g., moderate rainfall)
        - Outside interval, weight = 0
    """

CDF Processing Utilities

Utilities for preparing and manipulating CDF forecasts.

Forecast Adjustment for CRPS

def adjust_fcst_for_crps(
    fcst: xr.DataArray,
    threshold_dim: str,
    *,
    threshold_weight: Optional[xr.DataArray] = None,
    additional_thresholds: Optional[xr.DataArray] = None,
    fcst_fill_method: str = "linear",
    threshold_weight_fill_method: str = "forward",
) -> xr.DataArray:
    """
    Prepare forecast CDF for CRPS calculation.

    Args:
        fcst: Raw CDF forecast data
        threshold_dim: Name of threshold dimension
        threshold_weight: Optional threshold weighting
        additional_thresholds: Additional threshold points
        fcst_fill_method: CDF interpolation method
        threshold_weight_fill_method: Weight interpolation method

    Returns:
        Processed CDF forecast ready for CRPS calculation

    Notes:
        - Ensures CDF is properly formatted and monotonic
        - Handles missing values and interpolation
        - Adds additional threshold points if needed
    """

Step Threshold Weighting

Creates step-function threshold weights for CRPS.

def crps_step_threshold_weight(
    thresholds: xr.DataArray,
    threshold_bins: Sequence[float],
) -> xr.DataArray:
    """
    Create step-function threshold weights.

    Args:
        thresholds: Threshold values for CDF
        threshold_bins: Bin edges for step function

    Returns:
        Step-function weights matching threshold dimension

    Notes:
        - Creates piecewise constant weighting
        - Each bin can have different weight
        - Used for interval-based evaluation emphasis
    """

Usage Patterns

Basic Probabilistic Evaluation

from scores.probability import brier_score, crps_for_ensemble
from scores.sample_data import simple_forecast, simple_observations
import numpy as np

# Binary probability forecast evaluation
prob_forecast = np.random.beta(2, 2, 100)  # Probabilities [0,1]
binary_obs = np.random.binomial(1, prob_forecast)  # Binary outcomes

bs = brier_score(prob_forecast, binary_obs)
print(f"Brier Score: {bs.values:.4f}")

# Ensemble CRPS evaluation
ensemble = np.random.normal(10, 2, (100, 20))  # 100 times, 20 members
observations = np.random.normal(10, 2, 100)

crps = crps_for_ensemble(ensemble, observations, ensemble_member_dim="member")
print(f"CRPS: {crps.values:.4f}")

Multi-threshold Evaluation

# Evaluate multiple thresholds simultaneously
thresholds = [5, 10, 15, 20, 25]
ensemble_bs = brier_score_for_ensemble(
    ensemble_forecast, observations,
    ensemble_member_dim="member",
    event_thresholds=thresholds
)

# Results have threshold dimension
for i, thresh in enumerate(thresholds):
    score = ensemble_bs.isel(threshold=i)
    print(f"Threshold {thresh}: BS = {score.values:.4f}")

Extreme Value Focus

# Emphasize extreme values using tail-weighted CRPS
tail_crps = tail_tw_crps_for_ensemble(
    ensemble_forecast, observations,
    ensemble_member_dim="member",
    tail_weight=2.0  # Strong emphasis on extremes
)

# Focus on specific range (e.g., moderate rainfall 5-15mm)
interval_crps = interval_tw_crps_for_ensemble(
    ensemble_forecast, observations,
    ensemble_member_dim="member",
    lower_threshold=5.0,
    upper_threshold=15.0
)

print(f"Standard CRPS: {crps.values:.4f}")
print(f"Tail-weighted CRPS: {tail_crps.values:.4f}")
print(f"Interval CRPS (5-15): {interval_crps.values:.4f}")

CDF Forecast Evaluation

# For CDF forecasts (probability vs threshold)
from scores.sample_data import cdf_forecast, cdf_observations

cdf_fcst = cdf_forecast()  # CDF values at different thresholds
cdf_obs = cdf_observations()  # Corresponding observations

# Standard CRPS for CDF
cdf_crps = crps_cdf(cdf_fcst, cdf_obs, threshold_dim="threshold")

# With decomposition
decomp = crps_cdf_brier_decomposition(
    cdf_fcst, cdf_obs, threshold_dim="threshold"
)

print(f"CDF CRPS: {cdf_crps.values:.4f}")
print(f"Reliability: {decomp.reliability.values:.4f}")
print(f"Resolution: {decomp.resolution.values:.4f}")

Install with Tessl CLI

npx tessl i tessl/pypi-scores

docs

categorical-scores.md

continuous-scores.md

emerging-scores.md

index.md

pandas-integration.md

plot-data.md

probability-scores.md

processing-tools.md

sample-data.md

spatial-scores.md

statistical-tests.md

tile.json