Mathematical functions for verification, evaluation and optimization of forecasts, predictions or models
Spatial verification methods that account for spatial structure and displacement errors in gridded forecasts. These metrics evaluate forecast performance while considering the spatial context and neighborhood relationships, addressing the "double penalty" problem in traditional point-wise verification.
The primary spatial verification method using a neighborhood approach to evaluate forecast accuracy over different spatial scales.
Standard FSS implementation for 2D gridded data with configurable spatial windows and event thresholds.
def fss_2d(
fcst: xr.DataArray,
obs: xr.DataArray,
*,
event_threshold: float,
window_size: Tuple[int, int],
spatial_dims: Tuple[str, str],
zero_padding: bool = False,
reduce_dims: Optional[FlexibleDimensionTypes] = None,
preserve_dims: Optional[FlexibleDimensionTypes] = None,
threshold_operator: Optional[Callable] = None,
compute_method: FssComputeMethod = FssComputeMethod.NUMPY,
dask: str = "forbidden",
) -> xr.DataArray:
"""
Calculate 2D Fractions Skill Score for spatial verification.
Args:
fcst: Forecast data array (2D spatial + other dims)
obs: Observation data array with matching spatial dimensions
event_threshold: Threshold for binary event definition
window_size: (height, width) of spatial verification window
spatial_dims: Names of spatial dimensions (e.g., ("lat", "lon"))
zero_padding: Add zeros around domain edges for window calculations
reduce_dims: Dimensions to reduce
preserve_dims: Dimensions to preserve
threshold_operator: Function for threshold comparison (default: >=)
compute_method: Computational approach (NUMPY, SCIPY, etc.)
dask: Dask computation control ("forbidden", "allowed")
Returns:
FSS values [0, 1] where 1 = perfect skill, 0 = no skill
Formula:
FSS = 1 - MSE_n / MSE_n_ref
Where:
- MSE_n: Mean squared error of neighborhood fractions
- MSE_n_ref: Reference MSE (worst possible forecast)
Notes:
- Addresses the "double penalty" problem
- Window size controls spatial scale of verification
- Larger windows generally yield higher FSS values
- Event threshold defines binary events from continuous data
"""FSS calculation for pre-discretized binary fields, skipping the threshold conversion step.
def fss_2d_binary(
fcst: xr.DataArray,
obs: xr.DataArray,
*,
window_size: Tuple[int, int],
spatial_dims: Tuple[str, str],
zero_padding: bool = False,
reduce_dims: Optional[FlexibleDimensionTypes] = None,
preserve_dims: Optional[FlexibleDimensionTypes] = None,
compute_method: FssComputeMethod = FssComputeMethod.NUMPY,
dask: str = "forbidden",
) -> xr.DataArray:
"""
Calculate FSS for binary data fields.
Args:
fcst: Binary forecast data {0, 1}
obs: Binary observation data {0, 1}
window_size: (height, width) of spatial verification window
spatial_dims: Names of spatial dimensions
zero_padding: Add zeros around domain edges
reduce_dims: Dimensions to reduce
preserve_dims: Dimensions to preserve
compute_method: Computational approach
dask: Dask computation control
Returns:
FSS values [0, 1]
Notes:
- Input data must already be binary {0, 1}
- More efficient when events are pre-defined
- Same FSS formula as fss_2d but skips thresholding
"""FSS implementation optimized for single 2D spatial fields without additional dimensions.
def fss_2d_single_field(
fcst: xr.DataArray,
obs: xr.DataArray,
*,
event_threshold: float,
window_size: Tuple[int, int],
spatial_dims: Tuple[str, str],
zero_padding: bool = False,
threshold_operator: Optional[Callable] = None,
compute_method: FssComputeMethod = FssComputeMethod.NUMPY,
) -> xr.DataArray:
"""
Calculate FSS for single 2D spatial field.
Args:
fcst: 2D forecast field
obs: 2D observation field
event_threshold: Threshold for event definition
window_size: (height, width) of verification window
spatial_dims: Names of spatial dimensions
zero_padding: Add zeros around domain edges
threshold_operator: Comparison function for thresholding
compute_method: Computational approach
Returns:
Single FSS value
Notes:
- Optimized for single spatial fields
- No additional dimensions (time, ensemble, etc.)
- Returns scalar FSS value
"""Enumeration of available computational approaches for FSS calculation.
from enum import Enum
class FssComputeMethod(Enum):
"""
Available computation methods for FSS calculation.
Attributes:
NUMPY: Pure NumPy implementation (default)
SCIPY: SciPy-based convolution approach
DASK: Dask-distributed computation (if available)
"""
NUMPY = "numpy"
SCIPY = "scipy"
DASK = "dask"from scores.spatial import fss_2d
import xarray as xr
import numpy as np
# Create sample 2D gridded data
forecast = xr.DataArray(
np.random.exponential(2, (50, 40)), # 50x40 grid
dims=["y", "x"],
coords={"y": range(50), "x": range(40)}
)
observation = xr.DataArray(
np.random.exponential(2, (50, 40)),
dims=["y", "x"],
coords={"y": range(50), "x": range(40)}
)
# Calculate FSS for precipitation >= 5mm with 5x5 window
fss = fss_2d(
forecast, observation,
event_threshold=5.0,
window_size=(5, 5),
spatial_dims=("y", "x")
)
print(f"FSS (5x5 window, 5mm threshold): {fss.values:.3f}")# Evaluate FSS across multiple spatial scales
window_sizes = [(1, 1), (3, 3), (5, 5), (11, 11), (21, 21)]
fss_scales = {}
for window in window_sizes:
fss_value = fss_2d(
forecast, observation,
event_threshold=5.0,
window_size=window,
spatial_dims=("y", "x")
)
scale_name = f"{window[0]}x{window[1]}"
fss_scales[scale_name] = fss_value.values
print("FSS by spatial scale:")
for scale, fss_val in fss_scales.items():
print(f" {scale}: {fss_val:.3f}")# Evaluate FSS for different event intensities
thresholds = [1.0, 2.5, 5.0, 10.0, 20.0]
window_size = (7, 7)
fss_thresholds = {}
for threshold in thresholds:
fss_value = fss_2d(
forecast, observation,
event_threshold=threshold,
window_size=window_size,
spatial_dims=("y", "x")
)
fss_thresholds[threshold] = fss_value.values
print(f"FSS by threshold (window: {window_size[0]}x{window_size[1]}):")
for thresh, fss_val in fss_thresholds.items():
print(f" {thresh:4.1f}mm: {fss_val:.3f}")# Multi-temporal spatial verification
forecast_time = xr.DataArray(
np.random.exponential(2, (24, 50, 40)), # 24 time steps
dims=["time", "y", "x"],
coords={
"time": pd.date_range("2023-01-01", periods=24, freq="H"),
"y": range(50),
"x": range(40)
}
)
observation_time = xr.DataArray(
np.random.exponential(2, (24, 50, 40)),
dims=["time", "y", "x"],
coords=forecast_time.coords
)
# FSS for each time step
fss_hourly = fss_2d(
forecast_time, observation_time,
event_threshold=5.0,
window_size=(7, 7),
spatial_dims=("y", "x"),
reduce_dims=["y", "x"] # Reduce spatial dims, keep time
)
print(f"Hourly FSS shape: {fss_hourly.shape}")
print(f"Mean FSS over 24 hours: {fss_hourly.mean().values:.3f}")
# Overall FSS (all time steps combined)
fss_overall = fss_2d(
forecast_time, observation_time,
event_threshold=5.0,
window_size=(7, 7),
spatial_dims=("y", "x")
# No reduce_dims specified = reduce all non-spatial dims
)
print(f"Overall FSS: {fss_overall.values:.3f}")from scores.processing import binary_discretise
# Pre-process continuous data to binary events
precip_threshold = 10.0
fcst_binary = binary_discretise(forecast_time, precip_threshold)
obs_binary = binary_discretise(observation_time, precip_threshold)
# Calculate FSS for binary data (more efficient)
fss_binary = fss_2d_binary(
fcst_binary, obs_binary,
window_size=(9, 9),
spatial_dims=("y", "x")
)
print(f"FSS for binary data (10mm threshold): {fss_binary.values:.3f}")import operator
# Custom threshold operator (less than or equal)
fss_le = fss_2d(
forecast, observation,
event_threshold=2.0,
window_size=(5, 5),
spatial_dims=("y", "x"),
threshold_operator=operator.le # Event = value <= 2.0
)
# With zero padding (extends domain with zeros)
fss_padded = fss_2d(
forecast, observation,
event_threshold=5.0,
window_size=(11, 11),
spatial_dims=("y", "x"),
zero_padding=True # Adds zeros around edges
)
# Different computation method
fss_scipy = fss_2d(
forecast, observation,
event_threshold=5.0,
window_size=(7, 7),
spatial_dims=("y", "x"),
compute_method=FssComputeMethod.SCIPY
)
print(f"FSS (<=2.0mm events): {fss_le.values:.3f}")
print(f"FSS (with padding): {fss_padded.values:.3f}")
print(f"FSS (SciPy method): {fss_scipy.values:.3f}")# For large datasets, control computation methods
large_forecast = xr.DataArray(
np.random.exponential(2, (100, 200, 300)), # Large 3D array
dims=["time", "y", "x"]
)
large_observation = xr.DataArray(
np.random.exponential(2, (100, 200, 300)),
dims=["time", "y", "x"]
)
# Process in chunks for memory efficiency
fss_chunked = fss_2d(
large_forecast.chunk({"time": 10}), # Chunk along time
large_observation.chunk({"time": 10}),
event_threshold=5.0,
window_size=(7, 7),
spatial_dims=("y", "x"),
reduce_dims=["y", "x"], # Keep time dimension
compute_method=FssComputeMethod.NUMPY,
dask="allowed" # Allow Dask computation
)
# Compute results
fss_result = fss_chunked.compute() # Triggers Dask computation
print(f"FSS time series computed: shape {fss_result.shape}")FSS naturally increases with window size, so comparisons should use consistent spatial scales or reference the "useful scale" where FSS first exceeds 0.5.
Install with Tessl CLI
npx tessl i tessl/pypi-scores