Mathematical functions for verification, evaluation and optimization of forecasts, predictions or models
npx @tessl/cli install tessl/pypi-scores@2.2.0A comprehensive Python library containing mathematical functions for the verification, evaluation and optimization of forecasts, predictions or models. The package provides over 60 functions specifically designed for scientific applications in meteorology, climatology and oceanography, built on xarray for handling labeled n-dimensional scientific data with support for NetCDF4, HDF5, Zarr and GRIB formats.
pip install scoresBasic package import:
import scoresModule-specific imports:
import scores.continuous
import scores.probability
import scores.categorical
import scores.processing
import scores.sample_data
import scores.emerging
# Note: spatial must be imported directly
from scores import spatialDirect function imports (recommended):
from scores.continuous import mse, rmse, mae, kge, nse
from scores.probability import brier_score, crps_for_ensemble
from scores.categorical import probability_of_detection
from scores.spatial import fss_2dimport xarray as xr
from scores.continuous import mse, rmse, mae
from scores.sample_data import simple_forecast, simple_observations
# Generate sample data for demonstration
forecast = simple_forecast()
observations = simple_observations()
# Calculate basic continuous scores
mse_score = mse(forecast, observations)
rmse_score = rmse(forecast, observations)
mae_score = mae(forecast, observations)
print(f"MSE: {mse_score.values}")
print(f"RMSE: {rmse_score.values}")
print(f"MAE: {mae_score.values}")
# Calculate scores with dimension reduction
forecast_3d = forecast.expand_dims({"station": 5, "time": 10})
observations_3d = observations.expand_dims({"station": 5, "time": 10})
# Reduce over time dimension only
temporal_mse = mse(forecast_3d, observations_3d, reduce_dims="time")
print(f"Temporal MSE shape: {temporal_mse.shape}")
# Apply weighting (e.g., latitude weighting for spatial data)
from scores.functions import create_latitude_weights
import numpy as np
lats = np.linspace(-90, 90, forecast_3d.sizes["station"])
lat_weights = create_latitude_weights(lats)
weighted_mse = mse(forecast_3d, observations_3d,
reduce_dims=["time", "station"],
weights=lat_weights)
print(f"Weighted MSE: {weighted_mse.values}")The scores package is organized around forecast evaluation paradigms:
reduce_dims) and preservation (preserve_dims) with automatic broadcastingweights parameter (e.g., area weighting, population weighting)Standard metrics for evaluating single-valued continuous forecasts including error measures, bias metrics, efficiency indices, and correlation coefficients.
from scores.continuous import (
mse, rmse, mae, additive_bias, multiplicative_bias, mean_error, pbias,
kge, nse, pearsonr, spearmanr
)Metrics for evaluating probability forecasts, ensemble forecasts, and distributional predictions including Brier score, CRPS (Continuous Ranked Probability Score), and their threshold-weighted variants.
from scores.probability import (
brier_score, brier_score_for_ensemble,
crps_cdf, crps_for_ensemble, crps_cdf_brier_decomposition,
tw_crps_for_ensemble, tail_tw_crps_for_ensemble
)Verification metrics for categorical and binary forecasts including contingency table statistics, skill scores, and multicategorical measures.
from scores.categorical import (
probability_of_detection, probability_of_false_detection,
firm, seeps, BinaryContingencyManager, BasicContingencyManager
)Spatial verification methods that account for spatial structure and displacement errors in gridded forecasts.
from scores.spatial import fss_2d, fss_2d_binary, fss_2d_single_fieldTools for data preprocessing, discretization, bootstrapping, and CDF manipulation required for forecast verification workflows.
from scores.processing import (
comparative_discretise, binary_discretise, binary_discretise_proportion,
broadcast_and_match_nan, isotonic_fit, block_bootstrap
)Functions for generating data structures needed for diagnostic plots and visualization including Murphy diagrams, Q-Q plots, and ROC curves.
from scores.plotdata import murphy_score, murphy_thetas, qq, rocStatistical significance testing for forecast comparisons and confidence interval estimation.
from scores.stats.statistical_tests import diebold_marianoBuilt-in sample data generation for tutorials, testing, and experimentation with various data formats and structures.
from scores.sample_data import (
simple_forecast, simple_observations,
continuous_forecast, continuous_observations,
cdf_forecast, cdf_observations
)Experimental scoring methods under development and peer review, including risk-based evaluation frameworks designed for decision-oriented forecast assessment.
from scores.emerging import (
risk_matrix_score, matrix_weights_to_array, weights_from_warning_scaling
)Specialized functions for working with pandas Series data, providing simplified APIs for basic continuous metrics without dimensional operations.
from scores.pandas.continuous import mse, rmse, mae
from scores.pandas.typing import PandasType# Standard dimension parameters (used by all scoring functions)
reduce_dims: Optional[Iterable[Hashable]] = None # Dimensions to reduce
preserve_dims: Optional[Iterable[Hashable]] = None # Dimensions to keep
weights: Optional[xr.DataArray] = None # Optional weightingfrom scores.typing import (
FlexibleDimensionTypes, # Iterable[Hashable]
XarrayLike, # Union[xr.DataArray, xr.Dataset]
FlexibleArrayType # Union[XarrayLike, pd.Series]
)from scores.functions import (
apply_weights,
create_latitude_weights,
angular_difference
)# Basic installation
pip install scores
# Development tools (testing, linting)
pip install scores[dev]
# Tutorial dependencies (jupyter, matplotlib, plotting tools)
pip install scores[tutorial]
# Everything
pip install scores[all]
# Alternative: Using pixi environment manager
pixi install