tessl/pypi-scores

Mathematical functions for verification, evaluation and optimization of forecasts, predictions or models

Overview

Eval results

Files

Plot Data

Name: tessl/pypi-scores
Author: tessl

Functions for generating data structures needed for diagnostic plots and visualization in forecast verification. These tools create plot-ready data for Murphy diagrams, Q-Q plots, ROC curves, and other verification visualizations.

Capabilities

Murphy Diagram Data

Murphy diagrams provide comprehensive visualization of forecast performance across all possible decision thresholds, showing the relationship between forecast value and expected score.

Murphy Score Calculation

Generates the core data for Murphy diagram plotting.

def murphy_score(
    fcst: XarrayLike,
    obs: XarrayLike,
    *,
    reduce_dims: Optional[FlexibleDimensionTypes] = None,
    preserve_dims: Optional[FlexibleDimensionTypes] = None,
    weights: Optional[xr.DataArray] = None,
) -> XarrayLike:
    """
    Calculate Murphy score for Murphy diagram generation.

    Args:
        fcst: Forecast values
        obs: Observation values
        reduce_dims: Dimensions to reduce
        preserve_dims: Dimensions to preserve
        weights: Optional weights

    Returns:
        Murphy scores across forecast value thresholds

    Notes:
        - Creates data for Murphy diagram plotting
        - Shows expected score as function of forecast value
        - Useful for understanding forecast value relationships
        - Typically plotted as scatter or line plot
    """

Murphy Theta Values

Computes theta parameters for Murphy diagram construction.

def murphy_thetas(
    fcst: XarrayLike,
    obs: XarrayLike,
    *,
    reduce_dims: Optional[FlexibleDimensionTypes] = None,
    preserve_dims: Optional[FlexibleDimensionTypes] = None,
    weights: Optional[xr.DataArray] = None,
) -> XarrayLike:
    """
    Calculate Murphy theta values for Murphy diagrams.

    Args:
        fcst: Forecast values
        obs: Observation values
        reduce_dims: Dimensions to reduce
        preserve_dims: Dimensions to preserve
        weights: Optional weights

    Returns:
        Theta values for Murphy diagram axes

    Notes:
        - Provides theta parameters for Murphy diagram
        - Used as x-axis values in Murphy plots
        - Represents decision threshold parameters
    """

Quantile-Quantile Plot Data

Q-Q plots compare the distributions of forecasts and observations by plotting their quantiles against each other.

def qq(
    fcst: XarrayLike,
    obs: XarrayLike,
    *,
    quantiles: Optional[Sequence[float]] = None,
    reduce_dims: Optional[FlexibleDimensionTypes] = None,
    preserve_dims: Optional[FlexibleDimensionTypes] = None,
) -> xr.Dataset:
    """
    Generate quantile-quantile plot data.

    Args:
        fcst: Forecast values
        obs: Observation values
        quantiles: Quantile levels to compute (default: 0.01 to 0.99 by 0.01)
        reduce_dims: Dimensions to reduce
        preserve_dims: Dimensions to preserve

    Returns:
        Dataset with forecast and observation quantiles

    Output Variables:
        - fcst_quantiles: Forecast quantile values
        - obs_quantiles: Observation quantile values
        - quantile_levels: Quantile probability levels

    Notes:
        - Perfect forecast would show fcst_quantiles = obs_quantiles
        - Deviations indicate distributional biases
        - Useful for identifying systematic forecast errors
        - Default quantiles: [0.01, 0.02, ..., 0.99]
    """

ROC Curve Data

ROC (Receiver Operating Characteristic) curves evaluate binary classification performance across all probability thresholds.

def roc(
    fcst: XarrayLike,
    obs: XarrayLike,
    *,
    bin_edges: Union[str, Sequence[float]] = "continuous",
    reduce_dims: Optional[FlexibleDimensionTypes] = None,
    preserve_dims: Optional[FlexibleDimensionTypes] = None,
    weights: Optional[xr.DataArray] = None,
) -> xr.Dataset:
    """
    Generate ROC curve data for binary classification.

    Args:
        fcst: Probability forecast values [0, 1] or continuous values
        obs: Binary observation values {0, 1}
        bin_edges: Bin edges for probability discretization or "continuous"
        reduce_dims: Dimensions to reduce
        preserve_dims: Dimensions to preserve
        weights: Optional weights

    Returns:
        Dataset with ROC curve coordinates and metrics

    Output Variables:
        - pod: Probability of Detection (True Positive Rate)
        - pofd: Probability of False Detection (False Positive Rate)
        - threshold: Probability thresholds used
        - auc: Area Under Curve (if computed)

    Notes:
        - POD (y-axis) vs POFD (x-axis) creates ROC curve
        - Perfect forecast: ROC curve passes through (0,1)
        - No skill: ROC curve follows diagonal (0,0) to (1,1)
        - Area Under Curve (AUC): scalar skill measure
        - bin_edges="continuous" uses unique forecast values
    """

Usage Patterns

Basic Murphy Diagram

from scores.plotdata import murphy_score, murphy_thetas
import matplotlib.pyplot as plt
import numpy as np

# Generate sample data
forecast = np.random.normal(10, 3, 1000)
observation = forecast + np.random.normal(0, 1, 1000)

# Calculate Murphy diagram data
murphy_scores = murphy_score(forecast, observation)
theta_values = murphy_thetas(forecast, observation)

# Create Murphy diagram plot
plt.figure(figsize=(10, 6))
plt.scatter(theta_values, murphy_scores, alpha=0.6)
plt.xlabel('Theta (Decision Threshold)')
plt.ylabel('Murphy Score')
plt.title('Murphy Diagram')
plt.grid(True)
plt.show()

Quantile-Quantile Analysis

from scores.plotdata import qq
import matplotlib.pyplot as plt

# Generate forecast and observation data with different distributions
forecast = np.random.gamma(2, 2, 1000)  # Gamma distribution
observation = np.random.exponential(3, 1000)  # Exponential distribution

# Calculate Q-Q plot data
qq_data = qq(forecast, observation)

# Extract quantile values
fcst_q = qq_data.fcst_quantiles
obs_q = qq_data.obs_quantiles

# Create Q-Q plot
plt.figure(figsize=(8, 8))
plt.scatter(fcst_q, obs_q, alpha=0.7)
plt.plot([0, max(fcst_q.max().values, obs_q.max().values)],
         [0, max(fcst_q.max().values, obs_q.max().values)],
         'r--', label='Perfect Forecast Line')
plt.xlabel('Forecast Quantiles')
plt.ylabel('Observation Quantiles')
plt.title('Quantile-Quantile Plot')
plt.legend()
plt.grid(True)
plt.show()

print(f"Q-Q data contains {len(qq_data.quantile_levels)} quantile levels")

ROC Curve Analysis

from scores.plotdata import roc
import matplotlib.pyplot as plt

# Create probability forecast and binary observations
prob_forecast = np.random.beta(2, 3, 1000)  # Probability values [0,1]
# Generate observations with some skill
binary_obs = np.random.binomial(1, 0.3 + 0.4 * prob_forecast)

# Calculate ROC curve data
roc_data = roc(prob_forecast, binary_obs)

# Extract ROC coordinates
pod_values = roc_data.pod
pofd_values = roc_data.pofd
auc_value = roc_data.auc if 'auc' in roc_data else None

# Create ROC curve plot
plt.figure(figsize=(8, 8))
plt.plot(pofd_values, pod_values, 'b-', linewidth=2, label='ROC Curve')
plt.plot([0, 1], [0, 1], 'r--', label='No Skill Line')
plt.xlabel('Probability of False Detection (False Positive Rate)')
plt.ylabel('Probability of Detection (True Positive Rate)')
plt.title(f'ROC Curve (AUC = {auc_value.values:.3f})' if auc_value else 'ROC Curve')
plt.legend()
plt.grid(True)
plt.axis('equal')
plt.xlim([0, 1])
plt.ylim([0, 1])
plt.show()

Custom Quantile Levels

# Q-Q plot with custom quantile levels
custom_quantiles = [0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95]

qq_custom = qq(forecast, observation, quantiles=custom_quantiles)

print("Custom quantile analysis:")
for i, q_level in enumerate(custom_quantiles):
    fcst_val = qq_custom.fcst_quantiles.values[i]
    obs_val = qq_custom.obs_quantiles.values[i]
    print(f"Q{q_level:4.2f}: forecast={fcst_val:6.2f}, observation={obs_val:6.2f}")

Multi-dimensional Plot Data

# Generate plot data for multi-dimensional arrays
forecast_3d = xr.DataArray(
    np.random.normal(10, 2, (100, 5, 8)),  # time, lat, lon
    dims=["time", "lat", "lon"]
)
observation_3d = xr.DataArray(
    forecast_3d.values + np.random.normal(0, 1, (100, 5, 8)),
    dims=["time", "lat", "lon"]
)

# Q-Q data for each spatial point (reduce time dimension)
qq_spatial = qq(forecast_3d, observation_3d, reduce_dims="time")
print(f"Spatial Q-Q data shape: {qq_spatial.fcst_quantiles.shape}")

# Q-Q data for each time step (reduce spatial dimensions)
qq_temporal = qq(forecast_3d, observation_3d, reduce_dims=["lat", "lon"])
print(f"Temporal Q-Q data shape: {qq_temporal.fcst_quantiles.shape}")

# Overall Q-Q data (reduce all dimensions)
qq_overall = qq(forecast_3d, observation_3d)
print(f"Overall Q-Q data points: {len(qq_overall.quantile_levels)}")

ROC Analysis with Binning

# ROC curve with specified probability bins
prob_bins = np.linspace(0, 1, 21)  # 20 probability bins

roc_binned = roc(prob_forecast, binary_obs, bin_edges=prob_bins)

print(f"ROC analysis with {len(prob_bins)-1} probability bins")
print(f"Number of ROC points: {len(roc_binned.threshold)}")

# Plot binned ROC curve
plt.figure(figsize=(8, 8))
plt.plot(roc_binned.pofd, roc_binned.pod, 'bo-', markersize=4, label='Binned ROC')
plt.plot([0, 1], [0, 1], 'r--', label='No Skill')
plt.xlabel('POFD')
plt.ylabel('POD')
plt.title('ROC Curve (Binned Probabilities)')
plt.legend()
plt.grid(True)
plt.show()

# Show threshold values
for i in range(0, len(roc_binned.threshold), 5):  # Every 5th threshold
    thresh = roc_binned.threshold.values[i]
    pod = roc_binned.pod.values[i]
    pofd = roc_binned.pofd.values[i]
    print(f"Threshold {thresh:.2f}: POD={pod:.3f}, POFD={pofd:.3f}")

Comprehensive Visualization Workflow

from scores.plotdata import murphy_score, murphy_thetas, qq, roc
import matplotlib.pyplot as plt

# Sample forecast system evaluation
forecast_prob = np.random.beta(3, 2, 2000)  # Probability forecasts
observations = np.random.binomial(1, forecast_prob * 0.8)  # Binary outcomes with some skill

# Generate all plot data
murphy_data = murphy_score(forecast_prob, observations)
theta_data = murphy_thetas(forecast_prob, observations)
qq_data = qq(forecast_prob, observations)
roc_data = roc(forecast_prob, observations)

# Create comprehensive verification plot
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 12))

# Murphy diagram
ax1.scatter(theta_data, murphy_data, alpha=0.6)
ax1.set_xlabel('Theta')
ax1.set_ylabel('Murphy Score')
ax1.set_title('Murphy Diagram')
ax1.grid(True)

# Q-Q plot
ax2.scatter(qq_data.fcst_quantiles, qq_data.obs_quantiles, alpha=0.7)
ax2.plot([0, 1], [0, 1], 'r--', label='Perfect Forecast')
ax2.set_xlabel('Forecast Quantiles')
ax2.set_ylabel('Observation Quantiles')
ax2.set_title('Q-Q Plot')
ax2.legend()
ax2.grid(True)

# ROC curve
ax3.plot(roc_data.pofd, roc_data.pod, 'b-', linewidth=2)
ax3.plot([0, 1], [0, 1], 'r--', alpha=0.7)
ax3.set_xlabel('POFD')
ax3.set_ylabel('POD')
ax3.set_title('ROC Curve')
ax3.grid(True)

# Distribution comparison
ax4.hist(forecast_prob, bins=20, alpha=0.6, label='Forecast', density=True)
ax4.hist(observations, bins=20, alpha=0.6, label='Observation', density=True)
ax4.set_xlabel('Value')
ax4.set_ylabel('Density')
ax4.set_title('Distribution Comparison')
ax4.legend()
ax4.grid(True)

plt.tight_layout()
plt.show()

Weighted Plot Data

from scores.functions import create_latitude_weights

# Create spatially-weighted plot data
lats = np.linspace(-60, 60, 25)
area_weights = create_latitude_weights(lats)

# Expand weights to match data dimensions
weights_3d = area_weights.broadcast_like(forecast_3d.isel(time=0, drop=True))

# Generate area-weighted plot data
qq_weighted = qq(forecast_3d, observation_3d,
                reduce_dims=["time", "lat", "lon"])

roc_weighted = roc(forecast_3d, observation_3d.where(observation_3d > 5, 0).where(observation_3d <= 5, 1),
                  reduce_dims=["time", "lat", "lon"],
                  weights=weights_3d)

print("Area-weighted verification complete")
print(f"Weighted Q-Q quantiles: {len(qq_weighted.quantile_levels)}")
print(f"Weighted ROC points: {len(roc_weighted.threshold)}")

Install with Tessl CLI