or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

categorical-scores.mdcontinuous-scores.mdemerging-scores.mdindex.mdpandas-integration.mdplot-data.mdprobability-scores.mdprocessing-tools.mdsample-data.mdspatial-scores.mdstatistical-tests.md

categorical-scores.mddocs/

0

# Categorical Scores

1

2

Verification metrics for categorical and binary forecasts including contingency table statistics, skill scores, and multicategorical measures. Supports both traditional binary classification metrics and advanced multicategorical scoring methods.

3

4

## Capabilities

5

6

### Binary Classification Metrics

7

8

Standard 2x2 contingency table metrics for evaluating binary event forecasts.

9

10

#### Probability of Detection (POD)

11

12

Also known as Hit Rate or Sensitivity, measures the proportion of observed events correctly forecast.

13

14

```python { .api }

15

def probability_of_detection(

16

fcst: XarrayLike,

17

obs: XarrayLike,

18

*,

19

reduce_dims: Optional[FlexibleDimensionTypes] = None,

20

preserve_dims: Optional[FlexibleDimensionTypes] = None,

21

weights: Optional[xr.DataArray] = None,

22

check_args: bool = True,

23

) -> XarrayLike:

24

"""

25

Calculate Probability of Detection (Hit Rate).

26

27

Args:

28

fcst: Binary forecast values {0, 1}

29

obs: Binary observation values {0, 1}

30

reduce_dims: Dimensions to reduce

31

preserve_dims: Dimensions to preserve

32

weights: Optional weights

33

check_args: Validate binary data

34

35

Returns:

36

Probability of detection values

37

38

Formula:

39

POD = hits / (hits + misses)

40

41

Notes:

42

- Range: [0, 1]

43

- Perfect forecast has POD = 1

44

- Also known as Sensitivity, Hit Rate, Recall

45

- Measures ability to detect events when they occur

46

"""

47

```

48

49

#### Probability of False Detection (POFD)

50

51

Also known as False Alarm Rate, measures the proportion of non-events incorrectly forecast as events.

52

53

```python { .api }

54

def probability_of_false_detection(

55

fcst: XarrayLike,

56

obs: XarrayLike,

57

*,

58

reduce_dims: Optional[FlexibleDimensionTypes] = None,

59

preserve_dims: Optional[FlexibleDimensionTypes] = None,

60

weights: Optional[xr.DataArray] = None,

61

check_args: bool = True,

62

) -> XarrayLike:

63

"""

64

Calculate Probability of False Detection (False Alarm Rate).

65

66

Args:

67

fcst: Binary forecast values {0, 1}

68

obs: Binary observation values {0, 1}

69

reduce_dims: Dimensions to reduce

70

preserve_dims: Dimensions to preserve

71

weights: Optional weights

72

check_args: Validate binary data

73

74

Returns:

75

Probability of false detection values

76

77

Formula:

78

POFD = false_alarms / (false_alarms + correct_negatives)

79

80

Notes:

81

- Range: [0, 1]

82

- Perfect forecast has POFD = 0

83

- Also known as False Alarm Rate, Fall-out

84

- Measures tendency to falsely predict events

85

"""

86

```

87

88

**Usage Example:**

89

90

```python

91

from scores.categorical import probability_of_detection, probability_of_false_detection

92

import xarray as xr

93

import numpy as np

94

95

# Binary forecast and observation data

96

forecast = xr.DataArray([1, 1, 0, 0, 1, 1, 0, 1])

97

observed = xr.DataArray([1, 0, 0, 1, 1, 0, 0, 1])

98

99

pod = probability_of_detection(forecast, observed)

100

pofd = probability_of_false_detection(forecast, observed)

101

102

print(f"Probability of Detection: {pod.values:.3f}")

103

print(f"Probability of False Detection: {pofd.values:.3f}")

104

```

105

106

### Contingency Table Management

107

108

Classes for systematic computation of multiple binary classification metrics.

109

110

#### BinaryContingencyManager

111

112

Comprehensive manager providing access to 18 different binary contingency table metrics.

113

114

```python { .api }

115

class BinaryContingencyManager:

116

"""

117

Binary contingency table manager with 18 metrics.

118

119

Computes and provides access to all standard binary classification

120

metrics derived from 2x2 contingency tables.

121

"""

122

123

def __init__(

124

self,

125

fcst: XarrayLike,

126

obs: XarrayLike,

127

*,

128

reduce_dims: Optional[FlexibleDimensionTypes] = None,

129

preserve_dims: Optional[FlexibleDimensionTypes] = None,

130

weights: Optional[xr.DataArray] = None,

131

check_args: bool = True,

132

):

133

"""

134

Initialize binary contingency table manager.

135

136

Args:

137

fcst: Binary forecast values {0, 1}

138

obs: Binary observation values {0, 1}

139

reduce_dims: Dimensions to reduce

140

preserve_dims: Dimensions to preserve

141

weights: Optional weights

142

check_args: Validate binary data

143

"""

144

145

# Available metrics (all return XarrayLike):

146

def pod(self) -> XarrayLike:

147

"""Probability of Detection (Hit Rate, Sensitivity)."""

148

149

def pofd(self) -> XarrayLike:

150

"""Probability of False Detection (False Alarm Rate)."""

151

152

def far(self) -> XarrayLike:

153

"""False Alarm Ratio."""

154

155

def success_ratio(self) -> XarrayLike:

156

"""Success Ratio (1 - False Alarm Ratio)."""

157

158

def accuracy(self) -> XarrayLike:

159

"""Accuracy (Proportion Correct)."""

160

161

def bias_score(self) -> XarrayLike:

162

"""Bias Score (Frequency Bias)."""

163

164

def csi(self) -> XarrayLike:

165

"""Critical Success Index (Threat Score)."""

166

167

def ets(self) -> XarrayLike:

168

"""Equitable Threat Score."""

169

170

def hss(self) -> XarrayLike:

171

"""Heidke Skill Score."""

172

173

def pss(self) -> XarrayLike:

174

"""Peirce Skill Score (True Skill Statistic)."""

175

176

def odds_ratio(self) -> XarrayLike:

177

"""Odds Ratio."""

178

179

def log_odds_ratio(self) -> XarrayLike:

180

"""Logarithm of Odds Ratio."""

181

182

def yules_q(self) -> XarrayLike:

183

"""Yule's Q Statistic."""

184

185

def yules_y(self) -> XarrayLike:

186

"""Yule's Y Statistic."""

187

188

def dor(self) -> XarrayLike:

189

"""Diagnostic Odds Ratio."""

190

191

def log_dor(self) -> XarrayLike:

192

"""Logarithm of Diagnostic Odds Ratio."""

193

194

def positive_likelihood_ratio(self) -> XarrayLike:

195

"""Positive Likelihood Ratio."""

196

197

def negative_likelihood_ratio(self) -> XarrayLike:

198

"""Negative Likelihood Ratio."""

199

```

200

201

#### BasicContingencyManager

202

203

Lower-level contingency table management for custom metrics.

204

205

```python { .api }

206

class BasicContingencyManager:

207

"""

208

Basic contingency table manager.

209

210

Provides raw contingency table counts for custom metric computation.

211

"""

212

213

def __init__(

214

self,

215

fcst: XarrayLike,

216

obs: XarrayLike,

217

*,

218

reduce_dims: Optional[FlexibleDimensionTypes] = None,

219

preserve_dims: Optional[FlexibleDimensionTypes] = None,

220

weights: Optional[xr.DataArray] = None,

221

):

222

"""Initialize basic contingency manager."""

223

224

def hits(self) -> XarrayLike:

225

"""True Positives: forecast=1, observed=1."""

226

227

def misses(self) -> XarrayLike:

228

"""False Negatives: forecast=0, observed=1."""

229

230

def false_alarms(self) -> XarrayLike:

231

"""False Positives: forecast=1, observed=0."""

232

233

def correct_negatives(self) -> XarrayLike:

234

"""True Negatives: forecast=0, observed=0."""

235

```

236

237

### Event Operators

238

239

Classes for defining binary events from continuous data.

240

241

#### EventOperator

242

243

Generic event operator for categorical scoring.

244

245

```python { .api }

246

class EventOperator:

247

"""

248

Generic event operator for categorical scoring.

249

250

Defines binary events from continuous forecast and observation data

251

using custom logic or thresholds.

252

"""

253

254

def __init__(self, event_fcst_func: Callable, event_obs_func: Callable):

255

"""

256

Initialize event operator.

257

258

Args:

259

event_fcst_func: Function to convert forecast to binary events

260

event_obs_func: Function to convert observations to binary events

261

"""

262

263

def convert_to_events(

264

self,

265

fcst: XarrayLike,

266

obs: XarrayLike,

267

) -> Tuple[XarrayLike, XarrayLike]:

268

"""Convert forecast and observations to binary events."""

269

```

270

271

#### ThresholdEventOperator

272

273

Threshold-based event operator for categorical scoring.

274

275

```python { .api }

276

class ThresholdEventOperator(EventOperator):

277

"""

278

Threshold-based event operator.

279

280

Converts continuous data to binary events using threshold comparison.

281

"""

282

283

def __init__(

284

self,

285

threshold: float,

286

operator_func: Callable = operator.ge,

287

):

288

"""

289

Initialize threshold event operator.

290

291

Args:

292

threshold: Threshold value for event definition

293

operator_func: Comparison operator (ge, le, gt, lt)

294

295

Example:

296

# Event = precipitation >= 1mm

297

rain_events = ThresholdEventOperator(1.0, operator.ge)

298

"""

299

```

300

301

### Multicategorical Scores

302

303

Advanced scoring methods for forecasts with multiple categories.

304

305

#### FIRM (Fixed Risk Multicategorical) Score

306

307

Risk-based scoring for multicategorical forecasts with asymmetric loss functions.

308

309

```python { .api }

310

def firm(

311

fcst: XarrayLike,

312

obs: XarrayLike,

313

risk_parameter: float,

314

categorical_thresholds: Union[Sequence[float], Sequence[xr.DataArray]],

315

threshold_weights: Sequence[Union[float, xr.DataArray]],

316

*,

317

discount_distance: float = 0,

318

reduce_dims: Optional[FlexibleDimensionTypes] = None,

319

preserve_dims: Optional[FlexibleDimensionTypes] = None,

320

weights: Optional[xr.DataArray] = None,

321

include_components: bool = False,

322

) -> XarrayLike:

323

"""

324

Calculate Fixed Risk Multicategorical (FIRM) Score.

325

326

Args:

327

fcst: Continuous forecast values

328

obs: Continuous observation values

329

risk_parameter: Risk aversion parameter (higher = more risk averse)

330

categorical_thresholds: Threshold values defining categories

331

threshold_weights: Weights for each threshold/category

332

discount_distance: Distance-based penalty discount factor

333

reduce_dims: Dimensions to reduce

334

preserve_dims: Dimensions to preserve

335

weights: Optional weights

336

include_components: Return decomposed components

337

338

Returns:

339

FIRM scores (and components if requested)

340

341

Notes:

342

- Designed for asymmetric loss situations

343

- Risk parameter controls penalty severity

344

- Higher scores indicate worse performance

345

- Useful for decision-oriented evaluation

346

"""

347

```

348

349

#### SEEPS (Stable Equitable Error in Probability Space)

350

351

Equitable scoring for multicategorical precipitation forecasts.

352

353

```python { .api }

354

def seeps(

355

fcst: XarrayLike,

356

obs: XarrayLike,

357

dry_threshold: float,

358

light_threshold: float,

359

*,

360

reduce_dims: Optional[FlexibleDimensionTypes] = None,

361

preserve_dims: Optional[FlexibleDimensionTypes] = None,

362

weights: Optional[xr.DataArray] = None,

363

) -> XarrayLike:

364

"""

365

Calculate Stable Equitable Error in Probability Space (SEEPS).

366

367

Args:

368

fcst: Precipitation forecast values

369

obs: Precipitation observation values

370

dry_threshold: Threshold separating dry from light precipitation

371

light_threshold: Threshold separating light from heavy precipitation

372

reduce_dims: Dimensions to reduce

373

preserve_dims: Dimensions to preserve

374

weights: Optional weights

375

376

Returns:

377

SEEPS scores

378

379

Notes:

380

- Specifically designed for precipitation verification

381

- Creates three categories: dry, light, heavy

382

- Equitable scoring (random forecast scores 0)

383

- Perfect forecast scores 1

384

- Range typically: [0, 1] but can exceed 1 for very poor forecasts

385

"""

386

```

387

388

## Usage Patterns

389

390

### Basic Binary Classification

391

392

```python

393

from scores.categorical import BinaryContingencyManager

394

import numpy as np

395

396

# Create sample binary data

397

np.random.seed(42)

398

forecast = np.random.binomial(1, 0.3, 1000)

399

observed = np.random.binomial(1, 0.3, 1000)

400

401

# Use contingency manager for comprehensive evaluation

402

contingency = BinaryContingencyManager(forecast, observed)

403

404

# Access multiple metrics

405

metrics = {

406

'POD': contingency.pod().values,

407

'POFD': contingency.pofd().values,

408

'FAR': contingency.far().values,

409

'CSI': contingency.csi().values,

410

'HSS': contingency.hss().values,

411

'Accuracy': contingency.accuracy().values

412

}

413

414

for name, value in metrics.items():

415

print(f"{name}: {value:.3f}")

416

```

417

418

### Threshold-based Events

419

420

```python

421

from scores.categorical import ThresholdEventOperator

422

from scores.processing import binary_discretise

423

import operator

424

425

# Continuous precipitation data

426

precip_forecast = np.random.exponential(2, 1000)

427

precip_observed = np.random.exponential(2, 1000)

428

429

# Define heavy rain event (>= 5mm)

430

heavy_rain = ThresholdEventOperator(5.0, operator.ge)

431

fcst_events, obs_events = heavy_rain.convert_to_events(

432

precip_forecast, precip_observed

433

)

434

435

# Or use direct discretization

436

fcst_heavy = binary_discretise(precip_forecast, 5.0)

437

obs_heavy = binary_discretise(precip_observed, 5.0)

438

439

# Evaluate heavy rain forecasts

440

heavy_contingency = BinaryContingencyManager(fcst_heavy, obs_heavy)

441

print(f"Heavy rain POD: {heavy_contingency.pod().values:.3f}")

442

print(f"Heavy rain CSI: {heavy_contingency.csi().values:.3f}")

443

```

444

445

### Multicategorical Evaluation

446

447

```python

448

from scores.categorical import firm, seeps

449

import numpy as np

450

451

# Precipitation forecast/observation data

452

precip_fcst = np.random.exponential(3, 500)

453

precip_obs = np.random.exponential(3, 500)

454

455

# SEEPS evaluation (3 categories: dry, light, heavy)

456

seeps_score = seeps(

457

precip_fcst, precip_obs,

458

dry_threshold=0.1, # < 0.1mm = dry

459

light_threshold=5.0 # >= 5.0mm = heavy

460

)

461

print(f"SEEPS Score: {seeps_score.values:.3f}")

462

463

# FIRM evaluation with custom risk parameter

464

firm_score = firm(

465

precip_fcst, precip_obs,

466

risk_parameter=2.0, # Moderate risk aversion

467

categorical_thresholds=[0.1, 1.0, 5.0, 15.0], # Category boundaries

468

threshold_weights=[0.5, 1.0, 2.0, 4.0] # Increasing penalties

469

)

470

print(f"FIRM Score: {firm_score.values:.3f}")

471

```

472

473

### Multi-dimensional Categorical Analysis

474

475

```python

476

# Spatial-temporal categorical analysis

477

forecast_3d = xr.DataArray(

478

np.random.binomial(1, 0.4, (50, 10, 15)), # time, lat, lon

479

dims=["time", "lat", "lon"]

480

)

481

observed_3d = xr.DataArray(

482

np.random.binomial(1, 0.4, (50, 10, 15)),

483

dims=["time", "lat", "lon"]

484

)

485

486

# Temporal verification at each grid point

487

spatial_contingency = BinaryContingencyManager(

488

forecast_3d, observed_3d,

489

reduce_dims="time"

490

)

491

spatial_pod = spatial_contingency.pod() # Shape: (lat, lon)

492

493

# Overall verification (all dimensions)

494

overall_contingency = BinaryContingencyManager(

495

forecast_3d, observed_3d,

496

reduce_dims=["time", "lat", "lon"]

497

)

498

overall_csi = overall_contingency.csi() # Scalar value

499

500

print(f"Overall CSI: {overall_csi.values:.3f}")

501

print(f"Spatial POD range: {spatial_pod.min().values:.3f} to {spatial_pod.max().values:.3f}")

502

```

503

504

### Custom Metrics from Contingency Counts

505

506

```python

507

# Use BasicContingencyManager for custom metrics

508

basic_contingency = BasicContingencyManager(forecast, observed)

509

510

# Access raw counts

511

hits = basic_contingency.hits()

512

misses = basic_contingency.misses()

513

false_alarms = basic_contingency.false_alarms()

514

correct_negatives = basic_contingency.correct_negatives()

515

516

# Calculate custom metric: Threat Score (CSI)

517

threat_score = hits / (hits + misses + false_alarms)

518

print(f"Custom CSI: {threat_score.values:.3f}")

519

520

# Calculate custom metric: Symmetric Extremal Dependence Index

521

n_events = hits + misses

522

n_forecasts = hits + false_alarms

523

sedi = (np.log(false_alarms + 1) - np.log(hits + 1) -

524

np.log(n_forecasts + 1) + np.log(n_events + 1)) / \

525

(np.log(false_alarms + 1) + np.log(hits + 1) +

526

np.log(n_forecasts + 1) + np.log(n_events + 1))

527

print(f"SEDI: {sedi.values:.3f}")

528

```