Skip to content

Commit

Permalink
add roc curve
Browse files Browse the repository at this point in the history
  • Loading branch information
nicholasloveday committed Oct 6, 2023
1 parent 7614b23 commit e3e6d0d
Show file tree
Hide file tree
Showing 19 changed files with 1,732 additions and 23 deletions.
3 changes: 3 additions & 0 deletions docs/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,14 @@
.. autofunction:: scores.probability.crps_cdf_brier_decomposition
.. autofunction:: scores.probability.murphy
.. autofunction:: scores.probability.murphy_thetas
.. autofunction:: scores.probability.roc_curve_data
```

## scores.categorical
```{eval-rst}
.. autofunction:: scores.categorical.firm
.. autofunction:: scores.continuous.probability_of_detection
.. autofunction:: scores.continuous.probability_of_false_detection
```

## scores.stats
Expand Down
6 changes: 3 additions & 3 deletions docs/summary_table_of_scores.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
| continuous | probability | categorical | statistical tests |
| ---------- | ----------- | ----------- | ----------- |
| MAE, MSE, RMSE, Murphy score | CRPS, Murphy score | FIRM | Diebold Mariano (with the Harvey et al. 1997 and the Hering and Genton 2011 modifications) |
| continuous | probability | categorical | statistical tests |
| ---------- | ----------- | ----------- | ----------- |
| MAE, MSE, RMSE, Murphy score | CRPS, Murphy score, ROC | FIRM, POD, POFD | Diebold Mariano (with the Harvey et al. 1997 and the Hering and Genton 2011 modifications) |
4 changes: 4 additions & 0 deletions src/scores/categorical/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
"""
Import the functions from the implementations into the public API
"""
from scores.categorical.binary_impl import (
probability_of_detection,
probability_of_false_detection,
)
from scores.categorical.multicategorical_impl import firm
149 changes: 149 additions & 0 deletions src/scores/categorical/binary_impl.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
"""
This module contains methods for binary categories
"""
from typing import Optional, Sequence

import numpy as np
import pandas as pd
import xarray as xr

from scores.functions import apply_weights
from scores.processing import check_binary
from scores.utils import gather_dimensions


def probability_of_detection(
fcst: xr.DataArray,
obs: xr.DataArray,
reduce_dims: Optional[Sequence[str]] = None,
preserve_dims: Optional[Sequence[str]] = None,
weights: Optional[xr.DataArray] = None,
check_args: Optional[bool] = True,
) -> xr.DataArray:
"""
Calculates the Probability of Detection (POD), also known as the Hit Rate.
This is the proportion of observed events (obs = 1) that were correctly
forecast as an event (fcst = 1).
Args:
fcst: An array containing binary values in the set {0, 1, np.nan}
obs: An array containing binary values in the set {0, 1, np.nan}
reduce_dims: Optionally specify which dimensions to sum when
calculating the POD. All other dimensions will be not summed. As a
special case, 'all' will allow all dimensions to be summed. Only one
of `reduce_dims` and `preserve_dims` can be supplied. The default behaviour
if neither are supplied is to sum across all dims.
preserve_dims: Optionally specify which dimensions to not sum
when calculating the POD. All other dimensions will be summed.
As a special case, 'all' will allow all dimensions to be
not summed. In this case, the result will be in the same
shape/dimensionality as the forecast, and the errors will be
the POD score at each point (i.e. single-value comparison
against observed), and the forecast and observed dimensions
must match precisely. Only one of `reduce_dims` and `preserve_dims` can be
supplied. The default behaviour if neither are supplied is to reduce all dims.
weights: Optionally provide an array for weighted averaging (e.g. by area, by latitude,
by population, custom)
check_args: Checks if `fcst and `obs` data only contains values in the set
{0, 1, np.nan}. You may want to skip this check if you are sure about your
input data and want to improve the performance when working with dask.
Returns:
A DataArray of the Probability of Detection.
Raises:
ValueError: if there are values in `fcst` and `obs` that are not in the
set {0, 1, np.nan} and `check_args` is true.
"""
# fcst & obs must be 0s and 1s
if check_args:
check_binary(fcst, "fcst")
check_binary(obs, "obs")
dims_to_sum = gather_dimensions(fcst.dims, obs.dims, reduce_dims, preserve_dims)

misses = (obs == 1) & (fcst == 0)
hits = (obs == 1) & (fcst == 1)

# preserve NaNs
misses = misses.where((~np.isnan(fcst)) & (~np.isnan(obs)))
hits = hits.where((~np.isnan(fcst)) & (~np.isnan(obs)))

misses = apply_weights(misses, weights)
hits = apply_weights(hits, weights)

misses = misses.sum(dim=dims_to_sum)
hits = hits.sum(dim=dims_to_sum)

pod = hits / (hits + misses)

pod.name = "ctable_probability_of_detection"
return pod


def probability_of_false_detection(
fcst: xr.DataArray,
obs: xr.DataArray,
reduce_dims: Optional[Sequence[str]] = None,
preserve_dims: Optional[Sequence[str]] = None,
weights: Optional[xr.DataArray] = None,
check_args: Optional[bool] = True,
) -> xr.DataArray:
"""
Calculates the Probability of False Detection (POFD), also known as False
Alarm Rate (not to be confused with the False Alarm Ratio). The POFD is
the proportion of observed non-events (obs = 0) that were incorrectly
forecast as event (i.e. fcst = 1).
Args:
fcst: An array containing binary values in the set {0, 1, np.nan}
obs: An array containing binary values in the set {0, 1, np.nan}
reduce_dims: Optionally specify which dimensions to sum when
calculating the POFD. All other dimensions will be not summed. As a
special case, 'all' will allow all dimensions to be summed. Only one
of `reduce_dims` and `preserve_dims` can be supplied. The default behaviour
if neither are supplied is to sum across all dims.
preserve_dims: Optionally specify which dimensions to not sum
when calculating the POFD. All other dimensions will be summed.
As a special case, 'all' will allow all dimensions to be
not summed. In this case, the result will be in the same
shape/dimensionality as the forecast, and the errors will be
the POD score at each point (i.e. single-value comparison
against observed), and the forecast and observed dimensions
must match precisely. Only one of `reduce_dims` and `preserve_dims` can be
supplied. The default behaviour if neither are supplied is to reduce all dims.
weights: Optionally provide an array for weighted averaging (e.g. by area, by latitude,
by population, custom)
check_args: Checks if `fcst and `obs` data only contains values in the set
{0, 1, np.nan}. You may want to skip this check if you are sure about your
input data and want to improve the performance when working with dask.
Returns:
A DataArray of the Probability of False Detection.
Raises:
ValueError: if there are values in `fcst` and `obs` that are not in the
set {0, 1, np.nan} and `check_args` is true.
"""
# fcst & obs must be 0s and 1s
if check_args:
check_binary(fcst, "fcst")
check_binary(obs, "obs")
dims_to_sum = gather_dimensions(fcst.dims, obs.dims, reduce_dims, preserve_dims)

false_alarms = (obs == 0) & (fcst == 1)
correct_negatives = (obs == 0) & (fcst == 0)

# preserve NaNs
false_alarms = false_alarms.where((~np.isnan(fcst)) & (~np.isnan(obs)))
correct_negatives = correct_negatives.where((~np.isnan(fcst)) & (~np.isnan(obs)))

false_alarms = apply_weights(false_alarms, weights)
correct_negatives = apply_weights(correct_negatives, weights)

false_alarms = false_alarms.sum(dim=dims_to_sum)
correct_negatives = correct_negatives.sum(dim=dims_to_sum)

pofd = false_alarms / (false_alarms + correct_negatives)
pofd.name = "ctable_probability_of_false_detection"
return pofd
2 changes: 1 addition & 1 deletion src/scores/categorical/multicategorical_impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,4 +222,4 @@ def _single_category_score(
}
)
score = score.transpose(*fcst.dims)
return score
return score
2 changes: 1 addition & 1 deletion src/scores/continuous/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""
Import the functions from the implementations into the public API
"""
from scores.continuous.standard_impl import mse, rmse, mae
from scores.continuous.murphy_impl import murphy_score, murphy_thetas
from scores.continuous.standard_impl import mae, mse, rmse
3 changes: 2 additions & 1 deletion src/scores/continuous/murphy_impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,9 @@

import numpy as np
import xarray as xr
from scores.utils import gather_dimensions

from scores.processing import broadcast_and_match_nan
from scores.utils import gather_dimensions

QUANTILE = "quantile"
HUBER = "huber"
Expand Down
1 change: 1 addition & 0 deletions src/scores/probability/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@
crps_cdf,
crps_cdf_brier_decomposition,
)
from scores.probability.roc_impl import roc_curve_data
124 changes: 124 additions & 0 deletions src/scores/probability/roc_impl.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
"""
Implementation of Reciever Operating Characteristic (ROC) calculations
"""
from typing import Iterable, Optional, Sequence

import numpy as np
import xarray as xr

from scores.categorical import probability_of_detection, probability_of_false_detection
from scores.processing import binary_discretise
from scores.utils import gather_dimensions


def roc_curve_data(
fcst: xr.DataArray,
obs: xr.DataArray,
thresholds: Iterable[float],
reduce_dims: Optional[Sequence[str]] = None,
preserve_dims: Optional[Sequence[str]] = None,
weights: Optional[xr.DataArray] = None,
check_args: bool = True,
) -> xr.Dataset:
"""
Calculates data required for plotting a Receiver (Relative) Operating Characteristic (ROC)
curve including the AUC. The ROC curve is used as a way to measure the discrimination
ability of a particular forecast.
The AUC is the probability that the forecast probability of a random event is higher
than the forecast probability of a random non-event.
Args:
fcst: An array of probabilistic forecasts for a binary event in the range [0, 1].
obs: An array of binary values where 1 is an event and 0 is a non-event.
thresholds: Monotonic increasing values between 0 and 1, the thresholds at and
above which to convert the probabilistic forecast to a value of 1 (an 'event')
reduce_dims: Optionally specify which dimensions to reduce when
calculating the ROC curve data. All other dimensions will be preserved. As a
special case, 'all' will allow all dimensions to be reduced. Only one
of `reduce_dims` and `preserve_dims` can be supplied. The default behaviour
if neither are supplied is to reduce all dims.
preserve_dims: Optionally specify which dimensions to preserve
when calculating ROC curve data. All other dimensions will be reduced.
As a special case, 'all' will allow all dimensions to be
preserved. In this case, the result will be in the same
shape/dimensionality as the forecast, and the values will be
the ROC curve at each point (i.e. single-value comparison
against observed) for each threshold, and the forecast and observed dimensions
must match precisely. Only one of `reduce_dims` and `preserve_dims` can be
supplied. The default behaviour if neither are supplied is to reduce all dims.
weights: Optionally provide an array for weighted averaging (e.g. by area, by latitude,
by population, custom).
check_args: Checks if `obs` data only contains values in the set
{0, 1, np.nan}. You may want to skip this check if you are sure about your
input data and want to improve the performance when working with dask.
Returns:
An xarray.Dataset with data variables:
- 'POD' (the probability of detection)
- 'POFD' (the probability of false detection)
- 'AUC' (the area under the ROC curve)
`POD` and `POFD` have dimensions `dims` + 'threshold', while `AUC` has
dimensions `dims`.
Notes:
The probabilistic `fcst` is converted to a deterministic forecast
for each threshold in `thresholds`. If a value in `fcst` is greater
than or equal to the threshold, then it is converted into a
'forecast event' (fcst = 1), and a 'forecast non-event' (fcst = 0)
otherwise. The probability of detection (POD) and probability of false
detection (POFD) are calculated for the converted forecast. From the
POD and POFD data, the area under the ROC curve is calculated.
Ideally concave ROC curves should be generated rather than traditional
ROC curves.
Raises:
ValueError: if `fcst` contains values outside of the range [0, 1]
ValueError: if `obs` contains non-nan values not in the set {0, 1}
ValueError: if 'threshold' is a dimension in `fcst`.
ValueError: if values in `thresholds` are not montonic increasing or are outside
the range [0, 1]
"""
if fcst.max().item() > 1 or fcst.min().item() < 0:
raise ValueError("`fcst` contains values outside of the range [0, 1]")

if np.max(thresholds) > 1 or np.min(thresholds) < 0:
raise ValueError("`thresholds` contains values outside of the range [0, 1]")

if not np.all(np.array(thresholds)[1:] >= np.array(thresholds)[:-1]):
raise ValueError("`thresholds` is not monotonic increasing between 0 and 1")

# make a discrete forecast for each threshold in thresholds
# discrete_fcst has an extra dimension 'threshold'
discrete_fcst = binary_discretise(fcst, thresholds, ">=")

all_dims = set(fcst.dims).union(set(obs.dims))
final_reduce_dims = gather_dimensions(fcst.dims, obs.dims, reduce_dims, preserve_dims)
final_preserve_dims = all_dims - set(final_reduce_dims)
auc_dims = () if final_preserve_dims is None else tuple(final_preserve_dims)
final_preserve_dims = auc_dims + ("threshold",)

pod = probability_of_detection(
discrete_fcst, obs, preserve_dims=final_preserve_dims, weights=weights, check_args=check_args
)
pofd = probability_of_false_detection(
discrete_fcst, obs, preserve_dims=final_preserve_dims, weights=weights, check_args=check_args
)

# Need to ensure ordering of dims is consistent for xr.apply_ufunc
pod = pod.transpose(*final_preserve_dims)
pofd = pofd.transpose(*final_preserve_dims)

auc = -1 * xr.apply_ufunc(
np.trapz,
pod,
pofd,
input_core_dims=[pod.dims, pofd.dims],
output_core_dims=[auc_dims],
dask="allowed",
)

return xr.Dataset({"POD": pod, "POFD": pofd, "AUC": auc})
Loading

0 comments on commit e3e6d0d

Please sign in to comment.