Skip to content

Commit

Permalink
Merge pull request #54 from Project-MONAI/docs_tests
Browse files Browse the repository at this point in the history
Docs tests
  • Loading branch information
csudre authored Dec 13, 2024
2 parents 9d08ebc + a88e6ed commit ca3d352
Show file tree
Hide file tree
Showing 10 changed files with 1,465 additions and 230 deletions.
154 changes: 146 additions & 8 deletions MetricsReloaded/metrics/calibration_measures.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
import numpy as np
import math
from scipy.special import gamma

import warnings
# from metrics.pairwise_measures import CacheFunctionOutput
from MetricsReloaded.utility.utils import (
CacheFunctionOutput,
Expand Down Expand Up @@ -91,7 +91,7 @@ def class_wise_expectation_calibration_error(self):
cwECE = \dfrac{1}{K}\sum_{k=1}^{K}\sum_{i=1}^{N}\dfrac{\vert B_{i,k} \vert}{N} \left(y_{k}(B_{i,k}) - p_{k}(B_{i,k})\right)
:return: cwece
"""

if "bins_ece" in self.dict_args:
Expand Down Expand Up @@ -138,11 +138,22 @@ def expectation_calibration_error(self):
"""
Derives the expectation calibration error in the case of binary task
bins_ece is the key in the dictionary for the number of bins to consider
Cheat sheet SN 3.68 p113
Defined in Mahdi Pakdaman Naeini, Gregory Cooper, and Milos Hauskrecht. Obtaining well calibrated probabilities using
bayesian binning. In Twenty-Ninth AAAI Conference on Artificial Intelligence, 2015.
Default is 10
.. math::
ECE = \sum_{m=1}^{M} \dfrac{|B_m|}{n}(\dfrac{1}{|B_m|}\sum_{i \in B_m}1(pred_ik==ref_ik)-\dfrac{1}{|B_m|}\sum_{i \in B_m}pred_i)
:return: ece
"""
if "bins_ece" in self.dict_args:
nbins = self.dict_args["bins_ece"]
else:
warnings.warn("Bins ECE not specified in optional arguments dictionary - default set to 10")
nbins = 10
step = 1.0 / nbins
range_values = np.arange(0, 1.00001, step)
Expand All @@ -169,7 +180,55 @@ def expectation_calibration_error(self):
else:
list_values.append(nsamples * np.abs(prop - np.mean(pred_sel)))
numb_samples += nsamples
return np.sum(np.asarray(list_values)) / numb_samples
ece = np.sum(np.asarray(list_values)) / numb_samples
return ece


def maximum_calibration_error(self):
"""
Derives the maximum calibration error in the case of binary task
bins_mce is the key in the dictionary for the number of bins to consider
Default is 10
.. math::
MCE = max(|\dfrac{1}{|B_m|}\sum_{i \in B_m}1(pred_ik==ref_ik)-\dfrac{1}{|B_m|}\sum_{i \in B_m}pred_i|)
:return: mce
"""
if "bins_mce" in self.dict_args:
nbins = self.dict_args["bins_mce"]
else:
warnings.warn("Bins MCE not specified in optional arguments dictionary - default set to 10")
nbins = 10
step = 1.0 / nbins
range_values = np.arange(0, 1.00001, step)
list_values = []
numb_samples = 0
pred_prob = self.pred[:,1]
for (l, u) in zip(range_values[:-1], range_values[1:]):
ref_tmp = np.where(
np.logical_and(pred_prob > l, pred_prob <= u),
self.ref,
np.ones_like(self.ref) * -1,
)
ref_sel = ref_tmp[ref_tmp > -1]
nsamples = np.size(ref_sel)
prop = np.sum(ref_sel) / nsamples
pred_tmp = np.where(
np.logical_and(pred_prob > l, pred_prob <= u),
pred_prob,
np.ones_like(pred_prob) * -1,
)
pred_sel = pred_tmp[pred_tmp > -1]
if nsamples == 0:
list_values.append(0)
else:
list_values.append(np.abs(prop - np.mean(pred_sel)))
mce = np.max(np.asarray(list_values))
return mce


def brier_score(self):
"""
Expand All @@ -179,22 +238,44 @@ def brier_score(self):
Glenn W Brier et al. 1950. Verification of forecasts expressed in terms of probability. Monthly weather review 78, 1
(1950), 1–3.
.. math::
BS = \dfrac{1}{N}\sum_{i=1}{N}\sum_{j=1}^{C}(p_{ic}-r_{ic})^2
where :math: `p_{ic}` is the probability for class c and :math: `r_{ic}` the binary reference for class c and element i
:return: brier score (BS)
"""
bs = np.mean(np.sum(np.square(self.one_hot_ref - self.pred),1))
return bs

def root_brier_score(self):
"""
Determines the root brier score
Gruber S. and Buettner F., Better Uncertainty Calibration via Proper Scores
for Classification and Beyond, In Proceedings of the 36th International
Conference on Neural Information Processing Systems, 2022
.. math::
RBS = \sqrt{BS}
:return: rbs
"""
return np.sqrt(self.brier_score())
rbs = np.sqrt(self.brier_score())
return rbs

def logarithmic_score(self):
"""
Calculation of the logarithmic score https://en.wikipedia.org/wiki/Scoring_rule
.. math::
LS = 1/N\sum_{i=1}^{N}\log{pred_ik}ref_{ik}
:return: ls
"""
eps = 1e-10
log_pred = np.log(self.pred + eps)
Expand All @@ -204,27 +285,48 @@ def logarithmic_score(self):
return ls

def distance_ij(self,i,j):
"""
Determines the euclidean distance between two vectors of prediction for two samples i and j
:return: distance
"""
pred_i = self.pred[i,:]
pred_j = self.pred[j,:]
distance = np.sqrt(np.sum(np.square(pred_i - pred_j)))
return distance


def kernel_calculation(self, i,j):
"""
Defines the kernel value for two samples i and j with the following definition for k(x_i,x_j)
.. math::
k(x_i,x_j) = exp(-||x_i-y_j||/ \\nu)I_{N}
where :math: `\\nu` is the bandwith defined as the median heuristic if not specified in the options and N the number of classes
:return: kernel_value
"""
distance = self.distance_ij(i,j)
if 'bandwidth_kce' in self.dict_args.keys():
bandwidth = self.dict_args['bandwidth_kce']
else:
bandwidth = median_heuristic(self.pred)
value = np.exp(-distance/bandwidth)
identity = np.ones([self.pred.shape[1], self.pred.shape[1]])
return value * identity
identity = np.eye(self.pred.shape[1])
kernel_value = value*identity
return kernel_value

def kernel_calibration_error(self):
"""
Based on the paper Widmann, D., Lindsten, F., and Zachariah, D.
Calibration tests in multi-class classification: A unifying framework.
Advances in Neural Information Processing Systems, 32:12257–12267, 2019.
:return: kce
"""
one_hot_ref = one_hot_encode(self.ref, self.pred.shape[1])
numb_samples = self.pred.shape[0]
Expand All @@ -246,6 +348,9 @@ def top_label_classification_error(self):
"""
Calculation of the top-label classification error. Assumes pred_proba a matrix K x Numb observations
with probability to be in class k for observation i in position (k,i)
:return: tce
"""
class_max = np.argmax(self.pred, 1)
prob_pred_max = np.max(self.pred, 1)
Expand All @@ -271,7 +376,12 @@ def kernel_based_ece(self):
Teodora Popordanoska, Raphael Sayer, and Matthew B Blaschko. 2022. A Consistent and Differentiable Lp Canonical
Calibration Error Estimator. In Advances in Neural Information Processing Systems.
.. math::
ECE\_KDE = 1/N \sum_{j=1}^{N}||\dfrac{\sum_{i \\neq j}k_{Dir}(pred_j,pred_i)ref_i}{\sum_{i \\neq j}k_{Dir}(pred_j,pred_i)} - pred_j ||
:return: ece_kde
"""
ece_kde = 0
one_hot_ref = one_hot_encode(self.ref, self.pred.shape[1])
Expand All @@ -298,6 +408,18 @@ def kernel_based_ece(self):
return ece_kde

def gamma_ik(self, i, k):
"""
Definition of gamma value for sample i class k of the predictions
.. math::
gamma_{ik} = \Gamma(pred_{ik}/h + 1)
where h is the bandwidth value set as default to 0.5
:return gamma_ik
"""
pred_ik = self.pred[i, k]
if "bandwidth" in self.dict_args.keys():
h = self.dict_args["bandwidth"]
Expand All @@ -308,6 +430,16 @@ def gamma_ik(self, i, k):
return gamma_ik

def dirichlet_kernel(self, j, i):
"""
Calculation of Dirichlet kernel value for predictions of samples i and j
.. math::
k_{Dir}(x_j,x_i) = \dfrac{\Gamma(\sum_{k=1}^{K}\\alpha_{ik})}{\prod_{k=1}^{K}\\alpha_{ik}}\prod_{k=1}^{K}x_jk^{\\alpha_{ik}-1}
:return: kernel_value
"""
pred_i = self.pred[i, :]
pred_j = self.pred[j, :]
nclasses = self.pred.shape[1]
Expand All @@ -331,16 +463,22 @@ def negative_log_likelihood(self):
George Cybenko, Dianne P O’Leary, and Jorma Rissanen. 1998. The Mathematics of Information Coding, Extraction
and Distribution. Vol. 107. Springer Science & Business Media.
Cheat Sheet p 116 - Figure SN 3.71
.. math::
-\sum_{i=1}{N} log(p_{i,k} | y_i=k)
NLL = -\dfrac{1}{N}\sum_{i=1}^{N}\sum_{k=1}^{C} y_{ik} \dot log(p_{i,k})
where :math: `y_{ik}` the outcome is 1 if the class of :math: `y_{i}` is k and :math: `p_{ik}` is the predicted
probability for sample :math: `x_i` and class k
:return: NLL
"""
log_pred = np.log(self.pred)
numb_samples = self.pred.shape[0]
ll = np.sum(log_pred[range(numb_samples), self.ref])
nll = -1 * ll
nll = -1/numb_samples * ll
return nll

def to_dict_meas(self, fmt="{:.4f}"):
Expand Down
Loading

0 comments on commit ca3d352

Please sign in to comment.