Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Docs tests #54

Merged
merged 18 commits into from
Dec 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
154 changes: 146 additions & 8 deletions MetricsReloaded/metrics/calibration_measures.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
import numpy as np
import math
from scipy.special import gamma

import warnings
# from metrics.pairwise_measures import CacheFunctionOutput
from MetricsReloaded.utility.utils import (
CacheFunctionOutput,
Expand Down Expand Up @@ -91,7 +91,7 @@ def class_wise_expectation_calibration_error(self):

cwECE = \dfrac{1}{K}\sum_{k=1}^{K}\sum_{i=1}^{N}\dfrac{\vert B_{i,k} \vert}{N} \left(y_{k}(B_{i,k}) - p_{k}(B_{i,k})\right)


:return: cwece
"""

if "bins_ece" in self.dict_args:
Expand Down Expand Up @@ -138,11 +138,22 @@ def expectation_calibration_error(self):
"""
Derives the expectation calibration error in the case of binary task
bins_ece is the key in the dictionary for the number of bins to consider
Cheat sheet SN 3.68 p113
Defined in Mahdi Pakdaman Naeini, Gregory Cooper, and Milos Hauskrecht. Obtaining well calibrated probabilities using
bayesian binning. In Twenty-Ninth AAAI Conference on Artificial Intelligence, 2015.
Default is 10

.. math::

ECE = \sum_{m=1}^{M} \dfrac{|B_m|}{n}(\dfrac{1}{|B_m|}\sum_{i \in B_m}1(pred_ik==ref_ik)-\dfrac{1}{|B_m|}\sum_{i \in B_m}pred_i)

:return: ece

"""
if "bins_ece" in self.dict_args:
nbins = self.dict_args["bins_ece"]
else:
warnings.warn("Bins ECE not specified in optional arguments dictionary - default set to 10")
nbins = 10
step = 1.0 / nbins
range_values = np.arange(0, 1.00001, step)
Expand All @@ -169,7 +180,55 @@ def expectation_calibration_error(self):
else:
list_values.append(nsamples * np.abs(prop - np.mean(pred_sel)))
numb_samples += nsamples
return np.sum(np.asarray(list_values)) / numb_samples
ece = np.sum(np.asarray(list_values)) / numb_samples
return ece


def maximum_calibration_error(self):
"""
Derives the maximum calibration error in the case of binary task
bins_mce is the key in the dictionary for the number of bins to consider
Default is 10

.. math::

MCE = max(|\dfrac{1}{|B_m|}\sum_{i \in B_m}1(pred_ik==ref_ik)-\dfrac{1}{|B_m|}\sum_{i \in B_m}pred_i|)

:return: mce

"""
if "bins_mce" in self.dict_args:
nbins = self.dict_args["bins_mce"]
else:
warnings.warn("Bins MCE not specified in optional arguments dictionary - default set to 10")
nbins = 10
step = 1.0 / nbins
range_values = np.arange(0, 1.00001, step)
list_values = []
numb_samples = 0
pred_prob = self.pred[:,1]
for (l, u) in zip(range_values[:-1], range_values[1:]):
ref_tmp = np.where(
np.logical_and(pred_prob > l, pred_prob <= u),
self.ref,
np.ones_like(self.ref) * -1,
)
ref_sel = ref_tmp[ref_tmp > -1]
nsamples = np.size(ref_sel)
prop = np.sum(ref_sel) / nsamples
pred_tmp = np.where(
np.logical_and(pred_prob > l, pred_prob <= u),
pred_prob,
np.ones_like(pred_prob) * -1,
)
pred_sel = pred_tmp[pred_tmp > -1]
if nsamples == 0:
list_values.append(0)
else:
list_values.append(np.abs(prop - np.mean(pred_sel)))
mce = np.max(np.asarray(list_values))
return mce


def brier_score(self):
"""
Expand All @@ -179,22 +238,44 @@ def brier_score(self):
Glenn W Brier et al. 1950. Verification of forecasts expressed in terms of probability. Monthly weather review 78, 1
(1950), 1–3.

.. math::

BS = \dfrac{1}{N}\sum_{i=1}{N}\sum_{j=1}^{C}(p_{ic}-r_{ic})^2

where :math: `p_{ic}` is the probability for class c and :math: `r_{ic}` the binary reference for class c and element i

:return: brier score (BS)

"""
bs = np.mean(np.sum(np.square(self.one_hot_ref - self.pred),1))
return bs

def root_brier_score(self):
"""
Determines the root brier score

Gruber S. and Buettner F., Better Uncertainty Calibration via Proper Scores
for Classification and Beyond, In Proceedings of the 36th International
Conference on Neural Information Processing Systems, 2022

.. math::

RBS = \sqrt{BS}

:return: rbs
"""
return np.sqrt(self.brier_score())
rbs = np.sqrt(self.brier_score())
return rbs

def logarithmic_score(self):
"""
Calculation of the logarithmic score https://en.wikipedia.org/wiki/Scoring_rule

.. math::

LS = 1/N\sum_{i=1}^{N}\log{pred_ik}ref_{ik}

:return: ls
"""
eps = 1e-10
log_pred = np.log(self.pred + eps)
Expand All @@ -204,27 +285,48 @@ def logarithmic_score(self):
return ls

def distance_ij(self,i,j):
"""
Determines the euclidean distance between two vectors of prediction for two samples i and j

:return: distance
"""
pred_i = self.pred[i,:]
pred_j = self.pred[j,:]
distance = np.sqrt(np.sum(np.square(pred_i - pred_j)))
return distance


def kernel_calculation(self, i,j):
"""
Defines the kernel value for two samples i and j with the following definition for k(x_i,x_j)

.. math::

k(x_i,x_j) = exp(-||x_i-y_j||/ \\nu)I_{N}

where :math: `\\nu` is the bandwith defined as the median heuristic if not specified in the options and N the number of classes

:return: kernel_value

"""
distance = self.distance_ij(i,j)
if 'bandwidth_kce' in self.dict_args.keys():
bandwidth = self.dict_args['bandwidth_kce']
else:
bandwidth = median_heuristic(self.pred)
value = np.exp(-distance/bandwidth)
identity = np.ones([self.pred.shape[1], self.pred.shape[1]])
return value * identity
identity = np.eye(self.pred.shape[1])
kernel_value = value*identity
return kernel_value

def kernel_calibration_error(self):
"""
Based on the paper Widmann, D., Lindsten, F., and Zachariah, D.
Calibration tests in multi-class classification: A unifying framework.
Advances in Neural Information Processing Systems, 32:12257–12267, 2019.

:return: kce

"""
one_hot_ref = one_hot_encode(self.ref, self.pred.shape[1])
numb_samples = self.pred.shape[0]
Expand All @@ -246,6 +348,9 @@ def top_label_classification_error(self):
"""
Calculation of the top-label classification error. Assumes pred_proba a matrix K x Numb observations
with probability to be in class k for observation i in position (k,i)

:return: tce

"""
class_max = np.argmax(self.pred, 1)
prob_pred_max = np.max(self.pred, 1)
Expand All @@ -271,7 +376,12 @@ def kernel_based_ece(self):
Teodora Popordanoska, Raphael Sayer, and Matthew B Blaschko. 2022. A Consistent and Differentiable Lp Canonical
Calibration Error Estimator. In Advances in Neural Information Processing Systems.

.. math::

ECE\_KDE = 1/N \sum_{j=1}^{N}||\dfrac{\sum_{i \\neq j}k_{Dir}(pred_j,pred_i)ref_i}{\sum_{i \\neq j}k_{Dir}(pred_j,pred_i)} - pred_j ||

:return: ece_kde

"""
ece_kde = 0
one_hot_ref = one_hot_encode(self.ref, self.pred.shape[1])
Expand All @@ -298,6 +408,18 @@ def kernel_based_ece(self):
return ece_kde

def gamma_ik(self, i, k):
"""
Definition of gamma value for sample i class k of the predictions

.. math::

gamma_{ik} = \Gamma(pred_{ik}/h + 1)

where h is the bandwidth value set as default to 0.5

:return gamma_ik

"""
pred_ik = self.pred[i, k]
if "bandwidth" in self.dict_args.keys():
h = self.dict_args["bandwidth"]
Expand All @@ -308,6 +430,16 @@ def gamma_ik(self, i, k):
return gamma_ik

def dirichlet_kernel(self, j, i):
"""
Calculation of Dirichlet kernel value for predictions of samples i and j

.. math::

k_{Dir}(x_j,x_i) = \dfrac{\Gamma(\sum_{k=1}^{K}\\alpha_{ik})}{\prod_{k=1}^{K}\\alpha_{ik}}\prod_{k=1}^{K}x_jk^{\\alpha_{ik}-1}

:return: kernel_value

"""
pred_i = self.pred[i, :]
pred_j = self.pred[j, :]
nclasses = self.pred.shape[1]
Expand All @@ -331,16 +463,22 @@ def negative_log_likelihood(self):

George Cybenko, Dianne P O’Leary, and Jorma Rissanen. 1998. The Mathematics of Information Coding, Extraction
and Distribution. Vol. 107. Springer Science & Business Media.
Cheat Sheet p 116 - Figure SN 3.71

.. math::

-\sum_{i=1}{N} log(p_{i,k} | y_i=k)
NLL = -\dfrac{1}{N}\sum_{i=1}^{N}\sum_{k=1}^{C} y_{ik} \dot log(p_{i,k})

where :math: `y_{ik}` the outcome is 1 if the class of :math: `y_{i}` is k and :math: `p_{ik}` is the predicted
probability for sample :math: `x_i` and class k

:return: NLL

"""
log_pred = np.log(self.pred)
numb_samples = self.pred.shape[0]
ll = np.sum(log_pred[range(numb_samples), self.ref])
nll = -1 * ll
nll = -1/numb_samples * ll
return nll

def to_dict_meas(self, fmt="{:.4f}"):
Expand Down
Loading
Loading