Merge pull request #2 from francescopisu/dev

feat: comparison between median AUCs + tests
francescopisu · Sep 9, 2023 · a46fd54 · a46fd54
2 parents 55b6827 + 2ca124c
commit a46fd54
Show file tree

Hide file tree

Showing 10 changed files with 607 additions and 142 deletions.
diff --git a/src/modelsight/curves/__init__.py b/src/modelsight/curves/__init__.py
@@ -1,5 +1,11 @@
 from src.modelsight.curves.roc import average_roc_curves
+from src.modelsight.curves.compare import (
+    roc_single_comparison, roc_comparisons,
+    add_annotations
+)
 
 __all__ = [
-    "average_roc_curves"
+    "average_roc_curves",
+    "roc_single_comparison", 
+    "roc_comparisons",
 ]
diff --git a/src/modelsight/curves/_delong.py b/src/modelsight/curves/_delong.py
@@ -0,0 +1,124 @@
+import pandas as pd
+import numpy as np
+import scipy.stats
+
+# AUC comparison adapted from
+# https://github.com/Netflix/vmaf/
+def compute_midrank(x):
+    """Computes midranks.
+    Args:
+       x - a 1D numpy array
+    Returns:
+       array of midranks
+    """
+    J = np.argsort(x)
+    Z = x[J]
+    N = len(x)
+    T = np.zeros(N, dtype=np.float)
+    i = 0
+    while i < N:
+        j = i
+        while j < N and Z[j] == Z[i]:
+            j += 1
+        T[i:j] = 0.5*(i + j - 1)
+        i = j
+    T2 = np.empty(N, dtype=np.float)
+    # Note(kazeevn) +1 is due to Python using 0-based indexing
+    # instead of 1-based in the AUC formula in the paper
+    T2[J] = T + 1
+    return T2
+
+
+def fastDeLong(predictions_sorted_transposed, label_1_count):
+    """
+    The fast version of DeLong's method for computing the covariance of
+    unadjusted AUC.
+    Args:
+       predictions_sorted_transposed: a 2D numpy.array[n_classifiers, n_examples]
+          sorted such as the examples with label "1" are first
+    Returns:
+       (AUC value, DeLong covariance)
+    Reference:
+     @article{sun2014fast,
+       title={Fast Implementation of DeLong's Algorithm for
+              Comparing the Areas Under Correlated Receiver Operating Characteristic Curves},
+       author={Xu Sun and Weichao Xu},
+       journal={IEEE Signal Processing Letters},
+       volume={21},
+       number={11},
+       pages={1389--1393},
+       year={2014},
+       publisher={IEEE}
+     }
+    """
+    # Short variables are named as they are in the paper
+    m = label_1_count
+    n = predictions_sorted_transposed.shape[1] - m
+    positive_examples = predictions_sorted_transposed[:, :m]
+    negative_examples = predictions_sorted_transposed[:, m:]
+    k = predictions_sorted_transposed.shape[0]
+
+    tx = np.empty([k, m], dtype=np.float)
+    ty = np.empty([k, n], dtype=np.float)
+    tz = np.empty([k, m + n], dtype=np.float)
+    for r in range(k):
+        tx[r, :] = compute_midrank(positive_examples[r, :])
+        ty[r, :] = compute_midrank(negative_examples[r, :])
+        tz[r, :] = compute_midrank(predictions_sorted_transposed[r, :])
+    aucs = tz[:, :m].sum(axis=1) / m / n - float(m + 1.0) / 2.0 / n
+    v01 = (tz[:, :m] - tx[:, :]) / n
+    v10 = 1.0 - (tz[:, m:] - ty[:, :]) / m
+    sx = np.cov(v01)
+    sy = np.cov(v10)
+    delongcov = sx / m + sy / n
+    return aucs, delongcov
+
+
+def calc_pvalue(aucs, sigma):
+    """Computes log(10) of p-values.
+    Args:
+       aucs: 1D array of AUCs
+       sigma: AUC DeLong covariances
+    Returns:
+       log10(pvalue)
+    """
+    l = np.array([[1, -1]])
+    z = np.abs(np.diff(aucs)) / np.sqrt(np.dot(np.dot(l, sigma), l.T))
+    return np.log10(2) + scipy.stats.norm.logsf(z, loc=0, scale=1) / np.log(10)
+
+
+def compute_ground_truth_statistics(ground_truth):
+    assert np.array_equal(np.unique(ground_truth), [0, 1])
+    order = (-ground_truth).argsort()
+    label_1_count = int(ground_truth.sum())
+    return order, label_1_count
+
+
+def delong_roc_variance(ground_truth, predictions):
+    """
+    Computes ROC AUC variance for a single set of predictions
+    Args:
+       ground_truth: np.array of 0 and 1
+       predictions: np.array of floats of the probability of being class 1
+    """
+    order, label_1_count = compute_ground_truth_statistics(ground_truth)
+    predictions_sorted_transposed = predictions[np.newaxis, order]
+    aucs, delongcov = fastDeLong(predictions_sorted_transposed, label_1_count)
+    assert len(aucs) == 1, "There is a bug in the code, please forward this to the developers"
+    return aucs[0], delongcov
+
+
+def delong_roc_test(ground_truth, predictions_one, predictions_two):
+    """
+    Computes log(p-value) for hypothesis that two ROC AUCs are different
+    Args:
+       ground_truth: np.array of 0 and 1
+       predictions_one: predictions of the first model,
+          np.array of floats of the probability of being class 1
+       predictions_two: predictions of the second model,
+          np.array of floats of the probability of being class 1
+    """
+    order, label_1_count = compute_ground_truth_statistics(ground_truth)
+    predictions_sorted_transposed = np.vstack((predictions_one, predictions_two))[:, order]
+    aucs, delongcov = fastDeLong(predictions_sorted_transposed, label_1_count)
+    return 10**calc_pvalue(aucs, delongcov).item()
diff --git a/src/modelsight/curves/compare.py b/src/modelsight/curves/compare.py
@@ -0,0 +1,241 @@
+from typing import Callable, Dict, Tuple, List
+import matplotlib
+from matplotlib import patches
+import matplotlib.pyplot as plt
+from scipy.stats import ttest_ind
+from sklearn.metrics import average_precision_score
+
+from src.modelsight.curves._delong import delong_roc_test
+
+def annot_stat_vertical(text, x, y1, y2, ww, 
+                        col='k', 
+                        fontsize=13, 
+                        voffset = 0, 
+                        n_elems = None,
+                        ax=None,
+                        **kwargs):
+    """
+    ww: float
+        whisker width
+    """
+    ax = plt.gca() if ax is None else ax
+
+    # we want the text to be centered on the whisker 
+    text_x_pos = x + ww 
+    #+ 0.01
+    text_y_pos = (y1+y2)/2
+
+    # draw whisker from y1 to y2 with width `ww`
+    ax.plot([x, x + ww, x + ww, x], [y1, y1, y2, y2], lw=1, c=col)
+
+    if len(text) == 1:                
+        #text_y_pos = (y1+y2)/2
+
+        # draw text at (text_x_pos, text_y_pos) # + 0.15
+        ax.text(
+            text_x_pos, (text_y_pos - voffset) + 0.17, text, 
+            ha='center', va='center', color=col,
+            size=fontsize, zorder=10
+        )
+
+        # Rectangle's props
+        rect_h_base = kwargs.get("rect_h_base", 0.1)
+        rect_w = 0.05 - (0.375 * 0.05) # on a scale from 0 to 1
+        rect_h = rect_h_base * n_elems # transform to scale from 0 to n_elems-1
+        rect_x_offset = -0.002
+        rect_y_offset = 0.01 # move rectangle to the bottom. (0,0) is top left in the inserted barplot
+
+        # draw white rectangle and put it beneath the text 
+        # specifying a zorder inferior to that of the text
+        rect = patches.Rectangle(
+            (
+                text_x_pos - (rect_w/2) + rect_x_offset, 
+                text_y_pos - (rect_h/2) + rect_y_offset
+            ),
+            width = rect_w, height = rect_h, 
+            linewidth=1, 
+            edgecolor='w', 
+            facecolor='w',
+            zorder=9
+        )
+
+        ax.add_patch(rect)
+    else:
+        fontsize_nonsignif = kwargs.pop("fontsize_nonsignif", fontsize)
+        ax.text(
+            text_x_pos, text_y_pos, text, 
+            ha='center', va='center', color=col,
+            size=fontsize_nonsignif, zorder=10,
+            bbox=dict(
+                boxstyle='square,pad=0', 
+                facecolor="white", 
+                edgecolor="white"
+            )
+        )        
+
+from matplotlib import patches
+def annot_stat_horizontal(text, x1, x2, y, wh, col='k', fontsize=13, 
+                        voffset = 0, 
+                        n_elems = None,
+                        ax=None,
+                        **kwargs):
+    """
+    ww: float
+        whisker width
+    """
+    ax = plt.gca() if ax is None else ax
+
+    # we want the text to be centered on the whisker 
+    text_y_pos = y + wh
+    #+ 0.01 
+    text_x_pos = (x1+x2)/2
+
+    # draw whisker from y1 to y2 with width `ww`
+    ax.plot([x1, x1, x2, x2], [y, y + wh, y + wh, y], lw=1, c=col,
+           clip_on=False)
+
+    if len(text) == 1:     
+        #text_y_pos = (y1+y2)/2
+
+        # draw text at (text_x_pos, text_y_pos) # + 0.15
+        ax.text(
+            text_x_pos, text_y_pos + voffset, text, 
+            ha='center', va='center', color=col,
+            size=fontsize, zorder=10
+        )
+
+        # Rectangle's props
+        rect_w = 0.09 # transform to scale from 0 to n_elems-1        
+        rect_h = 0.05 - (0.375 * 0.05) # on a scale from 0 to 1
+        rect_x_offset = 0.005
+        rect_y_offset = -0.001 # move rectangle to the bottom. (0,0) is top left in the inserted barplot
+
+        # draw white rectangle and put it beneath the text 
+        # specifying a zorder inferior to that of the text
+        rect = patches.Rectangle(
+            (
+                text_x_pos - (rect_w/2) + rect_x_offset, 
+                text_y_pos - (rect_h/2) + rect_y_offset
+            ),
+            width = rect_w, height = rect_h, 
+            linewidth=1, 
+            edgecolor='w', 
+            facecolor='w',
+            zorder=9,
+            clip_on=False
+        )
+
+        ax.add_patch(rect)
+    else:
+        fontsize_nonsignif = kwargs.pop("fontsize_nonsignif", fontsize)
+        ax.text(
+            text_x_pos, text_y_pos, text, 
+            ha='center', va='center', color=col,
+            size=fontsize_nonsignif, zorder=10,
+            bbox=dict(
+                boxstyle='square,pad=0', 
+                facecolor="white", 
+                edgecolor="white"
+            )
+        )        
+
+
+from typing import Tuple, List, Dict
+
+def add_annotations(comparisons: Dict[str, Tuple[str, str, float]], 
+                    alpha: float, 
+                    bars: matplotlib.container.BarContainer,
+                    direction: str,
+                    order: List[Tuple[str, str]],
+                    symbol: str,
+                    symbol_fontsize: int = 22,
+                    voffset: float = 0,
+                    ext_voffset: float = 0,
+                    ext_hoffset: float = 0,
+                    P_val_rounding: int = 2,
+                    ax: plt.Axes = None,
+                    **kwargs):
+    if not ax:
+        raise ValueError("I need an Axes to draw comparisons on.")
+
+    comparisons_list = []
+    if order:
+        for fst_algo, snd_algo in order:
+            cmp_key = f"{fst_algo}_{snd_algo}"
+            cmp = comparisons.get(cmp_key, None)
+            if not cmp:
+                raise ValueError(f"The comparison {cmp_key} does not exist in the order list.")
+            comparisons_list.append(cmp)
+    else:
+        comparisons_list = list(comparisons.values())
+
+
+    if direction == "horizontal":
+        width = bars[0].get_width()
+        entity_labels = ax.get_xticklabels()
+        entity_idx = {label.get_text(): (i + 0.03) for i, label in enumerate(entity_labels)}
+
+        whisker_y_offset = kwargs.pop("whisker_y_offset", 0)
+        y_lim_upper = ax.get_ylim()[1] + 0.05 + whisker_y_offset
+        v_offset = 0.07
+
+        for i, (fst_model, snd_model, P) in enumerate(comparisons_list):
+            P_str = symbol if P <= alpha else f"{P:.{P_val_rounding}f}"
+            annot_stat_horizontal(text=P_str, 
+                                x1=entity_idx[fst_model] + width/2, 
+                                x2=entity_idx[snd_model] + width/2, 
+                                y=(y_lim_upper - 0.17) + (i * v_offset), # overall distance from top of bars and upper limit of y + inter-distance between whiskers
+                                wh=0.02,
+                                col="black", 
+                                fontsize=symbol_fontsize,
+                                voffset = -0.02,
+                                ext_offset = ext_hoffset,
+                                n_elems = len(entity_labels),
+                                ax=ax,
+                                **kwargs)
+    elif direction == "vertical":
+        height = bars[0].get_height()
+        entity_labels = ax.get_yticklabels()
+        entity_idx = {label.get_text(): (i + 0.03) for i, label in enumerate(entity_labels)}
+
+        space_between_whiskers = kwargs.pop("space_between_whiskers", 0)
+        x_lim_upper = ax.get_xlim()[1] + 0
+        h_offset = 0.07 + space_between_whiskers
+
+        for i, (fst_model, snd_model, P) in enumerate(comparisons_list):
+            P_str = symbol if P <= alpha else f"{P:.{P_val_rounding}f}"
+            annot_stat_vertical(text=P_str,
+                                x=x_lim_upper + (i * h_offset),  
+                                y1=entity_idx[fst_model], 
+                                y2=entity_idx[snd_model],                                  
+                                ww=0.02,
+                                col="black", 
+                                fontsize=symbol_fontsize if P_str == "*" else 16,
+                                voffset=voffset, 
+                                ext_offset = ext_voffset,
+                                n_elems = len(entity_labels),
+                                ax=ax,
+                                **kwargs)        
+
+    return ax
+
+def roc_single_comparison(cv_preds, fst_algo, snd_algo):
+    ground_truths = cv_preds[fst_algo].gts_val_conc
+    fst_algo_probas = cv_preds[fst_algo].probas_val_conc
+    snd_algo_probas = cv_preds[snd_algo].probas_val_conc
+
+    print("A"*100)
+    print(fst_algo_probas.shape, snd_algo_probas.shape)
+    P = delong_roc_test(ground_truths, fst_algo_probas, snd_algo_probas)
+    cmp_key = f"{fst_algo}_{snd_algo}"
+    return {cmp_key: (fst_algo, snd_algo, P)}
+
+def roc_comparisons(cv_preds, target_algo):
+    comparisons = dict()
+
+    for algo_name in cv_preds.keys():
+        if algo_name != target_algo:
+            cmp = roc_single_comparison(cv_preds, target_algo, algo_name)
+            comparisons = dict(cmp, **comparisons)
+
+    return comparisons     
diff --git a/src/modelsight/curves/roc.py b/src/modelsight/curves/roc.py
@@ -178,4 +178,4 @@ def average_roc_curves(cv_preds: Dict[str, CVModellingOutput],
                   fontweight="bold",
                   position=(0.4, 0.5))
 
-    return fig, ax, ins, bars, auc_cis
+    return fig, ax, ins, bars, all_data