Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add LesionCountWeightedByAssignment, RefLesionsCount, PredLesionsCount #6

Merged
merged 16 commits into from
Jun 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions .github/workflows/python-app.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,14 +53,14 @@ jobs:
run: |
pytest --cov --cov-config=.coveragerc --cov-report=xml -vv

# Using Codecov's action, upload the coverage report for the triggering commit/PR
- name: Upload coverage
uses: codecov/codecov-action@v2
with:
file: ./coverage.xml
fail_ci_if_error: true
verbose: true
version: "v0.1.15"
# # Using Codecov's action, upload the coverage report for the triggering commit/PR
# - name: Upload coverage
# uses: codecov/codecov-action@v2
# with:
# file: ./coverage.xml
# fail_ci_if_error: true
# verbose: true
# version: "v0.1.15"

build_docs:
runs-on: ubuntu-latest
Expand Down
74 changes: 72 additions & 2 deletions MetricsReloaded/metrics/pairwise_measures.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@
from __future__ import absolute_import, print_function
import warnings
import numpy as np
from scipy import ndimage
from scipy import ndimage, optimize
from sklearn.metrics import precision_score
from functools import partial
from skimage.morphology import skeletonize
from MetricsReloaded.utility.utils import (
Expand Down Expand Up @@ -274,10 +275,13 @@ def __init__(
"hd_perc": (self.measured_hausdorff_distance_perc, "HDPerc"),
"masd": (self.measured_masd, "MASD"),
"nsd": (self.normalised_surface_distance, "NSD"),
# instance-specific measures
# lesion-specific measures
"ref_count": (self.ref_lesions_count, "RefLesionsCount"),
"pred_count": (self.pred_lesions_count, "PredLesionsCount"),
"lesion_ppv": (self.lesion_ppv, "LesionWisePPV"),
"lesion_sensitivity": (self.lesion_sensitivity, "LesionWiseSensitivity"),
"lesion_f1_score": (self.lesion_f1_score, "LesionWiseF1Score"),
"lcwa": (self.lesion_count_weighted_by_assignment, "LesionCountWeightedByAssignment"),
# other measures
"vol_diff": (self.vol_diff, "VolDiff"),
"rel_vol_error": (self.rel_vol_error, "RelVolError"),
Expand Down Expand Up @@ -1316,6 +1320,72 @@ def lesion_sensitivity(self):
sensitivity = tp / denom
return sensitivity

def ref_lesions_count(self):
"""
Returns the number of lesions in the reference mask
"""
ref_lesion, num_ref_lesions = ndimage.label(self.ref)

return num_ref_lesions

def pred_lesions_count(self):
"""
Returns the number of lesions in the prediction mask
"""
pred_lesion, num_pred_lesions = ndimage.label(self.pred)

return num_pred_lesions

def lesion_count_weighted_by_assignment(self):
"""
Performs lesion matching between the predicted lesions and the true lesions. A weighted bipartite graph between
the predicted and true lesions is constructed, using precision as the edge weights. The returned value is the
mean precision across predictions normalized by the number of lesions in the ground truth. Values close to 1
indicate that the right number of lesions have been identified and that they overlap. Lower values indicate either
the wrong number of predicted lesions or that they do not sufficiently overlap with the ground truth.

Adapted from: https://github.com/npnl/atlas2_grand_challenge/blob/main/isles/scoring.py#L126
NOTE: the original implementation had a bug which was iterating through the 0-1 mask itself but NOT the
labeled mask obtained after using `ndimage.label`. This has been fixed in this implementation.

Returns
-------
float : Lesion Count by Weighted Assignment (LCWA) score
"""
# reshape to add batch dimension
prediction = np.reshape(self.pred, (1, *self.pred.shape))
truth = np.reshape(self.ref, (1, *self.ref.shape))

# "Lesion Count by Weighted Assignment"
lcwa = []
for idx_sample in range(truth.shape[0]):
# Identify unique regions
pred_lesion, num_pred_lesions = ndimage.label(prediction[idx_sample, ...])
truth_lesion, num_truth_lesions = ndimage.label(truth[idx_sample, ...])

# pre-allocate cost matrix
cost_matrix = np.zeros((num_pred_lesions, num_truth_lesions))

# compute cost matrix
# NOTE: 0 is the background class so we start from 1
for idx_pred in range(1, num_pred_lesions+1):
pred = (pred_lesion == idx_pred).reshape(-1)

for idx_truth in range(1, num_truth_lesions+1):
truth = (truth_lesion == idx_truth).reshape(-1)

# compute precision scores to use as edge weights in the bipartite graph
# NOTE: sklearn's precision requires 1D arrays as input
cost_matrix[idx_pred-1, idx_truth-1] = precision_score(y_true=truth, y_pred=pred)

# compute the optimal assignment
row_ind, col_ind = optimize.linear_sum_assignment(cost_matrix=cost_matrix, maximize=True)
total_precision = cost_matrix[row_ind, col_ind].sum()
lcwa.append(total_precision / num_truth_lesions)

return lcwa[0]


# NOTE: it's best to keep this function at the end as it does not explicitly compute any metric
def to_dict_meas(self, fmt="{:.4f}"):
result_dict = {}
Expand Down
34 changes: 20 additions & 14 deletions compute_metrics_reloaded.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,16 +45,20 @@
from MetricsReloaded.metrics.pairwise_measures import BinaryPairwiseMeasures as BPM


# This dictionary is used to rename the metric columns in the output CSV file
METRICS_TO_NAME = {
'dsc': 'Dice similarity coefficient (DSC)',
'hd': 'Hausdorff distance (HD95)',
'fbeta': 'F1 score',
'nsd': 'Normalized surface distance (NSD)',
'vol_diff': 'Volume difference',
'rel_vol_error': 'Relative volume error (RVE)',
'lesion_ppv': 'Lesion wise positive predictive value (PPV)',
'lesion_sensitivity': 'Lesion wise sensitivity',
'lesion_f1_score': 'Lesion wise F1 score',
'dsc': 'DiceSimilarityCoefficient',
'hd': 'HausdorffDistance95',
'fbeta': 'F1score',
'nsd': 'NormalizedSurfaceDistance',
'vol_diff': 'VolumeDifference',
'rel_vol_error': 'RelativeVolumeError',
'lesion_ppv': 'LesionWisePositivePredictiveValue',
'lesion_sensitivity': 'LesionWiseSensitivity',
'lesion_f1_score': 'LesionWiseF1Score',
'ref_count': 'RefLesionsCount',
'pred_count': 'PredLesionsCount',
'lcwa': 'LesionCountWeightedByAssignment'
}


Expand All @@ -69,11 +73,12 @@ def get_parser():
parser.add_argument('-reference', required=True, type=str,
help='Path to the folder with nifti images of reference (ground truth) or path to a single '
'nifti image of reference (ground truth).')
parser.add_argument('-metrics', nargs='+', default=['dsc', 'fbeta', 'nsd', 'vol_diff', 'rel_vol_error'],
required=False,
parser.add_argument('-metrics', nargs='+', required=False,
default=['dsc', 'fbeta', 'nsd', 'vol_diff', 'rel_vol_error',
'lesion_ppv', 'lesion_sensitivity', 'lesion_f1_score',
'ref_count', 'pred_count', 'lcwa'],
help='List of metrics to compute. For details, '
'see: https://metricsreloaded.readthedocs.io/en/latest/reference/metrics/metrics.html. '
'Default: dsc, fbeta, nsd, vol_diff, rel_vol_error')
'see: https://metricsreloaded.readthedocs.io/en/latest/reference/metrics/metrics.html.')
parser.add_argument('-output', type=str, default='metrics.csv', required=False,
help='Path to the output CSV file to save the metrics. Default: metrics.csv')

Expand Down Expand Up @@ -150,7 +155,8 @@ def compute_metrics_single_subject(prediction, reference, metrics):
# append entry into the output_list to store the metrics for the current subject
metrics_dict = {'reference': reference, 'prediction': prediction}

# loop over all unique labels
# loop over all unique labels, e.g., voxels with values 1, 2, ...
# by doing this, we can compute metrics for each label separately, e.g., separately for spinal cord and lesions
for label in unique_labels:
# create binary masks for the current label
print(f'\tLabel {label}')
Expand Down
146 changes: 137 additions & 9 deletions test/test_metrics/test_pairwise_measures_neuropoly.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# Tests for the `compute_metrics_reloaded.py` script
#
# RUN BY:
# python -m unittest tests/test_pairwise_measures_neuropoly.py
# python -m unittest test/test_metrics/test_pairwise_measures_neuropoly.py
#
# Authors: NeuroPoly team
#
Expand All @@ -16,7 +16,8 @@
from compute_metrics_reloaded import compute_metrics_single_subject
import tempfile

METRICS = ['dsc', 'fbeta', 'nsd', 'vol_diff', 'rel_vol_error', 'lesion_ppv', 'lesion_sensitivity', 'lesion_f1_score']
METRICS = ['dsc', 'fbeta', 'nsd', 'vol_diff', 'rel_vol_error', 'lesion_ppv', 'lesion_sensitivity', 'lesion_f1_score',
'ref_count', 'pred_count']


class TestComputeMetricsReloaded(unittest.TestCase):
Expand Down Expand Up @@ -65,7 +66,9 @@ def test_empty_ref_and_pred(self):
'vol_diff': np.nan,
'lesion_ppv': 1.0,
'lesion_sensitivity': 1.0,
'lesion_f1_score': 1.0}}
'lesion_f1_score': 1.0,
'ref_count': 0,
'pred_count': 0}}

# Create empty reference
self.create_dummy_nii(self.ref_file, np.zeros((10, 10, 10)))
Expand All @@ -90,7 +93,9 @@ def test_empty_ref(self):
'vol_diff': np.inf,
'lesion_ppv': 0.0,
'lesion_sensitivity': 1.0,
'lesion_f1_score': 0.0}}
'lesion_f1_score': 0.0,
'ref_count': 0,
'pred_count': 1}}

# Create empty reference
self.create_dummy_nii(self.ref_file, np.zeros((10, 10, 10)))
Expand All @@ -117,7 +122,9 @@ def test_empty_pred(self):
'vol_diff': 1.0,
'lesion_ppv': 0.0,
'lesion_sensitivity': 0.0,
'lesion_f1_score': 0.0}}
'lesion_f1_score': 0.0,
'ref_count': 1,
'pred_count': 0}}

# Create non-empty reference
ref = np.zeros((10, 10, 10))
Expand All @@ -144,7 +151,9 @@ def test_non_empty_ref_and_pred(self):
'vol_diff': 3.0,
'lesion_ppv': 1.0,
'lesion_sensitivity': 1.0,
'lesion_f1_score': 1.0}}
'lesion_f1_score': 1.0,
'ref_count': 1,
'pred_count': 1}}

# Create non-empty reference
ref = np.zeros((10, 10, 10))
Expand All @@ -159,6 +168,119 @@ def test_non_empty_ref_and_pred(self):
# Assert metrics
self.assert_metrics(metrics_dict, expected_metrics)

def test_non_empty_ref_and_pred_multi_lesion(self):
"""
Non-empty reference (2 lesions) and non-empty prediction (2 lesions)
Multi-lesion (i.e., there are multiple regions (lesions) with voxel values 1)
Lesion #1: complete overlap; Lesion #2: partial overlap
"""

expected_metrics = {1.0: {'EmptyPred': False,
'EmptyRef': False,
'dsc': 0.8571428571428571,
'fbeta': 0.8571428571428571,
'nsd': 1.0,
'rel_vol_error': -25.0,
'vol_diff': 0.25,
'lesion_ppv': 1.0,
'lesion_sensitivity': 1.0,
'lesion_f1_score': 1.0,
'ref_count': 2,
'pred_count': 2}}

# Create non-empty reference
ref = np.zeros((10, 10, 10))
# Lesion #1
ref[1:3, 3:6] = 1
# Lesion #2
ref[7:9, 2:5] = 1
self.create_dummy_nii(self.ref_file, ref)
# Create non-empty prediction
pred = np.zeros((10, 10, 10))
# Lesion #1 -- complete overlap
pred[1:3, 3:6] = 1
# Lesion #2 -- partial overlap
pred[7:8, 2:5] = 1
self.create_dummy_nii(self.pred_file, pred)
# Compute metrics
metrics_dict = compute_metrics_single_subject(self.pred_file.name, self.ref_file.name, self.metrics)
# Assert metrics
self.assert_metrics(metrics_dict, expected_metrics)

def test_non_empty_ref_and_pred_multi_lesion_one_lesion_not_predicted(self):
"""
Non-empty reference (2 lesions) and non-empty prediction (1 lesion)
Multi-lesion (i.e., there are multiple regions (lesions) with voxel values 1)
Lesion #1: complete overlap; Lesion #2: only in reference
"""

expected_metrics = {1.0: {'EmptyPred': False,
'EmptyRef': False,
'dsc': 0.6666666666666666,
'fbeta': 0.6666666666666666,
'nsd': 0.6666666666666666,
'rel_vol_error': -50.0,
'vol_diff': 0.5,
'lesion_ppv': 1.0,
'lesion_sensitivity': 0.5,
'lesion_f1_score': 0.6666666666666666,
'ref_count': 2,
'pred_count': 1}}

# Create non-empty reference
ref = np.zeros((10, 10, 10))
# Lesion #1
ref[1:3, 3:6] = 1
# Lesion #2
ref[7:9, 2:5] = 1
self.create_dummy_nii(self.ref_file, ref)
# Create non-empty prediction
pred = np.zeros((10, 10, 10))
# Lesion #1 -- complete overlap
pred[1:3, 3:6] = 1
# Note: there is no Lesion #2 in prediction
self.create_dummy_nii(self.pred_file, pred)
# Compute metrics
metrics_dict = compute_metrics_single_subject(self.pred_file.name, self.ref_file.name, self.metrics)
# Assert metrics
self.assert_metrics(metrics_dict, expected_metrics)

def test_non_empty_ref_and_pred_multi_lesion_no_lesion_predicted(self):
"""
Non-empty reference (2 lesions) and empty prediction (0 lesions)
Multi-lesion (i.e., there are multiple regions (lesions) with voxel values 1)
Lesion #1: only in reference; Lesion #2: only in reference
"""

expected_metrics = {1.0: {'EmptyPred': False,
'EmptyRef': True,
'dsc': 0.0,
'fbeta': 0,
'nsd': 0,
'rel_vol_error': -100.0,
'vol_diff': 1.0,
'lesion_ppv': 0.0,
'lesion_sensitivity': 0.0,
'lesion_f1_score': 0.0,
'ref_count': 2,
'pred_count': 0}}

# Create non-empty reference
ref = np.zeros((10, 10, 10))
# Lesion #1
ref[1:3, 3:6] = 1
# Lesion #2
ref[7:9, 2:5] = 1
self.create_dummy_nii(self.ref_file, ref)
# Create non-empty prediction
pred = np.zeros((10, 10, 10))
# Note: there is no lesion in prediction
self.create_dummy_nii(self.pred_file, pred)
# Compute metrics
metrics_dict = compute_metrics_single_subject(self.pred_file.name, self.ref_file.name, self.metrics)
# Assert metrics
self.assert_metrics(metrics_dict, expected_metrics)

def test_non_empty_ref_and_pred_multi_class(self):
"""
Non-empty reference and non-empty prediction with partial overlap
Expand All @@ -174,7 +296,9 @@ def test_non_empty_ref_and_pred_multi_class(self):
'EmptyPred': False,
'lesion_ppv': 1.0,
'lesion_sensitivity': 1.0,
'lesion_f1_score': 1.0},
'lesion_f1_score': 1.0,
'ref_count': 1,
'pred_count': 1},
2.0: {'dsc': 0.26666666666666666,
'fbeta': 0.26666667461395266,
'nsd': 0.5373134328358209,
Expand All @@ -184,7 +308,9 @@ def test_non_empty_ref_and_pred_multi_class(self):
'EmptyPred': False,
'lesion_ppv': 1.0,
'lesion_sensitivity': 1.0,
'lesion_f1_score': 1.0}}
'lesion_f1_score': 1.0,
'ref_count': 1,
'pred_count': 1}}

# Create non-empty reference
ref = np.zeros((10, 10, 10))
Expand Down Expand Up @@ -215,7 +341,9 @@ def test_non_empty_ref_and_pred_with_full_overlap(self):
'vol_diff': 0.0,
'lesion_ppv': 1.0,
'lesion_sensitivity': 1.0,
'lesion_f1_score': 1.0}}
'lesion_f1_score': 1.0,
'ref_count': 1,
'pred_count': 1}}

# Create non-empty reference
ref = np.zeros((10, 10, 10))
Expand Down
Loading