diff --git a/midsagittal_measures/01_compute_midsagittal_lesion_length_and_width.sh b/midsagittal_measures/01_compute_midsagittal_lesion_length_and_width.sh new file mode 100644 index 0000000..59c608a --- /dev/null +++ b/midsagittal_measures/01_compute_midsagittal_lesion_length_and_width.sh @@ -0,0 +1,181 @@ +#!/bin/bash +# +# Segment the spinal cord and lesions from T2w images and compute the midsagittal lesion length and width. +# +# The script does the following: +# 1. Segment the spinal cord and lesions using SCIsegV2 +# 2. Compute the midsagittal lesion length and width based on the spinal cord and lesion segmentations obtained using SCIsegV2 +# +# NOTE: This script requires SCT v6.4 or higher. + +# NOTE: The script is meant to be run on GPU (see `CUDA_VISIBLE_DEVICES=1 SCT_USE_GPU=1 sct_deepseg ...` below). +# +# Usage: +# sct_run_batch -config config-01_compute_midsagittal_lesion_length_and_width.json +# +# Example of config-01_compute_midsagittal_lesion_length_and_width.json: +# { +# "path_data" : "", +# "path_output" : "_2024-09-20", +# "script" : "/model_seg_sci/midsagittal_measures/01_compute_midsagittal_lesion_length_and_width.sh", +# "jobs" : 8 +# } +# +# The following global variables are retrieved from the caller sct_run_batch +# but could be overwritten by uncommenting the lines below: +# PATH_DATA_PROCESSED="~/data_processed" +# PATH_RESULTS="~/results" +# PATH_LOG="~/log" +# PATH_QC="~/qc" +# +# Author: Jan Valosek +# + +# Uncomment for full verbose +set -x + +# Immediately exit if error +set -e -o pipefail + +# Exit if user presses CTRL+C (Linux) or CMD+C (OSX) +trap "echo Caught Keyboard Interrupt within script. Exiting now.; exit" INT + +# Print retrieved variables from the sct_run_batch script to the log (to allow easier debug) +echo "Retrieved variables from from the caller sct_run_batch:" +echo "PATH_DATA: ${PATH_DATA}" +echo "PATH_DATA_PROCESSED: ${PATH_DATA_PROCESSED}" +echo "PATH_RESULTS: ${PATH_RESULTS}" +echo "PATH_LOG: ${PATH_LOG}" +echo "PATH_QC: ${PATH_QC}" + +SUBJECT=$1 + +# ------------------------------------------------------------------------------ +# CONVENIENCE FUNCTIONS +# ------------------------------------------------------------------------------ + +# Copy GT SC or lesion segmentation +copy_gt(){ + local file="$1" + local type="$2" # seg or lesion + # Construct file name to GT SC or lesion segmentation located under derivatives/labels + FILESEGMANUAL="${PATH_DATA}/derivatives/labels/${SUBJECT}/anat/${file}_${type}-manual.nii.gz" + echo "" + echo "Looking for manual segmentation: $FILESEGMANUAL" + if [[ -e $FILESEGMANUAL ]]; then + echo "Found! Copying ..." + rsync -avzh $FILESEGMANUAL ${file}_${type}-manual.nii.gz + else + echo "File ${FILESEGMANUAL}.nii.gz does not exist" >> ${PATH_LOG}/missing_files.log + echo "ERROR: Manual GT segmentation ${FILESEGMANUAL}.nii.gz does not exist. Exiting." + exit 1 + fi +} + +# ------------------------------------------------------------------------------ +# SCRIPT STARTS HERE +# ------------------------------------------------------------------------------ +# get starting time: +start=`date +%s` + +# Display useful info for the log, such as SCT version, RAM and CPU cores available +sct_check_dependencies -short + +# Go to folder where data will be copied and processed +cd $PATH_DATA_PROCESSED + +# Copy source T2w images +# Note: we use '/./' in order to include the sub-folder 'ses-0X' +# We do a substitution '/' --> '_' in case there is a subfolder 'ses-0X/' +# for sci-zurich, copy only sagittal T2w to save space +rsync -Ravzh ${PATH_DATA}/./${SUBJECT}/anat/${SUBJECT//[\/]/_}_*sag_T2w.* . + +# Go to subject folder for source images +cd ${SUBJECT}/anat + +# ------------------------------------------------------------------------------ +# T2w +# ------------------------------------------------------------------------------ +# sci-zurich +# We do a substitution '/' --> '_' in case there is a subfolder 'ses-0X/' +file_t2="${SUBJECT//[\/]/_}"_acq-sag_T2w + +# Check if file_t2 exists +if [[ ! -e ${file_t2}.nii.gz ]]; then + echo "File ${file_t2}.nii.gz does not exist" >> ${PATH_LOG}/missing_files.log + echo "ERROR: File ${file_t2}.nii.gz does not exist. Exiting." + exit 1 +fi + +# ------------------------------------ +# GT +# ------------------------------------ +# Copy GT SC and lesion segmentations from derivatives/labels +copy_gt "${file_t2}" "seg" +copy_gt "${file_t2}" "lesion" + +# Binarize GT lesion segmentation (sct_analyze_lesion requires binary mask until https://github.com/spinalcordtoolbox/spinalcordtoolbox/issues/4120 is fixed) +sct_maths -i ${file_t2}_lesion-manual.nii.gz -bin 0 -o ${file_t2}_lesion-manual_bin.nii.gz + +# Generate sagittal lesion QC report +sct_qc -i ${file_t2}.nii.gz -d ${file_t2}_lesion-manual_bin.nii.gz -s ${file_t2}_seg-manual.nii.gz -p sct_deepseg_lesion -plane sagittal -qc ${PATH_QC} -qc-subject ${SUBJECT} + +# Compute the midsagittal lesion length and width based on the spinal cord and lesion segmentations obtained manually +sct_analyze_lesion -m ${file_t2}_lesion-manual_bin.nii.gz -s ${file_t2}_seg-manual.nii.gz -qc ${PATH_QC} -qc-subject ${SUBJECT} +# The outputs are: +# - ${file_t2}_lesion-manual_bin_label.nii.gz: 3D mask of the segmented lesion with lesion IDs (1, 2, 3, etc.) +# - ${file_t2}_lesion-manual_bin_analysis.xls: XLS file containing the morphometric measures +# - ${file_t2}_lesion-manual_bin_analysis.pkl: Python Pickle file containing the morphometric measures + +# Remove pickle file -- we only need the XLS file +rm ${file_t2}_lesion-manual_bin_analysis.pkl + +# Copy the XLS file to the results folder +cp ${file_t2}_lesion-manual_bin_analysis.xls ${PATH_RESULTS} +echo "${file_t2}_lesion-manual_bin_analysis.xls created" >> ${PATH_LOG}/manual_GT_analysis.log + +# ---------------------------- +# SCIsegV2 +# ---------------------------- +# Segment the spinal cord and lesions using SCIsegV2 +CUDA_VISIBLE_DEVICES=1 SCT_USE_GPU=1 sct_deepseg -i ${file_t2}.nii.gz -task seg_sc_lesion_t2w_sci -largest 1 -qc ${PATH_QC} -qc-subject ${SUBJECT} +# The outputs are: +# - ${file_t2}_sc_seg.nii.gz: 3D binary mask of the segmented spinal cord +# - ${file_t2}_lesion_seg.nii.gz: 3D binary mask of the segmented lesion +# Rename the SC seg to make clear it comes from the SCIsegV2 model +mv ${file_t2}_sc_seg.nii.gz ${file_t2}_sc_seg_SCIsegV2.nii.gz + +# Generate sagittal lesion QC report (because sct_deepseg produces only axial QC report showing both SC and lesion). +# But we want to show only the lesion segmentation in the QC report on sagittal slices. +sct_qc -i ${file_t2}.nii.gz -d ${file_t2}_lesion_seg.nii.gz -s ${file_t2}_sc_seg_SCIsegV2.nii.gz -p sct_deepseg_lesion -plane sagittal -qc ${PATH_QC} -qc-subject ${SUBJECT} + +# Compute the midsagittal lesion length and width based on the spinal cord and lesion segmentations obtained using SCIsegV2 +sct_analyze_lesion -m ${file_t2}_lesion_seg.nii.gz -s ${file_t2}_sc_seg_SCIsegV2.nii.gz -qc ${PATH_QC} -qc-subject ${SUBJECT} +# The outputs are: +# - ${file_t2}_lesion_seg_label.nii.gz: 3D mask of the segmented lesion with lesion IDs (1, 2, 3, etc.) +# - ${file_t2}_lesion_seg_analysis.xls: XLS file containing the morphometric measures +# - ${file_t2}_lesion_seg_analysis.pkl: Python Pickle file containing the morphometric measures + +# Remove pickle file -- we only need the XLS file +rm ${file_t2}_lesion_seg_analysis.pkl + +# Rename the files to make clear they come from the SCIsegV2 model +mv ${file_t2}_lesion_seg_label.nii.gz ${file_t2}_lesion_seg_label_SCIsegV2.nii.gz +mv ${file_t2}_lesion_seg_analysis.xls ${file_t2}_lesion_seg_analysis_SCIsegV2.xls +# Copy the XLS file to the results folder +cp ${file_t2}_lesion_seg_analysis_SCIsegV2.xls ${PATH_RESULTS} +echo "${file_t2}_lesion_seg_analysis_SCIsegV2.xls created" >> ${PATH_LOG}/SCIsegV2_predictions_analysis.log + +# ------------------------------------------------------------------------------ +# End +# ------------------------------------------------------------------------------ + +# Display results (to easily compare integrity across SCT versions) +end=`date +%s` +runtime=$((end-start)) +echo +echo "~~~" +echo "SCT version: `sct_version`" +echo "Ran on: `uname -nsr`" +echo "Duration: $(($runtime / 3600))hrs $((($runtime / 60) % 60))min $(($runtime % 60))sec" +echo "~~~" diff --git a/midsagittal_measures/02_read_xls_files.py b/midsagittal_measures/02_read_xls_files.py new file mode 100644 index 0000000..d12dec8 --- /dev/null +++ b/midsagittal_measures/02_read_xls_files.py @@ -0,0 +1,218 @@ +""" +The script: + - read XLS files (located under /results) with lesion metrics computed using sct_analyze_lesion (one XLS file per subject) + - fetch the midsagittal slice number, midsagittal lesion length, and midsagittal lesion width + - save the values in the dataframe and save the dataframe to a CSV file + +The script needs to be run twice: + - once for the master branch + - once for the PR4631 branch + +Note: to read XLS files, you might need to install the following packages: + pip install openpyxl + +Author: Jan Valosek +""" + +import os +import sys +import re +import glob +import argparse +import logging + +import numpy as np +import pandas as pd + + +# Initialize logging +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) # default: logging.DEBUG, logging.INFO +hdlr = logging.StreamHandler(sys.stdout) +logging.root.addHandler(hdlr) + +FONT_SIZE = 15 + + +def get_parser(): + """ + parser function + """ + + parser = argparse.ArgumentParser( + description='Read XLS files (located under /results) with lesion metrics (computed using sct_analyze_lesion),' + 'construct a dataframe, and save the dataframe to a CSV file', + prog=os.path.basename(__file__).strip('.py') + ) + parser.add_argument( + '-dir', + required=True, + type=str, + help='Absolute path to the \'results\' folder with XLS files generated using \'sct_analyze_lesion. ' + 'The results folders were generated using the \'01_compute_midsagittal_lesion_length_and_width.sh\' ' + 'script.' + ) + parser.add_argument( + '-branch', + required=True, + type=str, + choices=['master', 'PR4631'], + help='Branch name (e.g., master or PR4631) with the XLS files with lesion metrics generated by ' + 'SCT\'s sct_analyze_lesion' + ) + parser.add_argument( + '-o', + required=False, + default='stats', + help='Path to the output folder where XLS table will be saved. Default: ./stats' + ) + + return parser + + +def fetch_subject(filename_path): + """ + Get subject ID and session ID from the input BIDS-compatible filename or file path + The function works both on absolute file path as well as filename + :param filename_path: input nifti filename (e.g., sub-001_ses-01_T1w.nii.gz) or file path + (e.g., /home/user/MRI/bids/derivatives/labels/sub-001/ses-01/anat/sub-001_ses-01_T1w.nii.gz + :return: subject_session: subject ID (e.g., sub-001) + :return: sessionID: session ID (e.g., ses-01) + """ + + subject = re.search('sub-(.*?)[_/]', filename_path) # [_/] means either underscore or slash + subjectID = subject.group(0)[:-1] if subject else "" # [:-1] removes the last underscore or slash + + session = re.search('ses-(.*?)[_/]', filename_path) # [_/] means either underscore or slash + sessionID = session.group(0)[:-1] if session else "" # [:-1] removes the last underscore or slash + + # REGEX explanation + # . - match any character (except newline) + # *? - match the previous element as few times as possible (zero or more times) + + return subjectID, sessionID + + +def get_fnames(dir_path, column_name): + """ + Get list of XLS files with lesion metrics + :param dir_path: list of paths to XLS files with lesion metrics + :param column_name: branch name (e.g., master or PR4631) + :return: pandas dataframe with the paths to the XLS files + """ + + # Get XLS files with lesion metrics + fname_files = glob.glob(os.path.join(dir_path, '*lesion_seg_analysis_SCIsegV2.xls')) + # remove hidden files starting with '~' + fname_files = [f for f in fname_files if not os.path.basename(f).startswith('~')] + # if fname_files is empty, exit + if len(fname_files) == 0: + print(f'ERROR: No XLS files found in {dir_path}') + + # Sort the list of file names (to make the list the same when provided the input folders in different order) + fname_files.sort() + + # Convert fname_files_all into pandas dataframe + df = pd.DataFrame(fname_files, columns=[column_name]) + + # Add a column with participant_id and session_id + df['participant_id'] = df[column_name].apply(lambda x: fetch_subject(x)[0]) + df['session_id'] = df[column_name].apply(lambda x: fetch_subject(x)[1]) + # Reorder the columns + df = df[['participant_id', 'session_id', column_name]] + print(f'Number of participants: {len(df)}') + + return df + + +def fetch_lesion_metrics(index, row, branch, df): + """ + Fetch lesion metrics from the XLS file with lesion metrics generated by sct_analyze_lesion + :param index: index of the dataframe + :param row: row of the dataframe (one row corresponds to one participant) + :param branch: master or PR4631 + :param df: dataframe with the paths to the XLS files for manual and predicted lesions + :return: df: updated dataframe with the paths to the XLS files for manual and predicted lesions and lesion metrics + """ + + # Check if the XLS file with lesion metrics for manual lesion exists + if not os.path.exists(row[branch]): + raise ValueError(f'ERROR: {row[+branch]} does not exist.') + + # Read the XLS file with lesion metrics for lesion predicted by our 3D SCIseg nnUNet model + df_lesion = pd.read_excel(row[branch], sheet_name='measures') + if len(df_lesion) > 1: + print(f'Subject: {row["participant_id"]} has more than one lesion. Aggregating the metrics across lesions.') + # Get the metrics + midsagittal_slice = str(df_lesion['midsagittal_spinal_cord_slice'].values[0]) + # Sum length + midsagittal_length = df_lesion['length_midsagittal_slice [mm]'].sum() + # Take max width + midsagittal_width = df_lesion['width_midsagittal_slice [mm]'].max() + # Check if 'slice_' + midsagittal_slice + '_dorsal_bridge_width [mm]' is in the columns + if 'slice_' + midsagittal_slice + '_dorsal_bridge_width [mm]' in df_lesion.columns: + # Take min for dorsal and ventral bridges + dorsal_tissue_bridge = df_lesion['slice_' + midsagittal_slice + '_dorsal_bridge_width [mm]'].min() + ventral_tissue_bridge = df_lesion['slice_' + midsagittal_slice + '_ventral_bridge_width [mm]'].min() + else: + dorsal_tissue_bridge = np.nan + ventral_tissue_bridge = np.nan + + # One lesion -- # TODO: consider also multiple lesions + # Save the values in the currently processed df row + df.at[index, 'midsagittal_slice'] = midsagittal_slice + df.at[index, 'midsagittal_length'] = midsagittal_length + df.at[index, 'midsagittal_width'] = midsagittal_width + df.at[index, 'dorsal_tissue_bridge'] = dorsal_tissue_bridge + df.at[index, 'ventral_tissue_bridge'] = ventral_tissue_bridge + + return df + + +def main(): + # Parse the command line arguments + parser = get_parser() + args = parser.parse_args() + + branch_name = args.branch + + # Output directory + output_dir = os.path.join(os.getcwd(), args.o) + if not os.path.exists(output_dir): + os.makedirs(output_dir) + print(f'Created {output_dir}') + + # Dump log file there + fname_log = f'log.txt' + if os.path.exists(fname_log): + os.remove(fname_log) + fh = logging.FileHandler(os.path.join(os.path.abspath(output_dir), fname_log)) + logging.root.addHandler(fh) + + # Check if the input path exists + if not os.path.exists(args.dir): + raise ValueError(f'ERROR: {args.dir} does not exist.') + + # For each participant_id, get XLS files with lesion metrics + df = get_fnames(args.dir, column_name=branch_name) + + # Remove sub-zh111 from the list of participants (it has multiple lesions) + df = df[df['participant_id'] != 'sub-zh111'] + + # Iterate over the rows of the dataframe and read the XLS files + for index, row in df.iterrows(): + + logger.info(f'Processing XLS files for {row["participant_id"]}') + + # Read the XLS file with lesion metrics + df = fetch_lesion_metrics(index, row, branch_name, df) + + # remove the branch column containing the paths to the XLS files + df.drop(columns=[branch_name], inplace=True) + # Save the dataframe with lesion metrics to a CSV file + df.to_csv(os.path.join(output_dir, f'lesion_metrics_{branch_name}.csv'), index=False) + logger.info(f'Saved lesion metrics to {output_dir}/lesion_metrics_{branch_name}.csv') + + +if __name__ == '__main__': + main() diff --git a/midsagittal_measures/03_generate_figures.py b/midsagittal_measures/03_generate_figures.py new file mode 100644 index 0000000..5f83c48 --- /dev/null +++ b/midsagittal_measures/03_generate_figures.py @@ -0,0 +1,288 @@ +""" +Compare the midsagittal lesion length, lesion width, and tissue bridges obtained using different methods (manual, +automatic SCT master, automatic SCT PR4631). + +The script: + - reads XLS files with manually measured lesion metrics and CSV files with lesion metrics computed using sct_analyze_lesion +- computes Wilcoxon signed-rank test for each metric between methods +- creates pairplot showing relationships between methods +- creates Bland-Altman Mean Difference Plot for each metric + +Note: to read XLS files, you might need to install the following packages: + pip install openpyxl + +Author: Jan Valosek +""" +import numpy as np + +METRICS = ['midsagittal_length', 'midsagittal_width', 'ventral_tissue_bridge', 'dorsal_tissue_bridge'] +METRIC_TO_TITLE = { + 'midsagittal_length': 'Midsagittal Lesion Length [mm]', + 'midsagittal_width': 'Midsagittal Lesion Width [mm]', + 'ventral_tissue_bridge': 'Midsagittal Ventral Tissue Bridges [mm]', + 'dorsal_tissue_bridge': 'Midsagittal Dorsal Tissue Bridges [mm]' +} + +import os +import pandas as pd +import matplotlib.pyplot as plt +import argparse + +from sklearn.linear_model import LinearRegression +from scipy.stats import wilcoxon +import statsmodels.api as sm + + +def get_parser(): + """ + parser function + """ + + parser = argparse.ArgumentParser( + description='Read CSV files with lesion metrics computed using sct_analyze_lesion and XLSX file with manually' + 'measured metrics.', + prog=os.path.basename(__file__).strip('.py') + ) + parser.add_argument( + '-manual', + required=True, + type=str, + help='Absolute path to the XLSX file with manually measured lesion metrics' + ) + parser.add_argument( + '-master', + required=True, + type=str, + help='Absolute path to the CSV file with lesion metrics computed using sct_analyze_lesion on the master branch' + ) + parser.add_argument( + '-PR4631', + required=True, + type=str, + help='Absolute path to the CSV file with lesion metrics computed using sct_analyze_lesion on the PR4631 branch' + ) + parser.add_argument( + '-o', + required=False, + default='stats/figures', + help='Path to the output folder where XLS table will be saved. Default: ./stats' + ) + + return parser + +def compute_regression(x, y): + """ + Compute a linear regression between x and y: + y = Slope * x + Intercept + https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LinearRegression.html + + You can then plot the linear fit by + ax.plot(x_vals, y_vals, '--', color='red') + + :param x: ndarray: input - regressor + :param y: ndarray: output - response + :return: intercept: ndarray: intercept constant (bias term) + :return: slope: ndarray: slope + :return: reg_predictor: ndarray: + :return: r2_sc: float: coefficient of determination + :return x_vals: ndarray: x values for the linear fit plot + :return y_vals: ndarray: y values for the linear fit plot + """ + # Make sure we are working with numpy arrays + if isinstance(x, pd.Series): + x = x.to_numpy() + if isinstance(y, pd.Series): + y = y.to_numpy() + + # Create an instance of the class LinearRegression, which will represent the regression model + linear_regression = LinearRegression() + # Perform linear regression (compute slope and intercept) + linear_regression.fit(x.reshape(-1, 1), y.reshape(-1, 1)) + intercept = linear_regression.intercept_ # underscore indicates that an attribute is estimated + slope = linear_regression.coef_ # underscore indicates that an attribute is estimated + + # Get x and y values to plot the linear fit + x_vals = np.array([x.min(), x.max()]) + y_vals = intercept + slope * x_vals + y_vals = np.squeeze(y_vals) # change shape from (1,N) to (N,) + + # Compute prediction (pass the regressor as the argument and get the corresponding predicted response) + # Identical as reg_predictor = slope * x + intercept + reg_predictor = linear_regression.predict(x.reshape(-1, 1)) + + # Compute coefficient of determination R^2 of the prediction + r2_sc = linear_regression.score(x.reshape(-1, 1), y.reshape(-1, 1)) + + return intercept, slope, reg_predictor, r2_sc, x_vals, y_vals + + +def create_pairplot(df, output_dir): + """ + Create pairplot showing relationships between methods in a 2x2 grid + :param df: pandas dataframe with metrics data + :param output_dir: output directory + :return: + """ + for metric in METRICS: + df_plot = df[[f'{metric}_manual', f'{metric}_master', f'{metric}_PR4631']] + # Rename columns for better axis labels + df_plot.rename(columns={metric + '_manual': 'Manual', + metric + '_master': 'SCT master', + metric + '_PR4631': 'SCT PR4631'}, + inplace=True) + + fig, axes = plt.subplots(2, 2, figsize=(6, 6)) + plt.subplots_adjust(hspace=0.3, wspace=0.3) + + max_val = df_plot.max().max() + min_val = df_plot.min().min() + + # Define the plots we want to show + plots = [(0, 0, 'Manual', 'SCT master'), + (1, 0, 'Manual', 'SCT PR4631'), + (1, 1, 'SCT master', 'SCT PR4631')] + + for row, col, x_method, y_method in plots: + ax = axes[row, col] + x = df_plot[x_method] + y = df_plot[y_method] + + ax.scatter(x, y) + ax.set_xlim(-0.1 * max_val, 1.1 * max_val) + ax.set_ylim(-0.1 * max_val, 1.1 * max_val) + + ax.set_xlabel(x_method) + ax.set_ylabel(y_method) + + # Add regression line + intercept, slope, _, r2_sc, x_vals, y_vals = compute_regression(x, y) + ax.plot(x_vals, y_vals, '--', color='red') + + # Add R² value to the plot + ax.text(0.05, 0.95, f'R² = {r2_sc:.2f}', transform=ax.transAxes, + verticalalignment='top') + + # Add diagonal line + ax.plot([min_val, max_val], [min_val, max_val], ls='--', c='gray') + + # Turn off the unused subplot + axes[0, 1].axis('off') + + # Add title + fig.suptitle(f'{METRIC_TO_TITLE[metric]}', y=0.99) + plt.tight_layout() + + # Save the plot + figure_fname = os.path.join(output_dir, f'{metric}.png') + plt.savefig(figure_fname, dpi=200) + print(f'Pairplot for {metric} bridges saved as {figure_fname}') + plt.close() + + +def create_diff_plot(df, output_dir): + """ + Create a Bland-Altman Mean Difference Plot for each metric + https://www.statsmodels.org/devel/generated/statsmodels.graphics.agreement.mean_diff_plot.html + :param df: + :param output_dir: + :return: + """ + for metric in METRICS: + df_plot = df[[f'{metric}_manual', f'{metric}_master', f'{metric}_PR4631']] + # Rename columns for better axis labels + df_plot.rename(columns={metric + '_manual': 'Manual', + metric + '_master': 'SCT master', + metric + '_PR4631': 'SCT PR4631'}, + inplace=True) + + fig, axes = plt.subplots(1, 2, figsize=(8, 4)) + + # Define the plots we want to show + plots = [(0, 'Manual' , 'SCT master'), + (1, 'Manual', 'SCT PR4631')] + + for col, x_method, y_method in plots: + ax = axes[col] + x = df_plot[x_method] + y = df_plot[y_method] + mean_diff_plot = sm.graphics.mean_diff_plot(x, y, ax=ax) + ax.set_title(f'{METRIC_TO_TITLE[metric]}\n{x_method} vs {y_method}') + + plt.tight_layout() + + # Save the plot + figure_fname = os.path.join(output_dir, f'{metric}_diffplot.png') + plt.savefig(figure_fname, dpi=200) + print(f'Diffplot for {metric} bridges saved as {figure_fname}') + plt.close() + + + +def main(): + + # Parse the command line arguments + parser = get_parser() + args = parser.parse_args() + output_dir = args.o + # create output directory if it does not exist + os.makedirs(output_dir, exist_ok=True) + + xlsx_manual = args.manual + csv_master = args.master + csv_PR4631 = args.PR4631 + + # MANUAL + df_manual = pd.read_excel(xlsx_manual) + # If session_id is nan in the manual file, set it to 'ses-01' + df_manual['session_id'] = df_manual['session_id'].fillna('ses-01') + # Add '_manual' suffix to all columns except participant_id and session_id + df_manual = df_manual.add_suffix('_manual') + df_manual.rename(columns={'participant_id_manual': 'participant_id', 'session_id_manual': 'session_id'}, inplace=True) + + # MASTER BRANCH + df_master = pd.read_csv(csv_master) + # Add '_master' suffix to all columns except participant_id and session_id + df_master = df_master.add_suffix('_master') + df_master.rename(columns={'participant_id_master': 'participant_id', 'session_id_master': 'session_id'}, inplace=True) + + # PR4631 BRANCH + df_PR4631 = pd.read_csv(csv_PR4631) + # Add '_PR4631' suffix to all columns except participant_id and session_id + df_PR4631 = df_PR4631.add_suffix('_PR4631') + df_PR4631.rename(columns={'participant_id_PR4631': 'participant_id', 'session_id_PR4631': 'session_id'}, inplace=True) + + # Merge the dataframes + df = pd.merge(df_manual, df_master, on=['participant_id', 'session_id']) + df = pd.merge(df, df_PR4631, on=['participant_id', 'session_id']) + + # Replace nan values with zeros (if there is no lesion, we assume the metrics are zero) + df = df.fillna(0) + + # Print number of subjects (rows) + print(f'Number of subjects: {df.shape[0]}') + + # Print participant_id presented in df_manual but not in df_master + missing_participant_id = df_manual[~df_manual['participant_id'].isin(df_master['participant_id'])]['participant_id'] + if not missing_participant_id.empty: + print('participant_id presented in df_manual but not in df_master:') + print(missing_participant_id) + + # Compute Wilcoxon signed-rank test for each metric between methods + for metric in METRICS: + # Compute Wilcoxon signed-rank test + _, p_value_master = wilcoxon(df[f'{metric}_manual'], df[f'{metric}_master']) + _, p_value_PR4631 = wilcoxon(df[f'{metric}_manual'], df[f'{metric}_PR4631']) + + print(f'Wilcoxon signed-rank test for {METRIC_TO_TITLE[metric]}:') + print(f' - p-value (manual vs master): {p_value_master:.3f}') + print(f' - p-value (manual vs PR4631): {p_value_PR4631:.3f}') + + # Create pairplot showing relationships between methods + #create_pairplot(df, output_dir) + create_diff_plot(df, output_dir) + + +if __name__ == '__main__': + main() + + diff --git a/midsagittal_measures/README.md b/midsagittal_measures/README.md new file mode 100644 index 0000000..08f8167 --- /dev/null +++ b/midsagittal_measures/README.md @@ -0,0 +1,40 @@ +# Midsagittal lesion length and width + +This folder contains scripts to compare the midsagittal lesion length and width obtained using different methods: + +1. **_manual_**: manual measurement provided by collaborators +2. **_automatic_SCIsegV2_**: the midsagittal lesion length and width are computed from the spinal cord and lesion segmentations obtained using SCIsegV2 + +## 1. Download the dataset + +```console +git clone git@data.neuro.polymtl.ca:datasets/sci-zurich +cd sci-zurich +git annex init +git annex dead here +git annex get $(find . -name "*sag*T2*") +``` + +## 2. Compute midsagittal lesion length and width + +Compute the midsagittal lesion length and width using the SCT's `sct_analyze_lesion` function using the +`01_compute_midsagittal_lesion_length_and_width.sh` script. +The script is run using the `sct_run_batch` wrapper script to process subjects in parallel. +Note that the script requires SCT v6.4 or higher and is designed to be run on GPU. + +```bash +sct_run_batch -config config-01_compute_midsagittal_lesion_length_and_width.json +``` + +NOTE: the script is run twice: once for the master branch and once for the [PR4631 branch](https://github.com/spinalcordtoolbox/spinalcordtoolbox/pull/4631). + +## 3. Aggregate lesion metrics across subjects + +As the `01_compute_midsagittal_lesion_length_and_width.sh` script calls `sct_analyze_lesion` function, it outputs one XLS file per subject. +The XLS files are saved in the `/results` directory. +To make it easier to work, I read the XLS files and save the data in a CSV file using the `02_read_xls_files.py` script. + +```bash +python 02_read_xls_files.py -dir sci-zurich_midsagittal_measures_2024-09-20_master/results -branch master +python 02_read_xls_files.py -dir sci-zurich_midsagittal_measures_2024-09-20_PR4631/results -branch PR4631 +``` \ No newline at end of file