From 08786fe9777e53dce3ca5b21b9e361ee380463ea Mon Sep 17 00:00:00 2001 From: valosekj Date: Tue, 29 Nov 2022 10:13:19 -0500 Subject: [PATCH 01/38] Add python script for conversion of INSPIRED dataset to BIDS compliant format --- scripts/curate_data_inspired.py | 187 ++++++++++++++++++++++++++++++++ 1 file changed, 187 insertions(+) create mode 100644 scripts/curate_data_inspired.py diff --git a/scripts/curate_data_inspired.py b/scripts/curate_data_inspired.py new file mode 100644 index 0000000..22fafc0 --- /dev/null +++ b/scripts/curate_data_inspired.py @@ -0,0 +1,187 @@ +# Convert the input non-BIDS INSPIRED dataset to the BIDS compliant dataset +# The INSPIRED dataset contains data from two centers (Toronto, Zurich) across three pathologies (DCM, SCI, HC) +# Spinal cord MRI data: +# - DWI (A-P and P-A phase encoding) +# - T1w sag +# - T2w sag +# - T2w tra +# - T2star tra +# Brain MRI data: +# - DWI (A-P and P-A phase encoding) +# - MPM (multi-parameter mapping) +# +# Authors: Jan Valosek + +import os +import sys +import shutil +import json +import glob +import argparse + +prefix = 'sub-' + +# There are two input datasets: `01` and `02`. `01` represents Toronto, `02` represents Zurich +# Details: https://github.com/neuropoly/data-management/issues/184#issuecomment-1329250514 +centres_conv_dict = { + '01': 'toronto', + '02': 'zurich' + } + +pathologies_conv_dict = { + 'csm': 'DCM', + 'hc': 'HC', + 'sci': 'SCI' + } + +# Dictionary for image filename conversion +# Note: we use label `bp-cspine` to differentiate spine imaging from brain +# Details: BIDS BEP025 Proposal (https://docs.google.com/document/d/1chZv7vAPE-ebPDxMktfI9i1OkLNR2FELIfpVYsaZPr4) +images_spine_conv_dict = { + 'dwi.nii.gz': 'dir-AP_bp-cspine_dwi.nii.gz', + 'dwi.bval': 'dir-AP_bp-cspine_dwi.bval', + 'dwi.bvec': 'dir-AP_bp-cspine_dwi.bvec', + 'dwi_reversed_blip.nii.gz': 'dir-PA_bp-cspine_dwi.nii.gz', + 't1_sag.nii.gz': 'bp-cspine_T1w.nii.gz', + 't2_sag.nii.gz': 'acq-coronal_bp-cspine_T2w.nii.gz', + 't2_tra.nii.gz': 'acq-axial_bp-cspine_T2w.nii.gz', + 'pd_medic.nii.gz': 'bp-cspine_T2star.nii.gz' + } + +# Dictionary for brain image filename conversion +images_brain_conv_dict = { + 'dwi.nii.gz': 'dir-AP_dwi.nii.gz', + 'dwi.bval': 'dir-AP_dwi.bval', + 'dwi.bvec': 'dir-AP_dwi.bvec', + 'dwi_reversed_blip.nii.gz': 'dir-PA_dwi.nii.gz', + } + + +def copy_file(path_file_in, path_dir_out, file_out): + """ + Copy file from input non-BIDS dataset to BIDS compliant dataset + :param path_file_in: path of the input non-BIDS file which will be copied + :param path_dir_out: path of the output BIDS directory; for example sub-torontoDCM001/dwi + :param file_out: filename of the output BIDS file; for example 'sub-torontoDCM001_bp-cspine_dir-AP_dwi.nii.gz' + :return: + """ + # Make sure that the input file exists, if so, copy it + if os.path.isfile(path_file_in): + # Create dwi or anat folder if does not exist + if not os.path.isdir(path_dir_out): + os.makedirs(path_dir_out, exist_ok=True) + # Construct path to the output file + path_file_out = os.path.join(path_dir_out, file_out) + print(f'Copying {path_file_in} to {path_file_out}') + shutil.copyfile(path_file_in, path_file_out) + create_dummy_json_sidecar_if_does_not_exist(path_file_out) + + +# TODO - do we want to create just a empty json sidecar? Or do we want to include some params there? +def create_dummy_json_sidecar_if_does_not_exist(path_file_out): + # Work only with .nii.gz (i.e., ignore .bval and .bvec files) + if path_file_out.endswith('.nii.gz'): + path_json_sidecar = path_file_out.replace('.nii.gz', '.json') + if not os.path.exists(path_json_sidecar): + os.system('touch ' + path_json_sidecar) + + +def read_json_file(path_to_file): + """ + Read json file and fetch relevant parameters (SeriesDescription, SeriesDescription, EchoTime) for MPM images + :param path_to_file: path to input json file + :return: + """ + with open(path_to_file) as p: + loaded_json = json.load(p) + series_description = loaded_json['acqpar'][0]['SeriesDescription'] + flip_angle = loaded_json['acqpar'][0]['SeriesDescription'] + echo_time = loaded_json['acqpar'][0]['EchoTime'] + return series_description, flip_angle, echo_time + + +def get_parameters(): + parser = argparse.ArgumentParser(description='Convert dataset to BIDS format.') + parser.add_argument("-i", "--path-input", + help="Path to folder containing the dataset to convert to BIDS", + required=True) + parser.add_argument("-o", "--path-output", + help="Path to the output BIDS folder", + required=True, + ) + arguments = parser.parse_args() + return arguments + + +def main(path_input, path_output): + # Check if input path is valid + if not os.path.isdir(path_input): + print(f'ERROR - {path_input} does not exist.') + sys.exit() + # Remove output folder if already exists and create an empty one again + if os.path.isdir(path_output): + shutil.rmtree(path_output) + os.makedirs(path_output, exist_ok=True) + + # Loop across centers (01, 02) + for centre_in, centre_out in centres_conv_dict.items(): + # Loop across pathologies (hc, csm, sci) + for pathology_in, pathology_out in pathologies_conv_dict.items(): + # Loop across subjects (001, ...) + for sub_index, subject_in in enumerate(glob.glob(os.path.join(path_input, centre_in, pathology_in, '*')), + start=1): + # If the input subject folder is .tar.gz, extract it + if subject_in.endswith('.tar.gz'): + os.system("tar -xf " + subject_in + " --directory " + os.path.join(path_input, centre_in, pathology_in)) + # TODO - consider what to do with extracted folders once the BIDS conversion is done. + # Delete them and keep only original .tar.gz files? + # Remove '.tar.gz' from the subject_in variable + subject_in = subject_in.replace('.tar.gz', '') + # Loop across regions (brain or cord) + for region in ['brain', 'cord']: + if region == 'cord': + # Loop across files + for image_in, image_out in images_spine_conv_dict.items(): + # Construct path to the input file + path_file_in = os.path.join(path_input, centre_in, pathology_in, subject_in, 'bl', region, image_in) + # Construct output subjectID containing centre name and pathology, e.g., 'sub-torontoDCM001' + subject_out = prefix + centre_out + pathology_out + f"{sub_index:03d}" + # Construct output filename, e.g., 'sub-torontoDCM001_bp-cspine_dir-AP_dwi.nii.gz' + file_out = subject_out + '_' + image_out + # Construct path to the output BIDS compliant directory + if 'dwi' in image_in: + path_dir_out = os.path.join(path_output, subject_out, 'dwi') + else: + path_dir_out = os.path.join(path_output, subject_out, 'anat') + + # Copy file and create a dummy json sidecar if does not exist + copy_file(path_file_in, path_dir_out, file_out) + elif region == 'brain': + # Loop across files + for image_in, image_out in images_brain_conv_dict.items(): + # Construct path to the input file + path_file_in = os.path.join(path_input, centre_in, pathology_in, subject_in, 'bl', region, image_in) + # Construct output subjectID containing centre name and pathology, e.g., 'sub-torontoDCM001' + subject_out = prefix + centre_out + pathology_out + f"{sub_index:03d}" + # Construct output filename, e.g., 'sub-torontoDCM001_dir-AP_dwi.nii.gz' + file_out = subject_out + '_' + image_out + # Construct path to the output BIDS compliant directory + if 'dwi' in image_in: + path_dir_out = os.path.join(path_output, subject_out, 'dwi') + + # Copy file and create a dummy json sidecar if does not exist + copy_file(path_file_in, path_dir_out, file_out) + + # Convert raw MPM images + mpm_raw_folder_path = os.path.join(path_input, centre_in, pathology_in, subject_in, 'bl', + region, 'mpm_raw') + if os.path.isdir(mpm_raw_folder_path): + # Loop across individual MPM files + for mpm_file in glob.glob(os.path.join(mpm_raw_folder_path, '*.nii*')): + series_description, flip_angle, echo_time = read_json_file(mpm_file.replace('.nii', '.json')) + # TODO - finish MPM files conversion + + +if __name__ == "__main__": + args = get_parameters() + main(args.path_input, args.path_output) From 37038dc030dac96c2b0fd9f942441deb55f4bd91 Mon Sep 17 00:00:00 2001 From: valosekj Date: Tue, 29 Nov 2022 14:07:55 -0500 Subject: [PATCH 02/38] Implement MPM image BIDSification --- scripts/curate_data_inspired.py | 69 ++++++++++++++++++++++++++++----- 1 file changed, 60 insertions(+), 9 deletions(-) diff --git a/scripts/curate_data_inspired.py b/scripts/curate_data_inspired.py index 22fafc0..6938bba 100644 --- a/scripts/curate_data_inspired.py +++ b/scripts/curate_data_inspired.py @@ -13,6 +13,7 @@ # Authors: Jan Valosek import os +import re import sys import shutil import json @@ -74,7 +75,9 @@ def copy_file(path_file_in, path_dir_out, file_out): path_file_out = os.path.join(path_dir_out, file_out) print(f'Copying {path_file_in} to {path_file_out}') shutil.copyfile(path_file_in, path_file_out) - create_dummy_json_sidecar_if_does_not_exist(path_file_out) + # Create a dummy json sidecar for all files except of MPM (for MPM we copy original .json sidecars) + if 'MPM' not in file_out: + create_dummy_json_sidecar_if_does_not_exist(path_file_out) # TODO - do we want to create just a empty json sidecar? Or do we want to include some params there? @@ -95,11 +98,54 @@ def read_json_file(path_to_file): with open(path_to_file) as p: loaded_json = json.load(p) series_description = loaded_json['acqpar'][0]['SeriesDescription'] - flip_angle = loaded_json['acqpar'][0]['SeriesDescription'] + flip_angle = loaded_json['acqpar'][0]['FlipAngle'] echo_time = loaded_json['acqpar'][0]['EchoTime'] return series_description, flip_angle, echo_time +def construct_mpm_bids_filename(mpm_files_dict, path_output, subject_out): + """ + Construct BIDS compliant filename for MPM images, e.g. 'acq-T1w_echo-1_flip-1_mt-off_MPM' + Then, call function to copy the input non-BIDS MPM file to BIDS compliant MPM file + :param mpm_files_dict: dict with SeriesDescription, FlipAngle, and EchoTime across all MPM files + :param path_output: path to output BIDS folder + :param subject_out: output BIDS subjectID containing centre name and pathology, e.g., 'sub-torontoDCM001' + :return: + """ + mpm_to_bids = dict() + # Get unique values + unique_SeriesDescription = set([key[0] for key in mpm_files_dict.keys()]) + unique_FlipAngle = set([key[1] for key in mpm_files_dict.keys()]) + unique_EchoTime = set([key[2] for key in mpm_files_dict.keys()]) + # Construct BIDS compliant filename, e.g. 'acq-T1w_echo-1_flip-1_mt-off_MPM' + for echo_idx, echo_time in enumerate(unique_EchoTime, start=1): + for flip_idx, flip_angle in enumerate(unique_FlipAngle, start=1): + for series_description in unique_SeriesDescription: + if '_mt_' in series_description: + BIDS_label = 'acq-MTw' + '_echo-' + str(echo_idx) + '_flip-' + str(flip_idx) + '_mt-on_MPM' + elif '_pd_' in series_description: + BIDS_label = 'acq-PDw' + '_echo-' + str(echo_idx) + '_flip-' + str(flip_idx) + '_mt-off_MPM' + elif '_t1_' in series_description: + BIDS_label = 'acq-T1w' + '_echo-' + str(echo_idx) + '_flip-' + str(flip_idx) + '_mt-off_MPM' + # Find original MPM filename + if (series_description, flip_angle, echo_time) in mpm_files_dict.keys(): + mpm_file = mpm_files_dict[(series_description, flip_angle, echo_time)] + mpm_to_bids[mpm_file] = BIDS_label + + # Construct path to the output BIDS compliant directory + path_dir_out = os.path.join(path_output, subject_out, 'anat') + # Copy individual mpm raw files to the output BIDS directory + for sequence_params, path_file_in in mpm_files_dict.items(): + label_out = mpm_to_bids[path_file_in] # e.g., 'acq-MTw_echo-1_flip-1_mt-on_MPM' + # Construct output filename, e.g., 'sub-torontoDCM001_acq-MTw_echo-1_flip-1_mt-on_MPM.nii.gz' + file_out = subject_out + '_' + label_out + '.nii.gz' + # Copy MPM nii file + copy_file(path_file_in, path_dir_out, file_out) + # Copy MPM json sidecar + # Note: re.sub has to be used instead of .replace to match both '.nii' and '.nii.gz' + copy_file(re.sub('.nii(.gz)*', '.json', path_file_in), path_dir_out, file_out.replace('.nii.gz', '.json')) + + def get_parameters(): parser = argparse.ArgumentParser(description='Convert dataset to BIDS format.') parser.add_argument("-i", "--path-input", @@ -139,13 +185,13 @@ def main(path_input, path_output): subject_in = subject_in.replace('.tar.gz', '') # Loop across regions (brain or cord) for region in ['brain', 'cord']: + # Construct output subjectID containing centre name and pathology, e.g., 'sub-torontoDCM001' + subject_out = prefix + centre_out + pathology_out + f"{sub_index:03d}" if region == 'cord': # Loop across files for image_in, image_out in images_spine_conv_dict.items(): # Construct path to the input file path_file_in = os.path.join(path_input, centre_in, pathology_in, subject_in, 'bl', region, image_in) - # Construct output subjectID containing centre name and pathology, e.g., 'sub-torontoDCM001' - subject_out = prefix + centre_out + pathology_out + f"{sub_index:03d}" # Construct output filename, e.g., 'sub-torontoDCM001_bp-cspine_dir-AP_dwi.nii.gz' file_out = subject_out + '_' + image_out # Construct path to the output BIDS compliant directory @@ -161,8 +207,6 @@ def main(path_input, path_output): for image_in, image_out in images_brain_conv_dict.items(): # Construct path to the input file path_file_in = os.path.join(path_input, centre_in, pathology_in, subject_in, 'bl', region, image_in) - # Construct output subjectID containing centre name and pathology, e.g., 'sub-torontoDCM001' - subject_out = prefix + centre_out + pathology_out + f"{sub_index:03d}" # Construct output filename, e.g., 'sub-torontoDCM001_dir-AP_dwi.nii.gz' file_out = subject_out + '_' + image_out # Construct path to the output BIDS compliant directory @@ -172,14 +216,21 @@ def main(path_input, path_output): # Copy file and create a dummy json sidecar if does not exist copy_file(path_file_in, path_dir_out, file_out) - # Convert raw MPM images + # Process raw MPM images mpm_raw_folder_path = os.path.join(path_input, centre_in, pathology_in, subject_in, 'bl', region, 'mpm_raw') if os.path.isdir(mpm_raw_folder_path): + mpm_files_dict = dict() # Loop across individual MPM files for mpm_file in glob.glob(os.path.join(mpm_raw_folder_path, '*.nii*')): - series_description, flip_angle, echo_time = read_json_file(mpm_file.replace('.nii', '.json')) - # TODO - finish MPM files conversion + # Get SeriesDescription, FlipAngle, and EchoTime for each MPM file + # Note: re.sub has to be used instead of .replace to match both '.nii' and '.nii.gz' + series_description, flip_angle, echo_time = read_json_file(re.sub('.nii(.gz)*', '.json', mpm_file)) + # Collect SeriesDescription, FlipAngle, and EchoTime across all MPM files + mpm_files_dict[series_description, flip_angle, echo_time] = mpm_file + + # Construct BIDS compliant filename for MPM images + construct_mpm_bids_filename(mpm_files_dict, path_output, subject_out) if __name__ == "__main__": From 336ad1082e5eb32878b5adbc5c9a72a3ba7acb9b Mon Sep 17 00:00:00 2001 From: valosekj Date: Tue, 29 Nov 2022 16:45:32 -0500 Subject: [PATCH 03/38] Make sure that input args have abs path --- scripts/curate_data_inspired.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/scripts/curate_data_inspired.py b/scripts/curate_data_inspired.py index 6938bba..51f4af6 100644 --- a/scripts/curate_data_inspired.py +++ b/scripts/curate_data_inspired.py @@ -160,6 +160,12 @@ def get_parameters(): def main(path_input, path_output): + # Make sure that input args are absolute paths + if os.path.isdir(path_input): + path_input = os.path.abspath(path_input) + if os.path.isdir(path_output): + path_output = os.path.abspath(path_output) + # Check if input path is valid if not os.path.isdir(path_input): print(f'ERROR - {path_input} does not exist.') From 3e231d30d30a4e861378335ab049ff8a8010bbf0 Mon Sep 17 00:00:00 2001 From: valosekj Date: Tue, 29 Nov 2022 16:46:11 -0500 Subject: [PATCH 04/38] Match also upper case in MPM json sidecars --- scripts/curate_data_inspired.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/curate_data_inspired.py b/scripts/curate_data_inspired.py index 51f4af6..cbd6205 100644 --- a/scripts/curate_data_inspired.py +++ b/scripts/curate_data_inspired.py @@ -121,11 +121,11 @@ def construct_mpm_bids_filename(mpm_files_dict, path_output, subject_out): for echo_idx, echo_time in enumerate(unique_EchoTime, start=1): for flip_idx, flip_angle in enumerate(unique_FlipAngle, start=1): for series_description in unique_SeriesDescription: - if '_mt_' in series_description: + if '_mt_' in series_description or '_MT_' in series_description: BIDS_label = 'acq-MTw' + '_echo-' + str(echo_idx) + '_flip-' + str(flip_idx) + '_mt-on_MPM' - elif '_pd_' in series_description: + elif '_pd_' in series_description or '_PD_' in series_description: BIDS_label = 'acq-PDw' + '_echo-' + str(echo_idx) + '_flip-' + str(flip_idx) + '_mt-off_MPM' - elif '_t1_' in series_description: + elif '_t1_' in series_description or '_T1_' in series_description: BIDS_label = 'acq-T1w' + '_echo-' + str(echo_idx) + '_flip-' + str(flip_idx) + '_mt-off_MPM' # Find original MPM filename if (series_description, flip_angle, echo_time) in mpm_files_dict.keys(): From 0df6da572d6711351a0f881a701d8e22dd413aaa Mon Sep 17 00:00:00 2001 From: valosekj Date: Tue, 29 Nov 2022 16:57:39 -0500 Subject: [PATCH 05/38] Fix condition when there are no json sidecars for MPM images --- scripts/curate_data_inspired.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/scripts/curate_data_inspired.py b/scripts/curate_data_inspired.py index cbd6205..3ad375a 100644 --- a/scripts/curate_data_inspired.py +++ b/scripts/curate_data_inspired.py @@ -231,12 +231,20 @@ def main(path_input, path_output): for mpm_file in glob.glob(os.path.join(mpm_raw_folder_path, '*.nii*')): # Get SeriesDescription, FlipAngle, and EchoTime for each MPM file # Note: re.sub has to be used instead of .replace to match both '.nii' and '.nii.gz' - series_description, flip_angle, echo_time = read_json_file(re.sub('.nii(.gz)*', '.json', mpm_file)) - # Collect SeriesDescription, FlipAngle, and EchoTime across all MPM files - mpm_files_dict[series_description, flip_angle, echo_time] = mpm_file + json_file = re.sub('.nii(.gz)*', '.json', mpm_file) + # Make sure json sidecar exist + if os.path.isfile(json_file): + series_description, flip_angle, echo_time = read_json_file(json_file) + # Collect SeriesDescription, FlipAngle, and EchoTime across all MPM files + mpm_files_dict[series_description, flip_angle, echo_time] = mpm_file # Construct BIDS compliant filename for MPM images - construct_mpm_bids_filename(mpm_files_dict, path_output, subject_out) + if bool(mpm_files_dict): + construct_mpm_bids_filename(mpm_files_dict, path_output, subject_out) + # In some cases, there are no json sidecars for MPM images, thus mpm_files_dict is empty + else: + print(f'WARNING: There are no json sidecars in {mpm_raw_folder_path}. ' + f'Skipping this subject.') if __name__ == "__main__": From 0c17d8c9d79f09022e04ffcb18f20d338a583795 Mon Sep 17 00:00:00 2001 From: valosekj Date: Tue, 29 Nov 2022 19:14:50 -0500 Subject: [PATCH 06/38] Save conversion progress to log --- scripts/curate_data_inspired.py | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/scripts/curate_data_inspired.py b/scripts/curate_data_inspired.py index 3ad375a..61c9e90 100644 --- a/scripts/curate_data_inspired.py +++ b/scripts/curate_data_inspired.py @@ -19,6 +19,14 @@ import json import glob import argparse +import logging +import datetime + +# Initialize logging +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) # default: logging.DEBUG, logging.INFO +hdlr = logging.StreamHandler(sys.stdout) +logging.root.addHandler(hdlr) prefix = 'sub-' @@ -73,7 +81,7 @@ def copy_file(path_file_in, path_dir_out, file_out): os.makedirs(path_dir_out, exist_ok=True) # Construct path to the output file path_file_out = os.path.join(path_dir_out, file_out) - print(f'Copying {path_file_in} to {path_file_out}') + logger.info(f'Copying {path_file_in} to {path_file_out}') shutil.copyfile(path_file_in, path_file_out) # Create a dummy json sidecar for all files except of MPM (for MPM we copy original .json sidecars) if 'MPM' not in file_out: @@ -175,6 +183,17 @@ def main(path_input, path_output): shutil.rmtree(path_output) os.makedirs(path_output, exist_ok=True) + FNAME_LOG = os.path.join(path_output, 'bids_conversion.log') + # Dump log file there + if os.path.exists(FNAME_LOG): + os.remove(FNAME_LOG) + fh = logging.FileHandler(os.path.join(os.path.abspath(os.curdir), FNAME_LOG)) + logging.root.addHandler(fh) + print("INFO: log file will be saved to {}".format(FNAME_LOG)) + + # Print current time and date to log file + logger.info('\nAnalysis started at {}'.format(datetime.datetime.now())) + # Loop across centers (01, 02) for centre_in, centre_out in centres_conv_dict.items(): # Loop across pathologies (hc, csm, sci) @@ -184,6 +203,7 @@ def main(path_input, path_output): start=1): # If the input subject folder is .tar.gz, extract it if subject_in.endswith('.tar.gz'): + logger.info(f'Unpacking tar archive for {subject_in}...') os.system("tar -xf " + subject_in + " --directory " + os.path.join(path_input, centre_in, pathology_in)) # TODO - consider what to do with extracted folders once the BIDS conversion is done. # Delete them and keep only original .tar.gz files? @@ -243,8 +263,8 @@ def main(path_input, path_output): construct_mpm_bids_filename(mpm_files_dict, path_output, subject_out) # In some cases, there are no json sidecars for MPM images, thus mpm_files_dict is empty else: - print(f'WARNING: There are no json sidecars in {mpm_raw_folder_path}. ' - f'Skipping this subject.') + logger.warning(f'WARNING: There are no json sidecars in {mpm_raw_folder_path}. ' + f'Skipping this subject.') if __name__ == "__main__": From 1814879d2913438bdcfc03246d75616c0fa06c8d Mon Sep 17 00:00:00 2001 From: valosekj Date: Wed, 30 Nov 2022 13:36:52 -0500 Subject: [PATCH 07/38] Write participants.tsv file --- scripts/curate_data_inspired.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/scripts/curate_data_inspired.py b/scripts/curate_data_inspired.py index 61c9e90..acf7f90 100644 --- a/scripts/curate_data_inspired.py +++ b/scripts/curate_data_inspired.py @@ -15,6 +15,7 @@ import os import re import sys +import csv import shutil import json import glob @@ -154,6 +155,22 @@ def construct_mpm_bids_filename(mpm_files_dict, path_output, subject_out): copy_file(re.sub('.nii(.gz)*', '.json', path_file_in), path_dir_out, file_out.replace('.nii.gz', '.json')) +def write_participants_tsv(participants_tsv_list, path_output): + """ + Write participants.tsv file + :param participants_tsv_list: list containing [subject_out, pathology_out, subject_in, centre_in, centre_out], + example:[sub-torontoDCM001, DCM, 001, 01, toronto] + :param path_output: path to the output BIDS folder + :return: + """ + with open(os.path.join(path_output + '/participants.tsv'), 'w') as tsv_file: + tsv_writer = csv.writer(tsv_file, delimiter='\t', lineterminator='\n') + tsv_writer.writerow(['participant_id', 'pathology', 'data_id', 'institution_id', 'institution']) + for item in participants_tsv_list: + tsv_writer.writerow(item) + logger.info(f'participants.tsv created in {path_output}') + + def get_parameters(): parser = argparse.ArgumentParser(description='Convert dataset to BIDS format.') parser.add_argument("-i", "--path-input", @@ -194,6 +211,9 @@ def main(path_input, path_output): # Print current time and date to log file logger.info('\nAnalysis started at {}'.format(datetime.datetime.now())) + # Initialize list for participants.tsv + participants_tsv_list = list() + # Loop across centers (01, 02) for centre_in, centre_out in centres_conv_dict.items(): # Loop across pathologies (hc, csm, sci) @@ -266,6 +286,10 @@ def main(path_input, path_output): logger.warning(f'WARNING: There are no json sidecars in {mpm_raw_folder_path}. ' f'Skipping this subject.') + participants_tsv_list.append([subject_out, pathology_out, subject_in.split(sep='/')[-1], centre_in, centre_out]) + + write_participants_tsv(participants_tsv_list, path_output) + if __name__ == "__main__": args = get_parameters() From 36a5b8956424c88ee302a000cce2192b91a48aeb Mon Sep 17 00:00:00 2001 From: valosekj Date: Wed, 30 Nov 2022 13:37:18 -0500 Subject: [PATCH 08/38] Comments clarification --- scripts/curate_data_inspired.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/scripts/curate_data_inspired.py b/scripts/curate_data_inspired.py index acf7f90..34ac010 100644 --- a/scripts/curate_data_inspired.py +++ b/scripts/curate_data_inspired.py @@ -117,7 +117,7 @@ def construct_mpm_bids_filename(mpm_files_dict, path_output, subject_out): Construct BIDS compliant filename for MPM images, e.g. 'acq-T1w_echo-1_flip-1_mt-off_MPM' Then, call function to copy the input non-BIDS MPM file to BIDS compliant MPM file :param mpm_files_dict: dict with SeriesDescription, FlipAngle, and EchoTime across all MPM files - :param path_output: path to output BIDS folder + :param path_output: path to the output BIDS folder :param subject_out: output BIDS subjectID containing centre name and pathology, e.g., 'sub-torontoDCM001' :return: """ @@ -234,6 +234,7 @@ def main(path_input, path_output): # Construct output subjectID containing centre name and pathology, e.g., 'sub-torontoDCM001' subject_out = prefix + centre_out + pathology_out + f"{sub_index:03d}" if region == 'cord': + # Process spinal cord anatomical and DWI data # Loop across files for image_in, image_out in images_spine_conv_dict.items(): # Construct path to the input file @@ -249,6 +250,7 @@ def main(path_input, path_output): # Copy file and create a dummy json sidecar if does not exist copy_file(path_file_in, path_dir_out, file_out) elif region == 'brain': + # Process DWI brain files # Loop across files for image_in, image_out in images_brain_conv_dict.items(): # Construct path to the input file @@ -262,7 +264,7 @@ def main(path_input, path_output): # Copy file and create a dummy json sidecar if does not exist copy_file(path_file_in, path_dir_out, file_out) - # Process raw MPM images + # Process raw MPM images (located in brain/mpm_raw folder) mpm_raw_folder_path = os.path.join(path_input, centre_in, pathology_in, subject_in, 'bl', region, 'mpm_raw') if os.path.isdir(mpm_raw_folder_path): From 6a073739a40f5a00c0a748cc891811e28f82ace4 Mon Sep 17 00:00:00 2001 From: valosekj Date: Fri, 2 Dec 2022 15:44:53 -0500 Subject: [PATCH 09/38] Create participants.json, dataset_description.json, and README files. Copy the script itself to the path_output/code folder. --- scripts/curate_data_inspired.py | 90 +++++++++++++++++++++++++++++++-- 1 file changed, 87 insertions(+), 3 deletions(-) diff --git a/scripts/curate_data_inspired.py b/scripts/curate_data_inspired.py index 34ac010..6f84b60 100644 --- a/scripts/curate_data_inspired.py +++ b/scripts/curate_data_inspired.py @@ -155,7 +155,7 @@ def construct_mpm_bids_filename(mpm_files_dict, path_output, subject_out): copy_file(re.sub('.nii(.gz)*', '.json', path_file_in), path_dir_out, file_out.replace('.nii.gz', '.json')) -def write_participants_tsv(participants_tsv_list, path_output): +def create_participants_tsv(participants_tsv_list, path_output): """ Write participants.tsv file :param participants_tsv_list: list containing [subject_out, pathology_out, subject_in, centre_in, centre_out], @@ -163,7 +163,7 @@ def write_participants_tsv(participants_tsv_list, path_output): :param path_output: path to the output BIDS folder :return: """ - with open(os.path.join(path_output + '/participants.tsv'), 'w') as tsv_file: + with open(os.path.join(path_output, 'participants.tsv'), 'w') as tsv_file: tsv_writer = csv.writer(tsv_file, delimiter='\t', lineterminator='\n') tsv_writer.writerow(['participant_id', 'pathology', 'data_id', 'institution_id', 'institution']) for item in participants_tsv_list: @@ -171,6 +171,86 @@ def write_participants_tsv(participants_tsv_list, path_output): logger.info(f'participants.tsv created in {path_output}') +def create_participants_json(path_output): + """ + Create participants.json file + :param path_output: path to the output BIDS folder + :return: + """ + # Create participants.json + data_json = { + "participant_id": { + "Description": "Unique Participant ID", + "LongName": "Participant ID" + }, + "pathology": { + "Description": "Pathology", + "LongName": "Pathology name" + }, + "data_id": { + "Description": "Subject ID as under duke/mri/", + "LongName": "Subject ID" + }, + "institution_id": { + "Description": "Institution ID as under duke/mri/", + "LongName": "Institution ID" + }, + "institution_": { + "Description": "Institution ID after conversion to BIDS", + "LongName": "BIDS Institution ID" + } + } + with open(os.path.join(path_output, 'participants.json'), 'w') as json_participants: + json.dump(data_json, json_participants, indent=4) + logger.info(f'participants.json created in {path_output}') + + +def create_dataset_description(path_output): + """ + Create dataset_description.json file + :param path_output: path to the output BIDS folder + :return: + """ + dataset_description = {"BIDSVersion": "BIDS 1.8.0", + "Name": "inspired" + } + with open(os.path.join(path_output, 'dataset_description.json'), 'w') as json_dataset_description: + json.dump(dataset_description, json_dataset_description, indent=4) + logger.info(f'dataset_description.json created in {path_output}') + + +def create_README(path_output): + """ + Create README file + :param path_output: path to the output BIDS folder + :return: + """ + with open(os.path.join(path_output, 'README'), 'w') as readme_file: + readme_file.write(f'# INSPIRED\n\nThis is an MRI dataset for the INSPIRED project.\n\n## dataset structure\n\n' + f'The dataset contains data from two centers (Toronto, Zurich) across three pathologies (' + f'DCM, SCI, HC). The following images are included:\n\nSpinal cord MRI data:\n- DWI (A-P and ' + f'P-A phase encoding)\n- T1w sag\n- T1w sag\n- T1w sag\n- T2star tra\nBrain MRI data:\n- DWI ' + f'(A-P and P-A phase encoding)\n- MPM (multi-parameter mapping)\n\n## naming convention\n\n' + f'sub-XXX\n\nexample:\nsub-zurichDCM001\n\nNote: the label `bp-cspine` is ' + f'used to differentiate spine images from brain.') + logger.info(f'README created in {path_output}') + + +def copy_script(path_output): + """ + Copy the script itself to the path_output/code folder + :param path_output: path to the output BIDS folder + :return: + """ + path_script_in = sys.argv[0] + path_code = os.path.join(path_output, 'code') + if not os.path.isdir(path_code): + os.makedirs(path_code, exist_ok=True) + path_script_out = os.path.join(path_code, sys.argv[0].split(sep='/')[-1]) + logger.info(f'Copying {path_script_in} to {path_script_out}') + shutil.copyfile(path_script_in, path_script_out) + + def get_parameters(): parser = argparse.ArgumentParser(description='Convert dataset to BIDS format.') parser.add_argument("-i", "--path-input", @@ -290,7 +370,11 @@ def main(path_input, path_output): participants_tsv_list.append([subject_out, pathology_out, subject_in.split(sep='/')[-1], centre_in, centre_out]) - write_participants_tsv(participants_tsv_list, path_output) + create_participants_tsv(participants_tsv_list, path_output) + create_participants_json(path_output) + create_dataset_description(path_output) + create_README(path_output) + copy_script(path_output) if __name__ == "__main__": From 1c2d836b8801baa7bf4d23d728e8da2e3391194f Mon Sep 17 00:00:00 2001 From: valosekj Date: Sun, 4 Dec 2022 14:50:19 -0500 Subject: [PATCH 10/38] Use ISO-8859-1 encoding to open json files --- scripts/curate_data_inspired.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/scripts/curate_data_inspired.py b/scripts/curate_data_inspired.py index 6f84b60..eb3498b 100644 --- a/scripts/curate_data_inspired.py +++ b/scripts/curate_data_inspired.py @@ -104,7 +104,10 @@ def read_json_file(path_to_file): :param path_to_file: path to input json file :return: """ - with open(path_to_file) as p: + # Note: open command returned error for the `02/hc/008/bl/brain/mpm_raw/s837313-0004-00001-000880-05.json` file: + # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xe4 in position 1340: invalid continuation byte + # Thus, `encoding="ISO-8859-1"` has to be used. Source: https://stackoverflow.com/a/19706723 + with open(path_to_file, encoding="ISO-8859-1") as p: loaded_json = json.load(p) series_description = loaded_json['acqpar'][0]['SeriesDescription'] flip_angle = loaded_json['acqpar'][0]['FlipAngle'] From 71c6eb0cb5a8fb33ba2f0077514f243a518b0b0e Mon Sep 17 00:00:00 2001 From: valosekj Date: Sun, 4 Dec 2022 15:08:38 -0500 Subject: [PATCH 11/38] Sort subjects in participants.tsv; fix typo in participants.json --- scripts/curate_data_inspired.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/curate_data_inspired.py b/scripts/curate_data_inspired.py index eb3498b..ce1acd5 100644 --- a/scripts/curate_data_inspired.py +++ b/scripts/curate_data_inspired.py @@ -198,7 +198,7 @@ def create_participants_json(path_output): "Description": "Institution ID as under duke/mri/", "LongName": "Institution ID" }, - "institution_": { + "institution": { "Description": "Institution ID after conversion to BIDS", "LongName": "BIDS Institution ID" } @@ -302,7 +302,7 @@ def main(path_input, path_output): # Loop across pathologies (hc, csm, sci) for pathology_in, pathology_out in pathologies_conv_dict.items(): # Loop across subjects (001, ...) - for sub_index, subject_in in enumerate(glob.glob(os.path.join(path_input, centre_in, pathology_in, '*')), + for sub_index, subject_in in enumerate(sorted(glob.glob(os.path.join(path_input, centre_in, pathology_in, '*'))), start=1): # If the input subject folder is .tar.gz, extract it if subject_in.endswith('.tar.gz'): From 13ecab182bc484d25e60aa3e031953d406d478c5 Mon Sep 17 00:00:00 2001 From: valosekj Date: Sun, 4 Dec 2022 15:32:36 -0500 Subject: [PATCH 12/38] Keep only .tar.gz subjects in the input dataset --- scripts/curate_data_inspired.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/scripts/curate_data_inspired.py b/scripts/curate_data_inspired.py index ce1acd5..75dfc23 100644 --- a/scripts/curate_data_inspired.py +++ b/scripts/curate_data_inspired.py @@ -372,6 +372,10 @@ def main(path_input, path_output): f'Skipping this subject.') participants_tsv_list.append([subject_out, pathology_out, subject_in.split(sep='/')[-1], centre_in, centre_out]) + # Remove uncompressed subject dir (i.e., keep only .tar.gz). + # (but first, make sure that .tar.gz subject exists) + if os.path.isfile(subject_in + '.tar.gz'): + shutil.rmtree(subject_in) create_participants_tsv(participants_tsv_list, path_output) create_participants_json(path_output) From ad2e6585060098b12c6ef23eebd257e584a3c694 Mon Sep 17 00:00:00 2001 From: valosekj Date: Mon, 5 Dec 2022 09:02:49 -0500 Subject: [PATCH 13/38] Remove solved TODO --- scripts/curate_data_inspired.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/scripts/curate_data_inspired.py b/scripts/curate_data_inspired.py index 75dfc23..cfc02a1 100644 --- a/scripts/curate_data_inspired.py +++ b/scripts/curate_data_inspired.py @@ -306,10 +306,8 @@ def main(path_input, path_output): start=1): # If the input subject folder is .tar.gz, extract it if subject_in.endswith('.tar.gz'): - logger.info(f'Unpacking tar archive for {subject_in}...') + logger.info(f'Unpacking tar archive {subject_in}...') os.system("tar -xf " + subject_in + " --directory " + os.path.join(path_input, centre_in, pathology_in)) - # TODO - consider what to do with extracted folders once the BIDS conversion is done. - # Delete them and keep only original .tar.gz files? # Remove '.tar.gz' from the subject_in variable subject_in = subject_in.replace('.tar.gz', '') # Loop across regions (brain or cord) From 31df28ef8fe2c45059aee2fc1d8cde4e5166b53d Mon Sep 17 00:00:00 2001 From: valosekj Date: Mon, 5 Dec 2022 09:23:07 -0500 Subject: [PATCH 14/38] Add usage example --- scripts/curate_data_inspired.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/scripts/curate_data_inspired.py b/scripts/curate_data_inspired.py index cfc02a1..8731b16 100644 --- a/scripts/curate_data_inspired.py +++ b/scripts/curate_data_inspired.py @@ -10,6 +10,9 @@ # - DWI (A-P and P-A phase encoding) # - MPM (multi-parameter mapping) # +# USAGE: +# python3 /curate_data_inspired.py -i /INSPIRED -o /INSPIRED_bids +# # Authors: Jan Valosek import os From 581edc0294f3a9916ce7405e25af90e5d5f1d1bc Mon Sep 17 00:00:00 2001 From: valosekj Date: Mon, 12 Dec 2022 13:49:08 -0500 Subject: [PATCH 15/38] Remove function for automatic README creation. --- scripts/curate_data_inspired.py | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/scripts/curate_data_inspired.py b/scripts/curate_data_inspired.py index 8731b16..e502ddf 100644 --- a/scripts/curate_data_inspired.py +++ b/scripts/curate_data_inspired.py @@ -225,23 +225,6 @@ def create_dataset_description(path_output): logger.info(f'dataset_description.json created in {path_output}') -def create_README(path_output): - """ - Create README file - :param path_output: path to the output BIDS folder - :return: - """ - with open(os.path.join(path_output, 'README'), 'w') as readme_file: - readme_file.write(f'# INSPIRED\n\nThis is an MRI dataset for the INSPIRED project.\n\n## dataset structure\n\n' - f'The dataset contains data from two centers (Toronto, Zurich) across three pathologies (' - f'DCM, SCI, HC). The following images are included:\n\nSpinal cord MRI data:\n- DWI (A-P and ' - f'P-A phase encoding)\n- T1w sag\n- T1w sag\n- T1w sag\n- T2star tra\nBrain MRI data:\n- DWI ' - f'(A-P and P-A phase encoding)\n- MPM (multi-parameter mapping)\n\n## naming convention\n\n' - f'sub-XXX\n\nexample:\nsub-zurichDCM001\n\nNote: the label `bp-cspine` is ' - f'used to differentiate spine images from brain.') - logger.info(f'README created in {path_output}') - - def copy_script(path_output): """ Copy the script itself to the path_output/code folder @@ -381,7 +364,6 @@ def main(path_input, path_output): create_participants_tsv(participants_tsv_list, path_output) create_participants_json(path_output) create_dataset_description(path_output) - create_README(path_output) copy_script(path_output) From 09e12eddafe213b5a3bf1c7dd1ea6abdbcba49e5 Mon Sep 17 00:00:00 2001 From: valosekj Date: Mon, 12 Dec 2022 13:57:28 -0500 Subject: [PATCH 16/38] Keep the original subjectID --- scripts/curate_data_inspired.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/scripts/curate_data_inspired.py b/scripts/curate_data_inspired.py index e502ddf..871aba8 100644 --- a/scripts/curate_data_inspired.py +++ b/scripts/curate_data_inspired.py @@ -288,8 +288,9 @@ def main(path_input, path_output): # Loop across pathologies (hc, csm, sci) for pathology_in, pathology_out in pathologies_conv_dict.items(): # Loop across subjects (001, ...) - for sub_index, subject_in in enumerate(sorted(glob.glob(os.path.join(path_input, centre_in, pathology_in, '*'))), - start=1): + for subject_in in sorted(glob.glob(os.path.join(path_input, centre_in, pathology_in, '*'))): + # Get subjectID (e.g., 001) + subject_id = subject_in.split(sep='/')[-1] # If the input subject folder is .tar.gz, extract it if subject_in.endswith('.tar.gz'): logger.info(f'Unpacking tar archive {subject_in}...') @@ -299,7 +300,7 @@ def main(path_input, path_output): # Loop across regions (brain or cord) for region in ['brain', 'cord']: # Construct output subjectID containing centre name and pathology, e.g., 'sub-torontoDCM001' - subject_out = prefix + centre_out + pathology_out + f"{sub_index:03d}" + subject_out = prefix + centre_out + pathology_out + subject_id if region == 'cord': # Process spinal cord anatomical and DWI data # Loop across files @@ -355,7 +356,7 @@ def main(path_input, path_output): logger.warning(f'WARNING: There are no json sidecars in {mpm_raw_folder_path}. ' f'Skipping this subject.') - participants_tsv_list.append([subject_out, pathology_out, subject_in.split(sep='/')[-1], centre_in, centre_out]) + participants_tsv_list.append([subject_out, pathology_out, subject_id, centre_in, centre_out]) # Remove uncompressed subject dir (i.e., keep only .tar.gz). # (but first, make sure that .tar.gz subject exists) if os.path.isfile(subject_in + '.tar.gz'): From 2ce0afdb54a5bfea31b0af7927ec8f141dfdbf31 Mon Sep 17 00:00:00 2001 From: valosekj Date: Mon, 12 Dec 2022 15:03:08 -0500 Subject: [PATCH 17/38] Create .bidsignore to exclude `bp-cspine` tag from bids-validator --- scripts/curate_data_inspired.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/scripts/curate_data_inspired.py b/scripts/curate_data_inspired.py index 871aba8..8d6cc97 100644 --- a/scripts/curate_data_inspired.py +++ b/scripts/curate_data_inspired.py @@ -225,6 +225,21 @@ def create_dataset_description(path_output): logger.info(f'dataset_description.json created in {path_output}') +def create_bidsignore_file(path_output): + """ + Create .bidsignore file defining files that should be ignored by the bids-validator. + We want to exclude files with `bp-cspine` tag since BEP025 + (https://docs.google.com/document/d/1chZv7vAPE-ebPDxMktfI9i1OkLNR2FELIfpVYsaZPr4/edit#heading=h.4k1noo90gelw) + is not merged to BIDS yet + :param path_output: + :return: + """ + bidsignore = "*/*/*_bp-cspine*" + with open(os.path.join(path_output, '.bidsignore'), 'w') as bidsignore_file: + bidsignore_file.write(f'{bidsignore}\n') + logger.info(f'.bidsignore created in {path_output}') + + def copy_script(path_output): """ Copy the script itself to the path_output/code folder @@ -365,6 +380,7 @@ def main(path_input, path_output): create_participants_tsv(participants_tsv_list, path_output) create_participants_json(path_output) create_dataset_description(path_output) + create_bidsignore_file(path_output) copy_script(path_output) From a2c5bbab87f155b6ba928d5ee120adf87339ec80 Mon Sep 17 00:00:00 2001 From: valosekj Date: Mon, 12 Dec 2022 15:17:56 -0500 Subject: [PATCH 18/38] Fix bug in subject_id variable construction --- scripts/curate_data_inspired.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/curate_data_inspired.py b/scripts/curate_data_inspired.py index 8d6cc97..465d362 100644 --- a/scripts/curate_data_inspired.py +++ b/scripts/curate_data_inspired.py @@ -304,14 +304,14 @@ def main(path_input, path_output): for pathology_in, pathology_out in pathologies_conv_dict.items(): # Loop across subjects (001, ...) for subject_in in sorted(glob.glob(os.path.join(path_input, centre_in, pathology_in, '*'))): - # Get subjectID (e.g., 001) - subject_id = subject_in.split(sep='/')[-1] # If the input subject folder is .tar.gz, extract it if subject_in.endswith('.tar.gz'): logger.info(f'Unpacking tar archive {subject_in}...') os.system("tar -xf " + subject_in + " --directory " + os.path.join(path_input, centre_in, pathology_in)) # Remove '.tar.gz' from the subject_in variable subject_in = subject_in.replace('.tar.gz', '') + # Get subjectID (e.g., 001) + subject_id = subject_in.split(sep='/')[-1] # Loop across regions (brain or cord) for region in ['brain', 'cord']: # Construct output subjectID containing centre name and pathology, e.g., 'sub-torontoDCM001' From 8556af9bacf8e137ec2cc2675ebef9634df25fe3 Mon Sep 17 00:00:00 2001 From: valosekj Date: Mon, 12 Dec 2022 15:27:04 -0500 Subject: [PATCH 19/38] Add README for inspired dataset --- scripts/README | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 scripts/README diff --git a/scripts/README b/scripts/README new file mode 100644 index 0000000..ffcfe43 --- /dev/null +++ b/scripts/README @@ -0,0 +1,33 @@ +# INSPIRED + +This is an MRI dataset acquired in the context of the INSPIRED project. + +https://www.spinalsurgerynews.com/2016/10/inspired-spinal-cord-neuro-imaging-project/14594 + +## contact person + +Dataset shared by: +Email communication: + +## dataset structure + +The dataset contains data from two centers (Toronto, Zurich) across three pathologies (DCM, SCI, HC). The following images are included: + +Spinal cord MRI data: +- DWI (A-P and P-A phase encoding) +- T1w sag +- T1w sag +- T1w sag +- T2star tra +Brain MRI data: +- DWI (A-P and P-A phase encoding) +- MPM (multi-parameter mapping) + +## naming convention + +sub-XXX + +example: +sub-zurichDCM001 + +Note: the label `bp-cspine` is used to differentiate spine images from brain. \ No newline at end of file From 30ae9979329ec2fc261abf8be9c5857636af063a Mon Sep 17 00:00:00 2001 From: valosekj Date: Wed, 21 Dec 2022 15:35:48 -0500 Subject: [PATCH 20/38] Switch from `bp-cspine` to `acq-cspine` --- scripts/curate_data_inspired.py | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/scripts/curate_data_inspired.py b/scripts/curate_data_inspired.py index 465d362..43b3a56 100644 --- a/scripts/curate_data_inspired.py +++ b/scripts/curate_data_inspired.py @@ -48,17 +48,19 @@ } # Dictionary for image filename conversion -# Note: we use label `bp-cspine` to differentiate spine imaging from brain -# Details: BIDS BEP025 Proposal (https://docs.google.com/document/d/1chZv7vAPE-ebPDxMktfI9i1OkLNR2FELIfpVYsaZPr4) +# Note: we use label `acq-cspine` to differentiate spine imaging from brain +# Note: we use label `acq-cspine` over `bp-cspine` since BIDS BEP025 is not merged yet (and thus does not pass bids-validator) +# BIDS BEP025 Proposal: https://docs.google.com/document/d/1chZv7vAPE-ebPDxMktfI9i1OkLNR2FELIfpVYsaZPr4 +# Discussion `acq-cspine` vs `bp-cspine`: https://github.com/neuropoly/data-management/pull/185#issuecomment-1347421696 images_spine_conv_dict = { - 'dwi.nii.gz': 'dir-AP_bp-cspine_dwi.nii.gz', - 'dwi.bval': 'dir-AP_bp-cspine_dwi.bval', - 'dwi.bvec': 'dir-AP_bp-cspine_dwi.bvec', - 'dwi_reversed_blip.nii.gz': 'dir-PA_bp-cspine_dwi.nii.gz', - 't1_sag.nii.gz': 'bp-cspine_T1w.nii.gz', - 't2_sag.nii.gz': 'acq-coronal_bp-cspine_T2w.nii.gz', - 't2_tra.nii.gz': 'acq-axial_bp-cspine_T2w.nii.gz', - 'pd_medic.nii.gz': 'bp-cspine_T2star.nii.gz' + 'dwi.nii.gz': 'dir-AP_acq-cspine_dwi.nii.gz', + 'dwi.bval': 'dir-AP_acq-cspine_dwi.bval', + 'dwi.bvec': 'dir-AP_acq-cspine_dwi.bvec', + 'dwi_reversed_blip.nii.gz': 'dir-PA_acq-cspine_dwi.nii.gz', + 't1_sag.nii.gz': 'acq-cspine_T1w.nii.gz', + 't2_sag.nii.gz': 'acq-cspineSagittal_T2w.nii.gz', + 't2_tra.nii.gz': 'acq-cspineAxial_T2w.nii.gz', + 'pd_medic.nii.gz': 'acq-cspine_T2star.nii.gz' } # Dictionary for brain image filename conversion @@ -75,7 +77,7 @@ def copy_file(path_file_in, path_dir_out, file_out): Copy file from input non-BIDS dataset to BIDS compliant dataset :param path_file_in: path of the input non-BIDS file which will be copied :param path_dir_out: path of the output BIDS directory; for example sub-torontoDCM001/dwi - :param file_out: filename of the output BIDS file; for example 'sub-torontoDCM001_bp-cspine_dir-AP_dwi.nii.gz' + :param file_out: filename of the output BIDS file; for example 'sub-torontoDCM001_dir-AP_acq-cspine_dwi.nii.gz' :return: """ # Make sure that the input file exists, if so, copy it @@ -228,7 +230,7 @@ def create_dataset_description(path_output): def create_bidsignore_file(path_output): """ Create .bidsignore file defining files that should be ignored by the bids-validator. - We want to exclude files with `bp-cspine` tag since BEP025 + We want to exclude files with `acq-cspine` tag since BEP025 (https://docs.google.com/document/d/1chZv7vAPE-ebPDxMktfI9i1OkLNR2FELIfpVYsaZPr4/edit#heading=h.4k1noo90gelw) is not merged to BIDS yet :param path_output: @@ -322,7 +324,7 @@ def main(path_input, path_output): for image_in, image_out in images_spine_conv_dict.items(): # Construct path to the input file path_file_in = os.path.join(path_input, centre_in, pathology_in, subject_in, 'bl', region, image_in) - # Construct output filename, e.g., 'sub-torontoDCM001_bp-cspine_dir-AP_dwi.nii.gz' + # Construct output filename, e.g., 'sub-torontoDCM001_dir-AP_acq-cspine_dwi.nii.gz' file_out = subject_out + '_' + image_out # Construct path to the output BIDS compliant directory if 'dwi' in image_in: @@ -380,7 +382,7 @@ def main(path_input, path_output): create_participants_tsv(participants_tsv_list, path_output) create_participants_json(path_output) create_dataset_description(path_output) - create_bidsignore_file(path_output) + #create_bidsignore_file(path_output) copy_script(path_output) From b364bf51acf960157f2a62cbe03ad9819ca46804 Mon Sep 17 00:00:00 2001 From: valosekj Date: Wed, 21 Dec 2022 15:36:30 -0500 Subject: [PATCH 21/38] Deal with SC derivatives --- scripts/curate_data_inspired.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/scripts/curate_data_inspired.py b/scripts/curate_data_inspired.py index 43b3a56..6a51b32 100644 --- a/scripts/curate_data_inspired.py +++ b/scripts/curate_data_inspired.py @@ -63,6 +63,14 @@ 'pd_medic.nii.gz': 'acq-cspine_T2star.nii.gz' } +# TODO - include also DWI derivatives +derivatives_spine_conv_dict = { + 't2_seg.nii.gz': 'acq-cspineAxial_T2w_seg.nii.gz', + # TODO - check which suffix is used for BIDS gm-seg and wm-seg derivatives + 'gm_seg.nii.gz': 'acq-cspine_T2star_gm-seg.nii.gz', + 'wm_seg.nii.gz': 'acq-cspine_T2star_wm-seg.nii.gz' +} + # Dictionary for brain image filename conversion images_brain_conv_dict = { 'dwi.nii.gz': 'dir-AP_dwi.nii.gz', @@ -286,6 +294,10 @@ def main(path_input, path_output): shutil.rmtree(path_output) os.makedirs(path_output, exist_ok=True) + # Construct path to derivatives/labels + path_derivatives = os.path.join(path_output, 'derivatives', 'labels') + os.makedirs(path_derivatives, exist_ok=True) + FNAME_LOG = os.path.join(path_output, 'bids_conversion.log') # Dump log file there if os.path.exists(FNAME_LOG): @@ -334,6 +346,22 @@ def main(path_input, path_output): # Copy file and create a dummy json sidecar if does not exist copy_file(path_file_in, path_dir_out, file_out) + # Deal with derivatives (i.e., spinal cord segmentation) located in `sct_processing` folder + path_sct_processing = os.path.join(path_input, centre_in, pathology_in, subject_in, 'bl', + region, 'sct_processing') + if os.path.isdir(path_sct_processing): + for image_in, image_out in derivatives_spine_conv_dict.items(): + if 't2' in image_in: + contrast = 't2' + else: + contrast = 't2s' + # Construct path to the input file + path_file_in = os.path.join(path_sct_processing, contrast, image_in) + # Construct output filename, e.g., 'sub-torontoDCM001_acq-cspineAxial_T2w_seg.nii.gz' + file_out = subject_out + '_' + image_out + # Construct path to the output BIDS compliant derivatives directory + path_dir_out = os.path.join(path_derivatives, subject_out, 'anat') + copy_file(path_file_in, path_dir_out, file_out) elif region == 'brain': # Process DWI brain files # Loop across files From cf6079007ec3c83260cd03a91d3707ce596dc17d Mon Sep 17 00:00:00 2001 From: valosekj Date: Wed, 4 Jan 2023 08:33:16 -0500 Subject: [PATCH 22/38] Use label-SC_mask, label-WM_mask, and label-GM_mask --- scripts/curate_data_inspired.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/scripts/curate_data_inspired.py b/scripts/curate_data_inspired.py index 6a51b32..b8d5210 100644 --- a/scripts/curate_data_inspired.py +++ b/scripts/curate_data_inspired.py @@ -65,10 +65,9 @@ # TODO - include also DWI derivatives derivatives_spine_conv_dict = { - 't2_seg.nii.gz': 'acq-cspineAxial_T2w_seg.nii.gz', - # TODO - check which suffix is used for BIDS gm-seg and wm-seg derivatives - 'gm_seg.nii.gz': 'acq-cspine_T2star_gm-seg.nii.gz', - 'wm_seg.nii.gz': 'acq-cspine_T2star_wm-seg.nii.gz' + 't2_seg.nii.gz': 'acq-cspineAxial_T2w_label-SC_mask.nii.gz', + 'gm_seg.nii.gz': 'acq-cspine_T2star_label-GM_mask.nii.gz', + 'wm_seg.nii.gz': 'acq-cspine_T2star_label-WM_mask.nii.gz' } # Dictionary for brain image filename conversion @@ -357,7 +356,7 @@ def main(path_input, path_output): contrast = 't2s' # Construct path to the input file path_file_in = os.path.join(path_sct_processing, contrast, image_in) - # Construct output filename, e.g., 'sub-torontoDCM001_acq-cspineAxial_T2w_seg.nii.gz' + # Construct output filename, e.g., 'sub-torontoDCM001_acq-cspineAxial_T2w_label-SC_mask.nii.gz' file_out = subject_out + '_' + image_out # Construct path to the output BIDS compliant derivatives directory path_dir_out = os.path.join(path_derivatives, subject_out, 'anat') From 0d135cc4e60dff40f374165da255a8ac4397e7cb Mon Sep 17 00:00:00 2001 From: valosekj Date: Wed, 4 Jan 2023 08:35:26 -0500 Subject: [PATCH 23/38] Include DatasetType (raw or derivative) to dataset_description.json. Add write_json function. --- scripts/curate_data_inspired.py | 52 ++++++++++++++++----------------- 1 file changed, 25 insertions(+), 27 deletions(-) diff --git a/scripts/curate_data_inspired.py b/scripts/curate_data_inspired.py index b8d5210..19612c5 100644 --- a/scripts/curate_data_inspired.py +++ b/scripts/curate_data_inspired.py @@ -170,6 +170,20 @@ def construct_mpm_bids_filename(mpm_files_dict, path_output, subject_out): copy_file(re.sub('.nii(.gz)*', '.json', path_file_in), path_dir_out, file_out.replace('.nii.gz', '.json')) +def write_json(path_output, json_filename, data_json): + """ + :param path_output: path to the output BIDS folder + :param json_filename: json filename, for example: participants.json + :param data_json: JSON formatted content + :return: + """ + with open(os.path.join(path_output, json_filename), 'w') as json_file: + json.dump(data_json, json_file, indent=4) + # Add last newline + json_file.write("\n") + logger.info(f'{json_filename} created in {path_output}') + + def create_participants_tsv(participants_tsv_list, path_output): """ Write participants.tsv file @@ -215,38 +229,22 @@ def create_participants_json(path_output): "LongName": "BIDS Institution ID" } } - with open(os.path.join(path_output, 'participants.json'), 'w') as json_participants: - json.dump(data_json, json_participants, indent=4) - logger.info(f'participants.json created in {path_output}') + write_json(path_output, 'participants.json', data_json) -def create_dataset_description(path_output): +def create_dataset_description(path_output, datasettype): """ Create dataset_description.json file :param path_output: path to the output BIDS folder + :param datasettype: raw or derivative (https://bids-specification.readthedocs.io/en/stable/glossary.html#datasettype-metadata) :return: """ - dataset_description = {"BIDSVersion": "BIDS 1.8.0", - "Name": "inspired" - } - with open(os.path.join(path_output, 'dataset_description.json'), 'w') as json_dataset_description: - json.dump(dataset_description, json_dataset_description, indent=4) - logger.info(f'dataset_description.json created in {path_output}') - - -def create_bidsignore_file(path_output): - """ - Create .bidsignore file defining files that should be ignored by the bids-validator. - We want to exclude files with `acq-cspine` tag since BEP025 - (https://docs.google.com/document/d/1chZv7vAPE-ebPDxMktfI9i1OkLNR2FELIfpVYsaZPr4/edit#heading=h.4k1noo90gelw) - is not merged to BIDS yet - :param path_output: - :return: - """ - bidsignore = "*/*/*_bp-cspine*" - with open(os.path.join(path_output, '.bidsignore'), 'w') as bidsignore_file: - bidsignore_file.write(f'{bidsignore}\n') - logger.info(f'.bidsignore created in {path_output}') + data_json = { + "BIDSVersion": "BIDS 1.8.0", + "Name": "inspired", + "DatasetType": datasettype, + } + write_json(path_output, 'dataset_description.json', data_json) def copy_script(path_output): @@ -408,8 +406,8 @@ def main(path_input, path_output): create_participants_tsv(participants_tsv_list, path_output) create_participants_json(path_output) - create_dataset_description(path_output) - #create_bidsignore_file(path_output) + create_dataset_description(path_output, 'raw') + create_dataset_description(os.path.join(path_output, 'derivatives'), 'derivative') copy_script(path_output) From 1b528670fc99504263a1191eecc74336a1bd5416 Mon Sep 17 00:00:00 2001 From: valosekj Date: Wed, 4 Jan 2023 08:36:05 -0500 Subject: [PATCH 24/38] Update participants.tsv and participants.json --- scripts/curate_data_inspired.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/scripts/curate_data_inspired.py b/scripts/curate_data_inspired.py index 19612c5..11adcac 100644 --- a/scripts/curate_data_inspired.py +++ b/scripts/curate_data_inspired.py @@ -194,7 +194,7 @@ def create_participants_tsv(participants_tsv_list, path_output): """ with open(os.path.join(path_output, 'participants.tsv'), 'w') as tsv_file: tsv_writer = csv.writer(tsv_file, delimiter='\t', lineterminator='\n') - tsv_writer.writerow(['participant_id', 'pathology', 'data_id', 'institution_id', 'institution']) + tsv_writer.writerow(['participant_id', 'source_id', 'pathology', 'institution_id', 'institution']) for item in participants_tsv_list: tsv_writer.writerow(item) logger.info(f'participants.tsv created in {path_output}') @@ -212,20 +212,20 @@ def create_participants_json(path_output): "Description": "Unique Participant ID", "LongName": "Participant ID" }, + "source_id": { + "Description": "Subject ID in the unprocessed data", + "LongName": "Subject ID in the unprocessed data" + }, "pathology": { "Description": "Pathology", "LongName": "Pathology name" }, - "data_id": { - "Description": "Subject ID as under duke/mri/", - "LongName": "Subject ID" - }, "institution_id": { - "Description": "Institution ID as under duke/mri/", - "LongName": "Institution ID" + "Description": "Institution ID in the unprocessed data", + "LongName": "Institution ID in the unprocessed data" }, "institution": { - "Description": "Institution ID after conversion to BIDS", + "Description": "Human-friendly institution name", "LongName": "BIDS Institution ID" } } @@ -398,7 +398,7 @@ def main(path_input, path_output): logger.warning(f'WARNING: There are no json sidecars in {mpm_raw_folder_path}. ' f'Skipping this subject.') - participants_tsv_list.append([subject_out, pathology_out, subject_id, centre_in, centre_out]) + participants_tsv_list.append([subject_out, subject_id, pathology_out, centre_in, centre_out]) # Remove uncompressed subject dir (i.e., keep only .tar.gz). # (but first, make sure that .tar.gz subject exists) if os.path.isfile(subject_in + '.tar.gz'): From 65e56e1d9a7022b56c3cdff2fe7573c88e2e1160 Mon Sep 17 00:00:00 2001 From: valosekj Date: Wed, 4 Jan 2023 10:58:00 -0500 Subject: [PATCH 25/38] Add script for merge of participants.tsv and table with clinical DCM data --- ...erge_clinical_table_to_participants_tsv.py | 127 ++++++++++++++++++ 1 file changed, 127 insertions(+) create mode 100644 scripts/inspired_merge_clinical_table_to_participants_tsv.py diff --git a/scripts/inspired_merge_clinical_table_to_participants_tsv.py b/scripts/inspired_merge_clinical_table_to_participants_tsv.py new file mode 100644 index 0000000..ae841b5 --- /dev/null +++ b/scripts/inspired_merge_clinical_table_to_participants_tsv.py @@ -0,0 +1,127 @@ +# +# Merge table with clinical data for DCM patients (provided Patrick Freund, Balgrist) with INSPIRED participants.tsv +# +# Note - pandas.read_excel requires openpyxl library (pip install openpyxl or conda install openpyxl) +# +# Authors: Jan Valosek +# + +import os +import argparse +import shutil + +import pandas as pd + +# For zurich site, subjectIDs match between clinical table and participants.tsv (i.e., ID 1 in clinical table +# corresponds to sub-zurichDCM001 in participants.tsv, ID 2 corresponds to sub-zurichDCM002, etc.) +# But for toronto site, subjectIDs do not match (i.e., ID 25 in clinical table corresponds to sub-torontoDCM001 in +# participants.tsv, ID 26 corresponds to sub-torontoDCM005, etc.) +# This dict thus allows the merge of both tables. +# - keys are subject ID from clinical table +# - values are subject ID for DCM patients from participants.tsv +subject_ID_dict = { + 1: 1, + 2: 2, + 3: 3, + 4: 4, + 5: 5, + 6: 6, + 7: 7, + 8: 8, + 9: 9, + 10: 10, + 11: 11, + 12: 12, + 13: 13, + 14: 14, + 15: 15, + 16: 16, + 17: 17, + 18: 18, + 19: 19, + 20: 20, + 21: 21, + 22: 22, + 23: 23, + 24: 24, + 25: 1, + 26: 5, + 27: 6, + 28: 7, + 29: 8, + 30: 9, + 31: 10, + 32: 15, + 33: 17, + 34: 18, + 35: 19, + 36: 21, + 37: 22, + 38: 23, + } + + +def get_parser(): + parser = argparse.ArgumentParser(description='Convert dataset to BIDS format.') + parser.add_argument("-participants-file", + help="INSPIRED participants.tsv file", + required=True) + parser.add_argument("-clinical-file", + help="Excel table (.xlsx) with clinical data for DCM patients.", + required=True, + ) + return parser + + +def main(): + # Parse the command line arguments + parser = get_parser() + args = parser.parse_args() + + # Read input .tsv and .xlsx tables as Pandas DataFrames + if os.path.isfile(args.participants_file): + print('Reading: {}'.format(args.participants_file)) + participants_df = pd.read_csv(args.participants_file, sep='\t', header=0) + else: + raise FileNotFoundError(f'{args.participants_file} not found') + if os.path.isfile(args.clinical_file): + print('Reading: {}'.format(args.clinical_file)) + # skip first two rows because they contain legend + clinical_df = pd.read_excel(args.clinical_file, skiprows=[0, 1]) + else: + raise FileNotFoundError(f'{args.clinical_file} not found') + + # Insert list of DCM subjectIDs from participants.tsv into clinical table to allow merge + # Note: 'data_id' is used to match with column in participants.tsv + clinical_df.insert(1, 'data_id', list(subject_ID_dict.values())) + # Insert a new column with institutions in lower case (toronto, zurich) into clinical table to allow merge + # Note: 'institution' is used to match with column in participants.tsv + clinical_df['institution'] = [x.lower() for x in clinical_df['Site']] + + # First, merge clinical table to participants.tsv + # Note: we work here only with DCM since there is overlap in 'data_id' with other pathologies + temp_df = pd.merge(participants_df[participants_df['pathology'] == 'DCM'], clinical_df, + on=['institution', 'data_id']) + # Drop columns from 'temp_df' (they are already included in the participants.tsv) + temp_df = temp_df.drop(['pathology', 'data_id', 'institution_id', 'institution', 'ID', 'Site'], axis=1) + # Now, merge 'temp_df' back to the participants.tsv + final_df = pd.merge(participants_df, temp_df, on='participant_id', how='outer') + + # Convert pd column names to lowercase + final_df = final_df.rename(columns=str.lower) + # Replace spaces in pd column names by '_' + final_df.columns = final_df.columns.str.replace(' ', '_') + + # Rename female to F and male to M + final_df = final_df.replace({"sex": {'female': 'F', 'male': 'M'}}) + + # Backup original participants.tsv + shutil.move(args.participants_file, args.participants_file.replace('.tsv', '_backup.tsv')) + + # Save merged pd as .tsv + print('Saving: {}'.format(args.participants_file)) + final_df.to_csv(args.participants_file, sep="\t") + + +if __name__ == "__main__": + main() From 5a39c77766a3b4eb135612aac923642244273687 Mon Sep 17 00:00:00 2001 From: valosekj Date: Wed, 4 Jan 2023 11:22:10 -0500 Subject: [PATCH 26/38] Rename data_id to source_id to be compatible with participants.tsv --- ...nspired_merge_clinical_table_to_participants_tsv.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/scripts/inspired_merge_clinical_table_to_participants_tsv.py b/scripts/inspired_merge_clinical_table_to_participants_tsv.py index ae841b5..4a7fe39 100644 --- a/scripts/inspired_merge_clinical_table_to_participants_tsv.py +++ b/scripts/inspired_merge_clinical_table_to_participants_tsv.py @@ -92,18 +92,18 @@ def main(): raise FileNotFoundError(f'{args.clinical_file} not found') # Insert list of DCM subjectIDs from participants.tsv into clinical table to allow merge - # Note: 'data_id' is used to match with column in participants.tsv - clinical_df.insert(1, 'data_id', list(subject_ID_dict.values())) + # Note: 'source_id' is used to match with column in participants.tsv + clinical_df.insert(1, 'source_id', list(subject_ID_dict.values())) # Insert a new column with institutions in lower case (toronto, zurich) into clinical table to allow merge # Note: 'institution' is used to match with column in participants.tsv clinical_df['institution'] = [x.lower() for x in clinical_df['Site']] # First, merge clinical table to participants.tsv - # Note: we work here only with DCM since there is overlap in 'data_id' with other pathologies + # Note: we work here only with DCM since there is overlap in 'source_id' with other pathologies temp_df = pd.merge(participants_df[participants_df['pathology'] == 'DCM'], clinical_df, - on=['institution', 'data_id']) + on=['institution', 'source_id']) # Drop columns from 'temp_df' (they are already included in the participants.tsv) - temp_df = temp_df.drop(['pathology', 'data_id', 'institution_id', 'institution', 'ID', 'Site'], axis=1) + temp_df = temp_df.drop(['pathology', 'source_id', 'institution_id', 'institution', 'ID', 'Site'], axis=1) # Now, merge 'temp_df' back to the participants.tsv final_df = pd.merge(participants_df, temp_df, on='participant_id', how='outer') From 1151061c47a63aa35365b918658cdf48a827c5c9 Mon Sep 17 00:00:00 2001 From: valosekj Date: Wed, 4 Jan 2023 11:25:12 -0500 Subject: [PATCH 27/38] Use n/a for empty cells --- scripts/inspired_merge_clinical_table_to_participants_tsv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/inspired_merge_clinical_table_to_participants_tsv.py b/scripts/inspired_merge_clinical_table_to_participants_tsv.py index 4a7fe39..662cab8 100644 --- a/scripts/inspired_merge_clinical_table_to_participants_tsv.py +++ b/scripts/inspired_merge_clinical_table_to_participants_tsv.py @@ -120,7 +120,7 @@ def main(): # Save merged pd as .tsv print('Saving: {}'.format(args.participants_file)) - final_df.to_csv(args.participants_file, sep="\t") + final_df.to_csv(args.participants_file, sep='\t', na_rep='n/a') if __name__ == "__main__": From 374876a753cbedddc95ec7cda8685fdfde388bb5 Mon Sep 17 00:00:00 2001 From: valosekj Date: Wed, 4 Jan 2023 11:34:32 -0500 Subject: [PATCH 28/38] Add leading zeros to source_id and institution_id (to match with the original participants.tsv) --- scripts/inspired_merge_clinical_table_to_participants_tsv.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/scripts/inspired_merge_clinical_table_to_participants_tsv.py b/scripts/inspired_merge_clinical_table_to_participants_tsv.py index 662cab8..204700b 100644 --- a/scripts/inspired_merge_clinical_table_to_participants_tsv.py +++ b/scripts/inspired_merge_clinical_table_to_participants_tsv.py @@ -114,6 +114,9 @@ def main(): # Rename female to F and male to M final_df = final_df.replace({"sex": {'female': 'F', 'male': 'M'}}) + # Add leading zeros to source_id and institution_id (to match with the original participants.tsv) + final_df['source_id'] = final_df['source_id'].astype(str).str.zfill(3) + final_df['institution_id'] = final_df['institution_id'].astype(str).str.zfill(2) # Backup original participants.tsv shutil.move(args.participants_file, args.participants_file.replace('.tsv', '_backup.tsv')) From 8959d2c7a68643aed45744a72a0ee5dc3f08d99b Mon Sep 17 00:00:00 2001 From: valosekj Date: Wed, 4 Jan 2023 11:46:15 -0500 Subject: [PATCH 29/38] Clarify script description --- ...pired_merge_clinical_table_to_participants_tsv.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/scripts/inspired_merge_clinical_table_to_participants_tsv.py b/scripts/inspired_merge_clinical_table_to_participants_tsv.py index 204700b..ee56375 100644 --- a/scripts/inspired_merge_clinical_table_to_participants_tsv.py +++ b/scripts/inspired_merge_clinical_table_to_participants_tsv.py @@ -1,7 +1,7 @@ # -# Merge table with clinical data for DCM patients (provided Patrick Freund, Balgrist) with INSPIRED participants.tsv +# Merge the .xlsx table with DCM clinical data (provided by Patrick Freund, Balgrist) with the INSPIRED participants.tsv # -# Note - pandas.read_excel requires openpyxl library (pip install openpyxl or conda install openpyxl) +# Note: pandas.read_excel requires openpyxl library (pip install openpyxl or conda install openpyxl) # # Authors: Jan Valosek # @@ -13,12 +13,12 @@ import pandas as pd # For zurich site, subjectIDs match between clinical table and participants.tsv (i.e., ID 1 in clinical table -# corresponds to sub-zurichDCM001 in participants.tsv, ID 2 corresponds to sub-zurichDCM002, etc.) +# corresponds to sub-zurichDCM001 in participants.tsv, ID 2 corresponds to sub-zurichDCM002, etc.). # But for toronto site, subjectIDs do not match (i.e., ID 25 in clinical table corresponds to sub-torontoDCM001 in -# participants.tsv, ID 26 corresponds to sub-torontoDCM005, etc.) +# participants.tsv, ID 26 corresponds to sub-torontoDCM005, etc.). # This dict thus allows the merge of both tables. -# - keys are subject ID from clinical table -# - values are subject ID for DCM patients from participants.tsv +# - keys are subjectIDs from the clinical table +# - values are subjectIDs for DCM patients from the participants.tsv subject_ID_dict = { 1: 1, 2: 2, From 6d3a956b22f82e8ce53423d638b8ebb260889dd1 Mon Sep 17 00:00:00 2001 From: valosekj Date: Fri, 6 Jan 2023 12:35:10 -0500 Subject: [PATCH 30/38] Rename derivatives/labels to derivatives/manual_labels --- scripts/curate_data_inspired.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/curate_data_inspired.py b/scripts/curate_data_inspired.py index 11adcac..e9adc0a 100644 --- a/scripts/curate_data_inspired.py +++ b/scripts/curate_data_inspired.py @@ -292,7 +292,7 @@ def main(path_input, path_output): os.makedirs(path_output, exist_ok=True) # Construct path to derivatives/labels - path_derivatives = os.path.join(path_output, 'derivatives', 'labels') + path_derivatives = os.path.join(path_output, 'derivatives', 'manual_labels') os.makedirs(path_derivatives, exist_ok=True) FNAME_LOG = os.path.join(path_output, 'bids_conversion.log') From a81a8efd8afa8a54ce36d629b2c3032ecbc8e958 Mon Sep 17 00:00:00 2001 From: valosekj Date: Fri, 20 Jan 2023 18:20:52 -0500 Subject: [PATCH 31/38] Copy qform to sform To address https://github.com/spinalcordtoolbox/spinalcordtoolbox/issues/3991#issuecomment-1378765661 --- scripts/curate_data_inspired.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/scripts/curate_data_inspired.py b/scripts/curate_data_inspired.py index e9adc0a..29b824b 100644 --- a/scripts/curate_data_inspired.py +++ b/scripts/curate_data_inspired.py @@ -101,6 +101,19 @@ def copy_file(path_file_in, path_dir_out, file_out): create_dummy_json_sidecar_if_does_not_exist(path_file_out) +def copy_qform_to_sform(path_dir_out, file_out): + """ + Copy qform to sform for to address https://github.com/spinalcordtoolbox/spinalcordtoolbox/issues/3991#issuecomment-1378765661 + :param path_dir_out: + :param file_out: + :return: + """ + path_file = os.path.join(path_dir_out, file_out) + # Make sure that the input file exists, if so, copy it + if os.path.isfile(path_file): + os.system('sct_image -i ' + path_file + ' -set-qform-to-sform') + + # TODO - do we want to create just a empty json sidecar? Or do we want to include some params there? def create_dummy_json_sidecar_if_does_not_exist(path_file_out): # Work only with .nii.gz (i.e., ignore .bval and .bvec files) @@ -343,6 +356,9 @@ def main(path_input, path_output): # Copy file and create a dummy json sidecar if does not exist copy_file(path_file_in, path_dir_out, file_out) + # Copy qform to sform to address https://github.com/spinalcordtoolbox/spinalcordtoolbox/issues/3991#issuecomment-1378765661 + if file_out.endswith('.nii.gz'): + copy_qform_to_sform(path_dir_out, file_out) # Deal with derivatives (i.e., spinal cord segmentation) located in `sct_processing` folder path_sct_processing = os.path.join(path_input, centre_in, pathology_in, subject_in, 'bl', region, 'sct_processing') From 4fbc4bfbb92f88c7973cfe57ced72d6f5861a5e0 Mon Sep 17 00:00:00 2001 From: valosekj Date: Sat, 21 Jan 2023 08:44:27 -0500 Subject: [PATCH 32/38] Swap 'dir-AP_acq-cspine' to 'acq-cspine_dir-AP' to pass bids-validator --- scripts/curate_data_inspired.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/curate_data_inspired.py b/scripts/curate_data_inspired.py index 29b824b..ca74413 100644 --- a/scripts/curate_data_inspired.py +++ b/scripts/curate_data_inspired.py @@ -53,10 +53,10 @@ # BIDS BEP025 Proposal: https://docs.google.com/document/d/1chZv7vAPE-ebPDxMktfI9i1OkLNR2FELIfpVYsaZPr4 # Discussion `acq-cspine` vs `bp-cspine`: https://github.com/neuropoly/data-management/pull/185#issuecomment-1347421696 images_spine_conv_dict = { - 'dwi.nii.gz': 'dir-AP_acq-cspine_dwi.nii.gz', - 'dwi.bval': 'dir-AP_acq-cspine_dwi.bval', - 'dwi.bvec': 'dir-AP_acq-cspine_dwi.bvec', - 'dwi_reversed_blip.nii.gz': 'dir-PA_acq-cspine_dwi.nii.gz', + 'dwi.nii.gz': 'acq-cspine_dir-AP_dwi.nii.gz', + 'dwi.bval': 'acq-cspine_dir-AP_dwi.bval', + 'dwi.bvec': 'acq-cspine_dir-AP_dwi.bvec', + 'dwi_reversed_blip.nii.gz': 'acq-cspine_dir-PA_dwi.nii.gz', 't1_sag.nii.gz': 'acq-cspine_T1w.nii.gz', 't2_sag.nii.gz': 'acq-cspineSagittal_T2w.nii.gz', 't2_tra.nii.gz': 'acq-cspineAxial_T2w.nii.gz', From f529249a0c1877c7b954cdb85649c27f76ce016d Mon Sep 17 00:00:00 2001 From: valosekj Date: Sat, 21 Jan 2023 08:45:14 -0500 Subject: [PATCH 33/38] Change '_mask' to '_seg' --- scripts/curate_data_inspired.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/curate_data_inspired.py b/scripts/curate_data_inspired.py index ca74413..7401fd7 100644 --- a/scripts/curate_data_inspired.py +++ b/scripts/curate_data_inspired.py @@ -65,9 +65,9 @@ # TODO - include also DWI derivatives derivatives_spine_conv_dict = { - 't2_seg.nii.gz': 'acq-cspineAxial_T2w_label-SC_mask.nii.gz', - 'gm_seg.nii.gz': 'acq-cspine_T2star_label-GM_mask.nii.gz', - 'wm_seg.nii.gz': 'acq-cspine_T2star_label-WM_mask.nii.gz' + 't2_seg.nii.gz': 'acq-cspineAxial_T2w_label-SC_seg.nii.gz', + 'gm_seg.nii.gz': 'acq-cspine_T2star_label-GM_seg.nii.gz', + 'wm_seg.nii.gz': 'acq-cspine_T2star_label-WM_seg.nii.gz' } # Dictionary for brain image filename conversion From b5a7b5fa75b34e802f1fab22afa77bf99c2144d0 Mon Sep 17 00:00:00 2001 From: valosekj Date: Sat, 21 Jan 2023 10:01:25 -0500 Subject: [PATCH 34/38] Rename README to README.md and update it. --- scripts/README | 33 --------------------------------- scripts/README.md | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 33 deletions(-) delete mode 100644 scripts/README create mode 100644 scripts/README.md diff --git a/scripts/README b/scripts/README deleted file mode 100644 index ffcfe43..0000000 --- a/scripts/README +++ /dev/null @@ -1,33 +0,0 @@ -# INSPIRED - -This is an MRI dataset acquired in the context of the INSPIRED project. - -https://www.spinalsurgerynews.com/2016/10/inspired-spinal-cord-neuro-imaging-project/14594 - -## contact person - -Dataset shared by: -Email communication: - -## dataset structure - -The dataset contains data from two centers (Toronto, Zurich) across three pathologies (DCM, SCI, HC). The following images are included: - -Spinal cord MRI data: -- DWI (A-P and P-A phase encoding) -- T1w sag -- T1w sag -- T1w sag -- T2star tra -Brain MRI data: -- DWI (A-P and P-A phase encoding) -- MPM (multi-parameter mapping) - -## naming convention - -sub-XXX - -example: -sub-zurichDCM001 - -Note: the label `bp-cspine` is used to differentiate spine images from brain. \ No newline at end of file diff --git a/scripts/README.md b/scripts/README.md new file mode 100644 index 0000000..b38e4bf --- /dev/null +++ b/scripts/README.md @@ -0,0 +1,34 @@ +# INSPIRED + +This is an MRI dataset acquired in the context of the INSPIRED project. + +It contains brain and spinal cord data acquired at two sites centers (Toronto, Zurich) from healthy controls (HC) and two pathologies: +- degenerative cervical myelopathy (DCM) +- spinal cord injury (SCI) + +It also contains spinal cord, gray matter and white matter segmentations for T2w axial images. + +## Dataset structure + +Spinal cord MRI data: +- T1w +- T2w axial +- T2w sagittal +- T2star +- DWI (A-P and P-A phase encoding) + +Brain MRI data: +- MPM (multi-parameter mapping) +- DWI (A-P and P-A phase encoding) + +## Naming convention + +sub-XXX + +Note: BIDS label `acq-cspine` is used to differentiate spine images from brain. For details, see https://github.com/neuropoly/data-management/pull/185#issuecomment-1362069079. + +Note: 01 corresponds to Toronto site, 02 to Zurich site. For details, see https://github.com/neuropoly/data-management/issues/184#issuecomment-1329250514. + +## Details + +https://www.spinalsurgerynews.com/2016/10/inspired-spinal-cord-neuro-imaging-project/14594 \ No newline at end of file From 33f440a17047d77665e0c0206e598b9499099860 Mon Sep 17 00:00:00 2001 From: valosekj Date: Sat, 21 Jan 2023 10:05:33 -0500 Subject: [PATCH 35/38] Update 'participants.json' according to https://intranet.neuro.polymtl.ca/data/dataset-curation.html#participants-json --- scripts/curate_data_inspired.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/scripts/curate_data_inspired.py b/scripts/curate_data_inspired.py index 7401fd7..a2789bd 100644 --- a/scripts/curate_data_inspired.py +++ b/scripts/curate_data_inspired.py @@ -230,8 +230,13 @@ def create_participants_json(path_output): "LongName": "Subject ID in the unprocessed data" }, "pathology": { - "Description": "Pathology", - "LongName": "Pathology name" + "Description": "The diagnosis of pathology of the participant", + "LongName": "Pathology name", + "Levels": { + "HC": "Healthy Control", + "DCM": "Degenerative Cervical Myelopathy", + "SCI": "Traumatic Spinal Cord Injury" + } }, "institution_id": { "Description": "Institution ID in the unprocessed data", From e17ed2c72fdf127664ef2dba0e7eefb0ca28478d Mon Sep 17 00:00:00 2001 From: valosekj Date: Sat, 21 Jan 2023 10:13:53 -0500 Subject: [PATCH 36/38] README.md update --- scripts/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/README.md b/scripts/README.md index b38e4bf..8c97410 100644 --- a/scripts/README.md +++ b/scripts/README.md @@ -6,7 +6,7 @@ It contains brain and spinal cord data acquired at two sites centers (Toronto, Z - degenerative cervical myelopathy (DCM) - spinal cord injury (SCI) -It also contains spinal cord, gray matter and white matter segmentations for T2w axial images. +It also contains spinal cord segmentation for T2w axial images and gray matter and white matter segmentations for T2star images. ## Dataset structure From 9941af1811b4bf2553d9f3821ac46c4cc4a0fd1b Mon Sep 17 00:00:00 2001 From: Jan Valosek <39456460+valosekj@users.noreply.github.com> Date: Tue, 24 Jan 2023 11:22:15 -0500 Subject: [PATCH 37/38] Move infolink Co-authored-by: Julien Cohen-Adad --- scripts/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/README.md b/scripts/README.md index 8c97410..e3345e9 100644 --- a/scripts/README.md +++ b/scripts/README.md @@ -1,6 +1,6 @@ # INSPIRED -This is an MRI dataset acquired in the context of the INSPIRED project. +This is an MRI dataset acquired in the context of the INSPIRED project (https://www.spinalsurgerynews.com/2016/10/inspired-spinal-cord-neuro-imaging-project/14594). It contains brain and spinal cord data acquired at two sites centers (Toronto, Zurich) from healthy controls (HC) and two pathologies: - degenerative cervical myelopathy (DCM) From 7d9783667086b12dacfebd524a130d91ec483030 Mon Sep 17 00:00:00 2001 From: Jan Valosek <39456460+valosekj@users.noreply.github.com> Date: Tue, 24 Jan 2023 11:22:34 -0500 Subject: [PATCH 38/38] Add Contact Person section Co-authored-by: Julien Cohen-Adad --- scripts/README.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/scripts/README.md b/scripts/README.md index e3345e9..2246e30 100644 --- a/scripts/README.md +++ b/scripts/README.md @@ -29,6 +29,7 @@ Note: BIDS label `acq-cspine` is used to differentiate spine images from brain. Note: 01 corresponds to Toronto site, 02 to Zurich site. For details, see https://github.com/neuropoly/data-management/issues/184#issuecomment-1329250514. -## Details +## Contact Person -https://www.spinalsurgerynews.com/2016/10/inspired-spinal-cord-neuro-imaging-project/14594 \ No newline at end of file +Main PI of the project: Claudia Wheeler Kingshott +Contact person: Julien Cohen-Adad \ No newline at end of file