run_CONN.py

"""
Script that applies the CONN pipeline to a set of subjects

Uses the same structure as run_pipeline_prime.py

COPY script/Base_batch.m to the CONN base directory for this to work!

For each subject:
 - Create new output directories
 - Moves the necessary data to the new directories
 - Converts the label generated by fastsurfer to MNI 1mm
 - Generates a .mat file with all the selected subjects
 - Runs it in parallel
 - Extract the timeseries per region and the FC, save them with the
        same format as the other pipeline
"""

import argparse
import pandas as pd
import os
from lib.data_loading import load_data
from joblib import Parallel, delayed
import scipy.io as sio
import subprocess
import numpy as np


def run_pipeline(row, out_dir, base_data_dir, batch):
    """
    Run conn pipeline for a single subject
    """
    # NOTE THAT WE NEED TO HAVE THE SAME TR FOR ALL SUBJECTS in a SINGLE project
    # so, no need to do separate ones. Try to do single them
    i = 0
    # Start the loop
    # subject information
    subID = row.SubjID
    center_to_process = row.CENTER

    if row.QC == "N":
        return None

    # load data using same function
    d = load_data(
        f"{base_data_dir}/{center_to_process}/{subID}", subID, center_to_process
    )

    # create new directory in the output directory

    # make directory
    out_dir_subject = f"{out_dir}/{center_to_process}_Post/{subID}/CONN"

    if not os.path.exists(out_dir_subject):
        os.makedirs(out_dir_subject)
        # create two directories for anat and fmri
    if not os.path.exists(f"{out_dir_subject}/results/"):
        os.makedirs(f"{out_dir_subject}/results/")
    if not os.path.exists(f"{out_dir_subject}/fmri/"):
        os.makedirs(f"{out_dir_subject}/fmri/")

    # register labels to MNI 1mm
    # and create labels with the same name (copying existing docs)

    # copy:
    # - fmri (nii and json, if it exists)
    # - T1 base
    # - MNI ROI
    # - MNI ROI text

    # THE CENTERS THAT HAVE JSON
    if center_to_process in ["MILAN", "MAINZ", "NAPLES", "OSLO"]:
        fmri_json = d["fMRI_json"]
        os.system(f"cp {fmri_json} {out_dir_subject}/fmri")

    fmri = d["fMRI"]
    os.system(f"cp {fmri} {out_dir_subject}/fmri")
    TR = d["TR"] / 1000

    # Get recon_all path
    recon_all_path = f"{out_dir}/{center_to_process}_Post/{subID}"

    # Copy a .txt file for the labels of freesurfer (located somewhere) to the same place as the ROIs
    file_newseg = f"{out_dir}/aparc.DKTatlas+aseg_newSeg.txt"
    os.system(f"cp {file_newseg} {recon_all_path}/recon_all")

    # Run the batch
    # parameters
    matlab_path = "''"
    conn_path = "''"
    func_path = f"{out_dir_subject}/fmri/{os.path.basename(fmri)}"
    sliceorder = "'BIDS'"  # DEPEND ON CENTER, TODO
    scans_to_remove = 5

    # if results doesnt exist, need to be done
    if not os.path.exists(
        f"{out_dir_subject}/conn_FC/results/preprocessing/ROI_Subject001_Condition000.mat"
    ):
        function_call = (
            "Base_batch('"
            + func_path
            + "', '"
            + recon_all_path
            + "', '"
            + out_dir_subject
            + "', str2num('"
            + str(TR)
            + "'), "
            + str(sliceorder)
            + ", "
            + str(scans_to_remove)
            + ");"
        )
        # run
        print(
            f'export MATLABPATH={conn_path}; {matlab_path} -nosplash -nodisplay -nodesktop -r "cd {out_dir_subject}; {function_call}"'
        )
        # redirect all output to file
        # with open(output_file, 'w') as f:
        cmd = subprocess.Popen(
            f'export MATLABPATH={conn_path}; {matlab_path} -nosplash -nodesktop -r "cd {out_dir_subject}; {function_call}exit;"',
            shell=True,
        )  # , stdout=f, stderr=f)
        cmd.wait()

    # load mat file with the timeseries per row
    try:
        mat_timeseries_path = f"{out_dir_subject}/conn_FC/results/preprocessing/ROI_Subject001_Condition000.mat"
        mat_FC_path = f"{out_dir_subject}/conn_FC/results/firstlevel/FC1/resultsROI_Subject001_Condition001.mat"
        mat_timeseries = sio.loadmat(mat_timeseries_path, squeeze_me=True)
        mat_FC = sio.loadmat(mat_FC_path, squeeze_me=True)
    except FileNotFoundError:
        print("Results not found! Something went wrong with processing")
        return 0

    # save the ROI timeseries
    # select last 76 items
    timeseries = [ts for ts in mat_timeseries["data"][3:]]
    corrlabel_ts = np.array(timeseries).T

    # load FC
    FC_CONN = mat_FC["Z"]

    # Do own FC
    fMRI_syn = np.corrcoef(corrlabel_ts.T)
    fMRI_syn = np.nan_to_num(fMRI_syn)

    # compute the zfisher correlation
    z_fmri_syn = np.arctanh(fMRI_syn)
    infs = np.isinf(z_fmri_syn).nonzero()
    # replace the infs with 0
    for idx in range(len(infs[0])):
        z_fmri_syn[infs[0][idx]][infs[1][idx]] = 0
    np.fill_diagonal(z_fmri_syn, 0)

    # Save all versions (zscored and normal)
    np.savetxt(f"{out_dir_subject}/results/r_matrix.csv", fMRI_syn, delimiter=",")
    np.savetxt(f"{out_dir_subject}/results/zr_matrix.csv", z_fmri_syn, delimiter=",")
    np.savetxt(f"{out_dir_subject}/results/conn_matrix.csv", FC_CONN, delimiter=",")
    np.savetxt(f"{out_dir_subject}/results/corrlabel_ts.txt", corrlabel_ts)

    # Copy QC results to a shared directory


parser = argparse.ArgumentParser()
parser.add_argument(
    "--in_dir",
    type=str,
    required=True,
    help="input dir with subject data, general input dir (MAGNIMS2021",
)
parser.add_argument(
    "--in_csv", type=str, required=True, help="csv with the subject info (general csv)"
)
parser.add_argument(
    "--out_dir",
    type=str,
    required=True,
    help="A string argument (also general directory)",
)
parser.add_argument(
    "--center", type=str, required=True, help="Select the centers to use"
)
parser.add_argument(
    "--batch",
    type=str,
    required=True,
    help="location of the .mat file with the base batch information",
)
parser.add_argument(
    "--njobs", type=int, required=True, default=1, help="Number of jobs to use"
)

# Parse and print the results
args = parser.parse_args()

# select type of data loading
base_data_dir = args.in_dir
out_dir = args.out_dir

# get njobs and center to process
njobs = args.njobs
center_to_process = args.center
batch = args.batch

# read the csv
# esta a base dir, copiar
df_connect = pd.read_csv(args.in_csv)
currentDirectory = os.getcwd()

df_connect_todo = df_connect[df_connect.CENTER == center_to_process]
outputs = Parallel(n_jobs=args.njobs, backend="threading")(
    delayed(run_pipeline)(row, out_dir, base_data_dir, batch)
    for row in df_connect_todo.itertuples()
)