diff --git a/clinica/iotools/converters/genfi_to_bids/genfi_to_bids.py b/clinica/iotools/converters/genfi_to_bids/genfi_to_bids.py index ae3b43e5b..26c8ae2c4 100644 --- a/clinica/iotools/converters/genfi_to_bids/genfi_to_bids.py +++ b/clinica/iotools/converters/genfi_to_bids/genfi_to_bids.py @@ -9,6 +9,7 @@ def convert_images( bids_dir: PathLike, path_to_clinical: Optional[PathLike], gif: bool, + path_to_clinical_tsv: Optional[PathLike], ) -> None: """Convert the entire dataset to BIDS. @@ -30,6 +31,10 @@ def convert_images( gif: bool If True, indicates the user wants to have the values of the gif parcellation + + path_to_clinical_tsv: PathLike, optional + Path to a tsv containing the data the additionnal data the user wants to have in the BIDS + If None, no additionnal data will be added. """ import os @@ -43,29 +48,33 @@ def convert_images( merge_imaging_data, read_imaging_data, write_bids, + check_clinical_path, ) + #check that if a clinical tsv is given, a path to the clinical data is given as well + if path_to_clinical_tsv: + check_clinical_path(path_to_clinical) # read the clinical data files if path_to_clinical: - df_demographics, df_imaging, df_clinical = find_clinical_data(path_to_clinical) + df_demographics, df_imaging, df_clinical, df_biosamples, df_neuropsych = find_clinical_data(path_to_clinical) + # makes a df of the imaging data imaging_data = read_imaging_data(path_to_dataset) - + # complete the data extracted imaging_data = merge_imaging_data(imaging_data) # complete clinical data if path_to_clinical: df_clinical_complete = complete_clinical_data( - df_demographics, df_imaging, df_clinical + df_demographics, df_imaging, df_clinical, df_biosamples, df_neuropsych ) - # intersect the data if path_to_clinical: df_complete = intersect_data(imaging_data, df_clinical_complete) else: df_complete = imaging_data # build the tsv - results = dataset_to_bids(df_complete, gif) + results = dataset_to_bids(df_complete, gif, path_to_clinical_tsv) write_bids( to=bids_dir, participants=results["participants"], diff --git a/clinica/iotools/converters/genfi_to_bids/genfi_to_bids_cli.py b/clinica/iotools/converters/genfi_to_bids/genfi_to_bids_cli.py index b498afc51..edb1590e0 100644 --- a/clinica/iotools/converters/genfi_to_bids/genfi_to_bids_cli.py +++ b/clinica/iotools/converters/genfi_to_bids/genfi_to_bids_cli.py @@ -14,17 +14,25 @@ ) gif = click.option("-gif", is_flag=True, help="Add values from gif to session.tsv") - +clinical_data_tsv = click.option( + "-cdt", + "--clinical-data-tsv", + "clinical_data_tsv", + type=click.Path(exists=True, file_okay=True, resolve_path=True), + help="Path to a tsv containing additionnal clinical data you want to have in the BIDS", +) @click.command(name="genfi-to-bids") @cli_param.dataset_directory @cli_param.bids_directory @clinical_data_directory @gif +@clinical_data_tsv def cli( dataset_directory: PathLike, bids_directory: PathLike, clinical_data_directory: Optional[PathLike] = None, + clinical_data_tsv: Optional[PathLike] = None, gif: bool = False, ) -> None: """GENFI to BIDS converter. @@ -39,7 +47,7 @@ def cli( check_dcm2niix() - convert_images(dataset_directory, bids_directory, clinical_data_directory, gif) + convert_images(dataset_directory, bids_directory, clinical_data_directory, gif, clinical_data_tsv) _write_bidsignore(str(bids_directory)) cprint("Conversion to BIDS succeeded.") diff --git a/clinica/iotools/converters/genfi_to_bids/genfi_to_bids_utils.py b/clinica/iotools/converters/genfi_to_bids/genfi_to_bids_utils.py index cb527bc5a..04a0ef8c4 100644 --- a/clinica/iotools/converters/genfi_to_bids/genfi_to_bids_utils.py +++ b/clinica/iotools/converters/genfi_to_bids/genfi_to_bids_utils.py @@ -7,7 +7,12 @@ import pydicom as pdcm from pandas import DataFrame - +def check_clinical_path(path_to_clinical_data: PathLike) -> None: + if path_to_clinical_data: + return + else: + raise ValueError("Missing a clinical_data_path.") + def find_dicoms(path_to_source_data: PathLike) -> Iterable[Tuple[PathLike, PathLike]]: """Find the dicoms in the given directory. @@ -150,6 +155,8 @@ def find_clinical_data( "FINAL*DEMOGRAPHICS*.xlsx", "FINAL*IMAGING*.xlsx", "FINAL*CLINICAL*.xlsx", + "FINAL*BIOSAMPLES*.xlsx", + "FINAL*NEUROPSYCH*.xlsx", ) ) @@ -168,7 +175,7 @@ def _read_file(data_file: PathLike) -> pd.DataFrame: def complete_clinical_data( - df_demographics: DataFrame, df_imaging: DataFrame, df_clinical: DataFrame + df_demographics: DataFrame, df_imaging: DataFrame, df_clinical: DataFrame, df_biosamples: DataFrame, df_neuropsych: DataFrame ) -> DataFrame: """Merges the different clincal dataframes into one. @@ -192,24 +199,13 @@ def complete_clinical_data( df_clinical_complete = df_imaging.merge( df_demographics, how="inner", on=merge_key ).drop(columns="diagnosis") + df_clinical_complete = df_clinical_complete.merge(df_biosamples, how="inner", on=merge_key) + df_clinical_complete = df_clinical_complete.merge(df_neuropsych, how="inner", on=merge_key) df_clinical = df_clinical.dropna(subset=merge_key) - return df_clinical_complete.merge( - df_clinical[ - [ - "blinded_code", - "blinded_site", - "visit", - "diagnosis", - "ftld-cdr-global", - "cdr-sob", - ] - ], - how="inner", - on=merge_key, - ) + return df_clinical_complete.merge(df_clinical, how="inner", on=merge_key) -def dataset_to_bids(complete_data_df: DataFrame, gif: bool) -> Dict[str, DataFrame]: +def dataset_to_bids(complete_data_df: DataFrame, gif: bool, path_to_clinical_tsv: PathLike) -> Dict[str, DataFrame]: """Selects the data needed to write the participants, sessions, and scans tsvs. Parameters @@ -241,11 +237,23 @@ def dataset_to_bids(complete_data_df: DataFrame, gif: bool) -> Dict[str, DataFra "genfi_ref.csv", ) df_ref = pd.read_csv(path_to_ref_csv, sep=";") - + #add additionnal data through csv + additionnal_data_df = pd.read_csv(path_to_clinical_tsv, sep="\t") + + #hard written path soon to be changed + map_to_level_df = pd.read_csv("/Users/matthieu.joulot/Desktop/clinical_data_dest.tsv", sep="\t") + pre_addi_df = map_to_level_df.merge(additionnal_data_df, how="inner", on="data") + session_addi_list = pre_addi_df["data"][pre_addi_df["dest"]=="sessions"].values.tolist() + participants_addi_list = pre_addi_df["data"][pre_addi_df["dest"]=="participants"].values.tolist() + scan_addi_list = pre_addi_df["data"][pre_addi_df["dest"]=="scans"].values.tolist() + + addi_df=pd.DataFrame([participants_addi_list,session_addi_list, scan_addi_list]).transpose() + addi_df.columns = ["participants", "sessions", "scans"] if not gif: df_ref = df_ref.head(8) + df_to_write = pd.concat([df_ref, addi_df]) return { - col: complete_data_df.filter(items=list(df_ref[col])) + col: complete_data_df.filter(items=list(df_to_write[col])) for col in ["participants", "sessions", "scans"] }