Skip to content

Commit

Permalink
improvements of Genfi
Browse files Browse the repository at this point in the history
  • Loading branch information
JOULOT Matthieu committed Oct 23, 2023
1 parent 10ed566 commit c79fb80
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 26 deletions.
19 changes: 14 additions & 5 deletions clinica/iotools/converters/genfi_to_bids/genfi_to_bids.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ def convert_images(
bids_dir: PathLike,
path_to_clinical: Optional[PathLike],
gif: bool,
path_to_clinical_tsv: Optional[PathLike],
) -> None:
"""Convert the entire dataset to BIDS.
Expand All @@ -30,6 +31,10 @@ def convert_images(
gif: bool
If True, indicates the user wants to have the values of the gif parcellation
path_to_clinical_tsv: PathLike, optional
Path to a tsv containing the data the additionnal data the user wants to have in the BIDS
If None, no additionnal data will be added.
"""
import os

Expand All @@ -43,29 +48,33 @@ def convert_images(
merge_imaging_data,
read_imaging_data,
write_bids,
check_clinical_path,
)

#check that if a clinical tsv is given, a path to the clinical data is given as well
if path_to_clinical_tsv:
check_clinical_path(path_to_clinical)
# read the clinical data files
if path_to_clinical:
df_demographics, df_imaging, df_clinical = find_clinical_data(path_to_clinical)
df_demographics, df_imaging, df_clinical, df_biosamples, df_neuropsych = find_clinical_data(path_to_clinical)

# makes a df of the imaging data
imaging_data = read_imaging_data(path_to_dataset)

# complete the data extracted
imaging_data = merge_imaging_data(imaging_data)
# complete clinical data
if path_to_clinical:
df_clinical_complete = complete_clinical_data(
df_demographics, df_imaging, df_clinical
df_demographics, df_imaging, df_clinical, df_biosamples, df_neuropsych
)

# intersect the data
if path_to_clinical:
df_complete = intersect_data(imaging_data, df_clinical_complete)
else:
df_complete = imaging_data
# build the tsv
results = dataset_to_bids(df_complete, gif)
results = dataset_to_bids(df_complete, gif, path_to_clinical_tsv)
write_bids(
to=bids_dir,
participants=results["participants"],
Expand Down
12 changes: 10 additions & 2 deletions clinica/iotools/converters/genfi_to_bids/genfi_to_bids_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,25 @@
)

gif = click.option("-gif", is_flag=True, help="Add values from gif to session.tsv")

clinical_data_tsv = click.option(
"-cdt",
"--clinical-data-tsv",
"clinical_data_tsv",
type=click.Path(exists=True, file_okay=True, resolve_path=True),
help="Path to a tsv containing additionnal clinical data you want to have in the BIDS",
)

@click.command(name="genfi-to-bids")
@cli_param.dataset_directory
@cli_param.bids_directory
@clinical_data_directory
@gif
@clinical_data_tsv
def cli(
dataset_directory: PathLike,
bids_directory: PathLike,
clinical_data_directory: Optional[PathLike] = None,
clinical_data_tsv: Optional[PathLike] = None,
gif: bool = False,
) -> None:
"""GENFI to BIDS converter.
Expand All @@ -39,7 +47,7 @@ def cli(

check_dcm2niix()

convert_images(dataset_directory, bids_directory, clinical_data_directory, gif)
convert_images(dataset_directory, bids_directory, clinical_data_directory, gif, clinical_data_tsv)
_write_bidsignore(str(bids_directory))

cprint("Conversion to BIDS succeeded.")
Expand Down
46 changes: 27 additions & 19 deletions clinica/iotools/converters/genfi_to_bids/genfi_to_bids_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,12 @@
import pydicom as pdcm
from pandas import DataFrame


def check_clinical_path(path_to_clinical_data: PathLike) -> None:
if path_to_clinical_data:
return
else:
raise ValueError("Missing a clinical_data_path.")

def find_dicoms(path_to_source_data: PathLike) -> Iterable[Tuple[PathLike, PathLike]]:
"""Find the dicoms in the given directory.
Expand Down Expand Up @@ -150,6 +155,8 @@ def find_clinical_data(
"FINAL*DEMOGRAPHICS*.xlsx",
"FINAL*IMAGING*.xlsx",
"FINAL*CLINICAL*.xlsx",
"FINAL*BIOSAMPLES*.xlsx",
"FINAL*NEUROPSYCH*.xlsx",
)
)

Expand All @@ -168,7 +175,7 @@ def _read_file(data_file: PathLike) -> pd.DataFrame:


def complete_clinical_data(
df_demographics: DataFrame, df_imaging: DataFrame, df_clinical: DataFrame
df_demographics: DataFrame, df_imaging: DataFrame, df_clinical: DataFrame, df_biosamples: DataFrame, df_neuropsych: DataFrame
) -> DataFrame:
"""Merges the different clincal dataframes into one.
Expand All @@ -192,24 +199,13 @@ def complete_clinical_data(
df_clinical_complete = df_imaging.merge(
df_demographics, how="inner", on=merge_key
).drop(columns="diagnosis")
df_clinical_complete = df_clinical_complete.merge(df_biosamples, how="inner", on=merge_key)
df_clinical_complete = df_clinical_complete.merge(df_neuropsych, how="inner", on=merge_key)
df_clinical = df_clinical.dropna(subset=merge_key)
return df_clinical_complete.merge(
df_clinical[
[
"blinded_code",
"blinded_site",
"visit",
"diagnosis",
"ftld-cdr-global",
"cdr-sob",
]
],
how="inner",
on=merge_key,
)
return df_clinical_complete.merge(df_clinical, how="inner", on=merge_key)


def dataset_to_bids(complete_data_df: DataFrame, gif: bool) -> Dict[str, DataFrame]:
def dataset_to_bids(complete_data_df: DataFrame, gif: bool, path_to_clinical_tsv: PathLike) -> Dict[str, DataFrame]:
"""Selects the data needed to write the participants, sessions, and scans tsvs.
Parameters
Expand Down Expand Up @@ -241,11 +237,23 @@ def dataset_to_bids(complete_data_df: DataFrame, gif: bool) -> Dict[str, DataFra
"genfi_ref.csv",
)
df_ref = pd.read_csv(path_to_ref_csv, sep=";")

#add additionnal data through csv
additionnal_data_df = pd.read_csv(path_to_clinical_tsv, sep="\t")

#hard written path soon to be changed
map_to_level_df = pd.read_csv("/Users/matthieu.joulot/Desktop/clinical_data_dest.tsv", sep="\t")
pre_addi_df = map_to_level_df.merge(additionnal_data_df, how="inner", on="data")
session_addi_list = pre_addi_df["data"][pre_addi_df["dest"]=="sessions"].values.tolist()
participants_addi_list = pre_addi_df["data"][pre_addi_df["dest"]=="participants"].values.tolist()
scan_addi_list = pre_addi_df["data"][pre_addi_df["dest"]=="scans"].values.tolist()

addi_df=pd.DataFrame([participants_addi_list,session_addi_list, scan_addi_list]).transpose()
addi_df.columns = ["participants", "sessions", "scans"]
if not gif:
df_ref = df_ref.head(8)
df_to_write = pd.concat([df_ref, addi_df])
return {
col: complete_data_df.filter(items=list(df_ref[col]))
col: complete_data_df.filter(items=list(df_to_write[col]))
for col in ["participants", "sessions", "scans"]
}

Expand Down

0 comments on commit c79fb80

Please sign in to comment.