Skip to content

Commit

Permalink
Add dataset links to config object (#1266)
Browse files Browse the repository at this point in the history
  • Loading branch information
tsalo authored Sep 17, 2024
1 parent a8c402b commit 37dd906
Show file tree
Hide file tree
Showing 5 changed files with 97 additions and 47 deletions.
10 changes: 8 additions & 2 deletions xcp_d/cli/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def main():
from xcp_d.cli.workflow import build_workflow
from xcp_d.utils.bids import (
write_atlas_dataset_description,
write_dataset_description,
write_derivative_description,
)

parse_args(args=sys.argv[1:])
Expand Down Expand Up @@ -160,7 +160,13 @@ def main():
from xcp_d.reports.core import generate_reports

# Write dataset description before generating reports
write_dataset_description(config.execution.fmri_dir, config.execution.output_dir)
write_derivative_description(
config.execution.fmri_dir,
config.execution.output_dir,
atlases=config.execution.atlases,
custom_confounds_folder=config.execution.custom_confounds,
dataset_links=config.execution.dataset_links,
)

if config.execution.atlases:
write_atlas_dataset_description(config.execution.output_dir / "atlases")
Expand Down
15 changes: 15 additions & 0 deletions xcp_d/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,8 @@
import os
from multiprocessing import set_start_method

from templateflow.conf import TF_LAYOUT

# Disable NiPype etelemetry always
_disable_et = bool(os.getenv("NO_ET") is not None or os.getenv("NIPYPE_NO_ET") is not None)
os.environ["NIPYPE_NO_ET"] = "1"
Expand Down Expand Up @@ -227,6 +229,8 @@ def load(cls, settings, init=True, ignore=None):
if k in cls._paths:
if isinstance(v, (list, tuple)):
setattr(cls, k, [Path(val).absolute() for val in v])
elif isinstance(v, dict):
setattr(cls, k, {key: Path(val).absolute() for key, val in v.items()})
else:
setattr(cls, k, Path(v).absolute())
elif hasattr(cls, k):
Expand All @@ -252,6 +256,8 @@ def get(cls):
if k in cls._paths:
if isinstance(v, (list, tuple)):
v = [str(val) for val in v]
elif isinstance(v, dict):
v = {key: str(val) for key, val in v.items()}
else:
v = str(v)
if isinstance(v, SpatialReferences):
Expand Down Expand Up @@ -419,6 +425,8 @@ class execution(_Config):
"""Path to a working directory where intermediate results will be available."""
write_graph = None
"""Write out the computational graph corresponding to the planned preprocessing."""
dataset_links = {}
"""A dictionary of dataset links to be used to track Sources in sidecars."""

_layout = None

Expand All @@ -431,6 +439,7 @@ class execution(_Config):
"output_dir",
"templateflow_home",
"work_dir",
"dataset_links",
)

@classmethod
Expand Down Expand Up @@ -501,6 +510,12 @@ def _process_value(value):
for k, v in filters.items():
cls.bids_filters[acq][k] = _process_value(v)

dataset_links = {
'preprocessed': cls.fmri_dir,
'templateflow': Path(TF_LAYOUT.root),
}
cls.dataset_links = dataset_links

if "all" in cls.debug:
cls.debug = list(DEBUG_MODES)

Expand Down
8 changes: 6 additions & 2 deletions xcp_d/tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
get_test_data_path,
list_files,
)
from xcp_d.utils.bids import write_atlas_dataset_description, write_dataset_description
from xcp_d.utils.bids import write_atlas_dataset_description, write_derivative_description

LOGGER = logging.getLogger("nipype.utils")

Expand Down Expand Up @@ -475,7 +475,11 @@ def _run_and_generate(test_name, parameters, input_type, test_main=False):
retval = build_workflow(config_file, retval={})
xcpd_wf = retval["workflow"]
xcpd_wf.run(**config.nipype.get_plugin())
write_dataset_description(config.execution.fmri_dir, config.execution.output_dir)
write_derivative_description(
config.execution.fmri_dir,
config.execution.output_dir,
dataset_links=config.execution.dataset_links,
)
if config.execution.atlases:
write_atlas_dataset_description(config.execution.output_dir / "atlases")

Expand Down
41 changes: 31 additions & 10 deletions xcp_d/tests/test_utils_bids.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,23 +253,30 @@ def test_collect_morphometry_data(datasets, tmp_path_factory):
assert morph_file_types == []


def test_write_dataset_description(datasets, tmp_path_factory, caplog):
"""Test write_dataset_description."""
tmpdir = tmp_path_factory.mktemp("test_write_dataset_description")
def test_write_derivative_description(datasets, tmp_path_factory, caplog):
"""Test write_derivative_description."""
tmpdir = tmp_path_factory.mktemp("test_write_derivative_description")
dset_description = os.path.join(tmpdir, "dataset_description.json")

# The function expects a description file in the fmri_dir.
with pytest.raises(FileNotFoundError, match="Dataset description DNE"):
xbids.write_dataset_description(tmpdir, tmpdir, atlases=None, custom_confounds_folder=None)
xbids.write_derivative_description(
tmpdir,
tmpdir,
atlases=None,
custom_confounds_folder=None,
dataset_links={},
)
assert not os.path.isfile(dset_description)

# It will work when we give it a real fmri_dir.
fmri_dir = datasets["ds001419"]
xbids.write_dataset_description(
xbids.write_derivative_description(
fmri_dir,
tmpdir,
atlases=["Gordon"],
custom_confounds_folder="/fake/path4",
dataset_links={"preprocessed": "/fake/path1"},
)
assert os.path.isfile(dset_description)

Expand All @@ -279,11 +286,12 @@ def test_write_dataset_description(datasets, tmp_path_factory, caplog):

assert "'preprocessed' is already a dataset link" not in caplog.text
assert "'custom_confounds' is already a dataset link" not in caplog.text
xbids.write_dataset_description(
xbids.write_derivative_description(
tmpdir,
tmpdir,
atlases=["Gordon"],
custom_confounds_folder="/fake/path4",
custom_confounds_folder="/fake/path5",
dataset_links={"preprocessed": "/fake/path2"},
)
assert "'preprocessed' is already a dataset link" in caplog.text
assert "'custom_confounds' is already a dataset link" in caplog.text
Expand All @@ -294,7 +302,13 @@ def test_write_dataset_description(datasets, tmp_path_factory, caplog):
json.dump(desc, fo, indent=4)

assert "Previous output generated by version" not in caplog.text
xbids.write_dataset_description(fmri_dir, tmpdir, atlases=None, custom_confounds_folder=None)
xbids.write_derivative_description(
fmri_dir,
tmpdir,
atlases=None,
custom_confounds_folder=None,
dataset_links={},
)
assert "Previous output generated by version" in caplog.text

# Should raise a warning if DatasetType is not in the description
Expand All @@ -303,7 +317,13 @@ def test_write_dataset_description(datasets, tmp_path_factory, caplog):
json.dump(desc, fo, indent=4)

assert "DatasetType key not in" not in caplog.text
xbids.write_dataset_description(tmpdir, tmpdir, atlases=None, custom_confounds_folder=None)
xbids.write_derivative_description(
tmpdir,
tmpdir,
atlases=None,
custom_confounds_folder=None,
dataset_links={},
)
assert "DatasetType key not in" in caplog.text

# Should raise an error if DatasetType is present, but isn't "derivative"
Expand All @@ -312,11 +332,12 @@ def test_write_dataset_description(datasets, tmp_path_factory, caplog):
json.dump(desc, fo, indent=4)

with pytest.raises(ValueError, match="XCP-D only works on derivative datasets."):
xbids.write_dataset_description(
xbids.write_derivative_description(
tmpdir,
tmpdir,
atlases=None,
custom_confounds_folder=None,
dataset_links={},
)


Expand Down
70 changes: 37 additions & 33 deletions xcp_d/utils/bids.py
Original file line number Diff line number Diff line change
Expand Up @@ -655,7 +655,13 @@ def collect_run_data(layout, bold_file, file_format, target_space):
return run_data


def write_dataset_description(fmri_dir, output_dir, atlases=None, custom_confounds_folder=None):
def write_derivative_description(
fmri_dir,
output_dir,
atlases=None,
custom_confounds_folder=None,
dataset_links={},
):
"""Write dataset_description.json file for derivatives.
Parameters
Expand All @@ -668,6 +674,8 @@ def write_dataset_description(fmri_dir, output_dir, atlases=None, custom_confoun
Names of requested XCP-D atlases.
custom_confounds_folder : :obj:`str`, optional
Path to the folder containing custom confounds files.
dataset_links : :obj:`dict`, optional
Dictionary of dataset links to include in the dataset description.
"""
import json
import os
Expand All @@ -679,22 +687,22 @@ def write_dataset_description(fmri_dir, output_dir, atlases=None, custom_confoun
raise FileNotFoundError(f"Dataset description DNE: {orig_dset_description}")

with open(orig_dset_description, "r") as fo:
dset_desc = json.load(fo)
desc = json.load(fo)

# Check if the dataset type is derivative
if "DatasetType" not in dset_desc.keys():
if "DatasetType" not in desc.keys():
LOGGER.warning(f"DatasetType key not in {orig_dset_description}. Assuming 'derivative'.")
dset_desc["DatasetType"] = "derivative"
desc["DatasetType"] = "derivative"

if dset_desc.get("DatasetType", "derivative") != "derivative":
if desc.get("DatasetType", "derivative") != "derivative":
raise ValueError(
f"DatasetType key in {orig_dset_description} is not 'derivative'. "
"XCP-D only works on derivative datasets."
)

# Update dataset description
dset_desc["Name"] = "XCP-D: A Robust Postprocessing Pipeline of fMRI data"
generated_by = dset_desc.get("GeneratedBy", [])
desc["Name"] = "XCP-D: A Robust Postprocessing Pipeline of fMRI data"
generated_by = desc.get("GeneratedBy", [])
generated_by.insert(
0,
{
Expand All @@ -703,42 +711,38 @@ def write_dataset_description(fmri_dir, output_dir, atlases=None, custom_confoun
"CodeURL": DOWNLOAD_URL,
},
)
dset_desc["GeneratedBy"] = generated_by
dset_desc["HowToAcknowledge"] = "Include the generated boilerplate in the methods section."

# Add DatasetLinks
if "DatasetLinks" not in dset_desc.keys():
dset_desc["DatasetLinks"] = {}
desc["GeneratedBy"] = generated_by
desc["HowToAcknowledge"] = "Include the generated boilerplate in the methods section."

if "preprocessed" in dset_desc["DatasetLinks"].keys():
LOGGER.warning("'preprocessed' is already a dataset link. Overwriting.")
dataset_links = dataset_links.copy()

dset_desc["DatasetLinks"]["preprocessed"] = str(fmri_dir)
# Replace local templateflow path with URL
dataset_links["templateflow"] = "https://github.com/templateflow/templateflow"

if atlases:
if "atlases" in dset_desc["DatasetLinks"].keys():
LOGGER.warning("'atlases' is already a dataset link. Overwriting.")

dset_desc["DatasetLinks"]["atlases"] = os.path.join(output_dir, "atlases")
dataset_links["atlases"] = os.path.join(output_dir, "atlases")

if custom_confounds_folder:
if "custom_confounds" in dset_desc["DatasetLinks"].keys():
LOGGER.warning("'custom_confounds' is already a dataset link. Overwriting.")
dataset_links["custom_confounds"] = str(custom_confounds_folder)

dset_desc["DatasetLinks"]["custom_confounds"] = str(custom_confounds_folder)
# Add DatasetLinks
if "DatasetLinks" not in desc.keys():
desc["DatasetLinks"] = {}

xcpd_dset_description = os.path.join(output_dir, "dataset_description.json")
if os.path.isfile(xcpd_dset_description):
with open(xcpd_dset_description, "r") as fo:
old_dset_desc = json.load(fo)
for k, v in dataset_links.items():
if k in desc["DatasetLinks"].keys() and desc["DatasetLinks"][k] != str(v):
LOGGER.warning(f"'{k}' is already a dataset link. Overwriting.")

old_version = old_dset_desc["GeneratedBy"][0]["Version"]
desc["DatasetLinks"][k] = str(v)

xcpd_dset_description = Path(output_dir / "dataset_description.json")
if xcpd_dset_description.is_file():
old_desc = json.loads(xcpd_dset_description.read_text())
old_version = old_desc["GeneratedBy"][0]["Version"]
if Version(__version__).public != Version(old_version).public:
LOGGER.warning(f"Previous output generated by version {old_version} found.")

else:
with open(xcpd_dset_description, "w") as fo:
json.dump(dset_desc, fo, indent=4, sort_keys=True)
xcpd_dset_description.write_text(json.dumps(desc, indent=4))


def write_atlas_dataset_description(atlas_dir):
Expand All @@ -754,7 +758,7 @@ def write_atlas_dataset_description(atlas_dir):

from xcp_d.__about__ import DOWNLOAD_URL, __version__

dset_desc = {
desc = {
"Name": "XCP-D Atlases",
"DatasetType": "atlas",
"GeneratedBy": [
Expand All @@ -779,7 +783,7 @@ def write_atlas_dataset_description(atlas_dir):

else:
with open(atlas_dset_description, "w") as fo:
json.dump(dset_desc, fo, indent=4, sort_keys=True)
json.dump(desc, fo, indent=4, sort_keys=True)


def get_preproc_pipeline_info(input_type, fmri_dir):
Expand Down

0 comments on commit 37dd906

Please sign in to comment.