Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add dataset links to config object #1266

Merged
merged 8 commits into from
Sep 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions xcp_d/cli/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def main():
from xcp_d.cli.workflow import build_workflow
from xcp_d.utils.bids import (
write_atlas_dataset_description,
write_dataset_description,
write_derivative_description,
)

parse_args(args=sys.argv[1:])
Expand Down Expand Up @@ -160,7 +160,13 @@ def main():
from xcp_d.reports.core import generate_reports

# Write dataset description before generating reports
write_dataset_description(config.execution.fmri_dir, config.execution.output_dir)
write_derivative_description(
config.execution.fmri_dir,
config.execution.output_dir,
atlases=config.execution.atlases,
custom_confounds_folder=config.execution.custom_confounds,
dataset_links=config.execution.dataset_links,
)

if config.execution.atlases:
write_atlas_dataset_description(config.execution.output_dir / "atlases")
Expand Down
15 changes: 15 additions & 0 deletions xcp_d/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,8 @@
import os
from multiprocessing import set_start_method

from templateflow.conf import TF_LAYOUT

# Disable NiPype etelemetry always
_disable_et = bool(os.getenv("NO_ET") is not None or os.getenv("NIPYPE_NO_ET") is not None)
os.environ["NIPYPE_NO_ET"] = "1"
Expand Down Expand Up @@ -227,6 +229,8 @@ def load(cls, settings, init=True, ignore=None):
if k in cls._paths:
if isinstance(v, (list, tuple)):
setattr(cls, k, [Path(val).absolute() for val in v])
elif isinstance(v, dict):
setattr(cls, k, {key: Path(val).absolute() for key, val in v.items()})
else:
setattr(cls, k, Path(v).absolute())
elif hasattr(cls, k):
Expand All @@ -252,6 +256,8 @@ def get(cls):
if k in cls._paths:
if isinstance(v, (list, tuple)):
v = [str(val) for val in v]
elif isinstance(v, dict):
v = {key: str(val) for key, val in v.items()}
else:
v = str(v)
if isinstance(v, SpatialReferences):
Expand Down Expand Up @@ -419,6 +425,8 @@ class execution(_Config):
"""Path to a working directory where intermediate results will be available."""
write_graph = None
"""Write out the computational graph corresponding to the planned preprocessing."""
dataset_links = {}
"""A dictionary of dataset links to be used to track Sources in sidecars."""

_layout = None

Expand All @@ -431,6 +439,7 @@ class execution(_Config):
"output_dir",
"templateflow_home",
"work_dir",
"dataset_links",
)

@classmethod
Expand Down Expand Up @@ -501,6 +510,12 @@ def _process_value(value):
for k, v in filters.items():
cls.bids_filters[acq][k] = _process_value(v)

dataset_links = {
'preprocessed': cls.fmri_dir,
'templateflow': Path(TF_LAYOUT.root),
}
cls.dataset_links = dataset_links

if "all" in cls.debug:
cls.debug = list(DEBUG_MODES)

Expand Down
8 changes: 6 additions & 2 deletions xcp_d/tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
get_test_data_path,
list_files,
)
from xcp_d.utils.bids import write_atlas_dataset_description, write_dataset_description
from xcp_d.utils.bids import write_atlas_dataset_description, write_derivative_description

LOGGER = logging.getLogger("nipype.utils")

Expand Down Expand Up @@ -475,7 +475,11 @@ def _run_and_generate(test_name, parameters, input_type, test_main=False):
retval = build_workflow(config_file, retval={})
xcpd_wf = retval["workflow"]
xcpd_wf.run(**config.nipype.get_plugin())
write_dataset_description(config.execution.fmri_dir, config.execution.output_dir)
write_derivative_description(
config.execution.fmri_dir,
config.execution.output_dir,
dataset_links=config.execution.dataset_links,
)
if config.execution.atlases:
write_atlas_dataset_description(config.execution.output_dir / "atlases")

Expand Down
41 changes: 31 additions & 10 deletions xcp_d/tests/test_utils_bids.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,23 +253,30 @@ def test_collect_morphometry_data(datasets, tmp_path_factory):
assert morph_file_types == []


def test_write_dataset_description(datasets, tmp_path_factory, caplog):
"""Test write_dataset_description."""
tmpdir = tmp_path_factory.mktemp("test_write_dataset_description")
def test_write_derivative_description(datasets, tmp_path_factory, caplog):
"""Test write_derivative_description."""
tmpdir = tmp_path_factory.mktemp("test_write_derivative_description")
dset_description = os.path.join(tmpdir, "dataset_description.json")

# The function expects a description file in the fmri_dir.
with pytest.raises(FileNotFoundError, match="Dataset description DNE"):
xbids.write_dataset_description(tmpdir, tmpdir, atlases=None, custom_confounds_folder=None)
xbids.write_derivative_description(
tmpdir,
tmpdir,
atlases=None,
custom_confounds_folder=None,
dataset_links={},
)
assert not os.path.isfile(dset_description)

# It will work when we give it a real fmri_dir.
fmri_dir = datasets["ds001419"]
xbids.write_dataset_description(
xbids.write_derivative_description(
fmri_dir,
tmpdir,
atlases=["Gordon"],
custom_confounds_folder="/fake/path4",
dataset_links={"preprocessed": "/fake/path1"},
)
assert os.path.isfile(dset_description)

Expand All @@ -279,11 +286,12 @@ def test_write_dataset_description(datasets, tmp_path_factory, caplog):

assert "'preprocessed' is already a dataset link" not in caplog.text
assert "'custom_confounds' is already a dataset link" not in caplog.text
xbids.write_dataset_description(
xbids.write_derivative_description(
tmpdir,
tmpdir,
atlases=["Gordon"],
custom_confounds_folder="/fake/path4",
custom_confounds_folder="/fake/path5",
dataset_links={"preprocessed": "/fake/path2"},
)
assert "'preprocessed' is already a dataset link" in caplog.text
assert "'custom_confounds' is already a dataset link" in caplog.text
Expand All @@ -294,7 +302,13 @@ def test_write_dataset_description(datasets, tmp_path_factory, caplog):
json.dump(desc, fo, indent=4)

assert "Previous output generated by version" not in caplog.text
xbids.write_dataset_description(fmri_dir, tmpdir, atlases=None, custom_confounds_folder=None)
xbids.write_derivative_description(
fmri_dir,
tmpdir,
atlases=None,
custom_confounds_folder=None,
dataset_links={},
)
assert "Previous output generated by version" in caplog.text

# Should raise a warning if DatasetType is not in the description
Expand All @@ -303,7 +317,13 @@ def test_write_dataset_description(datasets, tmp_path_factory, caplog):
json.dump(desc, fo, indent=4)

assert "DatasetType key not in" not in caplog.text
xbids.write_dataset_description(tmpdir, tmpdir, atlases=None, custom_confounds_folder=None)
xbids.write_derivative_description(
tmpdir,
tmpdir,
atlases=None,
custom_confounds_folder=None,
dataset_links={},
)
assert "DatasetType key not in" in caplog.text

# Should raise an error if DatasetType is present, but isn't "derivative"
Expand All @@ -312,11 +332,12 @@ def test_write_dataset_description(datasets, tmp_path_factory, caplog):
json.dump(desc, fo, indent=4)

with pytest.raises(ValueError, match="XCP-D only works on derivative datasets."):
xbids.write_dataset_description(
xbids.write_derivative_description(
tmpdir,
tmpdir,
atlases=None,
custom_confounds_folder=None,
dataset_links={},
)


Expand Down
70 changes: 37 additions & 33 deletions xcp_d/utils/bids.py
Original file line number Diff line number Diff line change
Expand Up @@ -655,7 +655,13 @@ def collect_run_data(layout, bold_file, file_format, target_space):
return run_data


def write_dataset_description(fmri_dir, output_dir, atlases=None, custom_confounds_folder=None):
def write_derivative_description(
fmri_dir,
output_dir,
atlases=None,
custom_confounds_folder=None,
dataset_links={},
):
"""Write dataset_description.json file for derivatives.

Parameters
Expand All @@ -668,6 +674,8 @@ def write_dataset_description(fmri_dir, output_dir, atlases=None, custom_confoun
Names of requested XCP-D atlases.
custom_confounds_folder : :obj:`str`, optional
Path to the folder containing custom confounds files.
dataset_links : :obj:`dict`, optional
Dictionary of dataset links to include in the dataset description.
"""
import json
import os
Expand All @@ -679,22 +687,22 @@ def write_dataset_description(fmri_dir, output_dir, atlases=None, custom_confoun
raise FileNotFoundError(f"Dataset description DNE: {orig_dset_description}")

with open(orig_dset_description, "r") as fo:
dset_desc = json.load(fo)
desc = json.load(fo)

# Check if the dataset type is derivative
if "DatasetType" not in dset_desc.keys():
if "DatasetType" not in desc.keys():
LOGGER.warning(f"DatasetType key not in {orig_dset_description}. Assuming 'derivative'.")
dset_desc["DatasetType"] = "derivative"
desc["DatasetType"] = "derivative"

if dset_desc.get("DatasetType", "derivative") != "derivative":
if desc.get("DatasetType", "derivative") != "derivative":
raise ValueError(
f"DatasetType key in {orig_dset_description} is not 'derivative'. "
"XCP-D only works on derivative datasets."
)

# Update dataset description
dset_desc["Name"] = "XCP-D: A Robust Postprocessing Pipeline of fMRI data"
generated_by = dset_desc.get("GeneratedBy", [])
desc["Name"] = "XCP-D: A Robust Postprocessing Pipeline of fMRI data"
generated_by = desc.get("GeneratedBy", [])
generated_by.insert(
0,
{
Expand All @@ -703,42 +711,38 @@ def write_dataset_description(fmri_dir, output_dir, atlases=None, custom_confoun
"CodeURL": DOWNLOAD_URL,
},
)
dset_desc["GeneratedBy"] = generated_by
dset_desc["HowToAcknowledge"] = "Include the generated boilerplate in the methods section."

# Add DatasetLinks
if "DatasetLinks" not in dset_desc.keys():
dset_desc["DatasetLinks"] = {}
desc["GeneratedBy"] = generated_by
desc["HowToAcknowledge"] = "Include the generated boilerplate in the methods section."

if "preprocessed" in dset_desc["DatasetLinks"].keys():
LOGGER.warning("'preprocessed' is already a dataset link. Overwriting.")
dataset_links = dataset_links.copy()

dset_desc["DatasetLinks"]["preprocessed"] = str(fmri_dir)
# Replace local templateflow path with URL
dataset_links["templateflow"] = "https://github.com/templateflow/templateflow"

if atlases:
if "atlases" in dset_desc["DatasetLinks"].keys():
LOGGER.warning("'atlases' is already a dataset link. Overwriting.")

dset_desc["DatasetLinks"]["atlases"] = os.path.join(output_dir, "atlases")
dataset_links["atlases"] = os.path.join(output_dir, "atlases")

if custom_confounds_folder:
if "custom_confounds" in dset_desc["DatasetLinks"].keys():
LOGGER.warning("'custom_confounds' is already a dataset link. Overwriting.")
dataset_links["custom_confounds"] = str(custom_confounds_folder)

dset_desc["DatasetLinks"]["custom_confounds"] = str(custom_confounds_folder)
# Add DatasetLinks
if "DatasetLinks" not in desc.keys():
desc["DatasetLinks"] = {}

xcpd_dset_description = os.path.join(output_dir, "dataset_description.json")
if os.path.isfile(xcpd_dset_description):
with open(xcpd_dset_description, "r") as fo:
old_dset_desc = json.load(fo)
for k, v in dataset_links.items():
if k in desc["DatasetLinks"].keys() and desc["DatasetLinks"][k] != str(v):
LOGGER.warning(f"'{k}' is already a dataset link. Overwriting.")

old_version = old_dset_desc["GeneratedBy"][0]["Version"]
desc["DatasetLinks"][k] = str(v)

xcpd_dset_description = Path(output_dir / "dataset_description.json")
if xcpd_dset_description.is_file():
old_desc = json.loads(xcpd_dset_description.read_text())
old_version = old_desc["GeneratedBy"][0]["Version"]
if Version(__version__).public != Version(old_version).public:
LOGGER.warning(f"Previous output generated by version {old_version} found.")

else:
with open(xcpd_dset_description, "w") as fo:
json.dump(dset_desc, fo, indent=4, sort_keys=True)
xcpd_dset_description.write_text(json.dumps(desc, indent=4))


def write_atlas_dataset_description(atlas_dir):
Expand All @@ -754,7 +758,7 @@ def write_atlas_dataset_description(atlas_dir):

from xcp_d.__about__ import DOWNLOAD_URL, __version__

dset_desc = {
desc = {
"Name": "XCP-D Atlases",
"DatasetType": "atlas",
"GeneratedBy": [
Expand All @@ -779,7 +783,7 @@ def write_atlas_dataset_description(atlas_dir):

else:
with open(atlas_dset_description, "w") as fo:
json.dump(dset_desc, fo, indent=4, sort_keys=True)
json.dump(desc, fo, indent=4, sort_keys=True)


def get_preproc_pipeline_info(input_type, fmri_dir):
Expand Down