diff --git a/xcp_d/cli/run.py b/xcp_d/cli/run.py index f81343382..3b9b4f5e1 100644 --- a/xcp_d/cli/run.py +++ b/xcp_d/cli/run.py @@ -21,7 +21,7 @@ def main(): from xcp_d.cli.workflow import build_workflow from xcp_d.utils.bids import ( write_atlas_dataset_description, - write_dataset_description, + write_derivative_description, ) parse_args(args=sys.argv[1:]) @@ -160,7 +160,13 @@ def main(): from xcp_d.reports.core import generate_reports # Write dataset description before generating reports - write_dataset_description(config.execution.fmri_dir, config.execution.output_dir) + write_derivative_description( + config.execution.fmri_dir, + config.execution.output_dir, + atlases=config.execution.atlases, + custom_confounds_folder=config.execution.custom_confounds, + dataset_links=config.execution.dataset_links, + ) if config.execution.atlases: write_atlas_dataset_description(config.execution.output_dir / "atlases") diff --git a/xcp_d/config.py b/xcp_d/config.py index b31a380f2..7b1878104 100644 --- a/xcp_d/config.py +++ b/xcp_d/config.py @@ -90,6 +90,8 @@ import os from multiprocessing import set_start_method +from templateflow.conf import TF_LAYOUT + # Disable NiPype etelemetry always _disable_et = bool(os.getenv("NO_ET") is not None or os.getenv("NIPYPE_NO_ET") is not None) os.environ["NIPYPE_NO_ET"] = "1" @@ -227,6 +229,8 @@ def load(cls, settings, init=True, ignore=None): if k in cls._paths: if isinstance(v, (list, tuple)): setattr(cls, k, [Path(val).absolute() for val in v]) + elif isinstance(v, dict): + setattr(cls, k, {key: Path(val).absolute() for key, val in v.items()}) else: setattr(cls, k, Path(v).absolute()) elif hasattr(cls, k): @@ -252,6 +256,8 @@ def get(cls): if k in cls._paths: if isinstance(v, (list, tuple)): v = [str(val) for val in v] + elif isinstance(v, dict): + v = {key: str(val) for key, val in v.items()} else: v = str(v) if isinstance(v, SpatialReferences): @@ -419,6 +425,8 @@ class execution(_Config): """Path to a working directory where intermediate results will be available.""" write_graph = None """Write out the computational graph corresponding to the planned preprocessing.""" + dataset_links = {} + """A dictionary of dataset links to be used to track Sources in sidecars.""" _layout = None @@ -431,6 +439,7 @@ class execution(_Config): "output_dir", "templateflow_home", "work_dir", + "dataset_links", ) @classmethod @@ -501,6 +510,12 @@ def _process_value(value): for k, v in filters.items(): cls.bids_filters[acq][k] = _process_value(v) + dataset_links = { + 'preprocessed': cls.fmri_dir, + 'templateflow': Path(TF_LAYOUT.root), + } + cls.dataset_links = dataset_links + if "all" in cls.debug: cls.debug = list(DEBUG_MODES) diff --git a/xcp_d/tests/test_cli.py b/xcp_d/tests/test_cli.py index 9cdadfe72..727ebb895 100644 --- a/xcp_d/tests/test_cli.py +++ b/xcp_d/tests/test_cli.py @@ -22,7 +22,7 @@ get_test_data_path, list_files, ) -from xcp_d.utils.bids import write_atlas_dataset_description, write_dataset_description +from xcp_d.utils.bids import write_atlas_dataset_description, write_derivative_description LOGGER = logging.getLogger("nipype.utils") @@ -475,7 +475,11 @@ def _run_and_generate(test_name, parameters, input_type, test_main=False): retval = build_workflow(config_file, retval={}) xcpd_wf = retval["workflow"] xcpd_wf.run(**config.nipype.get_plugin()) - write_dataset_description(config.execution.fmri_dir, config.execution.output_dir) + write_derivative_description( + config.execution.fmri_dir, + config.execution.output_dir, + dataset_links=config.execution.dataset_links, + ) if config.execution.atlases: write_atlas_dataset_description(config.execution.output_dir / "atlases") diff --git a/xcp_d/tests/test_utils_bids.py b/xcp_d/tests/test_utils_bids.py index 7b731a7cd..1f5e78e50 100644 --- a/xcp_d/tests/test_utils_bids.py +++ b/xcp_d/tests/test_utils_bids.py @@ -253,23 +253,30 @@ def test_collect_morphometry_data(datasets, tmp_path_factory): assert morph_file_types == [] -def test_write_dataset_description(datasets, tmp_path_factory, caplog): - """Test write_dataset_description.""" - tmpdir = tmp_path_factory.mktemp("test_write_dataset_description") +def test_write_derivative_description(datasets, tmp_path_factory, caplog): + """Test write_derivative_description.""" + tmpdir = tmp_path_factory.mktemp("test_write_derivative_description") dset_description = os.path.join(tmpdir, "dataset_description.json") # The function expects a description file in the fmri_dir. with pytest.raises(FileNotFoundError, match="Dataset description DNE"): - xbids.write_dataset_description(tmpdir, tmpdir, atlases=None, custom_confounds_folder=None) + xbids.write_derivative_description( + tmpdir, + tmpdir, + atlases=None, + custom_confounds_folder=None, + dataset_links={}, + ) assert not os.path.isfile(dset_description) # It will work when we give it a real fmri_dir. fmri_dir = datasets["ds001419"] - xbids.write_dataset_description( + xbids.write_derivative_description( fmri_dir, tmpdir, atlases=["Gordon"], custom_confounds_folder="/fake/path4", + dataset_links={"preprocessed": "/fake/path1"}, ) assert os.path.isfile(dset_description) @@ -279,11 +286,12 @@ def test_write_dataset_description(datasets, tmp_path_factory, caplog): assert "'preprocessed' is already a dataset link" not in caplog.text assert "'custom_confounds' is already a dataset link" not in caplog.text - xbids.write_dataset_description( + xbids.write_derivative_description( tmpdir, tmpdir, atlases=["Gordon"], - custom_confounds_folder="/fake/path4", + custom_confounds_folder="/fake/path5", + dataset_links={"preprocessed": "/fake/path2"}, ) assert "'preprocessed' is already a dataset link" in caplog.text assert "'custom_confounds' is already a dataset link" in caplog.text @@ -294,7 +302,13 @@ def test_write_dataset_description(datasets, tmp_path_factory, caplog): json.dump(desc, fo, indent=4) assert "Previous output generated by version" not in caplog.text - xbids.write_dataset_description(fmri_dir, tmpdir, atlases=None, custom_confounds_folder=None) + xbids.write_derivative_description( + fmri_dir, + tmpdir, + atlases=None, + custom_confounds_folder=None, + dataset_links={}, + ) assert "Previous output generated by version" in caplog.text # Should raise a warning if DatasetType is not in the description @@ -303,7 +317,13 @@ def test_write_dataset_description(datasets, tmp_path_factory, caplog): json.dump(desc, fo, indent=4) assert "DatasetType key not in" not in caplog.text - xbids.write_dataset_description(tmpdir, tmpdir, atlases=None, custom_confounds_folder=None) + xbids.write_derivative_description( + tmpdir, + tmpdir, + atlases=None, + custom_confounds_folder=None, + dataset_links={}, + ) assert "DatasetType key not in" in caplog.text # Should raise an error if DatasetType is present, but isn't "derivative" @@ -312,11 +332,12 @@ def test_write_dataset_description(datasets, tmp_path_factory, caplog): json.dump(desc, fo, indent=4) with pytest.raises(ValueError, match="XCP-D only works on derivative datasets."): - xbids.write_dataset_description( + xbids.write_derivative_description( tmpdir, tmpdir, atlases=None, custom_confounds_folder=None, + dataset_links={}, ) diff --git a/xcp_d/utils/bids.py b/xcp_d/utils/bids.py index 1a2848d37..af08cb143 100644 --- a/xcp_d/utils/bids.py +++ b/xcp_d/utils/bids.py @@ -655,7 +655,13 @@ def collect_run_data(layout, bold_file, file_format, target_space): return run_data -def write_dataset_description(fmri_dir, output_dir, atlases=None, custom_confounds_folder=None): +def write_derivative_description( + fmri_dir, + output_dir, + atlases=None, + custom_confounds_folder=None, + dataset_links={}, +): """Write dataset_description.json file for derivatives. Parameters @@ -668,6 +674,8 @@ def write_dataset_description(fmri_dir, output_dir, atlases=None, custom_confoun Names of requested XCP-D atlases. custom_confounds_folder : :obj:`str`, optional Path to the folder containing custom confounds files. + dataset_links : :obj:`dict`, optional + Dictionary of dataset links to include in the dataset description. """ import json import os @@ -679,22 +687,22 @@ def write_dataset_description(fmri_dir, output_dir, atlases=None, custom_confoun raise FileNotFoundError(f"Dataset description DNE: {orig_dset_description}") with open(orig_dset_description, "r") as fo: - dset_desc = json.load(fo) + desc = json.load(fo) # Check if the dataset type is derivative - if "DatasetType" not in dset_desc.keys(): + if "DatasetType" not in desc.keys(): LOGGER.warning(f"DatasetType key not in {orig_dset_description}. Assuming 'derivative'.") - dset_desc["DatasetType"] = "derivative" + desc["DatasetType"] = "derivative" - if dset_desc.get("DatasetType", "derivative") != "derivative": + if desc.get("DatasetType", "derivative") != "derivative": raise ValueError( f"DatasetType key in {orig_dset_description} is not 'derivative'. " "XCP-D only works on derivative datasets." ) # Update dataset description - dset_desc["Name"] = "XCP-D: A Robust Postprocessing Pipeline of fMRI data" - generated_by = dset_desc.get("GeneratedBy", []) + desc["Name"] = "XCP-D: A Robust Postprocessing Pipeline of fMRI data" + generated_by = desc.get("GeneratedBy", []) generated_by.insert( 0, { @@ -703,42 +711,38 @@ def write_dataset_description(fmri_dir, output_dir, atlases=None, custom_confoun "CodeURL": DOWNLOAD_URL, }, ) - dset_desc["GeneratedBy"] = generated_by - dset_desc["HowToAcknowledge"] = "Include the generated boilerplate in the methods section." - - # Add DatasetLinks - if "DatasetLinks" not in dset_desc.keys(): - dset_desc["DatasetLinks"] = {} + desc["GeneratedBy"] = generated_by + desc["HowToAcknowledge"] = "Include the generated boilerplate in the methods section." - if "preprocessed" in dset_desc["DatasetLinks"].keys(): - LOGGER.warning("'preprocessed' is already a dataset link. Overwriting.") + dataset_links = dataset_links.copy() - dset_desc["DatasetLinks"]["preprocessed"] = str(fmri_dir) + # Replace local templateflow path with URL + dataset_links["templateflow"] = "https://github.com/templateflow/templateflow" if atlases: - if "atlases" in dset_desc["DatasetLinks"].keys(): - LOGGER.warning("'atlases' is already a dataset link. Overwriting.") - - dset_desc["DatasetLinks"]["atlases"] = os.path.join(output_dir, "atlases") + dataset_links["atlases"] = os.path.join(output_dir, "atlases") if custom_confounds_folder: - if "custom_confounds" in dset_desc["DatasetLinks"].keys(): - LOGGER.warning("'custom_confounds' is already a dataset link. Overwriting.") + dataset_links["custom_confounds"] = str(custom_confounds_folder) - dset_desc["DatasetLinks"]["custom_confounds"] = str(custom_confounds_folder) + # Add DatasetLinks + if "DatasetLinks" not in desc.keys(): + desc["DatasetLinks"] = {} - xcpd_dset_description = os.path.join(output_dir, "dataset_description.json") - if os.path.isfile(xcpd_dset_description): - with open(xcpd_dset_description, "r") as fo: - old_dset_desc = json.load(fo) + for k, v in dataset_links.items(): + if k in desc["DatasetLinks"].keys() and desc["DatasetLinks"][k] != str(v): + LOGGER.warning(f"'{k}' is already a dataset link. Overwriting.") - old_version = old_dset_desc["GeneratedBy"][0]["Version"] + desc["DatasetLinks"][k] = str(v) + + xcpd_dset_description = Path(output_dir / "dataset_description.json") + if xcpd_dset_description.is_file(): + old_desc = json.loads(xcpd_dset_description.read_text()) + old_version = old_desc["GeneratedBy"][0]["Version"] if Version(__version__).public != Version(old_version).public: LOGGER.warning(f"Previous output generated by version {old_version} found.") - else: - with open(xcpd_dset_description, "w") as fo: - json.dump(dset_desc, fo, indent=4, sort_keys=True) + xcpd_dset_description.write_text(json.dumps(desc, indent=4)) def write_atlas_dataset_description(atlas_dir): @@ -754,7 +758,7 @@ def write_atlas_dataset_description(atlas_dir): from xcp_d.__about__ import DOWNLOAD_URL, __version__ - dset_desc = { + desc = { "Name": "XCP-D Atlases", "DatasetType": "atlas", "GeneratedBy": [ @@ -779,7 +783,7 @@ def write_atlas_dataset_description(atlas_dir): else: with open(atlas_dset_description, "w") as fo: - json.dump(dset_desc, fo, indent=4, sort_keys=True) + json.dump(desc, fo, indent=4, sort_keys=True) def get_preproc_pipeline_info(input_type, fmri_dir):