PennLINC · tsalo · Sep 17, 2024 · Sep 17, 2024 · Sep 17, 2024 · Sep 17, 2024
diff --git a/xcp_d/cli/run.py b/xcp_d/cli/run.py
@@ -21,7 +21,7 @@ def main():
     from xcp_d.cli.workflow import build_workflow
     from xcp_d.utils.bids import (
         write_atlas_dataset_description,
-        write_dataset_description,
+        write_derivative_description,
     )
 
     parse_args(args=sys.argv[1:])
@@ -160,7 +160,13 @@ def main():
         from xcp_d.reports.core import generate_reports
 
         # Write dataset description before generating reports
-        write_dataset_description(config.execution.fmri_dir, config.execution.output_dir)
+        write_derivative_description(
+            config.execution.fmri_dir,
+            config.execution.output_dir,
+            atlases=config.execution.atlases,
+            custom_confounds_folder=config.execution.custom_confounds,
+            dataset_links=config.execution.dataset_links,
+        )
 
         if config.execution.atlases:
             write_atlas_dataset_description(config.execution.output_dir / "atlases")

diff --git a/xcp_d/config.py b/xcp_d/config.py
@@ -90,6 +90,8 @@
 import os
 from multiprocessing import set_start_method
 
+from templateflow.conf import TF_LAYOUT
+
 # Disable NiPype etelemetry always
 _disable_et = bool(os.getenv("NO_ET") is not None or os.getenv("NIPYPE_NO_ET") is not None)
 os.environ["NIPYPE_NO_ET"] = "1"
@@ -227,6 +229,8 @@ def load(cls, settings, init=True, ignore=None):
             if k in cls._paths:
                 if isinstance(v, (list, tuple)):
                     setattr(cls, k, [Path(val).absolute() for val in v])
+                elif isinstance(v, dict):
+                    setattr(cls, k, {key: Path(val).absolute() for key, val in v.items()})
                 else:
                     setattr(cls, k, Path(v).absolute())
             elif hasattr(cls, k):
@@ -252,6 +256,8 @@ def get(cls):
             if k in cls._paths:
                 if isinstance(v, (list, tuple)):
                     v = [str(val) for val in v]
+                elif isinstance(v, dict):
+                    v = {key: str(val) for key, val in v.items()}
                 else:
                     v = str(v)
             if isinstance(v, SpatialReferences):
@@ -419,6 +425,8 @@ class execution(_Config):
     """Path to a working directory where intermediate results will be available."""
     write_graph = None
     """Write out the computational graph corresponding to the planned preprocessing."""
+    dataset_links = {}
+    """A dictionary of dataset links to be used to track Sources in sidecars."""
 
     _layout = None
 
@@ -431,6 +439,7 @@ class execution(_Config):
         "output_dir",
         "templateflow_home",
         "work_dir",
+        "dataset_links",
     )
 
     @classmethod
@@ -501,6 +510,12 @@ def _process_value(value):
                 for k, v in filters.items():
                     cls.bids_filters[acq][k] = _process_value(v)
 
+        dataset_links = {
+            'preprocessed': cls.fmri_dir,
+            'templateflow': Path(TF_LAYOUT.root),
+        }
+        cls.dataset_links = dataset_links
+
         if "all" in cls.debug:
             cls.debug = list(DEBUG_MODES)
 

diff --git a/xcp_d/tests/test_cli.py b/xcp_d/tests/test_cli.py
@@ -22,7 +22,7 @@
     get_test_data_path,
     list_files,
 )
-from xcp_d.utils.bids import write_atlas_dataset_description, write_dataset_description
+from xcp_d.utils.bids import write_atlas_dataset_description, write_derivative_description
 
 LOGGER = logging.getLogger("nipype.utils")
 
@@ -475,7 +475,11 @@ def _run_and_generate(test_name, parameters, input_type, test_main=False):
         retval = build_workflow(config_file, retval={})
         xcpd_wf = retval["workflow"]
         xcpd_wf.run(**config.nipype.get_plugin())
-        write_dataset_description(config.execution.fmri_dir, config.execution.output_dir)
+        write_derivative_description(
+            config.execution.fmri_dir,
+            config.execution.output_dir,
+            dataset_links=config.execution.dataset_links,
+        )
         if config.execution.atlases:
             write_atlas_dataset_description(config.execution.output_dir / "atlases")
 

diff --git a/xcp_d/tests/test_utils_bids.py b/xcp_d/tests/test_utils_bids.py
@@ -253,23 +253,30 @@ def test_collect_morphometry_data(datasets, tmp_path_factory):
     assert morph_file_types == []
 
 
-def test_write_dataset_description(datasets, tmp_path_factory, caplog):
-    """Test write_dataset_description."""
-    tmpdir = tmp_path_factory.mktemp("test_write_dataset_description")
+def test_write_derivative_description(datasets, tmp_path_factory, caplog):
+    """Test write_derivative_description."""
+    tmpdir = tmp_path_factory.mktemp("test_write_derivative_description")
     dset_description = os.path.join(tmpdir, "dataset_description.json")
 
     # The function expects a description file in the fmri_dir.
     with pytest.raises(FileNotFoundError, match="Dataset description DNE"):
-        xbids.write_dataset_description(tmpdir, tmpdir, atlases=None, custom_confounds_folder=None)
+        xbids.write_derivative_description(
+            tmpdir,
+            tmpdir,
+            atlases=None,
+            custom_confounds_folder=None,
+            dataset_links={},
+        )
     assert not os.path.isfile(dset_description)
 
     # It will work when we give it a real fmri_dir.
     fmri_dir = datasets["ds001419"]
-    xbids.write_dataset_description(
+    xbids.write_derivative_description(
         fmri_dir,
         tmpdir,
         atlases=["Gordon"],
         custom_confounds_folder="/fake/path4",
+        dataset_links={"preprocessed": "/fake/path1"},
     )
     assert os.path.isfile(dset_description)
 
@@ -279,11 +286,12 @@ def test_write_dataset_description(datasets, tmp_path_factory, caplog):
 
     assert "'preprocessed' is already a dataset link" not in caplog.text
     assert "'custom_confounds' is already a dataset link" not in caplog.text
-    xbids.write_dataset_description(
+    xbids.write_derivative_description(
         tmpdir,
         tmpdir,
         atlases=["Gordon"],
-        custom_confounds_folder="/fake/path4",
+        custom_confounds_folder="/fake/path5",
+        dataset_links={"preprocessed": "/fake/path2"},
     )
     assert "'preprocessed' is already a dataset link" in caplog.text
     assert "'custom_confounds' is already a dataset link" in caplog.text
@@ -294,7 +302,13 @@ def test_write_dataset_description(datasets, tmp_path_factory, caplog):
         json.dump(desc, fo, indent=4)
 
     assert "Previous output generated by version" not in caplog.text
-    xbids.write_dataset_description(fmri_dir, tmpdir, atlases=None, custom_confounds_folder=None)
+    xbids.write_derivative_description(
+        fmri_dir,
+        tmpdir,
+        atlases=None,
+        custom_confounds_folder=None,
+        dataset_links={},
+    )
     assert "Previous output generated by version" in caplog.text
 
     # Should raise a warning if DatasetType is not in the description
@@ -303,7 +317,13 @@ def test_write_dataset_description(datasets, tmp_path_factory, caplog):
         json.dump(desc, fo, indent=4)
 
     assert "DatasetType key not in" not in caplog.text
-    xbids.write_dataset_description(tmpdir, tmpdir, atlases=None, custom_confounds_folder=None)
+    xbids.write_derivative_description(
+        tmpdir,
+        tmpdir,
+        atlases=None,
+        custom_confounds_folder=None,
+        dataset_links={},
+    )
     assert "DatasetType key not in" in caplog.text
 
     # Should raise an error if DatasetType is present, but isn't "derivative"
@@ -312,11 +332,12 @@ def test_write_dataset_description(datasets, tmp_path_factory, caplog):
         json.dump(desc, fo, indent=4)
 
     with pytest.raises(ValueError, match="XCP-D only works on derivative datasets."):
-        xbids.write_dataset_description(
+        xbids.write_derivative_description(
             tmpdir,
             tmpdir,
             atlases=None,
             custom_confounds_folder=None,
+            dataset_links={},
         )
 
 

diff --git a/xcp_d/utils/bids.py b/xcp_d/utils/bids.py
@@ -655,7 +655,13 @@ def collect_run_data(layout, bold_file, file_format, target_space):
     return run_data
 
 
-def write_dataset_description(fmri_dir, output_dir, atlases=None, custom_confounds_folder=None):
+def write_derivative_description(
+    fmri_dir,
+    output_dir,
+    atlases=None,
+    custom_confounds_folder=None,
+    dataset_links={},
+):
     """Write dataset_description.json file for derivatives.
 
     Parameters
@@ -668,6 +674,8 @@ def write_dataset_description(fmri_dir, output_dir, atlases=None, custom_confoun
         Names of requested XCP-D atlases.
     custom_confounds_folder : :obj:`str`, optional
         Path to the folder containing custom confounds files.
+    dataset_links : :obj:`dict`, optional
+        Dictionary of dataset links to include in the dataset description.
     """
     import json
     import os
@@ -679,22 +687,22 @@ def write_dataset_description(fmri_dir, output_dir, atlases=None, custom_confoun
         raise FileNotFoundError(f"Dataset description DNE: {orig_dset_description}")
 
     with open(orig_dset_description, "r") as fo:
-        dset_desc = json.load(fo)
+        desc = json.load(fo)
 
     # Check if the dataset type is derivative
-    if "DatasetType" not in dset_desc.keys():
+    if "DatasetType" not in desc.keys():
         LOGGER.warning(f"DatasetType key not in {orig_dset_description}. Assuming 'derivative'.")
-        dset_desc["DatasetType"] = "derivative"
+        desc["DatasetType"] = "derivative"
 
-    if dset_desc.get("DatasetType", "derivative") != "derivative":
+    if desc.get("DatasetType", "derivative") != "derivative":
         raise ValueError(
             f"DatasetType key in {orig_dset_description} is not 'derivative'. "
             "XCP-D only works on derivative datasets."
         )
 
     # Update dataset description
-    dset_desc["Name"] = "XCP-D: A Robust Postprocessing Pipeline of fMRI data"
-    generated_by = dset_desc.get("GeneratedBy", [])
+    desc["Name"] = "XCP-D: A Robust Postprocessing Pipeline of fMRI data"
+    generated_by = desc.get("GeneratedBy", [])
     generated_by.insert(
         0,
         {
@@ -703,42 +711,38 @@ def write_dataset_description(fmri_dir, output_dir, atlases=None, custom_confoun
             "CodeURL": DOWNLOAD_URL,
         },
     )
-    dset_desc["GeneratedBy"] = generated_by
-    dset_desc["HowToAcknowledge"] = "Include the generated boilerplate in the methods section."
-
-    # Add DatasetLinks
-    if "DatasetLinks" not in dset_desc.keys():
-        dset_desc["DatasetLinks"] = {}
+    desc["GeneratedBy"] = generated_by
+    desc["HowToAcknowledge"] = "Include the generated boilerplate in the methods section."
 
-    if "preprocessed" in dset_desc["DatasetLinks"].keys():
-        LOGGER.warning("'preprocessed' is already a dataset link. Overwriting.")
+    dataset_links = dataset_links.copy()
 
-    dset_desc["DatasetLinks"]["preprocessed"] = str(fmri_dir)
+    # Replace local templateflow path with URL
+    dataset_links["templateflow"] = "https://github.com/templateflow/templateflow"
 
     if atlases:
-        if "atlases" in dset_desc["DatasetLinks"].keys():
-            LOGGER.warning("'atlases' is already a dataset link. Overwriting.")
-
-        dset_desc["DatasetLinks"]["atlases"] = os.path.join(output_dir, "atlases")
+        dataset_links["atlases"] = os.path.join(output_dir, "atlases")
 
     if custom_confounds_folder:
-        if "custom_confounds" in dset_desc["DatasetLinks"].keys():
-            LOGGER.warning("'custom_confounds' is already a dataset link. Overwriting.")
+        dataset_links["custom_confounds"] = str(custom_confounds_folder)
 
-        dset_desc["DatasetLinks"]["custom_confounds"] = str(custom_confounds_folder)
+    # Add DatasetLinks
+    if "DatasetLinks" not in desc.keys():
+        desc["DatasetLinks"] = {}
 
-    xcpd_dset_description = os.path.join(output_dir, "dataset_description.json")
-    if os.path.isfile(xcpd_dset_description):
-        with open(xcpd_dset_description, "r") as fo:
-            old_dset_desc = json.load(fo)
+    for k, v in dataset_links.items():
+        if k in desc["DatasetLinks"].keys() and desc["DatasetLinks"][k] != str(v):
+            LOGGER.warning(f"'{k}' is already a dataset link. Overwriting.")
 
-        old_version = old_dset_desc["GeneratedBy"][0]["Version"]
+        desc["DatasetLinks"][k] = str(v)
+
+    xcpd_dset_description = Path(output_dir / "dataset_description.json")
+    if xcpd_dset_description.is_file():
+        old_desc = json.loads(xcpd_dset_description.read_text())
+        old_version = old_desc["GeneratedBy"][0]["Version"]
         if Version(__version__).public != Version(old_version).public:
             LOGGER.warning(f"Previous output generated by version {old_version} found.")
-
     else:
-        with open(xcpd_dset_description, "w") as fo:
-            json.dump(dset_desc, fo, indent=4, sort_keys=True)
+        xcpd_dset_description.write_text(json.dumps(desc, indent=4))
 
 
 def write_atlas_dataset_description(atlas_dir):
@@ -754,7 +758,7 @@ def write_atlas_dataset_description(atlas_dir):
 
     from xcp_d.__about__ import DOWNLOAD_URL, __version__
 
-    dset_desc = {
+    desc = {
         "Name": "XCP-D Atlases",
         "DatasetType": "atlas",
         "GeneratedBy": [
@@ -779,7 +783,7 @@ def write_atlas_dataset_description(atlas_dir):
 
     else:
         with open(atlas_dset_description, "w") as fo:
-            json.dump(dset_desc, fo, indent=4, sort_keys=True)
+            json.dump(desc, fo, indent=4, sort_keys=True)
 
 
 def get_preproc_pipeline_info(input_type, fmri_dir):