Skip to content

Commit

Permalink
fixed recursive globbing and added manifest handling to upload
Browse files Browse the repository at this point in the history
  • Loading branch information
tclose committed Feb 14, 2024
1 parent 71901c1 commit 34f8fe7
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 12 deletions.
11 changes: 10 additions & 1 deletion xnat_ingest/cli/upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,14 @@
envvar="XNAT_INGEST_WORKDIR",
help="The directory to use for temporary downloads (i.e. from s3)",
)
@click.option(
"--use-manifest/--dont-use-manifest",
default=None,
envvar="XNAT_INGEST_REQUIRE_MANIFEST",
help=("Whether to use the manifest file in the staged sessions to load the "
"directory structure. By default it is used if present and ignore if not there"),
type=bool,
)
def upload(
staged: str,
server: str,
Expand All @@ -139,6 +147,7 @@ def upload(
raise_errors: bool,
store_credentials: ty.Tuple[str, str],
work_dir: ty.Optional[Path],
use_manifest: bool,
):

set_logger_handling(log_level, log_file, log_emails, mail_server)
Expand Down Expand Up @@ -235,7 +244,7 @@ def iter_staged_sessions():
total=num_sessions,
desc=f"Processing staged sessions found in '{staged}'",
):
session = ImagingSession.load(session_staging_dir)
session = ImagingSession.load(session_staging_dir, use_manifest=use_manifest)
try:
if "MR" in session.modalities:
SessionClass = xnat_repo.connection.classes.MrSessionData
Expand Down
29 changes: 19 additions & 10 deletions xnat_ingest/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import logging
import os.path
import subprocess as sp
from pprint import pprint
from functools import cached_property
import shutil
from copy import deepcopy
Expand Down Expand Up @@ -233,7 +232,7 @@ def from_dicoms(
else:
dicom_fspaths = [dicoms_path]
else:
dicom_fspaths = [Path(p) for p in glob(dicoms_path)]
dicom_fspaths = [Path(p) for p in glob(dicoms_path, recursive=True)]

# Sort loaded series by StudyInstanceUID (imaging session)
logger.info("Loading DICOM series from %s", str(dicoms_path))
Expand Down Expand Up @@ -303,20 +302,23 @@ def get_id(field):
return sessions

@classmethod
def load(cls, session_dir: Path, ignore_manifest: bool = False) -> "ImagingSession":
def load(cls, session_dir: Path, use_manifest: ty.Optional[bool] = None) -> "ImagingSession":
"""Loads a session from a directory. Assumes that the name of the directory is
the name of the session dir and the parent directory is the subject ID and the
grandparent directory is the project ID. The scan information is loaded from a YAML
along with the scan type, resources and fileformats. If the YAML file is not found
or `ignore_manifest` is set to True, the session is loaded based on the directory
or `use_manifest` is set to True, the session is loaded based on the directory
structure.
Parameters
----------
session_dir : Path
the path to the directory where the session is saved
ignore_manifest: bool
load the session based on the directory structure instead of the YAML file
use_manifest: bool, optional
determines whether to load the session based on YAML manifest or to infer
it from the directory structure. If True the manifest is expected and an error
will be raised if it isn't present, if False the manifest is ignored and if
None the manifest is used if present, otherwise the directory structure is used.
Returns
-------
Expand All @@ -327,7 +329,7 @@ def load(cls, session_dir: Path, ignore_manifest: bool = False) -> "ImagingSessi
subject_id = session_dir.parent.name
session_id = session_dir.name
yaml_file = session_dir / cls.MANIFEST_FILENAME
if yaml_file.exists() and not ignore_manifest:
if yaml_file.exists() and use_manifest is not False:
# Load session from YAML file metadata
try:
with open(yaml_file) as f:
Expand Down Expand Up @@ -361,7 +363,7 @@ def load(cls, session_dir: Path, ignore_manifest: bool = False) -> "ImagingSessi
session_id=session_id,
**dct,
)
else:
elif use_manifest is not True:
# Load session based on directory structure
scans = []
for scan_dir in session_dir.iterdir():
Expand All @@ -384,6 +386,12 @@ def load(cls, session_dir: Path, ignore_manifest: bool = False) -> "ImagingSessi
subject_id=subject_id,
session_id=session_id,
)
else:
raise FileNotFoundError(
f"Did not find manifest file '{yaml_file}' in session directory "
f"{session_dir}. If you want to fallback to load the session based on "
"the directory structure instead, set `use_manifest` to None."
)
return session

def save(self, save_dir: Path, just_manifest: bool = False) -> "ImagingSession":
Expand Down Expand Up @@ -537,7 +545,7 @@ def stage(
for dicom_dir in self.dicom_dirs:
assoc_glob = dicom_dir / associated_files.glob.format(**self.metadata)
# Select files using the constructed glob pattern
associated_fspaths.update(Path(p) for p in glob(str(assoc_glob)))
associated_fspaths.update(Path(p) for p in glob(str(assoc_glob), recursive=True))

logger.info(
"Found %s associated file paths matching '%s'",
Expand Down Expand Up @@ -568,8 +576,10 @@ def stage(
old, dest_path, remove_original=remove_original
)
elif remove_original:
logger.debug("Moving %s to %s", old, dest_path)
old.rename(dest_path)
else:
logger.debug("Copying %s to %s", old, dest_path)
shutil.copyfile(old, dest_path)
staged_associated_fspaths.append(dest_path)
else:
Expand Down Expand Up @@ -604,7 +614,6 @@ def stage(
f"'{prev_scan_type}' for scan ID '{scan_id}'"
)
assoc_resources[resource].append(fspath)
pprint(assoc_scans)
for scan_id, (scan_type, scan_resources_dict) in tqdm(
assoc_scans.items(), "moving associated files to staging directory"
):
Expand Down
2 changes: 1 addition & 1 deletion xnat_ingest/tests/test_session.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ def test_session_save_roundtrip(tmp_path: Path, imaging_session: ImagingSession)

# Load from saved directory, this time only using directory structure instead of
# manifest. Should be the same with the exception of the detected fileformats
loaded_no_manifest = ImagingSession.load(session_dir, ignore_manifest=True)
loaded_no_manifest = ImagingSession.load(session_dir, use_manifest=False)
for scan in loaded_no_manifest.scans.values():
for key, resource in list(scan.resources.items()):
if key == "DICOM":
Expand Down

0 comments on commit 34f8fe7

Please sign in to comment.