Skip to content

Commit

Permalink
Merge pull request #15 from Australian-Imaging-Service/develop
Browse files Browse the repository at this point in the history
added spaces-to-underscores option for associated file globs
  • Loading branch information
tclose authored Jul 22, 2024
2 parents f4dca1e + b16d887 commit 2a42035
Show file tree
Hide file tree
Showing 4 changed files with 57 additions and 9 deletions.
11 changes: 10 additions & 1 deletion xnat_ingest/cli/stage.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@
'The "id-pattern" arg is a regular expression that is used to extract the scan ID & '
"type/resource from the associated filename. Should be a regular-expression "
"(Python syntax) with named groups called 'id' and 'type', e.g. "
r"--assoc-id-pattern '[^\.]+\.[^\.]+\.(?P<id>\d+)\.(?P<type>\w+)\..*'"
r"'[^\.]+\.[^\.]+\.(?P<id>\d+)\.(?P<type>\w+)\..*'"
),
)
@click.option(
Expand Down Expand Up @@ -173,6 +173,13 @@
help="The XNAT server to upload to plus the user and password to use",
envvar="XNAT_INGEST_TRANSFER_XNAT_LOGIN",
)
@click.option(
"--spaces-to-underscores/--no-spaces-to-underscores",
default=False,
help="Whether to replace spaces with underscores in the filenames of associated files",
envvar="XNAT_INGEST_STAGE_SPACES_TO_UNDERSCORES",
type=bool,
)
def stage(
dicoms_path: str,
staging_dir: Path,
Expand All @@ -190,6 +197,7 @@ def stage(
raise_errors: bool,
deidentify: bool,
xnat_login: XnatLogin,
spaces_to_underscores: bool,
):
set_logger_handling(
log_level=log_level,
Expand Down Expand Up @@ -247,6 +255,7 @@ def stage(
remove_original=delete,
deidentify=deidentify,
project_list=project_list,
spaces_to_underscores=spaces_to_underscores,
)
except Exception as e:
if not raise_errors:
Expand Down
13 changes: 11 additions & 2 deletions xnat_ingest/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -507,6 +507,7 @@ def stage(
remove_original: bool = False,
deidentify: bool = True,
project_list: ty.Optional[ty.List[str]] = None,
spaces_to_underscores: bool = False,
) -> "ImagingSession":
r"""Stages and deidentifies files by removing the fields listed `FIELDS_TO_ANONYMISE` and
replacing birth date with 01/01/<BIRTH-YEAR> and returning new imaging session
Expand Down Expand Up @@ -540,6 +541,9 @@ def stage(
project_list : list[str], optional
list of available projects in the store, used to check whether the project ID
is valid
spaces_to_underscores : bool, optional
when building associated file globs, convert spaces underscores in fields
extracted from source file metadata, false by default
Returns
-------
Expand Down Expand Up @@ -599,10 +603,14 @@ def stage(
# with current session
associated_fspaths: ty.Set[Path] = set()
for dicom_dir in self.dicom_dirs:
assoc_glob = dicom_dir / associated_files.glob.format(**self.metadata)
assoc_glob = str(
dicom_dir / associated_files.glob.format(**self.metadata)
)
if spaces_to_underscores:
assoc_glob = assoc_glob.replace(" ", "_")
# Select files using the constructed glob pattern
associated_fspaths.update(
Path(p) for p in glob(str(assoc_glob), recursive=True)
Path(p) for p in glob(assoc_glob, recursive=True)
)

logger.info(
Expand Down Expand Up @@ -630,6 +638,7 @@ def stage(
assoc_glob_pattern,
self.metadata,
staged_metadata,
spaces_to_underscores=spaces_to_underscores,
)
staged_associated_fspaths = []

Expand Down
37 changes: 31 additions & 6 deletions xnat_ingest/tests/test_session.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,22 @@
)
from arcana.core.data.set import Dataset
from arcana.common import DirTree
from medimages4tests.dummy.dicom.base import default_dicom_dir
from medimages4tests.dummy.dicom.pet.wholebody.siemens.biograph_vision.vr20b import (
get_image as get_pet_image,
__file__ as pet_src_file,
)
from medimages4tests.dummy.dicom.ct.ac.siemens.biograph_vision.vr20b import (
get_image as get_ac_image,
__file__ as ac_src_file,
)
from medimages4tests.dummy.dicom.pet.topogram.siemens.biograph_vision.vr20b import (
get_image as get_topogram_image,
__file__ as topogram_src_file,
)
from medimages4tests.dummy.dicom.pet.statistics.siemens.biograph_vision.vr20b import (
get_image as get_statistics_image,
__file__ as statistics_src_file,
)
from medimages4tests.dummy.raw.pet.siemens.biograph_vision.vr20b import (
get_files as get_raw_data_files,
Expand All @@ -29,20 +34,37 @@
from xnat_ingest.utils import AssociatedFiles


FIRST_NAME = "GivenName"
FIRST_NAME = "Given Name"
LAST_NAME = "FamilyName"


@pytest.fixture
def imaging_session() -> ImagingSession:
PatientName = f"{FIRST_NAME}^{LAST_NAME}"
default_dicom_dir
dicoms = [
DicomSeries(d.iterdir())
for d in (
get_pet_image(PatientName=PatientName),
get_ac_image(PatientName=PatientName),
get_topogram_image(PatientName=PatientName),
get_statistics_image(PatientName=PatientName),
get_pet_image(
out_dir=default_dicom_dir(pet_src_file).with_suffix(".with-spaces"),
PatientName=PatientName,
),
get_ac_image(
out_dir=default_dicom_dir(ac_src_file).with_suffix(".with-spaces"),
PatientName=PatientName,
),
get_topogram_image(
out_dir=default_dicom_dir(topogram_src_file).with_suffix(
".with-spaces"
),
PatientName=PatientName,
),
get_statistics_image(
out_dir=default_dicom_dir(statistics_src_file).with_suffix(
".with-spaces"
),
PatientName=PatientName,
),
)
]
scans = [
Expand Down Expand Up @@ -123,7 +145,9 @@ def test_session_select_resources(
assoc_dir = tmp_path / "assoc"
assoc_dir.mkdir()

for fspath in get_raw_data_files(first_name=FIRST_NAME, last_name=LAST_NAME):
for fspath in get_raw_data_files(
first_name=FIRST_NAME.replace(" ", "_"), last_name=LAST_NAME
):
fspath.rename(assoc_dir / fspath.name)

staging_dir = tmp_path / "staging"
Expand All @@ -135,6 +159,7 @@ def test_session_select_resources(
str(assoc_dir) + "/{PatientName.given_name}_{PatientName.family_name}*.ptd",
r".*/[^\.]+.[^\.]+.[^\.]+.(?P<id>\d+)\.[A-Z]+_(?P<resource>[^\.]+).*",
),
spaces_to_underscores=True,
)

resources = list(staged_session.select_resources(dataset))
Expand Down
5 changes: 5 additions & 0 deletions xnat_ingest/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,7 @@ def transform_paths(
glob_pattern: str,
old_values: dict[str, str],
new_values: dict[str, str],
spaces_to_underscores: bool = False,
) -> list[Path]:
"""Applys the transforms FS paths matching `glob_pattern` by replacing the template values
found in the `old_values` dict to the values in `new_values`. Used to strip any identifying
Expand All @@ -319,6 +320,8 @@ def transform_paths(
the values used to parameterise the existing file paths
new_values : dict[str, str]
the new values to parameterise the transformed file paths
spaces_to_underscores: bool
whether to replace spaces with underscores in the transformed paths
Returns
-------
Expand Down Expand Up @@ -347,6 +350,8 @@ def str_templ_to_regex_group(match) -> str:
if attr_name:
groupname += "__" + attr_name
old_val = getattr(old_val, attr_name)
if spaces_to_underscores:
old_val = old_val.replace(" ", "_")
groupname += "__" + str(group_count[fieldname])
group_str = f"(?P<{groupname}>{old_val})"
group_count[fieldname] += 1
Expand Down

0 comments on commit 2a42035

Please sign in to comment.