Skip to content

Commit

Permalink
Merge pull request #3 from Australian-Imaging-Service/develop
Browse files Browse the repository at this point in the history
Bugfix for scan type names with special characters
  • Loading branch information
tclose authored Mar 15, 2024
2 parents fea9c8d + c9354f4 commit 503dcf1
Show file tree
Hide file tree
Showing 5 changed files with 102 additions and 62 deletions.
21 changes: 11 additions & 10 deletions xnat_ingest/cli/stage.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,23 +60,23 @@
)
@click.option(
"--associated-files",
type=AssociatedFiles(),
type=AssociatedFiles.cli_type,
nargs=2,
default=None,
envvar="XNAT_INGEST_ASSOCIATED",
metavar="<glob> <id-pattern>",
help=(
"The \"glob\" arg is a glob pattern by which to detect associated files to be "
'The "glob" arg is a glob pattern by which to detect associated files to be '
"attached to the DICOM sessions. Note that when this pattern corresponds to a "
"relative path it is considered to be relative to the parent directory containing "
"the DICOMs for the session NOT the current working directory Can contain string "
"templates corresponding to DICOM metadata fields, which are substituted before "
"the glob is called. For example, "
'"./associated/{PatientName.given_name}_{PatientName.family_name}/*)\" '
'"./associated/{PatientName.given_name}_{PatientName.family_name}/*)" '
"will find all files under the subdirectory within '/path/to/dicoms/associated' that matches "
"<GIVEN-NAME>_<FAMILY-NAME>. Will be interpreted as being relative to `dicoms_dir` "
"if a relative path is provided.\n"
"The \"id-pattern\" arg is a regular expression that is used to extract the scan ID & "
'The "id-pattern" arg is a regular expression that is used to extract the scan ID & '
"type/resource from the associated filename. Should be a regular-expression "
"(Python syntax) with named groups called 'id' and 'type', e.g. "
r"--assoc-id-pattern '[^\.]+\.[^\.]+\.(?P<id>\d+)\.(?P<type>\w+)\..*'"
Expand All @@ -98,7 +98,7 @@
@click.option(
"--log-file",
default=None,
type=LogFile(),
type=LogFile.cli_type,
nargs=2,
metavar="<path> <loglevel>",
envvar="XNAT_INGEST_LOGFILE",
Expand All @@ -110,7 +110,7 @@
@click.option(
"--log-email",
"log_emails",
type=LogEmail(),
type=LogEmail.cli_type,
nargs=3,
metavar="<address> <loglevel> <subject-preamble>",
multiple=True,
Expand All @@ -122,7 +122,7 @@
)
@click.option(
"--mail-server",
type=MailServer(),
type=MailServer.cli_type,
nargs=4,
metavar="<host> <sender-email> <user> <password>",
default=None,
Expand All @@ -142,7 +142,7 @@
"--deidentify/--dont-deidentify",
default=False,
type=bool,
help="whether to deidentify the file names and DICOM metadata before staging"
help="whether to deidentify the file names and DICOM metadata before staging",
)
def stage(
dicoms_path: str,
Expand All @@ -160,7 +160,7 @@ def stage(
raise_errors: bool,
deidentify: bool,
):
set_logger_handling(log_level, log_file, log_emails, mail_server)
set_logger_handling(log_level, log_emails, log_file, mail_server)

msg = f"Loading DICOM sessions from '{dicoms_path}'"

Expand Down Expand Up @@ -193,7 +193,8 @@ def stage(
continue
# Identify theDeidentify files if necessary and save them to the staging directory
session.stage(
staging_dir, associated_files=associated_files,
staging_dir,
associated_files=associated_files,
remove_original=delete,
deidentify=deidentify,
)
Expand Down
17 changes: 8 additions & 9 deletions xnat_ingest/cli/transfer.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def remove_old_files_on_ssh(remote_store: str, threshold: int):
@click.argument("remote_store", type=str, envvar="XNAT_INGEST_REMOTE_STORE")
@click.option(
"--store-credentials",
type=str,
type=click.Path(path_type=Path),
metavar="<access-key> <secret-key>",
envvar="XNAT_INGEST_STORE_CREDENTIALS",
default=None,
Expand All @@ -99,7 +99,7 @@ def remove_old_files_on_ssh(remote_store: str, threshold: int):
@click.option(
"--log-file",
default=None,
type=LogFile(),
type=LogFile.cli_type,
nargs=2,
metavar="<path> <loglevel>",
envvar="XNAT_INGEST_LOGFILE",
Expand All @@ -111,7 +111,7 @@ def remove_old_files_on_ssh(remote_store: str, threshold: int):
@click.option(
"--log-email",
"log_emails",
type=LogEmail(),
type=LogEmail.cli_type,
nargs=3,
metavar="<address> <loglevel> <subject-preamble>",
multiple=True,
Expand All @@ -123,7 +123,7 @@ def remove_old_files_on_ssh(remote_store: str, threshold: int):
)
@click.option(
"--mail-server",
type=MailServer(),
type=MailServer.cli_type,
metavar="<host> <sender-email> <user> <password>",
default=None,
envvar="XNAT_INGEST_MAILSERVER",
Expand Down Expand Up @@ -161,20 +161,19 @@ def remove_old_files_on_ssh(remote_store: str, threshold: int):
help="The number of days to keep files in the remote store for",
)
def transfer(
staging_dir: str,
staging_dir: Path,
remote_store: str,
credentials: ty.Tuple[str, str],
log_file: ty.Tuple[str, str],
log_file: LogFile,
log_level: str,
log_emails: ty.List[ty.Tuple[str, str, str]],
mail_server: ty.Tuple[str, str, str, str],
log_emails: ty.List[LogEmail],
mail_server: ty.Tuple[MailServer],
delete: bool,
raise_errors: bool,
xnat_login: ty.Optional[ty.Tuple[str, str, str]],
clean_up_older_than: int,
):

staging_dir = Path(staging_dir)
if not staging_dir.exists():
raise ValueError(f"Staging directory '{staging_dir}' does not exist")

Expand Down
6 changes: 3 additions & 3 deletions xnat_ingest/cli/upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@
@click.option(
"--log-file",
default=None,
type=LogFile(),
type=LogFile.cli_type,
nargs=2,
metavar="<path> <loglevel>",
envvar="XNAT_INGEST_LOGFILE",
Expand All @@ -72,7 +72,7 @@
@click.option(
"--log-email",
"log_emails",
type=LogEmail(),
type=LogEmail.cli_type,
nargs=3,
metavar="<address> <loglevel> <subject-preamble>",
multiple=True,
Expand All @@ -84,7 +84,7 @@
)
@click.option(
"--mail-server",
type=MailServer(),
type=MailServer.cli_type,
metavar="<host> <sender-email> <user> <password>",
default=None,
envvar="XNAT_INGEST_MAILSERVER",
Expand Down
34 changes: 22 additions & 12 deletions xnat_ingest/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,19 +26,24 @@
from arcana.core.data.tree import DataTree
from arcana.core.exceptions import ArcanaDataMatchError
from .exceptions import DicomParseError, StagingError
from .utils import add_exc_note, transform_paths
from .utils import add_exc_note, transform_paths, DicomField, AssociatedFiles
from .dicom import dcmedit_path
import random
import string

logger = logging.getLogger("xnat-ingest")


def scan_type_converter(scan_type: str) -> str:
"Ensure there aren't any special characters that aren't valid file/dir paths"
return re.sub(r"[\"\*\/\:\<\>\?\\\|\+\,\.\;\=\[\]]+", "", scan_type)


@attrs.define
class ImagingScan:
id: str
type: str
resources: ty.Dict[str, FileSet]
type: str = attrs.field(converter=scan_type_converter)
resources: ty.Dict[str, FileSet] = attrs.field()

def __contains__(self, resource_name):
return resource_name in self.resources
Expand Down Expand Up @@ -91,7 +96,9 @@ def make_session_id(cls, project_id, subject_id, visit_id):
def modalities(self) -> ty.Set[str]:
modalities = self.metadata["Modality"]
if not isinstance(modalities, str):
modalities = set(tuple(m) if not isinstance(m, str) else m for m in modalities)
modalities = set(
tuple(m) if not isinstance(m, str) else m for m in modalities
)
return modalities

@property
Expand Down Expand Up @@ -200,9 +207,9 @@ def metadata(self):
def from_dicoms(
cls,
dicoms_path: str | Path,
project_field: str = "StudyID",
subject_field: str = "PatientID",
visit_field: str = "AccessionNumber",
project_field: DicomField = DicomField("StudyID"),
subject_field: DicomField = DicomField("PatientID"),
visit_field: DicomField = DicomField("AccessionNumber"),
project_id: str | None = None,
) -> ty.List["ImagingSession"]:
"""Loads all imaging sessions from a list of DICOM files
Expand Down Expand Up @@ -478,7 +485,7 @@ def save(self, save_dir: Path, just_manifest: bool = False) -> "ImagingSession":
def stage(
self,
dest_dir: Path,
associated_files: ty.Optional[ty.Tuple[str, str]] = None,
associated_files: ty.Optional[AssociatedFiles] = None,
remove_original: bool = False,
deidentify: bool = True,
) -> "ImagingSession":
Expand Down Expand Up @@ -528,6 +535,7 @@ def stage(
):
staged_resources = {}
for resource_name, fileset in scan.resources.items():
# Ensure scan type is a valid directory name
scan_dir = session_dir / f"{scan.id}-{scan.type}" / resource_name
scan_dir.mkdir(parents=True, exist_ok=True)
if isinstance(fileset, DicomSeries):
Expand Down Expand Up @@ -559,7 +567,7 @@ def stage(
# substitute string templates int the glob template with values from the
# DICOM metadata to construct a glob pattern to select files associated
# with current session
associated_fspaths = set()
associated_fspaths: ty.Set[Path] = set()
for dicom_dir in self.dicom_dirs:
assoc_glob = dicom_dir / associated_files.glob.format(**self.metadata)
# Select files using the constructed glob pattern
Expand All @@ -579,7 +587,7 @@ def stage(
if deidentify:
# Transform the names of the paths to remove any identiable information
transformed_fspaths = transform_paths(
associated_fspaths,
list(associated_fspaths),
associated_files.glob,
self.metadata,
staged_metadata,
Expand All @@ -603,7 +611,7 @@ def stage(
shutil.copyfile(old, dest_path)
staged_associated_fspaths.append(dest_path)
else:
staged_associated_fspaths = associated_fspaths
staged_associated_fspaths = list(associated_fspaths)

# Identify scan id, type and resource names from deidentified file paths
assoc_scans = {}
Expand All @@ -624,7 +632,9 @@ def stage(
except IndexError:
scan_type = scan_id
if scan_id not in assoc_scans:
assoc_resources = defaultdict(list)
assoc_resources: ty.DefaultDict[str, ty.List[Path]] = defaultdict(
list
)
assoc_scans[scan_id] = (scan_type, assoc_resources)
else:
prev_scan_type, assoc_resources = assoc_scans[scan_id]
Expand Down
Loading

0 comments on commit 503dcf1

Please sign in to comment.