diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 7a881d3..3904cd4 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,28 +1,28 @@ # See https://pre-commit.com for more information # See https://pre-commit.com/hooks.html for more hooks repos: -- repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.3.0 - hooks: - - id: trailing-whitespace - - id: end-of-file-fixer - - id: check-yaml - - id: check-added-large-files -- repo: https://github.com/psf/black - rev: 22.3.0 - hooks: - - id: black - exclude: ^(arcana/_version\.py|versioneer\.py)$ - args: - - -l 88 -- repo: https://github.com/codespell-project/codespell - rev: v2.1.0 - hooks: - - id: codespell - exclude: ^(xnat_checks/_version\.py|versioneer\.py)$ - args: - - --ignore-words=.codespell-ignorewords -- repo: https://github.com/PyCQA/flake8 - rev: 4.0.1 - hooks: - - id: flake8 + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.3.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-yaml + - id: check-added-large-files + - repo: https://github.com/psf/black + rev: 22.3.0 + hooks: + - id: black + exclude: ^(arcana/_version\.py|versioneer\.py)$ + args: + - -l 88 + - repo: https://github.com/codespell-project/codespell + rev: v2.1.0 + hooks: + - id: codespell + exclude: ^(xnat_checks/_version\.py|versioneer\.py)$ + args: + - --ignore-words=.codespell-ignorewords + - repo: https://github.com/PyCQA/flake8 + rev: 7.0.0 + hooks: + - id: flake8 diff --git a/scripts/get_pet_tst.py b/scripts/get_pet_tst.py new file mode 100644 index 0000000..b905798 --- /dev/null +++ b/scripts/get_pet_tst.py @@ -0,0 +1,23 @@ +import tempfile +from pathlib import Path +from fileformats.medimage import DicomSeries +from medimages4tests.dummy.dicom.pet.wholebody.siemens.biograph_vision.vr20b import ( # type: ignore[import-untyped] + get_image as get_pet_image, +) + + +tmp_path = Path(tempfile.mkdtemp()) + +series = DicomSeries( + get_pet_image( + tmp_path, + first_name="first", + last_name="last", + StudyInstanceUID="StudyInstanceUID", + PatientID="PatientID", + AccessionNumber="AccessionNumber", + StudyID="xnat_project", + ).iterdir() +) + +print(series.metadata["StudyID"]) diff --git a/xnat_ingest/cli/stage.py b/xnat_ingest/cli/stage.py index 79eba75..ce11f53 100644 --- a/xnat_ingest/cli/stage.py +++ b/xnat_ingest/cli/stage.py @@ -7,6 +7,7 @@ import tempfile from tqdm import tqdm from fileformats.core import FileSet +from fileformats.medimage import DicomSeries from xnat_ingest.cli.base import cli from xnat_ingest.session import ImagingSession from frametree.xnat import Xnat # type: ignore[import-untyped] @@ -25,13 +26,13 @@ @cli.command( - help="""Stages DICOM and associated files found in the input directories into separate -directories for each session + help="""Stages images found in the input directories into separate directories for each +imaging acquisition session -DICOMS_PATH is either the path to a directory containing the DICOM files to upload, or -a glob pattern that selects the DICOM paths directly +FILES_PATH is either the path to a directory containing the files to upload, or +a glob pattern that selects the paths directly -STAGING_DIR is the directory that the files for each session are collated to before they +OUTPUT_DIR is the directory that the files for each session are collated to before they are uploaded to XNAT """, ) @@ -42,9 +43,15 @@ type=str, metavar="", multiple=True, - default=["medimage/dicom-series"], - envvar="XINGEST_DATATYPE", - help="The datatype of the primary files to to upload", + default=None, + envvar="XINGEST_DATATYPES", + help=( + 'The MIME-type(s) (or "MIME-like" see FileFormats docs) of potential datatype(s) ' + "of the primary files to to upload, defaults to 'medimage/dicom-series'. " + "Any formats implemented in the FileFormats Python package " + "(https://github.com/ArcanaFramework/fileformats) that implement the 'read_metadata' " + '"extra" are supported, see FF docs on how to add support for new formats.' + ), ) @click.option( "--project-field", @@ -250,7 +257,7 @@ def stage( files_path: str, output_dir: Path, - datatype: str, + datatype: list[str] | None, associated_files: ty.List[AssociatedFiles], project_field: str, subject_field: str, @@ -279,6 +286,11 @@ def stage( logger_configs=loggers, additional_loggers=additional_loggers, ) + datatypes: list[ty.Type[FileSet]] + if not datatype: + datatypes = [DicomSeries] + else: + datatypes = [FileSet.from_mime(dt) for dt in datatype] # type: ignore[misc] if xnat_login: xnat_repo = Xnat( @@ -292,10 +304,10 @@ def stage( else: project_list = None - if session_field is None and datatype == "medimage/dicom-series": + if session_field is None and DicomSeries in datatypes: session_field = "StudyInstanceUID" - msg = f"Loading {datatype} sessions from '{files_path}'" + msg = f"Loading {list(datatypes)} sessions from '{files_path}'" for assoc_files in associated_files: msg += f" with associated files selected from '{assoc_files.glob}'" @@ -319,6 +331,7 @@ def stage( def do_stage() -> None: sessions = ImagingSession.from_paths( files_path=files_path, + datatypes=datatypes, project_field=project_field, subject_field=subject_field, visit_field=visit_field, diff --git a/xnat_ingest/resource.py b/xnat_ingest/resource.py index 56161d4..401ecab 100644 --- a/xnat_ingest/resource.py +++ b/xnat_ingest/resource.py @@ -27,7 +27,9 @@ class ImagingResource: @checksums.default def calculate_checksums(self) -> dict[str, str]: - return self.fileset.hash_files(crypto=hashlib.md5) + return self.fileset.hash_files( + crypto=hashlib.md5, relative_to=self.fileset.parent + ) @property def datatype(self) -> ty.Type[FileSet]: diff --git a/xnat_ingest/session.py b/xnat_ingest/session.py index 4479895..2953237 100644 --- a/xnat_ingest/session.py +++ b/xnat_ingest/session.py @@ -323,6 +323,7 @@ def from_paths( multiple_sessions: ty.DefaultDict[str, ty.Set[ty.Tuple[str, str, str]]] = ( defaultdict(set) ) + missing_ids: dict[str, dict[str, str]] = defaultdict(dict) for resource in tqdm( resources, "Sorting resources into XNAT tree structure...", @@ -338,21 +339,28 @@ def get_id(field_type: str, field_name: str) -> str: try: value = resource.metadata[field_name] except KeyError: + value = "" + if not value: if session_uid and field_type in ("project", "subject", "visit"): - value = ( - "INVALID_MISSING_" - + field_type.upper() - + "_" - + "".join( - random.choices( - string.ascii_letters + string.digits, k=8 + try: + value = missing_ids[session_uid][field_type] + except KeyError: + value = missing_ids[session_uid][field_type] = ( + "INVALID_MISSING_" + + field_type.upper() + + "_" + + "".join( + random.choices( + string.ascii_letters + string.digits, k=8 + ) ) ) + else: + raise ImagingSessionParseError( + f"Did not find '{field_name}' field in {resource!r}, " + "cannot uniquely identify the resource, found:\n" + + "\n".join(resource.metadata) ) - raise ImagingSessionParseError( - f"Did not find '{field_name}' field in {resource}, " - "cannot uniquely identify the resource" - ) if index is not None: value = value[index] value_str = str(value) @@ -399,7 +407,8 @@ def get_id(field_type: str, field_name: str) -> str: raise ImagingSessionParseError( "Multiple session UIDs found with the same project/subject/visit ID triplets: " + "\n".join( - f"{i} -> {p}:{s}:{v}" for i, (p, s, v) in multiple_sessions.items() + f"{i} -> " + str(["{p}:{s}:{v}" for p, s, v in sess]) + for i, sess in multiple_sessions.items() ) ) return list(sessions.values()) diff --git a/xnat_ingest/tests/test_session.py b/xnat_ingest/tests/test_session.py index a5709a7..7853d4d 100644 --- a/xnat_ingest/tests/test_session.py +++ b/xnat_ingest/tests/test_session.py @@ -1,7 +1,7 @@ from pathlib import Path import pytest import typing as ty -from fileformats.core import from_mime, FileSet +from fileformats.core import from_mime from fileformats.medimage import ( DicomSeries, Vnd_Siemens_Biograph128Vision_Vr20b_PetRawData, @@ -10,22 +10,17 @@ ) from frametree.core.frameset import FrameSet # type: ignore[import-untyped] from frametree.common import FileSystem # type: ignore[import-untyped] -from medimages4tests.dummy.dicom.base import default_dicom_dir # type: ignore[import-untyped] from medimages4tests.dummy.dicom.pet.wholebody.siemens.biograph_vision.vr20b import ( # type: ignore[import-untyped] get_image as get_pet_image, - __file__ as pet_src_file, ) from medimages4tests.dummy.dicom.ct.ac.siemens.biograph_vision.vr20b import ( # type: ignore[import-untyped] get_image as get_ac_image, - __file__ as ac_src_file, ) from medimages4tests.dummy.dicom.pet.topogram.siemens.biograph_vision.vr20b import ( # type: ignore[import-untyped] get_image as get_topogram_image, - __file__ as topogram_src_file, ) from medimages4tests.dummy.dicom.pet.statistics.siemens.biograph_vision.vr20b import ( # type: ignore[import-untyped] get_image as get_statistics_image, - __file__ as statistics_src_file, ) from xnat_ingest.session import ImagingSession, ImagingScan from xnat_ingest.store import DummyAxes @@ -66,26 +61,18 @@ def imaging_session() -> ImagingSession: DicomSeries(d.iterdir()) for d in ( get_pet_image( - out_dir=default_dicom_dir(pet_src_file).with_suffix(".with-spaces"), first_name=FIRST_NAME, last_name=LAST_NAME, ), get_ac_image( - out_dir=default_dicom_dir(ac_src_file).with_suffix(".with-spaces"), first_name=FIRST_NAME, last_name=LAST_NAME, ), get_topogram_image( - out_dir=default_dicom_dir(topogram_src_file).with_suffix( - ".with-spaces" - ), first_name=FIRST_NAME, last_name=LAST_NAME, ), get_statistics_image( - out_dir=default_dicom_dir(statistics_src_file).with_suffix( - ".with-spaces" - ), first_name=FIRST_NAME, last_name=LAST_NAME, ), diff --git a/xnat_ingest/utils.py b/xnat_ingest/utils.py index f62e584..1b33e6e 100644 --- a/xnat_ingest/utils.py +++ b/xnat_ingest/utils.py @@ -126,6 +126,9 @@ def set_logger_handling( ) -> None: """Set up logging for the application""" + if not logger_configs: + logger_configs = [LoggerConfig("stream", "info", "stdout")] + loggers = [logger] for log in additional_loggers: loggers.append(logging.getLogger(log))