Skip to content

Commit

Permalink
Merge pull request #22 from Australian-Imaging-Service/datatype-handling
Browse files Browse the repository at this point in the history
Debugged datatype, logging and missing ID handling
  • Loading branch information
tclose authored Oct 2, 2024
2 parents 68ddb1a + 9bd2c84 commit e0d753a
Show file tree
Hide file tree
Showing 7 changed files with 100 additions and 63 deletions.
50 changes: 25 additions & 25 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,28 +1,28 @@
# See https://pre-commit.com for more information
# See https://pre-commit.com/hooks.html for more hooks
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.3.0
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
- id: check-yaml
- id: check-added-large-files
- repo: https://github.com/psf/black
rev: 22.3.0
hooks:
- id: black
exclude: ^(arcana/_version\.py|versioneer\.py)$
args:
- -l 88
- repo: https://github.com/codespell-project/codespell
rev: v2.1.0
hooks:
- id: codespell
exclude: ^(xnat_checks/_version\.py|versioneer\.py)$
args:
- --ignore-words=.codespell-ignorewords
- repo: https://github.com/PyCQA/flake8
rev: 4.0.1
hooks:
- id: flake8
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.3.0
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
- id: check-yaml
- id: check-added-large-files
- repo: https://github.com/psf/black
rev: 22.3.0
hooks:
- id: black
exclude: ^(arcana/_version\.py|versioneer\.py)$
args:
- -l 88
- repo: https://github.com/codespell-project/codespell
rev: v2.1.0
hooks:
- id: codespell
exclude: ^(xnat_checks/_version\.py|versioneer\.py)$
args:
- --ignore-words=.codespell-ignorewords
- repo: https://github.com/PyCQA/flake8
rev: 7.0.0
hooks:
- id: flake8
23 changes: 23 additions & 0 deletions scripts/get_pet_tst.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import tempfile
from pathlib import Path
from fileformats.medimage import DicomSeries
from medimages4tests.dummy.dicom.pet.wholebody.siemens.biograph_vision.vr20b import ( # type: ignore[import-untyped]
get_image as get_pet_image,
)


tmp_path = Path(tempfile.mkdtemp())

series = DicomSeries(
get_pet_image(
tmp_path,
first_name="first",
last_name="last",
StudyInstanceUID="StudyInstanceUID",
PatientID="PatientID",
AccessionNumber="AccessionNumber",
StudyID="xnat_project",
).iterdir()
)

print(series.metadata["StudyID"])
35 changes: 24 additions & 11 deletions xnat_ingest/cli/stage.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import tempfile
from tqdm import tqdm
from fileformats.core import FileSet
from fileformats.medimage import DicomSeries
from xnat_ingest.cli.base import cli
from xnat_ingest.session import ImagingSession
from frametree.xnat import Xnat # type: ignore[import-untyped]
Expand All @@ -25,13 +26,13 @@


@cli.command(
help="""Stages DICOM and associated files found in the input directories into separate
directories for each session
help="""Stages images found in the input directories into separate directories for each
imaging acquisition session
DICOMS_PATH is either the path to a directory containing the DICOM files to upload, or
a glob pattern that selects the DICOM paths directly
FILES_PATH is either the path to a directory containing the files to upload, or
a glob pattern that selects the paths directly
STAGING_DIR is the directory that the files for each session are collated to before they
OUTPUT_DIR is the directory that the files for each session are collated to before they
are uploaded to XNAT
""",
)
Expand All @@ -42,9 +43,15 @@
type=str,
metavar="<mime-type>",
multiple=True,
default=["medimage/dicom-series"],
envvar="XINGEST_DATATYPE",
help="The datatype of the primary files to to upload",
default=None,
envvar="XINGEST_DATATYPES",
help=(
'The MIME-type(s) (or "MIME-like" see FileFormats docs) of potential datatype(s) '
"of the primary files to to upload, defaults to 'medimage/dicom-series'. "
"Any formats implemented in the FileFormats Python package "
"(https://github.com/ArcanaFramework/fileformats) that implement the 'read_metadata' "
'"extra" are supported, see FF docs on how to add support for new formats.'
),
)
@click.option(
"--project-field",
Expand Down Expand Up @@ -250,7 +257,7 @@
def stage(
files_path: str,
output_dir: Path,
datatype: str,
datatype: list[str] | None,
associated_files: ty.List[AssociatedFiles],
project_field: str,
subject_field: str,
Expand Down Expand Up @@ -279,6 +286,11 @@ def stage(
logger_configs=loggers,
additional_loggers=additional_loggers,
)
datatypes: list[ty.Type[FileSet]]
if not datatype:
datatypes = [DicomSeries]
else:
datatypes = [FileSet.from_mime(dt) for dt in datatype] # type: ignore[misc]

if xnat_login:
xnat_repo = Xnat(
Expand All @@ -292,10 +304,10 @@ def stage(
else:
project_list = None

if session_field is None and datatype == "medimage/dicom-series":
if session_field is None and DicomSeries in datatypes:
session_field = "StudyInstanceUID"

msg = f"Loading {datatype} sessions from '{files_path}'"
msg = f"Loading {list(datatypes)} sessions from '{files_path}'"

for assoc_files in associated_files:
msg += f" with associated files selected from '{assoc_files.glob}'"
Expand All @@ -319,6 +331,7 @@ def stage(
def do_stage() -> None:
sessions = ImagingSession.from_paths(
files_path=files_path,
datatypes=datatypes,
project_field=project_field,
subject_field=subject_field,
visit_field=visit_field,
Expand Down
4 changes: 3 additions & 1 deletion xnat_ingest/resource.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,9 @@ class ImagingResource:

@checksums.default
def calculate_checksums(self) -> dict[str, str]:
return self.fileset.hash_files(crypto=hashlib.md5)
return self.fileset.hash_files(
crypto=hashlib.md5, relative_to=self.fileset.parent
)

@property
def datatype(self) -> ty.Type[FileSet]:
Expand Down
33 changes: 21 additions & 12 deletions xnat_ingest/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,7 @@ def from_paths(
multiple_sessions: ty.DefaultDict[str, ty.Set[ty.Tuple[str, str, str]]] = (
defaultdict(set)
)
missing_ids: dict[str, dict[str, str]] = defaultdict(dict)
for resource in tqdm(
resources,
"Sorting resources into XNAT tree structure...",
Expand All @@ -338,21 +339,28 @@ def get_id(field_type: str, field_name: str) -> str:
try:
value = resource.metadata[field_name]
except KeyError:
value = ""
if not value:
if session_uid and field_type in ("project", "subject", "visit"):
value = (
"INVALID_MISSING_"
+ field_type.upper()
+ "_"
+ "".join(
random.choices(
string.ascii_letters + string.digits, k=8
try:
value = missing_ids[session_uid][field_type]
except KeyError:
value = missing_ids[session_uid][field_type] = (
"INVALID_MISSING_"
+ field_type.upper()
+ "_"
+ "".join(
random.choices(
string.ascii_letters + string.digits, k=8
)
)
)
else:
raise ImagingSessionParseError(
f"Did not find '{field_name}' field in {resource!r}, "
"cannot uniquely identify the resource, found:\n"
+ "\n".join(resource.metadata)
)
raise ImagingSessionParseError(
f"Did not find '{field_name}' field in {resource}, "
"cannot uniquely identify the resource"
)
if index is not None:
value = value[index]
value_str = str(value)
Expand Down Expand Up @@ -399,7 +407,8 @@ def get_id(field_type: str, field_name: str) -> str:
raise ImagingSessionParseError(
"Multiple session UIDs found with the same project/subject/visit ID triplets: "
+ "\n".join(
f"{i} -> {p}:{s}:{v}" for i, (p, s, v) in multiple_sessions.items()
f"{i} -> " + str(["{p}:{s}:{v}" for p, s, v in sess])
for i, sess in multiple_sessions.items()
)
)
return list(sessions.values())
Expand Down
15 changes: 1 addition & 14 deletions xnat_ingest/tests/test_session.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from pathlib import Path
import pytest
import typing as ty
from fileformats.core import from_mime, FileSet
from fileformats.core import from_mime
from fileformats.medimage import (
DicomSeries,
Vnd_Siemens_Biograph128Vision_Vr20b_PetRawData,
Expand All @@ -10,22 +10,17 @@
)
from frametree.core.frameset import FrameSet # type: ignore[import-untyped]
from frametree.common import FileSystem # type: ignore[import-untyped]
from medimages4tests.dummy.dicom.base import default_dicom_dir # type: ignore[import-untyped]
from medimages4tests.dummy.dicom.pet.wholebody.siemens.biograph_vision.vr20b import ( # type: ignore[import-untyped]
get_image as get_pet_image,
__file__ as pet_src_file,
)
from medimages4tests.dummy.dicom.ct.ac.siemens.biograph_vision.vr20b import ( # type: ignore[import-untyped]
get_image as get_ac_image,
__file__ as ac_src_file,
)
from medimages4tests.dummy.dicom.pet.topogram.siemens.biograph_vision.vr20b import ( # type: ignore[import-untyped]
get_image as get_topogram_image,
__file__ as topogram_src_file,
)
from medimages4tests.dummy.dicom.pet.statistics.siemens.biograph_vision.vr20b import ( # type: ignore[import-untyped]
get_image as get_statistics_image,
__file__ as statistics_src_file,
)
from xnat_ingest.session import ImagingSession, ImagingScan
from xnat_ingest.store import DummyAxes
Expand Down Expand Up @@ -66,26 +61,18 @@ def imaging_session() -> ImagingSession:
DicomSeries(d.iterdir())
for d in (
get_pet_image(
out_dir=default_dicom_dir(pet_src_file).with_suffix(".with-spaces"),
first_name=FIRST_NAME,
last_name=LAST_NAME,
),
get_ac_image(
out_dir=default_dicom_dir(ac_src_file).with_suffix(".with-spaces"),
first_name=FIRST_NAME,
last_name=LAST_NAME,
),
get_topogram_image(
out_dir=default_dicom_dir(topogram_src_file).with_suffix(
".with-spaces"
),
first_name=FIRST_NAME,
last_name=LAST_NAME,
),
get_statistics_image(
out_dir=default_dicom_dir(statistics_src_file).with_suffix(
".with-spaces"
),
first_name=FIRST_NAME,
last_name=LAST_NAME,
),
Expand Down
3 changes: 3 additions & 0 deletions xnat_ingest/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,9 @@ def set_logger_handling(
) -> None:
"""Set up logging for the application"""

if not logger_configs:
logger_configs = [LoggerConfig("stream", "info", "stdout")]

loggers = [logger]
for log in additional_loggers:
loggers.append(logging.getLogger(log))
Expand Down

0 comments on commit e0d753a

Please sign in to comment.