Skip to content

Commit

Permalink
Merge pull request #19 from Australian-Imaging-Service/flexible-formats
Browse files Browse the repository at this point in the history
Allows primary files to be of any format supported by fileformats
  • Loading branch information
tclose authored Sep 24, 2024
2 parents 2a42035 + f1bea51 commit 7827a91
Show file tree
Hide file tree
Showing 15 changed files with 501 additions and 310 deletions.
15 changes: 14 additions & 1 deletion conftest.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,20 @@
import os
from pathlib import Path
import logging
import typing as ty
import tempfile
from logging.handlers import SMTPHandler
import pytest
from click.testing import CliRunner
import xnat4tests
import xnat4tests # type: ignore[import-untyped]
from datetime import datetime
from xnat_ingest.utils import logger
from medimages4tests.dummy.raw.pet.siemens.biograph_vision.vr20b.pet_listmode import (
get_data as get_listmode_data,
)
from medimages4tests.dummy.raw.pet.siemens.biograph_vision.vr20b.pet_countrate import (
get_data as get_countrate_data,
)

# Set DEBUG logging for unittests

Expand Down Expand Up @@ -110,3 +117,9 @@ def emit(self, record):
# Capture the email message and append it to the list
msg = self.format(record)
self.emails.append(msg)


def get_raw_data_files(out_dir: ty.Optional[Path] = None, **kwargs) -> ty.List[Path]:
if out_dir is None:
out_dir = Path(tempfile.mkdtemp())
return get_listmode_data(out_dir, **kwargs) + get_countrate_data(out_dir, **kwargs)
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ dependencies = [
"natsort",
"paramiko",
"xnat",
"arcana",
"arcana-xnat >=0.4.1",
"frametree",
"frametree-xnat",
]
license = { file = "LICENSE" }
authors = [{ name = "Thomas G. Close", email = "[email protected]" }]
Expand Down
1 change: 0 additions & 1 deletion real-tests/usyd_transfer.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import os
from click.testing import CliRunner
from xnat_ingest.cli import transfer
from xnat_ingest.utils import show_cli_trace
Expand Down
6 changes: 3 additions & 3 deletions scripts/dcm_performance_mrtrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@
"StudyInstanceUID",
"StudyID",
"PatientID",
"AccessionNumber"
"AccessionNumber",
]

series = DicomSeries(get_image().iterdir())
series = DicomSeries(get_image().iterdir(), specific_tags=METADATA_KEYS)

timeit.timeit(lambda: series.select_metadata(METADATA_KEYS))
timeit.timeit(lambda: series.metadata)
6 changes: 3 additions & 3 deletions scripts/dcm_performance_pydicom.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@
"StudyInstanceUID",
"StudyID",
"PatientID",
"AccessionNumber"
"AccessionNumber",
]

series = DicomSeries(get_image().iterdir())
series = DicomSeries(get_image().iterdir(), specific_tags=METADATA_KEYS)

timeit.timeit(lambda: series.select_metadata(METADATA_KEYS))
timeit.timeit(lambda: series.metadata)
2 changes: 1 addition & 1 deletion scripts/run_upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from xnat_ingest.utils import show_cli_trace
from click.testing import CliRunner

PATTERN = "{PatientName.given_name}_{PatientName.family_name}_{SeriesDate}.*"
PATTERN = "{PatientName.family_name}_{PatientName.given_name}_{SeriesDate}.*"

runner = CliRunner()

Expand Down
109 changes: 83 additions & 26 deletions xnat_ingest/cli/stage.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,8 @@
from tqdm import tqdm
from xnat_ingest.cli.base import cli
from xnat_ingest.session import ImagingSession
from arcana.xnat import Xnat
from frametree.xnat import Xnat # type: ignore[import-untyped]
from xnat_ingest.utils import (
DicomField,
AssociatedFiles,
logger,
LogFile,
Expand All @@ -30,31 +29,76 @@
are uploaded to XNAT
""",
)
@click.argument("dicoms_path", type=str, envvar="XNAT_INGEST_STAGE_DICOMS_PATH")
@click.argument("files_path", type=str, envvar="XNAT_INGEST_STAGE_DICOMS_PATH")
@click.argument(
"staging_dir", type=click.Path(path_type=Path), envvar="XNAT_INGEST_STAGE_DIR"
)
@click.option(
"--datatype",
type=str,
metavar="<mime-type>",
multiple=True,
default=["medimage/dicom-series"],
envvar="XNAT_INGEST_STAGE_DATATYPE",
help="The datatype of the primary files to to upload",
)
@click.option(
"--project-field",
type=DicomField,
type=str,
default="StudyID",
envvar="XNAT_INGEST_STAGE_PROJECT",
help=("The keyword or tag of the DICOM field to extract the XNAT project ID from "),
help=("The keyword of the metadata field to extract the XNAT project ID from "),
)
@click.option(
"--subject-field",
type=DicomField,
type=str,
default="PatientID",
envvar="XNAT_INGEST_STAGE_SUBJECT",
help=("The keyword or tag of the DICOM field to extract the XNAT subject ID from "),
help=("The keyword of the metadata field to extract the XNAT subject ID from "),
)
@click.option(
"--visit-field",
type=DicomField,
type=str,
default="AccessionNumber",
envvar="XNAT_INGEST_STAGE_VISIT",
help=(
"The keyword of the metadata field to extract the XNAT imaging session ID from "
),
)
@click.option(
"--session-field",
type=str,
default=None,
envvar="XNAT_INGEST_STAGE_SESSION",
help=(
"The keyword or tag of the DICOM field to extract the XNAT imaging session ID from "
"The keyword of the metadata field to extract the XNAT imaging session ID from "
),
)
@click.option(
"--scan-id-field",
type=str,
default="SeriesNumber",
envvar="XNAT_INGEST_STAGE_SCAN_ID",
help=(
"The keyword of the metadata field to extract the XNAT imaging scan ID from "
),
)
@click.option(
"--scan-desc-field",
type=str,
default="SeriesDescription",
envvar="XNAT_INGEST_STAGE_SCAN_DESC",
help=(
"The keyword of the metadata field to extract the XNAT imaging scan description from "
),
)
@click.option(
"--resource-field",
type=str,
default="ImageType[-1]",
envvar="XNAT_INGEST_STAGE_RESOURCE",
help=(
"The keyword of the metadata field to extract the XNAT imaging resource ID from "
),
)
@click.option(
Expand All @@ -66,18 +110,19 @@
@click.option(
"--associated-files",
type=AssociatedFiles.cli_type,
nargs=2,
nargs=3,
default=None,
multiple=True,
envvar="XNAT_INGEST_STAGE_ASSOCIATED",
metavar="<glob> <id-pattern>",
metavar="<datatype> <glob> <id-pattern>",
help=(
'The "glob" arg is a glob pattern by which to detect associated files to be '
"attached to the DICOM sessions. Note that when this pattern corresponds to a "
"relative path it is considered to be relative to the parent directory containing "
"the DICOMs for the session NOT the current working directory Can contain string "
"templates corresponding to DICOM metadata fields, which are substituted before "
"the glob is called. For example, "
'"./associated/{PatientName.given_name}_{PatientName.family_name}/*)" '
'"./associated/{PatientName.family_name}_{PatientName.given_name}/*)" '
"will find all files under the subdirectory within '/path/to/dicoms/associated' that matches "
"<GIVEN-NAME>_<FAMILY-NAME>. Will be interpreted as being relative to `dicoms_dir` "
"if a relative path is provided.\n"
Expand Down Expand Up @@ -181,12 +226,17 @@
type=bool,
)
def stage(
dicoms_path: str,
files_path: str,
staging_dir: Path,
associated_files: AssociatedFiles,
project_field: DicomField,
subject_field: DicomField,
visit_field: DicomField,
datatype: str,
associated_files: ty.List[AssociatedFiles],
project_field: str,
subject_field: str,
visit_field: str,
session_field: str | None,
scan_id_field: str,
scan_desc_field: str,
resource_field: str,
project_id: str | None,
delete: bool,
log_level: str,
Expand Down Expand Up @@ -219,26 +269,33 @@ def stage(
else:
project_list = None

msg = f"Loading DICOM sessions from '{dicoms_path}'"
if session_field is None and datatype == "medimage/dicom-series":
session_field = "StudyInstanceUID"

msg = f"Loading {datatype} sessions from '{files_path}'"

if associated_files:
msg += f" with associated files selected from '{associated_files.glob}'"
if not associated_files.glob.startswith("/"):
msg += " (relative to the directories in which the DICOMs are found)"
for assoc_files in associated_files:
msg += f" with associated files selected from '{assoc_files.glob}'"
if not assoc_files.glob.startswith("/"):
msg += " (relative to the directories in which the primary files are found)"

logger.info(msg)

sessions = ImagingSession.from_dicoms(
dicoms_path=dicoms_path,
sessions = ImagingSession.from_paths(
files_path=files_path,
project_field=project_field,
subject_field=subject_field,
visit_field=visit_field,
session_field=session_field,
scan_id_field=scan_id_field,
scan_desc_field=scan_desc_field,
resource_field=resource_field,
project_id=project_id,
)

logger.info("Staging sessions to '%s'", str(staging_dir))

for session in tqdm(sessions, f"Staging DICOM sessions found in '{dicoms_path}'"):
for session in tqdm(sessions, f"Staging DICOM sessions found in '{files_path}'"):
try:
session_staging_dir = staging_dir.joinpath(*session.staging_relpath)
if session_staging_dir.exists():
Expand All @@ -251,7 +308,7 @@ def stage(
# Identify theDeidentify files if necessary and save them to the staging directory
session.stage(
staging_dir,
associated_files=associated_files,
associated_file_groups=associated_files,
remove_original=delete,
deidentify=deidentify,
project_list=project_list,
Expand Down
13 changes: 7 additions & 6 deletions xnat_ingest/cli/upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,13 @@
import click
from tqdm import tqdm
from natsort import natsorted
import xnat
import xnat # type: ignore[import-untyped]
import boto3
import paramiko
from fileformats.generic import File
from arcana.core.data.set import Dataset
from arcana.xnat import Xnat
from xnat.exceptions import XNATResponseError
from frametree.core.frameset import FrameSet # type: ignore[import-untyped]
from frametree.xnat import Xnat # type: ignore[import-untyped]
from xnat.exceptions import XNATResponseError # type: ignore[import-untyped]
from xnat_ingest.cli.base import cli
from xnat_ingest.session import ImagingSession
from xnat_ingest.utils import (
Expand Down Expand Up @@ -349,7 +349,7 @@ def iter_staged_sessions():
missing_datasets = set()
for project_id in project_ids:
try:
dataset = Dataset.load(project_id, xnat_repo)
dataset = FrameSet.load(project_id, xnat_repo)
except Exception:
missing_datasets.add(project_id)
else:
Expand Down Expand Up @@ -392,7 +392,7 @@ def iter_staged_sessions():

# Access Arcana dataset associated with project
try:
dataset = Dataset.load(session.project_id, xnat_repo)
dataset = FrameSet.load(session.project_id, xnat_repo)
except Exception as e:
logger.warning(
"Did not load dataset definition (%s) from %s project "
Expand Down Expand Up @@ -446,6 +446,7 @@ def iter_staged_sessions():
image_type = scan.metadata.get("ImageType")
if image_type and image_type[:2] == ["DERIVED", "SECONDARY"]:
modality = "SC"
resource_name = "secondary"
else:
modality = scan.metadata.get(
"Modality", default_scan_modality
Expand Down
37 changes: 9 additions & 28 deletions xnat_ingest/dicom.py
Original file line number Diff line number Diff line change
@@ -1,30 +1,35 @@
import typing as ty
import subprocess as sp

# import re
import pydicom

# from fileformats.core import FileSet
# from fileformats.application import Dicom
# from fileformats.extras.application.medical import dicom_read_metadata


dcmedit_path: ty.Optional[str]
try:
dcmedit_path = sp.check_output("which dcmedit", shell=True).decode("utf-8").strip()
except sp.CalledProcessError:
dcmedit_path = None


dcminfo_path: ty.Optional[str]
try:
dcminfo_path = sp.check_output("which dcminfo", shell=True).decode("utf-8").strip()
except sp.CalledProcessError:
dcminfo_path = None


def tag2keyword(tag: ty.Tuple[str, str]) -> str:
return pydicom.datadict.dictionary_keyword(tag)
return pydicom.datadict.dictionary_keyword((int(tag[0]), int(tag[1])))


def keyword2tag(keyword: str) -> ty.Tuple[str, str]:
tag_str = hex(pydicom.datadict.tag_for_keyword(keyword))[2:]
tag = pydicom.datadict.tag_for_keyword(keyword)
if not tag:
raise ValueError(f"Could not find tag for keyword '{keyword}'")
tag_str = hex(tag)[2:]
return (f"{tag_str[:-4].zfill(4)}", tag_str[-4:])


Expand All @@ -49,27 +54,3 @@ def __init__(self, keyword_or_tag):

def __str__(self):
return f"'{self.keyword}' field ({','.join(self.tag)})"


# @FileSet.read_metadata.register
# def mrtrix_dicom_read_metadata(
# dcm: Dicom, selected_keys: ty.Optional[ty.Sequence[str]] = None
# ) -> ty.Mapping[str, ty.Any]:
# if dcminfo_path is None or selected_keys is None:
# return dicom_read_metadata(dcm, selected_keys)

# tags = [keyword2tag(k) for k in selected_keys]
# tag_str = " ".join(f"-t {t[0]} {t[1]}" for t in tags)
# cmd = f"dcminfo {tag_str} {dcm.fspath}"
# line_re = re.compile(r"\[([0-9A-F]{4}),([0-9A-F]{4})] (.*)")
# dcminfo_output = sp.check_output(cmd, shell=True).decode("utf-8")
# metadata = {}
# for line in dcminfo_output.splitlines():
# match = line_re.match(line)
# if not match:
# continue
# t1, t2, val = match.groups()
# key = tag2keyword((t1, t2))
# val = val.strip()
# metadata[key] = val
# return metadata
Loading

0 comments on commit 7827a91

Please sign in to comment.