diff --git a/xnat_ingest/cli/stage.py b/xnat_ingest/cli/stage.py index db77d5f..5e85201 100644 --- a/xnat_ingest/cli/stage.py +++ b/xnat_ingest/cli/stage.py @@ -4,8 +4,14 @@ from tqdm import tqdm from .base import cli from ..session import ImagingSession -from ..utils import logger -from .utils import DicomField, LogFile, LogEmail, MailServer, set_logger_handling +from ..utils import ( + DicomField, + logger, + LogFile, + LogEmail, + MailServer, + set_logger_handling, +) @cli.command( @@ -84,6 +90,7 @@ "--log-file", default=None, type=LogFile, + metavar=" ", envvar="XNAT_INGEST_LOGFILE", help=( 'Location to write the output logs to, defaults to "upload-logs" in the ' diff --git a/xnat_ingest/cli/upload.py b/xnat_ingest/cli/upload.py index a093518..e373a5d 100644 --- a/xnat_ingest/cli/upload.py +++ b/xnat_ingest/cli/upload.py @@ -10,8 +10,9 @@ from arcana.xnat import Xnat from .base import cli from ..session import ImagingSession -from ..utils import logger, add_exc_note -from .utils import ( +from ..utils import ( + logger, + add_exc_note, LogFile, LogEmail, MailServer, @@ -57,6 +58,7 @@ "--log-file", default=None, type=LogFile, + metavar=" ", envvar="XNAT_INGEST_LOGFILE", help=( 'Location to write the output logs to, defaults to "upload-logs" in the ' diff --git a/xnat_ingest/cli/utils.py b/xnat_ingest/cli/utils.py deleted file mode 100644 index 4843e8b..0000000 --- a/xnat_ingest/cli/utils.py +++ /dev/null @@ -1,173 +0,0 @@ -import sys -import logging -import typing as ty -import hashlib -from pathlib import Path -import pydicom -from fileformats.core import from_mime -from fileformats.core import FileSet - -logger = logging.getLogger("xnat-ingest") - - -class LogEmail: - def __init__(self, address, loglevel, subject): - self.address = address - self.loglevel = loglevel - self.subject = subject - - @classmethod - def split_envvar_value(cls, envvar): - return [cls(*entry.split(",")) for entry in envvar.split(";")] - - def __str__(self): - return self.address - - -class LogFile: - def __init__(self, path, loglevel): - self.path = Path(path) - self.loglevel = loglevel - - @classmethod - def split_envvar_value(cls, envvar): - return [cls(*entry.split(",")) for entry in envvar.split(";")] - - def __str__(self): - return str(self.path) - - def __fspath__(self): - return self.path - - -class MailServer: - def __init__(self, host, sender_email, user, password): - self.host = host - self.sender_email = sender_email - self.user = user - self.password = password - - -class NonDicomType(str): - def __init__(self, mime): - self.type = from_mime(mime) - - @classmethod - def split_envvar_value(cls, envvar): - return [cls(entry) for entry in envvar.split(";")] - - -class DicomField: - def __init__(self, keyword_or_tag): - # Get the tag associated with the keyword - try: - self.tag = pydicom.datadict.tag_for_keyword(keyword_or_tag) - except ValueError: - try: - self.keyword = pydicom.datadict.dictionary_description(keyword_or_tag) - except ValueError: - raise ValueError( - f'Could not parse "{keyword_or_tag}" as a DICOM keyword or tag' - ) - else: - self.tag = keyword_or_tag - else: - self.keyword = keyword_or_tag - - def __str__(self): - return f"'{self.keyword}' field ({','.join(self.tag)})" - - -def set_logger_handling( - log_level: str, log_emails: LogEmail, log_file: Path, mail_server: MailServer -): - # Configure the email logger - if log_emails: - if not mail_server: - raise ValueError( - "Mail server needs to be provided, either by `--mail-server` option or " - "XNAT_INGEST_MAILSERVER environment variable if logger emails " - "are provided: " + ", ".join(log_emails) - ) - for log_email in log_emails: - smtp_hdle = logging.handlers.SMTPHandler( - mailhost=mail_server.host, - fromaddr=mail_server.sender_email, - toaddrs=[log_email.address], - subject=log_email.subject, - credentials=(mail_server.user, mail_server.password), - secure=None, - ) - smtp_hdle.setLevel(getattr(logging, log_email.loglevel.upper())) - logger.addHandler(smtp_hdle) - - # Configure the file logger - if log_file is not None: - log_file.path.parent.mkdir(exist_ok=True) - log_file_hdle = logging.FileHandler(log_file) - log_file_hdle.setLevel(getattr(logging, log_file.loglevel.upper())) - log_file_hdle.setFormatter( - logging.Formatter("%(asctime)s - %(levelname)s - %(message)s") - ) - logger.addHandler(log_file_hdle) - - console_hdle = logging.StreamHandler(sys.stdout) - console_hdle.setLevel(getattr(logging, log_level.upper())) - console_hdle.setFormatter( - logging.Formatter("%(asctime)s - %(levelname)s - %(message)s") - ) - logger.addHandler(console_hdle) - - -def get_checksums(xresource) -> ty.Dict[str, str]: - """ - Downloads the MD5 digests associated with the files in a resource. - - Parameters - ---------- - xresource : xnat.classes.Resource - XNAT resource to retrieve the checksums from - - Returns - ------- - dict[str, str] - the checksums calculated by XNAT - """ - result = xresource.xnat_session.get(xresource.uri + "/files") - if result.status_code != 200: - raise RuntimeError( - "Could not download metadata for resource {}. Files " - "may have been uploaded but cannot check checksums".format(xresource.id) - ) - return dict((r["Name"], r["digest"]) for r in result.json()["ResultSet"]["Result"]) - - -def calculate_checksums(scan: FileSet) -> ty.Dict[str, str]: - """ - Calculates the MD5 digests associated with the files in a fileset. - - Parameters - ---------- - scan : FileSet - the file-set to calculate the checksums for - - Returns - ------- - dict[str, str] - the calculated checksums - """ - checksums = {} - for fspath in scan.fspaths: - try: - hsh = hashlib.md5() - with open(fspath, "rb") as f: - for chunk in iter(lambda: f.read(HASH_CHUNK_SIZE), b""): - hsh.update(chunk) - checksum = hsh.hexdigest() - except OSError: - raise RuntimeError(f"Could not create digest of '{fspath}' ") - checksums[str(fspath.relative_to(scan.parent))] = checksum - return checksums - - -HASH_CHUNK_SIZE = 2**20 diff --git a/xnat_ingest/tests/test_cli.py b/xnat_ingest/tests/test_cli.py index beddf38..2c7d25f 100644 --- a/xnat_ingest/tests/test_cli.py +++ b/xnat_ingest/tests/test_cli.py @@ -191,13 +191,14 @@ def test_stage_and_upload( [ str(dicoms_dir), str(staging_dir), - "--assoc-files-glob", - str(associated_files_dir) - + "/{PatientName.given_name}_{PatientName.family_name}*.ptd", + # "--assoc-files-glob", + # str(associated_files_dir) + # + "/{PatientName.given_name}_{PatientName.family_name}*.ptd", "--log-file", str(log_file), - "--raise-errors", - "--delete", + "info", + # "--raise-errors", + # "--delete", ] ) @@ -209,6 +210,7 @@ def test_stage_and_upload( str(staging_dir), "--log-file", str(log_file), + "info", "--raise-errors", "--include-dicoms", "--delete", diff --git a/xnat_ingest/utils.py b/xnat_ingest/utils.py index efdbea2..a947dc4 100644 --- a/xnat_ingest/utils.py +++ b/xnat_ingest/utils.py @@ -3,10 +3,177 @@ import traceback from collections import Counter from pathlib import Path +import sys +import typing as ty +import hashlib +import pydicom +from fileformats.core import from_mime +from fileformats.core import FileSet +logger = logging.getLogger("xnat-ingest") -logger = logging.getLogger("xnat-upload-exported-scans") -logger.setLevel(logging.INFO) + +class LogEmail: + def __init__(self, address, loglevel, subject): + self.address = address + self.loglevel = loglevel + self.subject = subject + + @classmethod + def split_envvar_value(cls, envvar): + return [cls(*entry.split(",")) for entry in envvar.split(";")] + + def __str__(self): + return self.address + + +class LogFile: + def __init__(self, path, loglevel): + self.path = Path(path) + self.loglevel = loglevel + + @classmethod + def split_envvar_value(cls, envvar): + return [cls(*entry.split(",")) for entry in envvar.split(";")] + + def __str__(self): + return str(self.path) + + def __fspath__(self): + return self.path + + +class MailServer: + def __init__(self, host, sender_email, user, password): + self.host = host + self.sender_email = sender_email + self.user = user + self.password = password + + +class NonDicomType(str): + def __init__(self, mime): + self.type = from_mime(mime) + + @classmethod + def split_envvar_value(cls, envvar): + return [cls(entry) for entry in envvar.split(";")] + + +class DicomField: + def __init__(self, keyword_or_tag): + # Get the tag associated with the keyword + try: + self.tag = pydicom.datadict.tag_for_keyword(keyword_or_tag) + except ValueError: + try: + self.keyword = pydicom.datadict.dictionary_description(keyword_or_tag) + except ValueError: + raise ValueError( + f'Could not parse "{keyword_or_tag}" as a DICOM keyword or tag' + ) + else: + self.tag = keyword_or_tag + else: + self.keyword = keyword_or_tag + + def __str__(self): + return f"'{self.keyword}' field ({','.join(self.tag)})" + + +def set_logger_handling( + log_level: str, log_emails: LogEmail, log_file: LogFile, mail_server: MailServer +): + # Configure the email logger + if log_emails: + if not mail_server: + raise ValueError( + "Mail server needs to be provided, either by `--mail-server` option or " + "XNAT_INGEST_MAILSERVER environment variable if logger emails " + "are provided: " + ", ".join(log_emails) + ) + for log_email in log_emails: + smtp_hdle = logging.handlers.SMTPHandler( + mailhost=mail_server.host, + fromaddr=mail_server.sender_email, + toaddrs=[log_email.address], + subject=log_email.subject, + credentials=(mail_server.user, mail_server.password), + secure=None, + ) + smtp_hdle.setLevel(getattr(logging, log_email.loglevel.upper())) + logger.addHandler(smtp_hdle) + + # Configure the file logger + if log_file is not None: + log_file.path.parent.mkdir(exist_ok=True) + log_file_hdle = logging.FileHandler(log_file) + log_file_hdle.setLevel(getattr(logging, log_file.loglevel.upper())) + log_file_hdle.setFormatter( + logging.Formatter("%(asctime)s - %(levelname)s - %(message)s") + ) + logger.addHandler(log_file_hdle) + + console_hdle = logging.StreamHandler(sys.stdout) + console_hdle.setLevel(getattr(logging, log_level.upper())) + console_hdle.setFormatter( + logging.Formatter("%(asctime)s - %(levelname)s - %(message)s") + ) + logger.addHandler(console_hdle) + + +def get_checksums(xresource) -> ty.Dict[str, str]: + """ + Downloads the MD5 digests associated with the files in a resource. + + Parameters + ---------- + xresource : xnat.classes.Resource + XNAT resource to retrieve the checksums from + + Returns + ------- + dict[str, str] + the checksums calculated by XNAT + """ + result = xresource.xnat_session.get(xresource.uri + "/files") + if result.status_code != 200: + raise RuntimeError( + "Could not download metadata for resource {}. Files " + "may have been uploaded but cannot check checksums".format(xresource.id) + ) + return dict((r["Name"], r["digest"]) for r in result.json()["ResultSet"]["Result"]) + + +def calculate_checksums(scan: FileSet) -> ty.Dict[str, str]: + """ + Calculates the MD5 digests associated with the files in a fileset. + + Parameters + ---------- + scan : FileSet + the file-set to calculate the checksums for + + Returns + ------- + dict[str, str] + the calculated checksums + """ + checksums = {} + for fspath in scan.fspaths: + try: + hsh = hashlib.md5() + with open(fspath, "rb") as f: + for chunk in iter(lambda: f.read(HASH_CHUNK_SIZE), b""): + hsh.update(chunk) + checksum = hsh.hexdigest() + except OSError: + raise RuntimeError(f"Could not create digest of '{fspath}' ") + checksums[str(fspath.relative_to(scan.parent))] = checksum + return checksums + + +HASH_CHUNK_SIZE = 2**20 def show_cli_trace(result):