From bd2952c0625036cc75f53cf3fbf9a90ecd4d4b24 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Thu, 16 Nov 2023 20:27:17 +1100 Subject: [PATCH] standardised logging across stage and upload commands --- xnat_ingest/cli/stage.py | 66 +++++++++------------ xnat_ingest/cli/upload.py | 118 +++++++------------------------------ xnat_ingest/cli/utils.py | 119 +++++++++++++++++++++++++++++++++++++- xnat_ingest/session.py | 2 + 4 files changed, 165 insertions(+), 140 deletions(-) diff --git a/xnat_ingest/cli/stage.py b/xnat_ingest/cli/stage.py index 41c7d31..db77d5f 100644 --- a/xnat_ingest/cli/stage.py +++ b/xnat_ingest/cli/stage.py @@ -1,16 +1,11 @@ from pathlib import Path import traceback -import logging.config -import logging.handlers import click from tqdm import tqdm from .base import cli from ..session import ImagingSession from ..utils import logger -from .utils import DicomField, LoggerEmail, MailServer - - -HASH_CHUNK_SIZE = 2**20 +from .utils import DicomField, LogFile, LogEmail, MailServer, set_logger_handling @cli.command( @@ -76,10 +71,19 @@ envvar="XNAT_INGEST_DELETE", help="Whether to delete the session directories after they have been uploaded or not", ) +@click.option( + "--log-level", + default="info", + type=str, + envvar="XNAT_INGEST_LOGLEVEL", + help=( + "The level of the logging printed to stdout" + ) +) @click.option( "--log-file", default=None, - type=click.Path(path_type=Path), + type=LogFile, envvar="XNAT_INGEST_LOGFILE", help=( 'Location to write the output logs to, defaults to "upload-logs" in the ' @@ -89,7 +93,7 @@ @click.option( "--log-email", "log_emails", - type=LoggerEmail, + type=LogEmail, metavar="
", multiple=True, envvar="XNAT_INGEST_LOGEMAIL", @@ -124,39 +128,20 @@ def stage( session_field: str, project_id: str | None, delete: bool, + log_level: str, log_file: Path, - log_emails: LoggerEmail, + log_emails: LogEmail, mail_server: MailServer, raise_errors: bool, ): - # Configure the email logger - if log_emails: - if not mail_server: - raise ValueError( - "Mail server needs to be provided, either by `--mail-server` option or " - "XNAT_INGEST_MAILSERVER environment variable if logger emails " - "are provided: " + ", ".join(log_emails) - ) - for log_email in log_emails: - smtp_handler = logging.handlers.SMTPHandler( - mailhost=mail_server.host, - fromaddr=mail_server.sender_email, - toaddrs=[log_email.address], - subject=log_email.subject, - credentials=(mail_server.user, mail_server.password), - secure=None, - ) - logger.addHandler(smtp_handler) - logger.info(f"Email logger configured for {log_email}") - # Configure the file logger - if log_file is not None: - log_file.parent.mkdir(exist_ok=True) - log_file_hdle = logging.FileHandler(log_file) - log_file_hdle.setFormatter( - logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s") - ) - logger.addHandler(log_file_hdle) + set_logger_handling(log_level, log_file, log_emails, mail_server) + + logger.info( + "Loading DICOM sessions from '%s' and associated files from '%s'", + str(dicoms_path), + str(assoc_files_glob), + ) sessions = ImagingSession.construct( dicoms_path=dicoms_path, @@ -167,15 +152,16 @@ def stage( project_id=project_id, ) - for session in tqdm( - sessions, f"Staging DICOM sessions found in '{dicoms_path}'" - ): + logger.info("Staging sessions to '%s'", str(staging_dir)) + + for session in tqdm(sessions, f"Staging DICOM sessions found in '{dicoms_path}'"): try: session_staging_dir = staging_dir / session.name if session_staging_dir.exists(): logger.info( "Skipping %s session as staging directory %s already exists", - session.name, str(session_staging_dir) + session.name, + str(session_staging_dir), ) continue session_staging_dir.mkdir(exist_ok=True) diff --git a/xnat_ingest/cli/upload.py b/xnat_ingest/cli/upload.py index 635438a..a093518 100644 --- a/xnat_ingest/cli/upload.py +++ b/xnat_ingest/cli/upload.py @@ -1,21 +1,9 @@ -# - [x] Email errors from upload script - emails configurable via env var -# - [x] add delete option to upload script -# - [x] add option to manually specify project/subject/session IDs in YAML file -# - [x] handle partial re-uploads (override, merge, error), option in YAML file -# - [x] Generalise to handle different types of raw data files -# - [ ] Send instructions to Dean/Fang on how it should be configured -# - [x] Pull info from DICOM headers/OHIF viewer from pathlib import Path import shutil -import typing as ty import traceback import tempfile -import hashlib -import logging.config -import logging.handlers import click from tqdm import tqdm -from fileformats.core import FileSet from fileformats.generic import File from fileformats.medimage import DicomSeries from arcana.core.data.set import Dataset @@ -23,10 +11,14 @@ from .base import cli from ..session import ImagingSession from ..utils import logger, add_exc_note -from .utils import LoggerEmail, MailServer - - -HASH_CHUNK_SIZE = 2**20 +from .utils import ( + LogFile, + LogEmail, + MailServer, + set_logger_handling, + get_checksums, + calculate_checksums, +) @cli.command( @@ -54,10 +46,17 @@ envvar="XNAT_INGEST_DELETE", help="Whether to delete the session directories after they have been uploaded or not", ) +@click.option( + "--log-level", + default="info", + type=str, + envvar="XNAT_INGEST_LOGLEVEL", + help=("The level of the logging printed to stdout"), +) @click.option( "--log-file", default=None, - type=click.Path(path_type=Path), + type=LogFile, envvar="XNAT_INGEST_LOGFILE", help=( 'Location to write the output logs to, defaults to "upload-logs" in the ' @@ -67,7 +66,7 @@ @click.option( "--log-email", "log_emails", - type=LoggerEmail, + type=LogEmail, metavar="
", multiple=True, envvar="XNAT_INGEST_LOGEMAIL", @@ -109,40 +108,14 @@ def upload( user: str, password: str, delete: bool, + log_level: str, log_file: Path, - log_emails: LoggerEmail, + log_emails: LogEmail, mail_server: MailServer, include_dicoms: bool, raise_errors: bool, ): - # Configure the email logger - if log_emails: - if not mail_server: - raise ValueError( - "Mail server needs to be provided, either by `--mail-server` option or " - "XNAT_INGEST_MAILSERVER environment variable if logger emails " - "are provided: " + ", ".join(log_emails) - ) - for log_email in log_emails: - smtp_handler = logging.handlers.SMTPHandler( - mailhost=mail_server.host, - fromaddr=mail_server.sender_email, - toaddrs=[log_email.address], - subject=log_email.subject, - credentials=(mail_server.user, mail_server.password), - secure=None, - ) - logger.addHandler(smtp_handler) - logger.info(f"Email logger configured for {log_email}") - - # Configure the file logger - if log_file is not None: - log_file.parent.mkdir(exist_ok=True) - log_file_hdle = logging.FileHandler(log_file) - log_file_hdle.setFormatter( - logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s") - ) - logger.addHandler(log_file_hdle) + set_logger_handling(log_level, log_file, log_emails, mail_server) xnat_repo = Xnat( server=server, user=user, password=password, cache_dir=Path(tempfile.mkdtemp()) @@ -304,54 +277,3 @@ def upload( continue else: raise - - -def get_checksums(xresource) -> ty.Dict[str, str]: - """ - Downloads the MD5 digests associated with the files in a resource. - - Parameters - ---------- - xresource : xnat.classes.Resource - XNAT resource to retrieve the checksums from - - Returns - ------- - dict[str, str] - the checksums calculated by XNAT - """ - result = xresource.xnat_session.get(xresource.uri + "/files") - if result.status_code != 200: - raise RuntimeError( - "Could not download metadata for resource {}. Files " - "may have been uploaded but cannot check checksums".format(xresource.id) - ) - return dict((r["Name"], r["digest"]) for r in result.json()["ResultSet"]["Result"]) - - -def calculate_checksums(scan: FileSet) -> ty.Dict[str, str]: - """ - Calculates the MD5 digests associated with the files in a fileset. - - Parameters - ---------- - scan : FileSet - the file-set to calculate the checksums for - - Returns - ------- - dict[str, str] - the calculated checksums - """ - checksums = {} - for fspath in scan.fspaths: - try: - hsh = hashlib.md5() - with open(fspath, "rb") as f: - for chunk in iter(lambda: f.read(HASH_CHUNK_SIZE), b""): - hsh.update(chunk) - checksum = hsh.hexdigest() - except OSError: - raise RuntimeError(f"Could not create digest of '{fspath}' ") - checksums[str(fspath.relative_to(scan.parent))] = checksum - return checksums diff --git a/xnat_ingest/cli/utils.py b/xnat_ingest/cli/utils.py index d16046e..5ca1614 100644 --- a/xnat_ingest/cli/utils.py +++ b/xnat_ingest/cli/utils.py @@ -1,8 +1,16 @@ +import sys +import logging +import typing as ty +import hashlib +from pathlib import Path import pydicom from fileformats.core import from_mime +from fileformats.core import FileSet +logger = logging.getLogger("xnat-ingest") -class LoggerEmail: + +class LogEmail: def __init__(self, address, loglevel, subject): self.address = address self.loglevel = loglevel @@ -16,6 +24,22 @@ def __str__(self): return self.address +class LogFile: + def __init__(self, path, loglevel): + self.path = Path(path) + self.loglevel = loglevel + + @classmethod + def split_envvar_value(cls, envvar): + return [cls(*entry.split(",")) for entry in envvar.split(";")] + + def __str__(self): + return str(self.path) + + def __fspath__(self): + return self.path + + class MailServer: def __init__(self, host, sender_email, user, password): self.host = host @@ -51,4 +75,95 @@ def __init__(self, keyword_or_tag): self.keyword = keyword_or_tag def __str__(self): - return f"'{self.keyword}' field ({','.join(self.tag)})" \ No newline at end of file + return f"'{self.keyword}' field ({','.join(self.tag)})" + + +def set_logger_handling(log_level: str, log_emails: LogEmail, log_file: Path, mail_server: MailServer): + + # Configure the email logger + if log_emails: + if not mail_server: + raise ValueError( + "Mail server needs to be provided, either by `--mail-server` option or " + "XNAT_INGEST_MAILSERVER environment variable if logger emails " + "are provided: " + ", ".join(log_emails) + ) + for log_email in log_emails: + smtp_handler = logging.handlers.SMTPHandler( + mailhost=mail_server.host, + fromaddr=mail_server.sender_email, + toaddrs=[log_email.address], + subject=log_email.subject, + credentials=(mail_server.user, mail_server.password), + secure=None, + ) + logger.addHandler(smtp_handler) + logger.info(f"Email logger configured for {log_email}") + + # Configure the file logger + if log_file is not None: + log_file.parent.mkdir(exist_ok=True) + log_file_hdle = logging.FileHandler(log_file) + log_file_hdle.setFormatter( + logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s") + ) + logger.addHandler(log_file_hdle) + + console_handler = logging.StreamHandler(sys.stdout) + console_handler.setLevel(getattr(logging, log_level.upper())) + console_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')) + logger.addHandler(console_handler) + + +def get_checksums(xresource) -> ty.Dict[str, str]: + """ + Downloads the MD5 digests associated with the files in a resource. + + Parameters + ---------- + xresource : xnat.classes.Resource + XNAT resource to retrieve the checksums from + + Returns + ------- + dict[str, str] + the checksums calculated by XNAT + """ + result = xresource.xnat_session.get(xresource.uri + "/files") + if result.status_code != 200: + raise RuntimeError( + "Could not download metadata for resource {}. Files " + "may have been uploaded but cannot check checksums".format(xresource.id) + ) + return dict((r["Name"], r["digest"]) for r in result.json()["ResultSet"]["Result"]) + + +def calculate_checksums(scan: FileSet) -> ty.Dict[str, str]: + """ + Calculates the MD5 digests associated with the files in a fileset. + + Parameters + ---------- + scan : FileSet + the file-set to calculate the checksums for + + Returns + ------- + dict[str, str] + the calculated checksums + """ + checksums = {} + for fspath in scan.fspaths: + try: + hsh = hashlib.md5() + with open(fspath, "rb") as f: + for chunk in iter(lambda: f.read(HASH_CHUNK_SIZE), b""): + hsh.update(chunk) + checksum = hsh.hexdigest() + except OSError: + raise RuntimeError(f"Could not create digest of '{fspath}' ") + checksums[str(fspath.relative_to(scan.parent))] = checksum + return checksums + + +HASH_CHUNK_SIZE = 2**20 diff --git a/xnat_ingest/session.py b/xnat_ingest/session.py index 41f39b5..1b7f3a2 100644 --- a/xnat_ingest/session.py +++ b/xnat_ingest/session.py @@ -228,11 +228,13 @@ def construct( dicom_fspaths = [Path(p) for p in glob(dicoms_path)] # Sort loaded series by StudyInstanceUID (imaging session) + logger.info("Loading DICOM series from %s", str(dicoms_path)) dicom_sessions = defaultdict(list) for series in from_paths(dicom_fspaths, DicomSeries): dicom_sessions[series["StudyInstanceUID"]].append(series) # Construct sessions from sorted series + logger.info("Searching for associated files ") sessions = [] for session_dicom_series in dicom_sessions.values():