Skip to content

Commit

Permalink
standardised logging across stage and upload commands
Browse files Browse the repository at this point in the history
  • Loading branch information
tclose committed Nov 16, 2023
1 parent ec2a4d5 commit bd2952c
Show file tree
Hide file tree
Showing 4 changed files with 165 additions and 140 deletions.
66 changes: 26 additions & 40 deletions xnat_ingest/cli/stage.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,11 @@
from pathlib import Path
import traceback
import logging.config
import logging.handlers
import click
from tqdm import tqdm
from .base import cli
from ..session import ImagingSession
from ..utils import logger
from .utils import DicomField, LoggerEmail, MailServer


HASH_CHUNK_SIZE = 2**20
from .utils import DicomField, LogFile, LogEmail, MailServer, set_logger_handling


@cli.command(
Expand Down Expand Up @@ -76,10 +71,19 @@
envvar="XNAT_INGEST_DELETE",
help="Whether to delete the session directories after they have been uploaded or not",
)
@click.option(
"--log-level",
default="info",
type=str,
envvar="XNAT_INGEST_LOGLEVEL",
help=(
"The level of the logging printed to stdout"
)
)
@click.option(
"--log-file",
default=None,
type=click.Path(path_type=Path),
type=LogFile,
envvar="XNAT_INGEST_LOGFILE",
help=(
'Location to write the output logs to, defaults to "upload-logs" in the '
Expand All @@ -89,7 +93,7 @@
@click.option(
"--log-email",
"log_emails",
type=LoggerEmail,
type=LogEmail,
metavar="<address> <loglevel> <subject-preamble>",
multiple=True,
envvar="XNAT_INGEST_LOGEMAIL",
Expand Down Expand Up @@ -124,39 +128,20 @@ def stage(
session_field: str,
project_id: str | None,
delete: bool,
log_level: str,
log_file: Path,
log_emails: LoggerEmail,
log_emails: LogEmail,
mail_server: MailServer,
raise_errors: bool,
):
# Configure the email logger
if log_emails:
if not mail_server:
raise ValueError(
"Mail server needs to be provided, either by `--mail-server` option or "
"XNAT_INGEST_MAILSERVER environment variable if logger emails "
"are provided: " + ", ".join(log_emails)
)
for log_email in log_emails:
smtp_handler = logging.handlers.SMTPHandler(
mailhost=mail_server.host,
fromaddr=mail_server.sender_email,
toaddrs=[log_email.address],
subject=log_email.subject,
credentials=(mail_server.user, mail_server.password),
secure=None,
)
logger.addHandler(smtp_handler)
logger.info(f"Email logger configured for {log_email}")

# Configure the file logger
if log_file is not None:
log_file.parent.mkdir(exist_ok=True)
log_file_hdle = logging.FileHandler(log_file)
log_file_hdle.setFormatter(
logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
)
logger.addHandler(log_file_hdle)
set_logger_handling(log_level, log_file, log_emails, mail_server)

logger.info(
"Loading DICOM sessions from '%s' and associated files from '%s'",
str(dicoms_path),
str(assoc_files_glob),
)

sessions = ImagingSession.construct(
dicoms_path=dicoms_path,
Expand All @@ -167,15 +152,16 @@ def stage(
project_id=project_id,
)

for session in tqdm(
sessions, f"Staging DICOM sessions found in '{dicoms_path}'"
):
logger.info("Staging sessions to '%s'", str(staging_dir))

for session in tqdm(sessions, f"Staging DICOM sessions found in '{dicoms_path}'"):
try:
session_staging_dir = staging_dir / session.name
if session_staging_dir.exists():
logger.info(
"Skipping %s session as staging directory %s already exists",
session.name, str(session_staging_dir)
session.name,
str(session_staging_dir),
)
continue
session_staging_dir.mkdir(exist_ok=True)
Expand Down
118 changes: 20 additions & 98 deletions xnat_ingest/cli/upload.py
Original file line number Diff line number Diff line change
@@ -1,32 +1,24 @@
# - [x] Email errors from upload script - emails configurable via env var
# - [x] add delete option to upload script
# - [x] add option to manually specify project/subject/session IDs in YAML file
# - [x] handle partial re-uploads (override, merge, error), option in YAML file
# - [x] Generalise to handle different types of raw data files
# - [ ] Send instructions to Dean/Fang on how it should be configured
# - [x] Pull info from DICOM headers/OHIF viewer
from pathlib import Path
import shutil
import typing as ty
import traceback
import tempfile
import hashlib
import logging.config
import logging.handlers
import click
from tqdm import tqdm
from fileformats.core import FileSet
from fileformats.generic import File
from fileformats.medimage import DicomSeries
from arcana.core.data.set import Dataset
from arcana.xnat import Xnat
from .base import cli
from ..session import ImagingSession
from ..utils import logger, add_exc_note
from .utils import LoggerEmail, MailServer


HASH_CHUNK_SIZE = 2**20
from .utils import (
LogFile,
LogEmail,
MailServer,
set_logger_handling,
get_checksums,
calculate_checksums,
)


@cli.command(
Expand Down Expand Up @@ -54,10 +46,17 @@
envvar="XNAT_INGEST_DELETE",
help="Whether to delete the session directories after they have been uploaded or not",
)
@click.option(
"--log-level",
default="info",
type=str,
envvar="XNAT_INGEST_LOGLEVEL",
help=("The level of the logging printed to stdout"),
)
@click.option(
"--log-file",
default=None,
type=click.Path(path_type=Path),
type=LogFile,
envvar="XNAT_INGEST_LOGFILE",
help=(
'Location to write the output logs to, defaults to "upload-logs" in the '
Expand All @@ -67,7 +66,7 @@
@click.option(
"--log-email",
"log_emails",
type=LoggerEmail,
type=LogEmail,
metavar="<address> <loglevel> <subject-preamble>",
multiple=True,
envvar="XNAT_INGEST_LOGEMAIL",
Expand Down Expand Up @@ -109,40 +108,14 @@ def upload(
user: str,
password: str,
delete: bool,
log_level: str,
log_file: Path,
log_emails: LoggerEmail,
log_emails: LogEmail,
mail_server: MailServer,
include_dicoms: bool,
raise_errors: bool,
):
# Configure the email logger
if log_emails:
if not mail_server:
raise ValueError(
"Mail server needs to be provided, either by `--mail-server` option or "
"XNAT_INGEST_MAILSERVER environment variable if logger emails "
"are provided: " + ", ".join(log_emails)
)
for log_email in log_emails:
smtp_handler = logging.handlers.SMTPHandler(
mailhost=mail_server.host,
fromaddr=mail_server.sender_email,
toaddrs=[log_email.address],
subject=log_email.subject,
credentials=(mail_server.user, mail_server.password),
secure=None,
)
logger.addHandler(smtp_handler)
logger.info(f"Email logger configured for {log_email}")

# Configure the file logger
if log_file is not None:
log_file.parent.mkdir(exist_ok=True)
log_file_hdle = logging.FileHandler(log_file)
log_file_hdle.setFormatter(
logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
)
logger.addHandler(log_file_hdle)
set_logger_handling(log_level, log_file, log_emails, mail_server)

xnat_repo = Xnat(
server=server, user=user, password=password, cache_dir=Path(tempfile.mkdtemp())
Expand Down Expand Up @@ -304,54 +277,3 @@ def upload(
continue
else:
raise


def get_checksums(xresource) -> ty.Dict[str, str]:
"""
Downloads the MD5 digests associated with the files in a resource.
Parameters
----------
xresource : xnat.classes.Resource
XNAT resource to retrieve the checksums from
Returns
-------
dict[str, str]
the checksums calculated by XNAT
"""
result = xresource.xnat_session.get(xresource.uri + "/files")
if result.status_code != 200:
raise RuntimeError(
"Could not download metadata for resource {}. Files "
"may have been uploaded but cannot check checksums".format(xresource.id)
)
return dict((r["Name"], r["digest"]) for r in result.json()["ResultSet"]["Result"])


def calculate_checksums(scan: FileSet) -> ty.Dict[str, str]:
"""
Calculates the MD5 digests associated with the files in a fileset.
Parameters
----------
scan : FileSet
the file-set to calculate the checksums for
Returns
-------
dict[str, str]
the calculated checksums
"""
checksums = {}
for fspath in scan.fspaths:
try:
hsh = hashlib.md5()
with open(fspath, "rb") as f:
for chunk in iter(lambda: f.read(HASH_CHUNK_SIZE), b""):
hsh.update(chunk)
checksum = hsh.hexdigest()
except OSError:
raise RuntimeError(f"Could not create digest of '{fspath}' ")
checksums[str(fspath.relative_to(scan.parent))] = checksum
return checksums
Loading

0 comments on commit bd2952c

Please sign in to comment.