From 424f8220f828a2fc962cffbc26e8016b42cd0e49 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Fri, 15 Mar 2024 13:46:45 +1100 Subject: [PATCH] additional environment vars corresponding to CLI options --- xnat_ingest/cli/stage.py | 7 +++- xnat_ingest/cli/transfer.py | 73 ------------------------------------- xnat_ingest/cli/upload.py | 68 ++++++++++++++++++++++++++++++++++ 3 files changed, 73 insertions(+), 75 deletions(-) diff --git a/xnat_ingest/cli/stage.py b/xnat_ingest/cli/stage.py index 7bf150c..33d3242 100644 --- a/xnat_ingest/cli/stage.py +++ b/xnat_ingest/cli/stage.py @@ -27,8 +27,10 @@ are uploaded to XNAT """, ) -@click.argument("dicoms_path", type=str) -@click.argument("staging_dir", type=click.Path(path_type=Path)) +@click.argument("dicoms_path", type=str, envvar="XNAT_INGEST_DICOMS_PATH") +@click.argument( + "staging_dir", type=click.Path(path_type=Path), envvar="XNAT_INGEST_STAGING_DIR" +) @click.option( "--project-field", type=DicomField, @@ -142,6 +144,7 @@ "--deidentify/--dont-deidentify", default=False, type=bool, + envvar="XNAT_INGEST_DEIDENTIFY", help="whether to deidentify the file names and DICOM metadata before staging", ) def stage( diff --git a/xnat_ingest/cli/transfer.py b/xnat_ingest/cli/transfer.py index 2222c21..701e06f 100644 --- a/xnat_ingest/cli/transfer.py +++ b/xnat_ingest/cli/transfer.py @@ -14,54 +14,6 @@ set_logger_handling, ) from .base import cli -import os -import datetime -import boto3 -import paramiko - - -def remove_old_files_on_s3(remote_store: str, threshold: int): - # Parse S3 bucket and prefix from remote store - bucket_name, prefix = remote_store[5:].split("/", 1) - - # Create S3 client - s3_client = boto3.client("s3") - - # List objects in the bucket with the specified prefix - response = s3_client.list_objects_v2(Bucket=bucket_name, Prefix=prefix) - - now = datetime.datetime.now() - - # Iterate over objects and delete files older than the threshold - for obj in response.get("Contents", []): - last_modified = obj["LastModified"] - age = (now - last_modified).days - if age > threshold: - s3_client.delete_object(Bucket=bucket_name, Key=obj["Key"]) - - -def remove_old_files_on_ssh(remote_store: str, threshold: int): - # Parse SSH server and directory from remote store - server, directory = remote_store.split("@", 1) - - # Create SSH client - ssh_client = paramiko.SSHClient() - ssh_client.load_system_host_keys() - ssh_client.connect(server) - - # Execute find command to list files in the directory - stdin, stdout, stderr = ssh_client.exec_command(f"find {directory} -type f") - - now = datetime.datetime.now() - - # Iterate over files and delete files older than the threshold - for file_path in stdout.read().decode().splitlines(): - last_modified = datetime.datetime.fromtimestamp(os.path.getmtime(file_path)) - age = (now - last_modified).days - if age > threshold: - ssh_client.exec_command(f"rm {file_path}") - - ssh_client.close() @cli.command( @@ -153,13 +105,6 @@ def remove_old_files_on_ssh(remote_store: str, threshold: int): help="The XNAT server to upload to plus the user and password to use", envvar="XNAT_INGEST_XNAT_LOGIN", ) -@click.option( - "--clean-up-older-than", - type=int, - metavar="", - default=0, - help="The number of days to keep files in the remote store for", -) def transfer( staging_dir: Path, remote_store: str, @@ -171,7 +116,6 @@ def transfer( delete: bool, raise_errors: bool, xnat_login: ty.Optional[ty.Tuple[str, str, str]], - clean_up_older_than: int, ): if not staging_dir.exists(): @@ -281,23 +225,6 @@ def transfer( logger.info("Deleting %s after successful upload", session_dir) shutil.rmtree(session_dir) - if clean_up_older_than: - logger.info( - "Cleaning up files in %s older than %d days", - remote_store, - clean_up_older_than, - ) - if store_type == "s3": - remove_old_files_on_s3( - remote_store=remote_store, threshold=clean_up_older_than - ) - elif store_type == "ssh": - remove_old_files_on_ssh( - remote_store=remote_store, threshold=clean_up_older_than - ) - else: - assert False - if __name__ == "__main__": transfer() diff --git a/xnat_ingest/cli/upload.py b/xnat_ingest/cli/upload.py index 5818721..b217ce0 100644 --- a/xnat_ingest/cli/upload.py +++ b/xnat_ingest/cli/upload.py @@ -1,5 +1,7 @@ from pathlib import Path import shutil +import os +import datetime import traceback import typing as ty from collections import defaultdict @@ -9,6 +11,7 @@ from tqdm import tqdm from natsort import natsorted import boto3 +import paramiko from fileformats.generic import File from arcana.core.data.set import Dataset from arcana.xnat import Xnat @@ -140,6 +143,13 @@ ), type=bool, ) +@click.option( + "--clean-up-older-than", + type=int, + metavar="", + default=0, + help="The number of days to keep files in the remote store for", +) def upload( staged: str, server: str, @@ -155,6 +165,7 @@ def upload( store_credentials: ty.Tuple[str, str], temp_dir: ty.Optional[Path], use_manifest: bool, + clean_up_older_than: int, ): set_logger_handling(log_level, log_file, log_emails, mail_server) @@ -446,3 +457,60 @@ def iter_staged_sessions(): continue else: raise + + if clean_up_older_than: + logger.info( + "Cleaning up files in %s older than %d days", + staged, + clean_up_older_than, + ) + if staged.startswith("s3://"): + remove_old_files_on_s3(remote_store=staged, threshold=clean_up_older_than) + elif "@" in staged: + remove_old_files_on_ssh(remote_store=staged, threshold=clean_up_older_than) + else: + assert False + + +def remove_old_files_on_s3(remote_store: str, threshold: int): + # Parse S3 bucket and prefix from remote store + bucket_name, prefix = remote_store[5:].split("/", 1) + + # Create S3 client + s3_client = boto3.client("s3") + + # List objects in the bucket with the specified prefix + response = s3_client.list_objects_v2(Bucket=bucket_name, Prefix=prefix) + + now = datetime.datetime.now() + + # Iterate over objects and delete files older than the threshold + for obj in response.get("Contents", []): + last_modified = obj["LastModified"] + age = (now - last_modified).days + if age > threshold: + s3_client.delete_object(Bucket=bucket_name, Key=obj["Key"]) + + +def remove_old_files_on_ssh(remote_store: str, threshold: int): + # Parse SSH server and directory from remote store + server, directory = remote_store.split("@", 1) + + # Create SSH client + ssh_client = paramiko.SSHClient() + ssh_client.load_system_host_keys() + ssh_client.connect(server) + + # Execute find command to list files in the directory + stdin, stdout, stderr = ssh_client.exec_command(f"find {directory} -type f") + + now = datetime.datetime.now() + + # Iterate over files and delete files older than the threshold + for file_path in stdout.read().decode().splitlines(): + last_modified = datetime.datetime.fromtimestamp(os.path.getmtime(file_path)) + age = (now - last_modified).days + if age > threshold: + ssh_client.exec_command(f"rm {file_path}") + + ssh_client.close()