diff --git a/NGPIris/cli/__init__.py b/NGPIris/cli/__init__.py index c239b17..700a921 100644 --- a/NGPIris/cli/__init__.py +++ b/NGPIris/cli/__init__.py @@ -1,18 +1,60 @@ import click from click.core import Context -from json import dumps, dump +from json import dump from pathlib import Path +from botocore.paginate import PageIterator, Paginator +from typing import Any, Generator +from os import get_terminal_size +from math import floor +from tabulate import tabulate +from bitmath import Byte, TiB from NGPIris.hcp import HCPHandler -def get_HCPHandler(context : Context)-> HCPHandler: +def get_HCPHandler(context : Context) -> HCPHandler: return context.obj["hcph"] def format_list(list_of_things : list) -> str: list_of_buckets = list(map(lambda s : s + "\n", list_of_things)) return "".join(list_of_buckets).strip("\n") +def _list_objects_generator(hcph : HCPHandler, name_only : bool) -> Generator[str, Any, None]: + """ + Handle object list as a paginator that `click` can handle. It works slightly + different from `list_objects` in `hcp.py` in order to make the output + printable in a terminal + """ + paginator : Paginator = hcph.s3_client.get_paginator("list_objects_v2") + pages : PageIterator = paginator.paginate(Bucket = hcph.bucket_name) + (nb_of_cols, _) = get_terminal_size() + max_width = floor(nb_of_cols / 5) + if (not name_only): + yield tabulate( + [], + headers = ["Key", "LastModified", "ETag", "Size", "StorageClass"], + tablefmt = "plain", + stralign = "center" + ) + "\n" + "-"*nb_of_cols + "\n" + for object in pages.search("Contents[?!ends_with(Key, '/')][]"): # filter objects that does not end with "/" + if name_only: + yield str(object["Key"]) + "\n" + else: + yield tabulate( + [ + [str(object["Key"]), + str(object["LastModified"]), + str(object["ETag"]), + str(object["Size"]), + str(object["StorageClass"])] + ], + maxcolwidths = max_width, + tablefmt = "plain" + ) + "\n" + "-"*nb_of_cols + "\n" + +def object_is_folder(object_path : str, hcph : HCPHandler) -> bool: + return (object_path[-1] == "/") and (hcph.get_object(object_path)["ContentLength"] == 0) + @click.group() @click.argument("credentials") @click.version_option(package_name = "NGPIris") @@ -28,52 +70,90 @@ def cli(context : Context, credentials : str): @cli.command() @click.argument("bucket") -@click.argument("file-or-folder") +@click.argument("source") +@click.argument("destination") @click.pass_context -def upload(context : Context, bucket : str, file_or_folder : str): +def upload(context : Context, bucket : str, source : str, destination : str): """ Upload files to an HCP bucket/namespace. BUCKET is the name of the upload destination bucket. - FILE-OR-FOLDER is the path to the file or folder of files to be uploaded. + SOURCE is the path to the file or folder of files to be uploaded. + + DESTINATION is the destination path on the HCP. """ hcph : HCPHandler = get_HCPHandler(context) hcph.mount_bucket(bucket) - if Path(file_or_folder).is_dir(): - hcph.upload_folder(file_or_folder) + if Path(source).is_dir(): + hcph.upload_folder(source, destination) else: - hcph.upload_file(file_or_folder) + hcph.upload_file(source, destination) @cli.command() @click.argument("bucket") -@click.argument("object_path") -@click.argument("local_path") +@click.argument("source") +@click.argument("destination") @click.option( "-f", "--force", - help = "Overwrite existing file with the same name", + help = "Overwrite existing file with the same name (single file download only)", + is_flag = True +) +@click.option( + "-iw", + "--ignore_warning", + help = "Ignore the download limit", is_flag = True ) @click.pass_context -def download(context : Context, bucket : str, object_path : str, local_path : str, force : bool): +def download(context : Context, bucket : str, source : str, destination : str, force : bool, ignore_warning : bool): """ - Download a file from an HCP bucket/namespace. + Download a file or folder from an HCP bucket/namespace. - BUCKET is the name of the upload destination bucket. + BUCKET is the name of the download source bucket. - OBJECT_PATH is the path to the object to be downloaded. + SOURCE is the path to the object or object folder to be downloaded. - LOCAL_PATH is the folder where the downloaded object is to be stored locally. + DESTINATION is the folder where the downloaded object or object folder is to be stored locally. """ - if not Path(local_path).exists(): - Path(local_path).mkdir() - downloaded_object_path = Path(local_path) / Path(object_path).name - if downloaded_object_path.exists() and not force: - exit("Object already exists. If you wish to overwrite the existing file, use the -f, --force option") hcph : HCPHandler = get_HCPHandler(context) hcph.mount_bucket(bucket) - hcph.download_file(object_path, downloaded_object_path.as_posix()) + if not Path(destination).exists(): + Path(destination).mkdir() + + if object_is_folder(source, hcph): + if source == "/": + source = "" + + cumulative_download_size = Byte(0) + if not ignore_warning: + click.echo("Computing download size...") + for object in hcph.list_objects(source): + object : dict + cumulative_download_size += Byte(object["Size"]) + if cumulative_download_size >= TiB(1): + click.echo("WARNING: You are about to download more than 1 TB of data. Is this your intention? [y/N]: ", nl = False) + inp = click.getchar(True) + if inp == "y" or inp == "Y": + break + else: # inp == "n" or inp == "N" or something else + exit("\nAborting download") + + hcph.download_folder(source, Path(destination).as_posix()) + else: + if Byte(hcph.get_object(source)["ContentLength"]) >= TiB(1): + click.echo("WARNING: You are about to download more than 1 TB of data. Is this your intention? [y/N]: ", nl = False) + inp = click.getchar(True) + if inp == "y" or inp == "Y": + pass + else: # inp == "n" or inp == "N" or something else + exit("\nAborting download") + + downloaded_source = Path(destination) / Path(source).name + if downloaded_source.exists() and not force: + exit("Object already exists. If you wish to overwrite the existing file, use the -f, --force option") + hcph.download_file(source, downloaded_source.as_posix()) @cli.command() @click.argument("bucket") @@ -133,14 +213,7 @@ def list_objects(context : Context, bucket : str, name_only : bool): """ hcph : HCPHandler = get_HCPHandler(context) hcph.mount_bucket(bucket) - objects_list = hcph.list_objects(name_only) - if name_only: - click.echo(format_list(objects_list)) - else: - out = [] - for d in objects_list: - out.append(dumps(d, indent = 4, default = str) + "\n") - click.echo("".join(out)) + click.echo_via_pager(_list_objects_generator(hcph, name_only)) @cli.command() @click.argument("bucket") diff --git a/NGPIris/hcp/exceptions.py b/NGPIris/hcp/exceptions.py index c1b93f3..3485cd8 100644 --- a/NGPIris/hcp/exceptions.py +++ b/NGPIris/hcp/exceptions.py @@ -1,8 +1,23 @@ class VPNConnectionError(Exception): pass +class NoBucketMounted(Exception): + pass + class BucketNotFound(Exception): pass -class NoBucketMounted(Exception): +class BucketForbidden(Exception): + pass + +class ObjectAlreadyExist(Exception): + pass + +class ObjectDoesNotExist(Exception): + pass + +class DownloadLimitReached(Exception): + pass + +class NotADirectory(Exception): pass \ No newline at end of file diff --git a/NGPIris/hcp/hcp.py b/NGPIris/hcp/hcp.py index 835eb47..da80ba7 100644 --- a/NGPIris/hcp/hcp.py +++ b/NGPIris/hcp/hcp.py @@ -5,16 +5,26 @@ create_access_control_policy, check_mounted ) -from NGPIris.hcp.exceptions import * +from NGPIris.hcp.exceptions import ( + VPNConnectionError, + BucketNotFound, + BucketForbidden, + ObjectAlreadyExist, + ObjectDoesNotExist, + DownloadLimitReached, + NotADirectory +) from boto3 import client from botocore.client import Config +from botocore.paginate import PageIterator, Paginator from botocore.exceptions import EndpointConnectionError, ClientError from boto3.s3.transfer import TransferConfig from configparser import ConfigParser +from pathlib import Path + from os import ( - path, stat, listdir ) @@ -28,6 +38,10 @@ from urllib3 import disable_warnings from tqdm import tqdm +from bitmath import TiB, Byte + +from typing import Generator + _KB = 1024 _MB = _KB * _KB @@ -156,19 +170,20 @@ def test_connection(self, bucket_name : str = "") -> dict: else: raise RuntimeError("No bucket selected. Either use `mount_bucket` first or supply the optional `bucket_name` paramter for `test_connection`") try: - response = dict(self.s3_client.head_bucket(Bucket = bucket_name)) + response = dict(self.s3_client.head_bucket(Bucket = bucket_name)) except EndpointConnectionError as e: # pragma: no cover print(e) raise VPNConnectionError("Please check your connection and that you have your VPN enabled") except ClientError as e: - print(e) - raise BucketNotFound("Bucket \"" + bucket_name + "\" was not found") + status_code = e.response["ResponseMetadata"].get("HTTPStatusCode", -1) + match status_code: + case 404: + raise BucketNotFound("Bucket \"" + bucket_name + "\" was not found") + case 403: + raise BucketForbidden("Bucket \"" + bucket_name + "\" could not be accessed due to lack of permissions") except Exception as e: # pragma: no cover raise Exception(e) - if response["ResponseMetadata"].get("HTTPStatusCode", -1) != 200: # pragma: no cover - error_msg = "The response code from the request made at " + self.endpoint + " returned status code " + response["ResponseMetadata"]["HTTPStatusCode"] - raise Exception(error_msg) return response def mount_bucket(self, bucket_name : str) -> None: @@ -185,6 +200,18 @@ def mount_bucket(self, bucket_name : str) -> None: self.test_connection(bucket_name = bucket_name) self.bucket_name = bucket_name + def create_bucket(self, bucket_name : str) -> None: + """ + Create a bucket. The user in the given credentials will be the owner + of the bucket + + :param bucket_name: Name of the new bucket + :type bucket_name: str + """ + self.s3_client.create_bucket( + Bucket = bucket_name + ) + def list_buckets(self) -> list[str]: """ List all available buckets at endpoint. @@ -198,27 +225,26 @@ def list_buckets(self) -> list[str]: return list_of_buckets @check_mounted - def list_objects(self, name_only : bool = False) -> list: + def list_objects(self, path_key : str = "", name_only : bool = False) -> Generator: """ - List all objects in the mounted bucket + List all objects in the mounted bucket as a generator. If one wishes to + get the result as a list, use :py:function:`list` to type cast the generator - :param name_only: If True, return only a list of the object names. If False, return the full metadata about each object. Defaults to False. + :param path_key: Filter string for which keys to list, specifically for finding objects in certain folders. + :type path_key: str, optional + :param name_only: If True, yield only a the object names. If False, yield the full metadata about each object. Defaults to False. :type name_only: bool, optional - - :return: A list of of either strings or a list of object metadata (the form of a dictionary) - :rtype: list - """ - response_list_objects = dict(self.s3_client.list_objects_v2( - Bucket = self.bucket_name - )) - if "Contents" not in response_list_objects.keys(): # pragma: no cover - return [] - list_of_objects : list[dict] = response_list_objects["Contents"] - if name_only: - return [object["Key"] for object in list_of_objects] - else: - return list_of_objects - + :yield: A generator of all objects in a bucket + :rtype: Generator + """ + paginator : Paginator = self.s3_client.get_paginator("list_objects_v2") + pages : PageIterator = paginator.paginate(Bucket = self.bucket_name) + for object in pages.search("Contents[?starts_with(Key, '" + path_key + "')][]"): + if name_only: + yield str(object["Key"]) + else: + yield object + @check_mounted def get_object(self, key : str) -> dict: """ @@ -264,9 +290,13 @@ def download_file(self, key : str, local_file_path : str) -> None: :param key: Name of the object :type key: str - :param local_file_path: Path to a file on your local system where the contents of the object file can be put. + :param local_file_path: Path to a file on your local system where the contents of the object file can be put :type local_file_path: str """ + try: + self.get_object(key) + except: + raise ObjectDoesNotExist("Could not find object", "\"" + key + "\"", "in bucket", "\"" + str(self.bucket_name) + "\"") try: file_size : int = self.s3_client.head_object(Bucket = self.bucket_name, Key = key)["ContentLength"] with tqdm( @@ -284,10 +314,48 @@ def download_file(self, key : str, local_file_path : str) -> None: ) except ClientError as e0: print(str(e0)) - raise Exception("Could not find object", "\"" + key + "\"", "in bucket", "\"" + str(self.bucket_name) + "\"") + raise e0 except Exception as e: # pragma: no cover raise Exception(e) + @check_mounted + def download_folder(self, folder_key : str, local_folder_path : str, use_download_limit : bool = False, download_limit_in_bytes : Byte = TiB(1).to_Byte()) -> None: + """ + Download multiple objects from a folder in the mounted bucket + + :param folder_key: Name of the folder + :type folder_key: str + + :param local_folder_path: Path to a folder on your local system where the contents of the objects can be put + :type local_folder_path: str + + :param use_download_limit: Boolean choice for using a download limit. Defaults to False + :type use_download_limit: bool, optional + + :param download_limit_in_bytes: The optional download limit in Byte (from the package `bitmath`). Defaults to 1 TB (`TiB(1).to_Byte()`) + :type download_limit_in_bytes: Byte, optional + + :raises DownloadLimitReached: If download limit was reached while downloading files + :raises NotADirectory: If local_folder_path is not a directory + """ + try: + self.get_object(folder_key) + except: + raise ObjectDoesNotExist("Could not find object", "\"" + folder_key + "\"", "in bucket", "\"" + str(self.bucket_name) + "\"") + if Path(local_folder_path).is_dir(): + current_download_size_in_bytes = Byte(0) # For tracking download limit + for object in self.list_objects(folder_key): # Build the tree with directories or add files + p = Path(local_folder_path) / Path(object["Key"]) + if object["Key"][-1] == "/": # If the object is a "folder" + p.mkdir() + else: # If the object is a file + current_download_size_in_bytes += Byte(object["Size"]) + if current_download_size_in_bytes >= download_limit_in_bytes and use_download_limit: + raise DownloadLimitReached("The download limit was reached when downloading files") + self.download_file(object["Key"], p.as_posix()) + else: + raise NotADirectory(local_folder_path + " is not a directory") + @check_mounted def upload_file(self, local_file_path : str, key : str = "") -> None: """ @@ -302,23 +370,26 @@ def upload_file(self, local_file_path : str, key : str = "") -> None: raise_path_error(local_file_path) if not key: - file_name = path.basename(local_file_path) + file_name = Path(local_file_path).name key = file_name - file_size : int = stat(local_file_path).st_size - with tqdm( - total = file_size, - unit = "B", - unit_scale = True, - desc = local_file_path - ) as pbar: - self.s3_client.upload_file( - Filename = local_file_path, - Bucket = self.bucket_name, - Key = key, - Config = self.transfer_config, - Callback = lambda bytes_transferred : pbar.update(bytes_transferred) - ) + if self.object_exists(key): + raise ObjectAlreadyExist("The object \"" + key + "\" already exist in the mounted bucket") + else: + file_size : int = stat(local_file_path).st_size + with tqdm( + total = file_size, + unit = "B", + unit_scale = True, + desc = local_file_path + ) as pbar: + self.s3_client.upload_file( + Filename = local_file_path, + Bucket = self.bucket_name, + Key = key, + Config = self.transfer_config, + Callback = lambda bytes_transferred : pbar.update(bytes_transferred) + ) @check_mounted def upload_folder(self, local_folder_path : str, key : str = "") -> None: @@ -355,19 +426,19 @@ def delete_objects(self, keys : list[str], verbose : bool = True) -> None: deletion_dict = {"Objects": object_list} - list_of_objects_before = self.list_objects(True) - response : dict = self.s3_client.delete_objects( Bucket = self.bucket_name, Delete = deletion_dict ) if verbose: print(dumps(response, indent=4)) - diff : set[str] = set(keys) - set(list_of_objects_before) - if diff: - does_not_exist = [] - for key in diff: + + deleted_dict_list : list[dict] = response["Deleted"] + does_not_exist = [] + for deleted_dict in deleted_dict_list: + if not "VersionId" in deleted_dict: does_not_exist.append("- " + key + "\n") + if does_not_exist: print("The following could not be deleted because they didn't exist: \n" + "".join(does_not_exist)) @check_mounted @@ -409,7 +480,7 @@ def delete_folder(self, key : str, verbose : bool = True) -> None: @check_mounted def search_objects_in_bucket(self, search_string : str, case_sensitive : bool = False) -> list[str]: """ - Simple search method using substrings in order to find certain objects. Case insensitive by default. + Simple search method using substrings in order to find certain objects. Case insensitive by default. Does not utilise the HCI :param search_string: Substring to be used in the search :type search_string: str @@ -421,7 +492,7 @@ def search_objects_in_bucket(self, search_string : str, case_sensitive : bool = :rtype: list[str] """ search_result : list[str] = [] - for key in self.list_objects(True): + for key in self.list_objects(name_only = True): parse_object = search( search_string, key, diff --git a/NGPIris/hcp/helpers.py b/NGPIris/hcp/helpers.py index 5fa648d..e86f111 100644 --- a/NGPIris/hcp/helpers.py +++ b/NGPIris/hcp/helpers.py @@ -26,6 +26,8 @@ def raise_path_error(path : str): if not p.exists(path): raise FileNotFoundError("\"" + path + "\"" + " does not exist") +# Decorator for checking if a bucket is mounted. This is meant to be used by +# class methods, hence its possibly odd typing. T = TypeVar("T") P = ParamSpec("P") diff --git a/NGPIris/hcp/statistics.py b/NGPIris/hcp/statistics.py index ffb5b9c..852c157 100644 --- a/NGPIris/hcp/statistics.py +++ b/NGPIris/hcp/statistics.py @@ -1,25 +1,20 @@ from NGPIris.hcp import HCPHandler +from NGPIris.hcp.helpers import check_mounted class HCPStatistics(HCPHandler): def __init__(self, credentials_path: str, use_ssl: bool = False, proxy_path: str = "", custom_config_path: str = "") -> None: super().__init__(credentials_path, use_ssl, proxy_path, custom_config_path) + @check_mounted def get_namespace_settings(self) -> dict: - if self.bucket_name: - return self.get_response("/namespaces/" + self.bucket_name) - else: - raise RuntimeError("No bucket has been mounted") + return self.get_response("/namespaces/" + self.bucket_name) #type: ignore + @check_mounted def get_namespace_statistics(self) -> dict: - if self.bucket_name: - return self.get_response("/namespaces/" + self.bucket_name + "/statistics") - else: - raise RuntimeError("No bucket has been mounted") + return self.get_response("/namespaces/" + self.bucket_name + "/statistics") #type: ignore + @check_mounted def get_namespace_permissions(self) -> dict: - if self.bucket_name: - return self.get_response("/namespaces/" + self.bucket_name + "/permissions") - else: - raise RuntimeError("No bucket has been mounted") + return self.get_response("/namespaces/" + self.bucket_name + "/permissions") #type: ignore diff --git a/README.md b/README.md index 5d6d238..03ef204 100644 --- a/README.md +++ b/README.md @@ -267,6 +267,19 @@ pytype ```shell pytest ``` +## Generating new documentation +With force flag `-f`: +```shell +cd docs/ +sphinx-apidoc ../NGPIris/ -o . -F -f +``` + +Without force flag +```shell +cd docs/ +sphinx-apidoc ../NGPIris/ -o . -F +``` + ## Compiling the documentation ```shell cd docs/ diff --git a/docs/conf.py b/docs/conf.py index 05ffb27..87e2f80 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -5,41 +5,32 @@ # -- Project information ----------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information -import sys -import os -from toml import load -from marko import convert -sys.path.insert(0, os.path.abspath("..")) - -#root_doc = "README" - -pyproject = load("../pyproject.toml") -pyproject_project : dict = pyproject["project"] - -project = pyproject_project["name"] -author = "" -for author_ in pyproject_project["authors"]: - author += (author_["name"] + ", ") -author = author[:-2] - -show_authors = True -html_show_copyright = False -release = pyproject_project["version"] +project = 'NGPIris' +copyright = '2024, Author' +author = 'Author' # -- General configuration --------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration -extensions = ["sphinx.ext.autodoc", "myst_parser"] +extensions = [ + 'sphinx.ext.autodoc', + 'sphinx.ext.viewcode', + 'sphinx.ext.todo', +] -source_suffix = [".rst", ".md"] - -templates_path = ["_templates"] -exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] +templates_path = ['_templates'] +exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] +language = 'en' # -- Options for HTML output ------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output -html_theme = "furo" -html_static_path = ["_static"] +html_theme = 'alabaster' +html_static_path = ['_static'] + +# -- Options for todo extension ---------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/extensions/todo.html#configuration + +todo_include_todos = True diff --git a/docs/index.rst b/docs/index.rst index 7a490c6..2d11598 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,21 +1,21 @@ -.. NGP Iris documentation master file, created by - sphinx-quickstart on Tue May 28 16:12:58 2024. +.. NGPIris documentation master file, created by + sphinx-quickstart on Fri Sep 27 16:07:28 2024. You can adapt this file completely to your liking, but it should at least contain the root `toctree` directive. -Welcome to NGP Iris's documentation! -==================================== +Welcome to NGPIris's documentation! +=================================== + .. toctree:: :maxdepth: 4 :caption: Contents: - - README - Technical documentation - Tutorial - Changelog for IRIS 5 + + NGPIris + Indices and tables ------------------- +================== * :ref:`genindex` * :ref:`modindex` +* :ref:`search` diff --git a/pyproject.toml b/pyproject.toml index b330a4c..e281fc7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "NGPIris" -version = "5.0.1" +version = "5.1.0" readme = "README.md" dependencies = [ "requests >= 2.31.0", @@ -9,10 +9,13 @@ dependencies = [ "boto3 >= 1.26.76", "parse >= 1.19.1", "tqdm >= 4.66.2", - "click >= 8.1.7" + "click >= 8.1.7", + "bitmath == 1.3.3.1", + "tabulate == 0.9.0" ] authors = [ - {name = "Erik Brink", email = "erik.brink@gu.se"} + {name = "Erik Brink", email = "erik.brink@gu.se"}, + {name = "Isak Sylvin", email = "isak.sylvin@gu.se"} ] [build-system] diff --git a/tests/test_hcp.py b/tests/test_hcp.py index 7313c15..8fac4ba 100644 --- a/tests/test_hcp.py +++ b/tests/test_hcp.py @@ -2,7 +2,8 @@ from typing import Callable from NGPIris.hcp import HCPHandler from configparser import ConfigParser -from os import mkdir, rmdir, remove +from pathlib import Path +from shutil import rmtree from filecmp import cmp hcp_h = HCPHandler("credentials/testCredentials.json") @@ -57,7 +58,7 @@ def test_test_connection_without_mounting_bucket() -> None: def test_list_objects() -> None: test_mount_bucket() - assert type(hcp_h.list_objects()) == list + assert type(list(hcp_h.list_objects())) == list def test_list_objects_without_mounting() -> None: _hcp_h = HCPHandler("credentials/testCredentials.json") @@ -118,7 +119,7 @@ def test_get_file_in_sub_directory() -> None: def test_download_file() -> None: test_mount_bucket() - mkdir(result_path) + Path(result_path).mkdir() hcp_h.download_file(test_file, result_path + test_file) assert cmp(result_path + test_file, test_file_path) @@ -135,6 +136,10 @@ def test_download_nonexistent_file() -> None: else: # pragma: no cover assert False +def test_download_folder() -> None: + test_mount_bucket() + hcp_h.download_folder("a folder of data/", result_path) + def test_search_objects_in_bucket() -> None: test_mount_bucket() hcp_h.search_objects_in_bucket(test_file) @@ -208,5 +213,4 @@ def test_delete_nonexistent_files() -> None: hcp_h.delete_objects(["some", "files", "that", "does", "not", "exist"]) def test_clean_up() -> None: - remove(result_path + test_file) - rmdir(result_path) \ No newline at end of file + rmtree(result_path) \ No newline at end of file