From 536136c9ad76fa12f364f23336759eddb9d90897 Mon Sep 17 00:00:00 2001 From: Leonhard Schick Date: Tue, 21 Sep 2021 17:42:14 +0200 Subject: [PATCH 1/5] add Azure Storage WIP --- README.md | 1 + mara_storage/.scripts/install.mk | 16 ++++++ mara_storage/azure.py | 12 ++++ mara_storage/info.py | 7 +++ mara_storage/manage.py | 20 +++++++ mara_storage/shell.py | 23 ++++++++ mara_storage/storages.py | 32 +++++++++++ setup.cfg | 1 + tests/local_config.py.example | 8 ++- tests/test_azcopy.py | 94 ++++++++++++++++++++++++++++++++ 10 files changed, 213 insertions(+), 1 deletion(-) create mode 100644 mara_storage/.scripts/install.mk create mode 100644 mara_storage/azure.py create mode 100644 tests/test_azcopy.py diff --git a/README.md b/README.md index 01a09a2..7134534 100644 --- a/README.md +++ b/README.md @@ -81,6 +81,7 @@ The following **command line clients** are used to access the various databases: | Local storage | unix shell | Included in standard distributions. | | SFTP storage | `sftp`, `curl` | | | Google Cloud Storage | `gsutil` | From [https://cloud.google.com/storage/docs/gsutil_install](https://cloud.google.com/storage/docs/gsutil_install). | +| Azure Storage | `azcopy` | From [https://docs.microsoft.com/en-us/azure/storage/common/storage-use-azcopy-v10](https://docs.microsoft.com/en-us/azure/storage/common/storage-use-azcopy-v10)   diff --git a/mara_storage/.scripts/install.mk b/mara_storage/.scripts/install.mk new file mode 100644 index 0000000..c61baab --- /dev/null +++ b/mara_storage/.scripts/install.mk @@ -0,0 +1,16 @@ + +install-azcopy: + # install azcopy in the virtual environment + # see also: https://docs.microsoft.com/en-us/azure/storage/common/storage-use-azcopy-v10 + + # download azcopy + wget https://aka.ms/downloadazcopy-v10-linux + tar -xvf downloadazcopy-v10-linux + + # install + rm -f .venv/bin/azcopy + mv ./azcopy_linux_amd64_*/azcopy .venv/bin/azcopy + + # clean up + rm downloadazcopy-v10-linux + rm -rf ./azcopy_linux_amd64_*/ downloadazcopy-v10-linux diff --git a/mara_storage/azure.py b/mara_storage/azure.py new file mode 100644 index 0000000..e14dbbd --- /dev/null +++ b/mara_storage/azure.py @@ -0,0 +1,12 @@ +from azure.storage.blob import BlobClient, BlobServiceClient + +from . import storages + + +def init_client(storage: storages.AzureStorage, path: str = None): + client = BlobClient.from_blob_url(storage.build_uri(path)) + return client + +def init_service_client(storage: storages.AzureStorage, path: str = None): + client = BlobServiceClient.from_connection_string(storage.connection_string()) + return client diff --git a/mara_storage/info.py b/mara_storage/info.py index f7fc1df..0b2f517 100644 --- a/mara_storage/info.py +++ b/mara_storage/info.py @@ -43,3 +43,10 @@ def __(storage: storages.GoogleCloudStorage, file_name: str): (exitcode, _) = subprocess.getstatusoutput(command) return exitcode == 0 + + +@file_exists.register(storages.AzureStorage) +def __(storage: storages.AzureStorage, file_name: str): + from . import azure + client = azure.init_client(storage, path=file_name) + return client.exists() diff --git a/mara_storage/manage.py b/mara_storage/manage.py index 05b6016..11044af 100644 --- a/mara_storage/manage.py +++ b/mara_storage/manage.py @@ -43,6 +43,16 @@ def __(storage: storages.GoogleCloudStorage): assert exitcode == 0 +@ensure_storage.register(storages.AzureStorage) +def __(storage: storages.AzureStorage): + from . import azure + client = azure.init_service_client(storage) + container_client = client.get_container_client(container=storage.container_name) + + if not container_client.exists(): + container_client.create_container() + + # ----------------------------------------------------------------------------- @@ -93,3 +103,13 @@ def __(storage: storages.GoogleCloudStorage, force: bool = False): if exitcode != 0: raise Exception(f'An error occured while dropping a GCS bucket. Stdout:\n{stdout}') assert exitcode == 0 + + +@drop_storage.register(storages.AzureStorage) +def __(storage: storages.AzureStorage, force: bool = False): + from . import azure + client = azure.init_service_client(storage) + container_client = client.get_container_client(container=storage.container_name) + + if container_client.exists(): + container_client.delete_container() diff --git a/mara_storage/shell.py b/mara_storage/shell.py index 9cf8c0e..faada2f 100644 --- a/mara_storage/shell.py +++ b/mara_storage/shell.py @@ -61,6 +61,14 @@ def __(storage: storages.GoogleCloudStorage, file_name: str, compression: Compre + (f'\\\n | {uncompressor(compression)} - ' if compression != Compression.NONE else '')) +@read_file_command.register(storages.AzureStorage) +def __(storage: storages.AzureStorage, file_name: str, compression: Compression = Compression.NONE): + return ('azcopy cp ' + + shlex.quote(storage.build_uri(file_name)) + + ' --from-to BlobPipe' + + (f'\\\n | {uncompressor(compression)} - ' if compression != Compression.NONE else '')) + + # ----------------------------------------------------------------------------- @@ -135,6 +143,15 @@ def __(storage: storages.GoogleCloudStorage, file_name: str, compression: Compre + shlex.quote(storage.build_uri(file_name))) +@write_file_command.register(storages.AzureStorage) +def __(storage: storages.AzureStorage, file_name: str, compression: Compression = Compression.NONE): + if compression not in [Compression.NONE]: + raise ValueError(f'Only compression NONE is supported from storage type "{storage.__class__.__name__}"') + return ('azcopy cp ' + + shlex.quote(storage.build_uri(file_name)) + + ' --from-to PipeBlob') + + # ----------------------------------------------------------------------------- @@ -202,3 +219,9 @@ def __(storage: storages.GoogleCloudStorage, file_name: str, force: bool = True, + ('-f ' if force else '') + ('-r ' if recursive else '') + shlex.quote(storage.build_uri(file_name))) + + +@delete_file_command.register(storages.AzureStorage) +def __(storage: storages.AzureStorage, file_name: str, force: bool = True): + return ('azcopy rm ' + + shlex.quote(storage.build_uri(file_name))) diff --git a/mara_storage/storages.py b/mara_storage/storages.py index 94a3e19..a80c2a1 100644 --- a/mara_storage/storages.py +++ b/mara_storage/storages.py @@ -85,3 +85,35 @@ def base_uri(self): def build_uri(self, path: str): """Returns a URI for a path on the storage""" return f"{self.base_uri}/{path}" + +class AzureStorage(Storage): + def __init__(self, account_name: str, container_name: str, sas: str, storage_type: str = 'blob'): + """ + Connection information for a Azure sstorage bucket + + Args: + account_name: The storage account name + container_name: The container name within the storage + storage_type: The storage type. Supports 'blob' or 'dfs'. + sas: The SAS token + """ + self.account_name = account_name + self.container_name = container_name + self.storage_type = storage_type + if sas: + self.sas = sas[1:] if sas.startswith('?') else sas + + @property + def base_uri(self): + return f'https://{self.account_name}.{self.storage_type}.core.windows.net/{self.container_name}' + + def build_uri(self, path: str): + """Returns a URI for a path on the storage""" + if path and not path.startswith('/'): + path = '/' + path + return f"{self.base_uri}{path}?{self.sas}" + + def connection_string(self): + return ('DefaultEndpointsProtocol=https' + + f';BlobEndpoint=https://{self.account_name}.{self.storage_type}.core.windows.net' + + f';SharedAccessSignature={self.sas}') diff --git a/setup.cfg b/setup.cfg index b6f7553..5c1657e 100644 --- a/setup.cfg +++ b/setup.cfg @@ -29,3 +29,4 @@ install_requires = test = pytest sftp = pysftp google-cloud-storage = google-cloud-storage; google-oauth +azure-blob = azure-storage-blob diff --git a/tests/local_config.py.example b/tests/local_config.py.example index 78c895f..003ee9d 100644 --- a/tests/local_config.py.example +++ b/tests/local_config.py.example @@ -12,4 +12,10 @@ SFTP_PUBLIC_IDENTITY_FILE = None GCS_PROJECT_ID = '' # required for GCS client -GCS_SERVICE_ACCOUNT_FILE = '' \ No newline at end of file +GCS_SERVICE_ACCOUNT_FILE = '' + + +# the Azure Storage config used to perform the test +AZ_STORAGE_ACCOUNT_NAME = '' +AZ_STORAGE_TYPE = 'blob' +AZ_STORAGE_SAS = None diff --git a/tests/test_azcopy.py b/tests/test_azcopy.py new file mode 100644 index 0000000..5ca709d --- /dev/null +++ b/tests/test_azcopy.py @@ -0,0 +1,94 @@ +import pytest +import subprocess + +from mara_storage import storages, info, shell, manage + + +from .local_config import AZ_STORAGE_ACCOUNT_NAME, AZ_STORAGE_TYPE, AZ_STORAGE_SAS + +TEST_TOUCH_FILE_NAME = 'empty-file.txt' +TEST_FILE_NOT_EXISTS_FILE_NAME = 'file-does-not-exist.txt' +TEST_READ_FILE_NAME = 'read_test.txt' +TEST_WRITE_FILE_NAME = 'write_test.txt' +TEST_DELETE_FILE_NAME = 'delete_test.txt' +TEST_CONTENT = 'THIS IS A TEST CONTENT' + + +if not AZ_STORAGE_ACCOUNT_NAME: + pytest.skip("skipping azcopp tests: variable AZ_STORAGE_ACCOUNT_NAME not set in tests/local_config.py", allow_module_level=True) + + +@pytest.fixture +def storage(): + import random + container_name = f'mara-storage-test-{random.randint(0, 2147483647)}' + + return storages.AzureStorage( + account_name=AZ_STORAGE_ACCOUNT_NAME, + container_name=container_name, + sas=AZ_STORAGE_SAS, + storage_type=AZ_STORAGE_TYPE) + +@pytest.fixture(autouse=True) +def test_before_and_after(storage: object): + assert storage.account_name + assert isinstance(storage, storages.AzureStorage) + + manage.ensure_storage(storage) + yield + manage.drop_storage(storage, force=True) + + +def test_file_exists(storage: object): + command = shell.write_file_command(storage, file_name=TEST_TOUCH_FILE_NAME) + assert command + + # prepare + (exitcode, _) = subprocess.getstatusoutput(f"echo '' | {command}") + assert exitcode == 0 + + # test + assert info.file_exists(storage, file_name=TEST_TOUCH_FILE_NAME) + assert not info.file_exists(storage, file_name=TEST_FILE_NOT_EXISTS_FILE_NAME) + + +def test_write_file_command(storage: object): + command = shell.write_file_command(storage, file_name=TEST_WRITE_FILE_NAME) + assert command + + (exitcode, _) = subprocess.getstatusoutput(f'echo "{TEST_CONTENT}" | {command}') + assert exitcode == 0 + + assert info.file_exists(storage, file_name=TEST_WRITE_FILE_NAME) + + +def test_read_file_command(storage: object): + command = shell.write_file_command(storage, file_name=TEST_READ_FILE_NAME) + assert command + + (exitcode, _) = subprocess.getstatusoutput(f'echo "{TEST_CONTENT}" | {command}') + assert exitcode == 0 + assert info.file_exists(storage, file_name=TEST_READ_FILE_NAME) + + command = shell.read_file_command(storage, file_name=TEST_READ_FILE_NAME) + assert command + + (exitcode, stdout) = subprocess.getstatusoutput(command) + assert exitcode == 0 + assert stdout == TEST_CONTENT + + +def test_delete_file_command(storage: object): + command = shell.write_file_command(storage, file_name=TEST_DELETE_FILE_NAME) + assert command + + (exitcode, _) = subprocess.getstatusoutput(f'echo "{TEST_CONTENT}" | {command}') + assert exitcode == 0 + assert info.file_exists(storage, file_name=TEST_DELETE_FILE_NAME) + + command = shell.delete_file_command(storage, file_name=TEST_DELETE_FILE_NAME) + assert command + + (exitcode, _) = subprocess.getstatusoutput(command) + assert exitcode == 0 + assert not info.file_exists(storage, file_name=TEST_DELETE_FILE_NAME) From c0a51d240b6d1cef10ff2de968473c8461357f04 Mon Sep 17 00:00:00 2001 From: Leonhard Schick Date: Mon, 28 Feb 2022 19:22:51 +0100 Subject: [PATCH 2/5] support Azure Storage connection strings with account key --- mara_storage/storages.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/mara_storage/storages.py b/mara_storage/storages.py index a80c2a1..b9ca0bd 100644 --- a/mara_storage/storages.py +++ b/mara_storage/storages.py @@ -87,7 +87,8 @@ def build_uri(self, path: str): return f"{self.base_uri}/{path}" class AzureStorage(Storage): - def __init__(self, account_name: str, container_name: str, sas: str, storage_type: str = 'blob'): + def __init__(self, account_name: str, container_name: str, sas: str = None, + storage_type: str = 'blob', account_key: str = None): """ Connection information for a Azure sstorage bucket @@ -96,8 +97,12 @@ def __init__(self, account_name: str, container_name: str, sas: str, storage_typ container_name: The container name within the storage storage_type: The storage type. Supports 'blob' or 'dfs'. sas: The SAS token + account_key: The storage account key """ + if sas is None and account_key is None: + raise ValueError('You have to provide either parameter sas or account_key for type AzureStorage.') self.account_name = account_name + self.account_key = account_key self.container_name = container_name self.storage_type = storage_type if sas: @@ -114,6 +119,10 @@ def build_uri(self, path: str): return f"{self.base_uri}{path}?{self.sas}" def connection_string(self): - return ('DefaultEndpointsProtocol=https' - + f';BlobEndpoint=https://{self.account_name}.{self.storage_type}.core.windows.net' - + f';SharedAccessSignature={self.sas}') + # see https://docs.microsoft.com/en-us/azure/storage/common/storage-configure-connection-string + if self.account_key: + return f'DefaultEndpointsProtocol=https;AccountName={self.account_name};AccountKey={self.account_key}' + else: + return ('DefaultEndpointsProtocol=https' + + f';BlobEndpoint=https://{self.account_name}.{self.storage_type}.core.windows.net' + + f';SharedAccessSignature={self.sas}') From 7c10d875304546007e292828bfe8a4274cfbb414 Mon Sep 17 00:00:00 2001 From: Leo Schick Date: Sat, 20 Aug 2022 13:52:49 +0200 Subject: [PATCH 3/5] improve azure storage implementation, add AzureStorageClient --- mara_storage/azure.py | 49 +++++++++++++++++++++++++++++++++++++--- mara_storage/client.py | 5 ++++ mara_storage/shell.py | 37 ++++++++++++++++++++++++++---- mara_storage/storages.py | 23 ++++++++++++++----- 4 files changed, 100 insertions(+), 14 deletions(-) diff --git a/mara_storage/azure.py b/mara_storage/azure.py index e14dbbd..cee9677 100644 --- a/mara_storage/azure.py +++ b/mara_storage/azure.py @@ -1,12 +1,55 @@ -from azure.storage.blob import BlobClient, BlobServiceClient +import datetime +from mara_storage.client import StorageClient from . import storages +from azure.storage.blob import BlobClient, BlobServiceClient + -def init_client(storage: storages.AzureStorage, path: str = None): +def init_client(storage: storages.AzureStorage, path: str = None) -> BlobClient: client = BlobClient.from_blob_url(storage.build_uri(path)) return client -def init_service_client(storage: storages.AzureStorage, path: str = None): +def init_service_client(storage: storages.AzureStorage, path: str = None) -> BlobServiceClient: client = BlobServiceClient.from_connection_string(storage.connection_string()) return client + +class AzureStorageClient(StorageClient): + def __init__(self, storage: storages.AzureStorage): + super().__init__(storage) + + self.__blob_service_client: BlobServiceClient = None + self.__container_client = None + + @property + def _blob_service_client(self): + if not self.__blob_service_client: + self.__blob_service_client = init_service_client(self._storage) + + return self.__blob_service_client + + @property + def _container_client(self): + if not self.__container_client: + self.__container_client = self._blob_service_client.get_container_client(self._storage.container_name) + + return self.__container_client + + def creation_timestamp(self, path: str) -> datetime.datetime: + blob_client = self._container_client.get_blob_client(path) + properties = blob_client.get_blob_properties() + + return properties.creation_time + + def last_modification_timestamp(self, path: str) -> datetime.datetime: + blob_client = self._container_client.get_blob_client(path) + properties = blob_client.get_blob_properties() + + return properties.last_modified + + def iterate_files(self, file_pattern: str): + blobs = self._container_client.list_blobs(name_starts_with=file_pattern) + + for blob in blobs: + if blob: + yield blob.name \ No newline at end of file diff --git a/mara_storage/client.py b/mara_storage/client.py index 7f08dbc..9c835e3 100644 --- a/mara_storage/client.py +++ b/mara_storage/client.py @@ -55,3 +55,8 @@ def __(storage: storages.LocalStorage): def __(storage: storages.GoogleCloudStorage): from .google_cloud_storage import GoogleCloudStorageClient return GoogleCloudStorageClient + +@storage_client_type.register(storages.AzureStorage) +def __(storage: storages.AzureStorage): + from .azure import AzureStorageClient + return AzureStorageClient \ No newline at end of file diff --git a/mara_storage/shell.py b/mara_storage/shell.py index faada2f..21e1d8f 100644 --- a/mara_storage/shell.py +++ b/mara_storage/shell.py @@ -63,7 +63,17 @@ def __(storage: storages.GoogleCloudStorage, file_name: str, compression: Compre @read_file_command.register(storages.AzureStorage) def __(storage: storages.AzureStorage, file_name: str, compression: Compression = Compression.NONE): - return ('azcopy cp ' + if storage.sas: + return (f'curl -sf {shlex.quote(storage.build_uri(path=file_name))}' + + (f'\\\n | {uncompressor(compression)} - ' if compression != Compression.NONE else '')) + + azlogin_env = ('AZCOPY_AUTO_LOGIN_TYPE=SPN ' + + f'AZCOPY_TENANT_ID="{storage.spa_tenant}" ' + + f'AZCOPY_SPA_APPLICATION_ID="{storage.spa_application}" ' + + f'AZCOPY_SPA_CLIENT_SECRET="{storage.spa_client_secret}" ' + ) if not storage.sas else '' + + return (f'{azlogin_env}azcopy cp ' + shlex.quote(storage.build_uri(file_name)) + ' --from-to BlobPipe' + (f'\\\n | {uncompressor(compression)} - ' if compression != Compression.NONE else '')) @@ -145,9 +155,17 @@ def __(storage: storages.GoogleCloudStorage, file_name: str, compression: Compre @write_file_command.register(storages.AzureStorage) def __(storage: storages.AzureStorage, file_name: str, compression: Compression = Compression.NONE): - if compression not in [Compression.NONE]: - raise ValueError(f'Only compression NONE is supported from storage type "{storage.__class__.__name__}"') - return ('azcopy cp ' + if compression not in [Compression.NONE, Compression.GZIP]: + raise ValueError(f'Only compression NONE and GZIP is supported from storage type "{storage.__class__.__name__}"') + + azlogin_env = ('AZCOPY_AUTO_LOGIN_TYPE=SPN ' + + f'AZCOPY_TENANT_ID="{storage.spa_tenant}" ' + + f'AZCOPY_SPA_APPLICATION_ID="{storage.spa_application}" ' + + f'AZCOPY_SPA_CLIENT_SECRET="{storage.spa_client_secret}" ' + ) if not storage.sas else '' + + return ((f'gzip \\\n | ' if compression == Compression.GZIP else '') + + f'{azlogin_env}azcopy cp ' + shlex.quote(storage.build_uri(file_name)) + ' --from-to PipeBlob') @@ -223,5 +241,14 @@ def __(storage: storages.GoogleCloudStorage, file_name: str, force: bool = True, @delete_file_command.register(storages.AzureStorage) def __(storage: storages.AzureStorage, file_name: str, force: bool = True): - return ('azcopy rm ' + if storage.sas and not force: + return (f'curl -sf -X DELETE {shlex.quote(storage.build_uri(path=file_name))}') + + azlogin_env = ('AZCOPY_AUTO_LOGIN_TYPE=SPN ' + + f'AZCOPY_TENANT_ID="{storage.spa_tenant}" ' + + f'AZCOPY_SPA_APPLICATION_ID="{storage.spa_application}" ' + + f'AZCOPY_SPA_CLIENT_SECRET="{storage.spa_client_secret}" ' + ) if not storage.sas else '' + + return (f'{azlogin_env}azcopy rm ' + shlex.quote(storage.build_uri(file_name))) diff --git a/mara_storage/storages.py b/mara_storage/storages.py index b9ca0bd..5fb51e3 100644 --- a/mara_storage/storages.py +++ b/mara_storage/storages.py @@ -88,25 +88,35 @@ def build_uri(self, path: str): class AzureStorage(Storage): def __init__(self, account_name: str, container_name: str, sas: str = None, - storage_type: str = 'blob', account_key: str = None): + storage_type: str = 'blob', account_key: str = None, + spa_tenant: str = None, spa_application: str = None, spa_client_secret: str = None): """ Connection information for a Azure sstorage bucket + Possible authentication methods: + SAS => "Shared access signature", see https://docs.microsoft.com/en-us/azure/storage/common/storage-sas-overview + SPA => "Service principal" + Args: account_name: The storage account name container_name: The container name within the storage storage_type: The storage type. Supports 'blob' or 'dfs'. sas: The SAS token account_key: The storage account key + spa_tenant: The service principal tenant id + spa_application: The service principal application id + spa_client_secret: The service principal client secret """ - if sas is None and account_key is None: - raise ValueError('You have to provide either parameter sas or account_key for type AzureStorage.') + if sas is None and account_key is None and spa_client_secret is None: + raise ValueError('You have to provide either parameter sas, account_key or spa_client_secret for type AzureStorage.') self.account_name = account_name self.account_key = account_key self.container_name = container_name self.storage_type = storage_type - if sas: - self.sas = sas[1:] if sas.startswith('?') else sas + self.sas = (sas[1:] if sas.startswith('?') else sas) if sas else None + self.spa_tenant = spa_tenant + self.spa_application = spa_application + self.spa_client_secret = spa_client_secret @property def base_uri(self): @@ -116,7 +126,8 @@ def build_uri(self, path: str): """Returns a URI for a path on the storage""" if path and not path.startswith('/'): path = '/' + path - return f"{self.base_uri}{path}?{self.sas}" + return (f"{self.base_uri}{path}" + + (f'?{self.sas}' if self.sas else '')) def connection_string(self): # see https://docs.microsoft.com/en-us/azure/storage/common/storage-configure-connection-string From 6a1cb593a1257273a1b98ab4724e84486d08b47c Mon Sep 17 00:00:00 2001 From: Leo Schick Date: Wed, 22 Feb 2023 17:17:25 +0100 Subject: [PATCH 4/5] update docs --- docs/index.rst | 3 +- docs/storages-overview.md | 13 ++++--- docs/storages/azure.rst | 75 +++++++++++++++++++++++++++++++++++++++ mara_storage/azure.py | 1 + 4 files changed, 87 insertions(+), 5 deletions(-) create mode 100644 docs/storages/azure.rst diff --git a/docs/index.rst b/docs/index.rst index 767da69..278ffab 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -29,9 +29,10 @@ This section focuses on the supported storages. :maxdepth: 2 storages-overview + storages/azure + storages/gcs storages/local storages/sftp - storages/gcs API Reference diff --git a/docs/storages-overview.md b/docs/storages-overview.md index 1958755..d4199e3 100644 --- a/docs/storages-overview.md +++ b/docs/storages-overview.md @@ -3,12 +3,16 @@ Overview The following storages are supported -| Storage | Configuration class | -| ---------------------- | ------------------- | -| Local bash | LocalStorage | -| [Google Cloud Storage] | GoogleCloudStorage | +| Storage | Configuration class | +| ------------------------- | ------------------- | +| Local bash | LocalStorage | +| [Google Cloud Storage] | GoogleCloudStorage | +| [Azure Blob Storage] | AzureStorage | +| [Azure Data Lake Storage] | AzureStorage | [Google Cloud Storage]: https://cloud.google.com/storage +[Azure Blob Storage]: https://azure.microsoft.com/en-us/products/storage/blobs +[Azure Data Lake Storage]: https://azure.microsoft.com/en-us/products/storage/data-lake-storage/ Function support matrix @@ -18,6 +22,7 @@ Function support matrix | --------------------- | ---- | ----- | ------ | ------ | | LocalStorage | Yes | Yes | Yes | Yes | GoogleCloudStorage | Yes | Yes | Yes | Yes +| AzureStorage | Yes | Yes | Yes | Yes ```{note} A `Move` operation is not implemented by design. Most of the blob storages do not diff --git a/docs/storages/azure.rst b/docs/storages/azure.rst new file mode 100644 index 0000000..c11beac --- /dev/null +++ b/docs/storages/azure.rst @@ -0,0 +1,75 @@ +Google Cloud Storage +==================== + +Accessing a Azure Blob Storage (GCS) with the shell tool `azcopy`. + +Installation +------------ + +You need to install `azcopy`. Take a look at `Get started with Azcopy `_. + + +Configuration examples +---------------------- + +```{note} +Currently some of the functions require a SAS token, and some of the functions +require a account key. It is recommended to provide both a SAS token +and a account key. +``` + +.. tabs:: + + .. group-tab:: SAS token + + .. code-block:: python + + import pathlib + import mara_storage.storages + mara_storage.config.storages = lambda: { + 'data': mara_storage.storages.AzureStorage( + account_name='account-name', + container_name='container-name', + sas='sp=racwdlm&st=2022-05-11T10:04:05Z&se=2023-05-11T18:04:05Z&spr=https&sv=2020-08-04&sr=c&sig=u7tqxugyv5MbyrtFdEUp22tnou4wifBoUfIaLDazeRT%3D'), + + # optional + storage_type = 'dfs' # use a dfs client instead of 'blob' (default value) + } + + .. group-tab:: Account key + + .. code-block:: python + + import pathlib + import mara_storage.storages + mara_storage.config.storages = lambda: { + 'data': mara_storage.storages.AzureStorage( + account_name='account-name', + container_name='container-name', + account_key='', + + # optional + storage_type = 'dfs' # use a dfs client instead of 'blob' (default value) + ), + } + +| + +| + +API reference +------------- + +This section contains database specific API in the module. + + +Configuration +~~~~~~~~~~~~~ + +.. module:: mara_storage.storages + :noindex: + +.. autoclass:: AzureStorage + :special-members: __init__ + :inherited-members: + :members: diff --git a/mara_storage/azure.py b/mara_storage/azure.py index cee9677..674e175 100644 --- a/mara_storage/azure.py +++ b/mara_storage/azure.py @@ -14,6 +14,7 @@ def init_service_client(storage: storages.AzureStorage, path: str = None) -> Blo client = BlobServiceClient.from_connection_string(storage.connection_string()) return client + class AzureStorageClient(StorageClient): def __init__(self, storage: storages.AzureStorage): super().__init__(storage) From 61bdd1d07e75b499dbeb221fd7e35bbb57de548e Mon Sep 17 00:00:00 2001 From: Leo Schick Date: Wed, 22 Feb 2023 17:38:08 +0100 Subject: [PATCH 5/5] support recursive deletion --- mara_storage/shell.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/mara_storage/shell.py b/mara_storage/shell.py index 21e1d8f..13c1556 100644 --- a/mara_storage/shell.py +++ b/mara_storage/shell.py @@ -240,8 +240,8 @@ def __(storage: storages.GoogleCloudStorage, file_name: str, force: bool = True, @delete_file_command.register(storages.AzureStorage) -def __(storage: storages.AzureStorage, file_name: str, force: bool = True): - if storage.sas and not force: +def __(storage: storages.AzureStorage, file_name: str, force: bool = True, recursive: bool = False): + if storage.sas and not force and not recursive: return (f'curl -sf -X DELETE {shlex.quote(storage.build_uri(path=file_name))}') azlogin_env = ('AZCOPY_AUTO_LOGIN_TYPE=SPN ' @@ -251,4 +251,5 @@ def __(storage: storages.AzureStorage, file_name: str, force: bool = True): ) if not storage.sas else '' return (f'{azlogin_env}azcopy rm ' - + shlex.quote(storage.build_uri(file_name))) + + shlex.quote(storage.build_uri(file_name)) + + (' --recursive=true' if recursive else ''))