diff --git a/synapseclient/core/download/__init__.py b/synapseclient/core/download/__init__.py index 502a63e3e..35b455115 100644 --- a/synapseclient/core/download/__init__.py +++ b/synapseclient/core/download/__init__.py @@ -6,6 +6,7 @@ PresignedUrlInfo, PresignedUrlProvider, _MultithreadedDownloader, + _pre_signed_url_expiration_time, download_file, ) from .download_functions import ( @@ -32,4 +33,5 @@ "PresignedUrlInfo", "PresignedUrlProvider", "_MultithreadedDownloader", + "_pre_signed_url_expiration_time", ] diff --git a/synapseclient/core/download/download_async.py b/synapseclient/core/download/download_async.py index 9080f31af..dfd0792b4 100644 --- a/synapseclient/core/download/download_async.py +++ b/synapseclient/core/download/download_async.py @@ -274,7 +274,12 @@ def _get_file_size( if otel_context: context.attach(otel_context) with syn._requests_session_storage.stream("GET", url) as response: - _raise_for_status_httpx(response=response, logger=syn.logger, verbose=debug) + _raise_for_status_httpx( + response=response, + logger=syn.logger, + verbose=debug, + read_response_content=False, + ) return int(response.headers["Content-Length"]) diff --git a/synapseclient/core/download/download_functions.py b/synapseclient/core/download/download_functions.py index 158613dda..e68da0be4 100644 --- a/synapseclient/core/download/download_functions.py +++ b/synapseclient/core/download/download_functions.py @@ -1,6 +1,7 @@ """This module handles the various ways that a user can download a file to Synapse.""" import asyncio +import datetime import errno import hashlib import os @@ -14,7 +15,10 @@ from tqdm.contrib.logging import logging_redirect_tqdm from synapseclient.api.configuration_services import get_client_authenticated_s3_profile -from synapseclient.api.file_services import get_file_handle_for_download_async +from synapseclient.api.file_services import ( + get_file_handle_for_download, + get_file_handle_for_download_async, +) from synapseclient.core import exceptions, sts_transfer, utils from synapseclient.core.constants import concrete_types from synapseclient.core.constants.method_flags import ( @@ -25,6 +29,8 @@ from synapseclient.core.download import ( SYNAPSE_DEFAULT_DOWNLOAD_PART_SIZE, DownloadRequest, + PresignedUrlProvider, + _pre_signed_url_expiration_time, download_file, ) from synapseclient.core.exceptions import ( @@ -500,11 +506,14 @@ def download_fn( progress_bar = get_or_create_download_progress_bar( file_size=1, postfix=synapse_id, synapse_client=syn ) + downloaded_path = await loop.run_in_executor( syn._get_thread_pool_executor(asyncio_event_loop=loop), lambda: download_from_url( url=file_handle_result["preSignedURL"], destination=destination, + entity_id=synapse_id, + file_handle_associate_type=entity_type, file_handle_id=file_handle["id"], expected_md5=file_handle.get("contentMd5"), progress_bar=progress_bar, @@ -610,6 +619,8 @@ async def download_from_url_multi_threaded( def download_from_url( url: str, destination: str, + entity_id: Optional[str], + file_handle_associate_type: Optional[str], file_handle_id: Optional[str] = None, expected_md5: Optional[str] = None, progress_bar: Optional[tqdm] = None, @@ -622,6 +633,11 @@ def download_from_url( Arguments: url: The source of download destination: The destination on local file system + entity_id: The id of the Synapse object that uses the FileHandle + e.g. "syn123" + file_handle_associate_type: The type of the Synapse object that uses the + FileHandle e.g. "FileEntity". Any of + file_handle_id: Optional. If given, the file will be given a temporary name that includes the file handle id which allows resuming partial downloads of the same file from previous sessions @@ -719,21 +735,69 @@ def _ftp_report_hook( else None ) - response = with_retry( - lambda url=url, range_header=range_header, auth=auth: client._requests_session.get( - url=url, - headers=client._generate_headers(range_header), - stream=True, - allow_redirects=False, - auth=auth, - ), - verbose=client.debug, - **STANDARD_RETRY_PARAMS, - ) try: + url_has_expiration = "Expires" in urllib_urlparse.urlparse(url).query + url_is_expired = False + if url_has_expiration: + url_is_expired = datetime.datetime.now( + tz=datetime.timezone.utc + ) + PresignedUrlProvider._TIME_BUFFER >= _pre_signed_url_expiration_time( + url + ) + if url_is_expired: + response = get_file_handle_for_download( + file_handle_id=file_handle_id, + synapse_id=entity_id, + entity_type=file_handle_associate_type, + synapse_client=client, + ) + url = response["preSignedURL"] + response = with_retry( + lambda url=url, range_header=range_header, auth=auth: client._requests_session.get( + url=url, + headers=client._generate_headers(range_header), + stream=True, + allow_redirects=False, + auth=auth, + ), + verbose=client.debug, + **STANDARD_RETRY_PARAMS, + ) exceptions._raise_for_status(response, verbose=client.debug) except SynapseHTTPError as err: - if err.response.status_code == 404: + if err.response.status_code == 403: + url_has_expiration = ( + "Expires" in urllib_urlparse.urlparse(url).query + ) + url_is_expired = False + if url_has_expiration: + url_is_expired = datetime.datetime.now( + tz=datetime.timezone.utc + ) + PresignedUrlProvider._TIME_BUFFER >= _pre_signed_url_expiration_time( + url + ) + if url_is_expired: + response = get_file_handle_for_download( + file_handle_id=file_handle_id, + synapse_id=entity_id, + entity_type=file_handle_associate_type, + synapse_client=client, + ) + refreshed_url = response["preSignedURL"] + response = with_retry( + lambda url=refreshed_url, range_header=range_header, auth=auth: client._requests_session.get( + url=url, + headers=client._generate_headers(range_header), + stream=True, + allow_redirects=False, + auth=auth, + ), + verbose=client.debug, + **STANDARD_RETRY_PARAMS, + ) + else: + raise + elif err.response.status_code == 404: raise SynapseError(f"Could not download the file at {url}") from err elif ( err.response.status_code == 416 @@ -745,8 +809,8 @@ def _ftp_report_hook( # If it fails the user can retry with another download. shutil.move(temp_destination, destination) break - raise - + else: + raise # handle redirects if response.status_code in [301, 302, 303, 307, 308]: url = response.headers["location"] diff --git a/synapseclient/core/retry.py b/synapseclient/core/retry.py index 15dff79e1..5851df5bc 100644 --- a/synapseclient/core/retry.py +++ b/synapseclient/core/retry.py @@ -69,6 +69,8 @@ "TimeoutException", "ConnectError", "ConnectTimeout", + # SSL Specific exceptions: + "SSLZeroReturnError", ] DEBUG_EXCEPTION = "calling %s resulted in an Exception" diff --git a/tests/integration/synapseclient/core/test_download.py b/tests/integration/synapseclient/core/test_download.py index 54184697a..b5dcabc57 100644 --- a/tests/integration/synapseclient/core/test_download.py +++ b/tests/integration/synapseclient/core/test_download.py @@ -5,8 +5,10 @@ import random import shutil import tempfile +from datetime import datetime from typing import Callable from unittest.mock import Mock, patch +from urllib.parse import parse_qs, urlencode, urlparse import httpx import pytest @@ -15,12 +17,27 @@ import synapseclient import synapseclient.core.utils as utils from synapseclient import Synapse +from synapseclient.api.file_services import get_file_handle_for_download +from synapseclient.core import exceptions from synapseclient.core.download import download_from_url, download_functions -from synapseclient.core.exceptions import SynapseMd5MismatchError +from synapseclient.core.exceptions import SynapseHTTPError, SynapseMd5MismatchError from synapseclient.models import File, Project from tests.test_utils import spy_for_async_function +def _expire_url(url: str) -> str: + """Expire a string URL by setting the expiration date to 1900-01-01T00:00:00Z""" + parsed_url = urlparse(url) + params = parse_qs(parsed_url.query) + expired_date = datetime(1970, 1, 1, 0, 0, 0) + params["X-Amz-Date"] = [expired_date.strftime("%Y%m%dT%H%M%SZ")] + params["X-Amz-Expires"] = ["30"] + params["Expires"] = ["0"] + new_query = urlencode(params, doseq=True) + new_url = parsed_url._replace(query=new_query).geturl() + return new_url + + class TestDownloadCollisions: """Tests for downloading files with different collision states.""" @@ -227,6 +244,8 @@ async def test_download_check_md5( await download_from_url( url=entity_bad_md5.external_url, destination=tempfile.gettempdir(), + entity_id=entity_bad_md5.id, + file_handle_associate_type="FileEntity", file_handle_id=entity_bad_md5.data_file_handle_id, expected_md5="2345a", ) @@ -260,6 +279,8 @@ async def test_download_from_url_resume_partial_download( path = download_from_url( url=f"{syn.repoEndpoint}/entity/{file.id}/file", destination=tempfile.gettempdir(), + entity_id=file.id, + file_handle_associate_type="FileEntity", file_handle_id=file.data_file_handle_id, expected_md5=file.file_handle.content_md5, ) @@ -271,6 +292,44 @@ async def test_download_from_url_resume_partial_download( # `caplog: pytest.LogCaptureFixture` would need to be added to the args. # assert f"Resuming partial download to {tmp_path}. {truncated_size}/{original_size}.0 bytes already transferred." in caplog.text + async def test_download_expired_url( + self, + mocker: MockerFixture, + project_model: Project, + ) -> None: + """Tests that a file download is retried when URL is expired.""" + # GIVEN a file stored in synapse + original_file = utils.make_bogus_data_file(40000) + file = await File(path=original_file, parent_id=project_model.id).store_async() + + # AND an expired preSignedURL for that file + file_handle_response = get_file_handle_for_download( + file_handle_id=file.data_file_handle_id, + synapse_id=file.id, + entity_type="FileEntity", + ) + url = file_handle_response["preSignedURL"] + expired_url = _expire_url(url) + + # get_file_handle_for_download only runs if we are handling an expired URL + spy_file_handle = mocker.spy(download_functions, "get_file_handle_for_download") + + # WHEN I download the file with a simulated expired url + path = download_from_url( + url=expired_url, + destination=tempfile.gettempdir(), + entity_id=file.id, + file_handle_associate_type="FileEntity", + file_handle_id=file.data_file_handle_id, + expected_md5=file.file_handle.content_md5, + ) + + # THEN the expired URL is refreshed + spy_file_handle.assert_called_once() + + # THEN the file is downloaded anyway AND matches the original + assert filecmp.cmp(original_file, path), "File comparison failed" + async def test_download_via_get( self, mocker: MockerFixture, diff --git a/tests/integration/synapseclient/core/test_external_storage.py b/tests/integration/synapseclient/core/test_external_storage.py index 4b6a94ef9..f0a27f3d8 100644 --- a/tests/integration/synapseclient/core/test_external_storage.py +++ b/tests/integration/synapseclient/core/test_external_storage.py @@ -304,7 +304,7 @@ async def test_sts_external_storage_location(self) -> None: Bucket=bucket_name, Key=remote_key, ) - assert "Access Denied" in str(ex_cm.value) + assert "AccessDenied" in str(ex_cm.value) # WHEN I put an object directly using the STS read/write credentials s3_write_client.upload_file( diff --git a/tests/unit/synapseclient/core/unit_test_download.py b/tests/unit/synapseclient/core/unit_test_download.py index b0ec7f94e..07caad80f 100644 --- a/tests/unit/synapseclient/core/unit_test_download.py +++ b/tests/unit/synapseclient/core/unit_test_download.py @@ -1,5 +1,6 @@ """Unit tests for downloads.""" +import datetime import hashlib import json import os @@ -33,6 +34,10 @@ ) DOWNLOAD_FROM_URL = "synapseclient.core.download.download_functions.download_from_url" +FILE_HANDLE_ID = "42" +OBJECT_ID = "syn789" +OBJECT_TYPE = "FileEntity" + # a callable that mocks the requests.get function class MockRequestGetFunction(object): @@ -135,10 +140,6 @@ def mock_generate_headers(self, headers=None): async def test_mock_download(syn: Synapse) -> None: temp_dir = tempfile.gettempdir() - fileHandleId = "42" - objectId = "syn789" - objectType = "FileEntity" - # make bogus content contents = "\n".join(str(i) for i in range(1000)) @@ -160,6 +161,8 @@ async def test_mock_download(syn: Synapse) -> None: download_from_url( url=url, destination=temp_dir, + entity_id=OBJECT_ID, + file_handle_associate_type=OBJECT_TYPE, file_handle_id=12345, expected_md5=contents_md5, synapse_client=syn, @@ -180,6 +183,8 @@ async def test_mock_download(syn: Synapse) -> None: download_from_url( url=url, destination=temp_dir, + entity_id=OBJECT_ID, + file_handle_associate_type=OBJECT_TYPE, file_handle_id=12345, expected_md5=contents_md5, synapse_client=syn, @@ -234,9 +239,9 @@ async def test_mock_download(syn: Synapse) -> None: return_value=_getFileHandleDownload_return_value, ): await download_by_file_handle( - file_handle_id=fileHandleId, - synapse_id=objectId, - entity_type=objectType, + file_handle_id=FILE_HANDLE_ID, + synapse_id=OBJECT_ID, + entity_type=OBJECT_TYPE, destination=temp_dir, synapse_client=syn, ) @@ -284,9 +289,9 @@ async def test_mock_download(syn: Synapse) -> None: return_value=_getFileHandleDownload_return_value, ): await download_by_file_handle( - file_handle_id=fileHandleId, - synapse_id=objectId, - entity_type=objectType, + file_handle_id=FILE_HANDLE_ID, + synapse_id=OBJECT_ID, + entity_type=OBJECT_TYPE, destination=temp_dir, synapse_client=syn, ) @@ -336,9 +341,9 @@ async def test_mock_download(syn: Synapse) -> None: ): with pytest.raises(Exception): await download_by_file_handle( - file_handle_id=fileHandleId, - synapse_id=objectId, - entity_type=objectType, + file_handle_id=FILE_HANDLE_ID, + synapse_id=OBJECT_ID, + entity_type=OBJECT_TYPE, destination=temp_dir, synapse_client=syn, ) @@ -377,9 +382,9 @@ async def test_mock_download(syn: Synapse) -> None: return_value=_getFileHandleDownload_return_value, ): await download_by_file_handle( - file_handle_id=fileHandleId, - synapse_id=objectId, - entity_type=objectType, + file_handle_id=FILE_HANDLE_ID, + synapse_id=OBJECT_ID, + entity_type=OBJECT_TYPE, destination=temp_dir, synapse_client=syn, ) @@ -409,9 +414,9 @@ async def test_mock_download(syn: Synapse) -> None: ): with pytest.raises(SynapseHTTPError): await download_by_file_handle( - file_handle_id=fileHandleId, - synapse_id=objectId, - entity_type=objectType, + file_handle_id=FILE_HANDLE_ID, + synapse_id=OBJECT_ID, + entity_type=OBJECT_TYPE, destination=temp_dir, synapse_client=syn, ) @@ -464,20 +469,21 @@ async def test_multithread_true_s3_file_handle(self) -> None: ) async def _multithread_not_applicable(self, file_handle: Dict[str, str]) -> None: + get_file_handle_for_download_return_value = { + "fileHandle": file_handle, + "preSignedURL": "asdf.com", + } + with patch.object(os, "makedirs"), patch( GET_FILE_HANDLE_FOR_DOWNLOAD, new_callable=AsyncMock, - ) as mock_getFileHandleDownload, patch( + return_value=get_file_handle_for_download_return_value, + ), patch( DOWNLOAD_FROM_URL, new_callable=AsyncMock, ) as mock_download_from_URL, patch.object( self.syn, "cache" ): - mock_getFileHandleDownload.return_value = { - "fileHandle": file_handle, - "preSignedURL": "asdf.com", - } - # multi_threaded/max_threads will have effect self.syn.multi_threaded = True await download_by_file_handle( @@ -491,6 +497,8 @@ async def _multithread_not_applicable(self, file_handle: Dict[str, str]) -> None mock_download_from_URL.assert_called_once_with( url="asdf.com", destination="/myfakepath", + entity_id=456, + file_handle_associate_type="FileEntity", file_handle_id="123", expected_md5="someMD5", progress_bar=ANY, @@ -548,6 +556,8 @@ async def test_multithread_false_s3_file_handle(self) -> None: mock_download_from_URL.assert_called_once_with( url="asdf.com", destination="/myfakepath", + entity_id=456, + file_handle_associate_type="FileEntity", file_handle_id="123", expected_md5="someMD5", progress_bar=ANY, @@ -618,192 +628,340 @@ async def test_md5_match(self) -> None: ) -async def test_download_end_early_retry(syn: Synapse) -> None: - """ - -------Test to ensure download retry even if connection ends early-------- - """ - - url = "http://www.ayy.lmao/filerino.txt" - contents = "\n".join(str(i) for i in range(1000)) - destination = os.path.normpath(os.path.expanduser("~/fake/path/filerino.txt")) - temp_destination = os.path.normpath( - os.path.expanduser("~/fake/path/filerino.txt.temp") - ) +class TestDownloadFromUrl: + @pytest.fixture(autouse=True, scope="function") + def init_syn(self, syn: Synapse) -> None: + self.syn = syn - partial_content_break = len(contents) // 7 * 3 - mock_requests_get = MockRequestGetFunction( - [ - create_mock_response( - url, - "stream", - contents=contents[:partial_content_break], - buffer_size=1024, - partial_end=len(contents), - status_code=200, - ), - create_mock_response( - url, - "stream", - contents=contents, - buffer_size=1024, - partial_start=partial_content_break, - status_code=206, - ), - ] - ) + async def test_download_end_early_retry(self, syn: Synapse) -> None: + """ + -------Test to ensure download retry even if connection ends early-------- + """ - # make the first response's 'content-type' header say it will transfer the full content even though it - # is only partially doing so - mock_requests_get.responses[0].headers["content-length"] = len(contents) - mock_requests_get.responses[1].headers["content-length"] = len( - contents[partial_content_break:] - ) + url = "http://www.ayy.lmao/filerino.txt" + contents = "\n".join(str(i) for i in range(1000)) + destination = os.path.normpath(os.path.expanduser("~/fake/path/filerino.txt")) + temp_destination = os.path.normpath( + os.path.expanduser("~/fake/path/filerino.txt.temp") + ) - # TODO: When swapping out for the HTTPX client, we will need to update this test - # with patch.object( - # syn, "rest_get_async", new_callable=AsyncMock, side_effect=mock_requests_get - # ) - with patch.object( - syn._requests_session, "get", side_effect=mock_requests_get - ), patch.object( - Synapse, "_generate_headers", side_effect=mock_generate_headers - ), patch.object( - utils, "temp_download_filename", return_value=temp_destination - ) as mocked_temp_dest, patch( - "synapseclient.core.download.download_functions.open", - new_callable=mock_open(), - create=True, - ) as mocked_open, patch.object( - os.path, "exists", side_effect=[False, True] - ) as mocked_exists, patch.object( - os.path, "getsize", return_value=partial_content_break - ) as mocked_getsize, patch.object( - utils, "md5_for_file" - ), patch.object( - shutil, "move" - ) as mocked_move: - # function under test - download_from_url(url=url, destination=destination, synapse_client=syn) - - # assert temp_download_filename() called 2 times with same parameters - assert [ - call(destination=destination, file_handle_id=None) - ] * 2 == mocked_temp_dest.call_args_list - - # assert exists called 2 times - assert [call(temp_destination)] * 2 == mocked_exists.call_args_list - - # assert open() called 2 times with different parameters - assert [ - call(temp_destination, "wb"), - call(temp_destination, "ab"), - ] == mocked_open.call_args_list - - # assert getsize() called 2 times - # once because exists()=True and another time because response status code = 206 - assert [call(filename=temp_destination)] * 2 == mocked_getsize.call_args_list - - # assert shutil.move() called 1 time - mocked_move.assert_called_once_with(temp_destination, destination) - - -async def test_download_md5_mismatch__not_local_file(syn: Synapse) -> None: - """ - --------Test to ensure file gets removed on md5 mismatch-------- - """ - url = "http://www.ayy.lmao/filerino.txt" - contents = "\n".join(str(i) for i in range(1000)) - destination = os.path.normpath(os.path.expanduser("~/fake/path/filerino.txt")) - temp_destination = os.path.normpath( - os.path.expanduser("~/fake/path/filerino.txt.temp") - ) + partial_content_break = len(contents) // 7 * 3 + mock_requests_get = MockRequestGetFunction( + [ + create_mock_response( + url, + "stream", + contents=contents[:partial_content_break], + buffer_size=1024, + partial_end=len(contents), + status_code=200, + ), + create_mock_response( + url, + "stream", + contents=contents, + buffer_size=1024, + partial_start=partial_content_break, + status_code=206, + ), + ] + ) - mock_requests_get = MockRequestGetFunction( - [ - create_mock_response( - url, - "stream", - contents=contents, - buffer_size=1024, - partial_end=len(contents), - status_code=200, - ) - ] - ) + # make the first response's 'content-type' header say + # it will transfer the full content even though it + # is only partially doing so + mock_requests_get.responses[0].headers["content-length"] = len(contents) + mock_requests_get.responses[1].headers["content-length"] = len( + contents[partial_content_break:] + ) - with patch.object( - syn._requests_session, "get", side_effect=mock_requests_get - ), patch.object( - Synapse, "_generate_headers", side_effect=mock_generate_headers - ), patch.object( - utils, "temp_download_filename", return_value=temp_destination - ) as mocked_temp_dest, patch( - "synapseclient.core.download.download_functions.open", - new_callable=mock_open(), - create=True, - ) as mocked_open, patch.object( - os.path, "exists", side_effect=[False, True] - ) as mocked_exists, patch.object( - shutil, "move" - ) as mocked_move, patch.object( - os, "remove" - ) as mocked_remove: - # function under test - with pytest.raises(SynapseMd5MismatchError): - await download_from_url( + # TODO: When swapping out for the HTTPX client, we will need to update this test + # with patch.object( + # syn, "rest_get_async", new_callable=AsyncMock, side_effect=mock_requests_get + # ) + with patch.object( + syn._requests_session, "get", side_effect=mock_requests_get + ), patch.object( + Synapse, "_generate_headers", side_effect=mock_generate_headers + ), patch.object( + utils, "temp_download_filename", return_value=temp_destination + ) as mocked_temp_dest, patch( + "synapseclient.core.download.download_functions.open", + new_callable=mock_open(), + create=True, + ) as mocked_open, patch.object( + os.path, "exists", side_effect=[False, True] + ) as mocked_exists, patch.object( + os.path, "getsize", return_value=partial_content_break + ) as mocked_getsize, patch.object( + utils, "md5_for_file" + ), patch.object( + shutil, "move" + ) as mocked_move: + # function under test + download_from_url( url=url, destination=destination, - expected_md5="fake md5 is fake", + entity_id=OBJECT_ID, + file_handle_associate_type=OBJECT_TYPE, synapse_client=syn, ) - # assert temp_download_filename() called once - mocked_temp_dest.assert_called_once_with( - destination=destination, file_handle_id=None + # assert temp_download_filename() called 2 times with same parameters + assert [ + call(destination=destination, file_handle_id=None) + ] * 2 == mocked_temp_dest.call_args_list + + # assert exists called 2 times + assert [call(temp_destination)] * 2 == mocked_exists.call_args_list + + # assert open() called 2 times with different parameters + assert [ + call(temp_destination, "wb"), + call(temp_destination, "ab"), + ] == mocked_open.call_args_list + + # assert getsize() called 2 times + # once because exists()=True and another time because response status code = 206 + assert [ + call(filename=temp_destination) + ] * 2 == mocked_getsize.call_args_list + + # assert shutil.move() called 1 time + mocked_move.assert_called_once_with(temp_destination, destination) + + async def test_download_md5_mismatch__not_local_file(self, syn: Synapse) -> None: + """ + --------Test to ensure file gets removed on md5 mismatch-------- + """ + url = "http://www.ayy.lmao/filerino.txt" + contents = "\n".join(str(i) for i in range(1000)) + destination = os.path.normpath(os.path.expanduser("~/fake/path/filerino.txt")) + temp_destination = os.path.normpath( + os.path.expanduser("~/fake/path/filerino.txt.temp") + ) + + mock_requests_get = MockRequestGetFunction( + [ + create_mock_response( + url, + "stream", + contents=contents, + buffer_size=1024, + partial_end=len(contents), + status_code=200, + ) + ] ) - # assert exists called 2 times - assert [ - call(temp_destination), - call(destination), - ] == mocked_exists.call_args_list + with patch.object( + syn._requests_session, "get", side_effect=mock_requests_get + ), patch.object( + Synapse, "_generate_headers", side_effect=mock_generate_headers + ), patch.object( + utils, "temp_download_filename", return_value=temp_destination + ) as mocked_temp_dest, patch( + "synapseclient.core.download.download_functions.open", + new_callable=mock_open(), + create=True, + ) as mocked_open, patch.object( + os.path, "exists", side_effect=[False, True] + ) as mocked_exists, patch.object( + shutil, "move" + ) as mocked_move, patch.object( + os, "remove" + ) as mocked_remove: + # function under test + with pytest.raises(SynapseMd5MismatchError): + await download_from_url( + url=url, + destination=destination, + entity_id=OBJECT_ID, + file_handle_associate_type=OBJECT_TYPE, + expected_md5="fake md5 is fake", + synapse_client=syn, + ) + + # assert temp_download_filename() called once + mocked_temp_dest.assert_called_once_with( + destination=destination, file_handle_id=None + ) - # assert open() called once - mocked_open.assert_called_once_with(temp_destination, "wb") + # assert exists called 2 times + assert [ + call(temp_destination), + call(destination), + ] == mocked_exists.call_args_list - # assert shutil.move() called once - mocked_move.assert_called_once_with(temp_destination, destination) + # assert open() called once + mocked_open.assert_called_once_with(temp_destination, "wb") - # assert file was removed - mocked_remove.assert_called_once_with(destination) + # assert shutil.move() called once + mocked_move.assert_called_once_with(temp_destination, destination) + # assert file was removed + mocked_remove.assert_called_once_with(destination) -async def test_download_md5_mismatch_local_file() -> None: - """ - --------Test to ensure file gets removed on md5 mismatch-------- - """ - url = "file:///some/file/path.txt" - destination = os.path.normpath(os.path.expanduser("~/fake/path/filerino.txt")) + async def test_download_md5_mismatch_local_file(self) -> None: + """ + --------Test to ensure file gets removed on md5 mismatch-------- + """ + url = "file:///some/file/path.txt" + destination = os.path.normpath(os.path.expanduser("~/fake/path/filerino.txt")) - with patch.object( - utils, "file_url_to_path", return_value=destination - ) as mocked_file_url_to_path, patch.object( - utils, - "md5_for_file_hex", - return_value="Some other incorrect md5", - ) as mocked_md5_for_file, patch( - "os.remove" - ) as mocked_remove: - # function under test - with pytest.raises(SynapseMd5MismatchError): - await download_from_url( - url=url, destination=destination, expected_md5="fake md5 is fake" + with patch.object( + utils, "file_url_to_path", return_value=destination + ) as mocked_file_url_to_path, patch.object( + utils, + "md5_for_file_hex", + return_value="Some other incorrect md5", + ) as mocked_md5_for_file, patch( + "os.remove" + ) as mocked_remove: + # function under test + with pytest.raises(SynapseMd5MismatchError): + await download_from_url( + url=url, + destination=destination, + entity_id=OBJECT_ID, + file_handle_associate_type=OBJECT_TYPE, + expected_md5="fake md5 is fake", + ) + + mocked_file_url_to_path.assert_called_once_with(url, verify_exists=True) + mocked_md5_for_file.assert_called_once() + # assert file was NOT removed + assert not mocked_remove.called + + async def test_download_expired_url(self, syn: Synapse) -> None: + url = "http://www.ayy.lmao/filerino.txt?Expires=0" + new_url = "http://www.ayy.lmao/new_url.txt?Expires=1715000000" + contents = "\n".join(str(i) for i in range(1000)) + temp_destination = os.path.normpath( + os.path.expanduser("~/fake/path/filerino.txt.temp") + ) + destination = os.path.normpath(os.path.expanduser("~/fake/path/filerino.txt")) + + mock_requests_get = MockRequestGetFunction( + [ + create_mock_response( + url, + "stream", + contents=contents, + buffer_size=1024, + partial_end=len(contents), + status_code=200, + ), + ] + ) + with patch.object( + syn._requests_session, "get", side_effect=mock_requests_get + ) as mocked_get, patch( + "synapseclient.core.download.download_functions._pre_signed_url_expiration_time", + return_value=datetime.datetime(1900, 1, 1, tzinfo=datetime.timezone.utc), + ) as mocked_pre_signed_url_expiration_time, patch( + "synapseclient.core.download.download_functions.get_file_handle_for_download", + return_value={"preSignedURL": new_url}, + ) as mocked_get_file_handle_for_download, patch.object( + Synapse, "_generate_headers", side_effect=mock_generate_headers + ), patch.object( + utils, "temp_download_filename", return_value=temp_destination + ), patch( + "synapseclient.core.download.download_functions.open", + new_callable=mock_open(), + create=True, + ), patch.object( + hashlib, "new" + ) as mocked_hashlib_new, patch.object( + shutil, "move" + ), patch.object( + os, "remove" + ): + mocked_hashlib_new.return_value.hexdigest.return_value = "fake md5 is fake" + # WHEN I call download_from_url with an expired url + download_from_url( + url=url, + destination=destination, + entity_id=OBJECT_ID, + file_handle_associate_type=OBJECT_TYPE, + expected_md5="fake md5 is fake", + ) + # I expect the expired url to be identified + mocked_pre_signed_url_expiration_time.assert_called_once_with(url) + # AND I expect the URL to be refreshed + mocked_get_file_handle_for_download.assert_called_once() + # AND I expect the download to be retried with the new URL + mocked_get.assert_called_with( + url=new_url, + headers=mock_generate_headers(self), + stream=True, + allow_redirects=False, + auth=None, ) - mocked_file_url_to_path.assert_called_once_with(url, verify_exists=True) - mocked_md5_for_file.assert_called_once() - # assert file was NOT removed - assert not mocked_remove.called + async def test_download_url_no_expiration(self, syn: Synapse) -> None: + url = "http://www.ayy.lmao/filerino.txt" + contents = "\n".join(str(i) for i in range(1000)) + temp_destination = os.path.normpath( + os.path.expanduser("~/fake/path/filerino.txt.temp") + ) + destination = os.path.normpath(os.path.expanduser("~/fake/path/filerino.txt")) + + mock_requests_get = MockRequestGetFunction( + [ + create_mock_response( + url, + "stream", + contents=contents, + buffer_size=1024, + partial_end=len(contents), + status_code=200, + ), + ] + ) + with patch.object( + syn._requests_session, "get", side_effect=mock_requests_get + ) as mocked_get, patch( + "synapseclient.core.download.download_functions._pre_signed_url_expiration_time", + return_value=datetime.datetime(1900, 1, 1, tzinfo=datetime.timezone.utc), + ) as mocked_pre_signed_url_expiration_time, patch( + "synapseclient.core.download.download_functions.get_file_handle_for_download", + ) as mocked_get_file_handle_for_download, patch.object( + Synapse, "_generate_headers", side_effect=mock_generate_headers + ), patch.object( + utils, "temp_download_filename", return_value=temp_destination + ), patch( + "synapseclient.core.download.download_functions.open", + new_callable=mock_open(), + create=True, + ), patch.object( + hashlib, "new" + ) as mocked_hashlib_new, patch.object( + shutil, "move" + ), patch.object( + os, "remove" + ): + mocked_hashlib_new.return_value.hexdigest.return_value = "fake md5 is fake" + # WHEN I call download_from_url with an expired url + download_from_url( + url=url, + destination=destination, + entity_id=OBJECT_ID, + file_handle_associate_type=OBJECT_TYPE, + expected_md5="fake md5 is fake", + ) + # I expect the expired url to be identified + mocked_pre_signed_url_expiration_time.assert_not_called() + # AND I expect the URL to be refreshed + mocked_get_file_handle_for_download.assert_not_called() + # AND I expect the download to be retried with the new URL + mocked_get.assert_called_with( + url=url, + headers=mock_generate_headers(self), + stream=True, + allow_redirects=False, + auth=None, + ) async def test_get_file_handle_download__error_unauthorized(syn: Synapse) -> None: diff --git a/tests/unit/synapseclient/unit_test_client.py b/tests/unit/synapseclient/unit_test_client.py index 9f5232810..3489d9526 100644 --- a/tests/unit/synapseclient/unit_test_client.py +++ b/tests/unit/synapseclient/unit_test_client.py @@ -1,4 +1,5 @@ """Unit tests for the Synapse client""" + import configparser import datetime import errno @@ -518,6 +519,8 @@ async def test_download_from_url__synapse_auth(self, mocker: MagicMock) -> None: out_destination = download_from_url( url=uri, + entity_id="syn123", + file_handle_associate_type="FileEntity", destination=in_destination.name, synapse_client=self.syn, ) @@ -547,6 +550,8 @@ async def test_download_from_url__external(self, mocker) -> None: out_destination = download_from_url( url=uri, destination=in_destination.name, + entity_id="syn123", + file_handle_associate_type="FileEntity", synapse_client=self.syn, ) assert mock_get.call_args[1]["auth"] is None