From 3a7e6e41b7fd5783ee9d2aa6bca1477dee899786 Mon Sep 17 00:00:00 2001 From: kyleknap Date: Fri, 3 Nov 2023 11:15:08 -0700 Subject: [PATCH 1/2] Turn on checksum validation for CRT S3 client For uploads, the CRT S3 client will add CRC32 trailing checksums. For downloads, the CRT S3 client will validate checksums associated to the object when possible. --- .../next-release/enhancement-s3-16115.json | 5 ++ awscli/s3transfer/crt.py | 27 ++++++- tests/functional/s3transfer/test_crt.py | 73 ++++++++++++++++++- 3 files changed, 100 insertions(+), 5 deletions(-) create mode 100644 .changes/next-release/enhancement-s3-16115.json diff --git a/.changes/next-release/enhancement-s3-16115.json b/.changes/next-release/enhancement-s3-16115.json new file mode 100644 index 000000000000..06ede755e130 --- /dev/null +++ b/.changes/next-release/enhancement-s3-16115.json @@ -0,0 +1,5 @@ +{ + "type": "enhancement", + "category": "``s3``", + "description": "Automatically configure CRC32 checksums for uploads and checksum validation for downloads through the CRT transfer client." +} diff --git a/awscli/s3transfer/crt.py b/awscli/s3transfer/crt.py index eda2985febfb..636a7a69d937 100644 --- a/awscli/s3transfer/crt.py +++ b/awscli/s3transfer/crt.py @@ -206,6 +206,7 @@ def upload(self, fileobj, bucket, key, extra_args=None, subscribers=None): extra_args = {} if subscribers is None: subscribers = {} + self._validate_checksum_algorithm_supported(extra_args) callargs = CallArgs( bucket=bucket, key=key, @@ -231,6 +232,17 @@ def delete(self, bucket, key, extra_args=None, subscribers=None): def shutdown(self, cancel=False): self._shutdown(cancel) + def _validate_checksum_algorithm_supported(self, extra_args): + checksum_algorithm = extra_args.get('ChecksumAlgorithm') + if checksum_algorithm is None: + return + if checksum_algorithm not in awscrt.s3.S3ChecksumAlgorithm.__members__: + raise ValueError( + f'ChecksumAlgorithm: {checksum_algorithm} not supported. ' + f'Supported algorithms are: ' + f'{list(awscrt.s3.S3ChecksumAlgorithm.__members__)}' + ) + def _cancel_transfers(self): for coordinator in self._future_coordinators: if not coordinator.done(): @@ -623,11 +635,17 @@ def _get_make_request_args_put_object( else: call_args.extra_args["Body"] = call_args.fileobj + checksum_algorithm = call_args.extra_args.pop( + 'ChecksumAlgorithm', 'CRC32' + ) + checksum_config = awscrt.s3.S3ChecksumConfig( + algorithm=awscrt.s3.S3ChecksumAlgorithm[checksum_algorithm], + location=awscrt.s3.S3ChecksumLocation.TRAILER, + ) # Suppress botocore's automatic MD5 calculation by setting an override # value that will get deleted in the BotocoreCRTRequestSerializer. - # The CRT S3 client is able automatically compute checksums as part of - # requests it makes, and the intention is to configure automatic - # checksums in a future update. + # As part of the CRT S3 request, we request the CRT S3 client to + # automatically add trailing checksums to its uploads. call_args.extra_args["ContentMD5"] = "override-to-be-removed" make_request_args = self._default_get_make_request_args( @@ -639,6 +657,7 @@ def _get_make_request_args_put_object( on_done_after_calls=on_done_after_calls, ) make_request_args['send_filepath'] = send_filepath + make_request_args['checksum_config'] = checksum_config return make_request_args def _get_make_request_args_get_object( @@ -652,6 +671,7 @@ def _get_make_request_args_get_object( ): recv_filepath = None on_body = None + checksum_config = awscrt.s3.S3ChecksumConfig(validate_response=True) if isinstance(call_args.fileobj, str): final_filepath = call_args.fileobj recv_filepath = self._os_utils.get_temp_filename(final_filepath) @@ -673,6 +693,7 @@ def _get_make_request_args_get_object( ) make_request_args['recv_filepath'] = recv_filepath make_request_args['on_body'] = on_body + make_request_args['checksum_config'] = checksum_config return make_request_args def _default_get_make_request_args( diff --git a/tests/functional/s3transfer/test_crt.py b/tests/functional/s3transfer/test_crt.py index f09eb12c8059..69ceba8d7633 100644 --- a/tests/functional/s3transfer/test_crt.py +++ b/tests/functional/s3transfer/test_crt.py @@ -129,10 +129,10 @@ def _assert_expected_crt_http_request( ) if expected_missing_headers is not None: header_names = [ - header[0] for header in crt_http_request.headers + header[0].lower() for header in crt_http_request.headers ] for expected_missing_header in expected_missing_headers: - self.assertNotIn(expected_missing_header, header_names) + self.assertNotIn(expected_missing_header.lower(), header_names) def _assert_subscribers_called(self, expected_future=None): self.assertTrue(self.record_subscriber.on_queued_called) @@ -145,6 +145,21 @@ def _assert_subscribers_called(self, expected_future=None): self.record_subscriber.on_done_future, expected_future ) + def _get_expected_upload_checksum_config(self, **overrides): + checksum_config_kwargs = { + 'algorithm': awscrt.s3.S3ChecksumAlgorithm.CRC32, + 'location': awscrt.s3.S3ChecksumLocation.TRAILER, + } + checksum_config_kwargs.update(overrides) + return awscrt.s3.S3ChecksumConfig(**checksum_config_kwargs) + + def _get_expected_download_checksum_config(self, **overrides): + checksum_config_kwargs = { + 'validate_response': True, + } + checksum_config_kwargs.update(overrides) + return awscrt.s3.S3ChecksumConfig(**checksum_config_kwargs) + def _invoke_done_callbacks(self, **kwargs): callargs = self.s3_crt_client.make_request.call_args callargs_kwargs = callargs[1] @@ -182,6 +197,7 @@ def test_upload(self): 'send_filepath': self.filename, 'on_progress': mock.ANY, 'on_done': mock.ANY, + 'checksum_config': self._get_expected_upload_checksum_config(), }, ) self._assert_expected_crt_http_request( @@ -208,6 +224,7 @@ def test_upload_from_seekable_stream(self): 'send_filepath': None, 'on_progress': mock.ANY, 'on_done': mock.ANY, + 'checksum_config': self._get_expected_upload_checksum_config(), }, ) self._assert_expected_crt_http_request( @@ -239,6 +256,7 @@ def test_upload_from_nonseekable_stream(self): 'send_filepath': None, 'on_progress': mock.ANY, 'on_done': mock.ANY, + 'checksum_config': self._get_expected_upload_checksum_config(), }, ) self._assert_expected_crt_http_request( @@ -253,6 +271,54 @@ def test_upload_from_nonseekable_stream(self): ) self._assert_subscribers_called(future) + def test_upload_override_checksum_algorithm(self): + future = self.transfer_manager.upload( + self.filename, + self.bucket, + self.key, + {'ChecksumAlgorithm': 'CRC32C'}, + [self.record_subscriber], + ) + future.result() + + callargs_kwargs = self.s3_crt_client.make_request.call_args[1] + self.assertEqual( + callargs_kwargs, + { + 'request': mock.ANY, + 'type': awscrt.s3.S3RequestType.PUT_OBJECT, + 'send_filepath': self.filename, + 'on_progress': mock.ANY, + 'on_done': mock.ANY, + 'checksum_config': self._get_expected_upload_checksum_config( + algorithm=awscrt.s3.S3ChecksumAlgorithm.CRC32C + ), + }, + ) + self._assert_expected_crt_http_request( + callargs_kwargs["request"], + expected_http_method='PUT', + expected_content_length=len(self.expected_content), + expected_missing_headers=[ + 'Content-MD5', + 'x-amz-sdk-checksum-algorithm', + 'X-Amz-Trailer', + ], + ) + self._assert_subscribers_called(future) + + def test_upload_throws_error_for_unsupported_checksum(self): + with self.assertRaisesRegex( + ValueError, 'ChecksumAlgorithm: UNSUPPORTED not supported' + ): + self.transfer_manager.upload( + self.filename, + self.bucket, + self.key, + {'ChecksumAlgorithm': 'UNSUPPORTED'}, + [self.record_subscriber], + ) + def test_download(self): future = self.transfer_manager.download( self.bucket, self.key, self.filename, {}, [self.record_subscriber] @@ -269,6 +335,7 @@ def test_download(self): 'on_progress': mock.ANY, 'on_done': mock.ANY, 'on_body': None, + 'checksum_config': self._get_expected_download_checksum_config(), }, ) # the recv_filepath will be set to a temporary file path with some @@ -306,6 +373,7 @@ def test_download_to_seekable_stream(self): 'on_progress': mock.ANY, 'on_done': mock.ANY, 'on_body': mock.ANY, + 'checksum_config': self._get_expected_download_checksum_config(), }, ) self._assert_expected_crt_http_request( @@ -340,6 +408,7 @@ def test_download_to_nonseekable_stream(self): 'on_progress': mock.ANY, 'on_done': mock.ANY, 'on_body': mock.ANY, + 'checksum_config': self._get_expected_download_checksum_config(), }, ) self._assert_expected_crt_http_request( From 4245e18ece84e2942c9963bc4f07f9743bbdd082 Mon Sep 17 00:00:00 2001 From: kyleknap Date: Mon, 13 Nov 2023 13:05:58 -0800 Subject: [PATCH 2/2] Allow lower case checksum algorithms This is was made to keep parity with the pure Python transfer manager which accepts lowercase algorithm values. --- awscli/s3transfer/crt.py | 8 +++--- tests/functional/s3transfer/test_crt.py | 36 +++++++++++++++++++++++++ 2 files changed, 40 insertions(+), 4 deletions(-) diff --git a/awscli/s3transfer/crt.py b/awscli/s3transfer/crt.py index 636a7a69d937..ed5a0864ee44 100644 --- a/awscli/s3transfer/crt.py +++ b/awscli/s3transfer/crt.py @@ -236,11 +236,11 @@ def _validate_checksum_algorithm_supported(self, extra_args): checksum_algorithm = extra_args.get('ChecksumAlgorithm') if checksum_algorithm is None: return - if checksum_algorithm not in awscrt.s3.S3ChecksumAlgorithm.__members__: + supported_algorithms = list(awscrt.s3.S3ChecksumAlgorithm.__members__) + if checksum_algorithm.upper() not in supported_algorithms: raise ValueError( f'ChecksumAlgorithm: {checksum_algorithm} not supported. ' - f'Supported algorithms are: ' - f'{list(awscrt.s3.S3ChecksumAlgorithm.__members__)}' + f'Supported algorithms are: {supported_algorithms}' ) def _cancel_transfers(self): @@ -637,7 +637,7 @@ def _get_make_request_args_put_object( checksum_algorithm = call_args.extra_args.pop( 'ChecksumAlgorithm', 'CRC32' - ) + ).upper() checksum_config = awscrt.s3.S3ChecksumConfig( algorithm=awscrt.s3.S3ChecksumAlgorithm[checksum_algorithm], location=awscrt.s3.S3ChecksumLocation.TRAILER, diff --git a/tests/functional/s3transfer/test_crt.py b/tests/functional/s3transfer/test_crt.py index 69ceba8d7633..c56ea3011f9d 100644 --- a/tests/functional/s3transfer/test_crt.py +++ b/tests/functional/s3transfer/test_crt.py @@ -307,6 +307,42 @@ def test_upload_override_checksum_algorithm(self): ) self._assert_subscribers_called(future) + def test_upload_override_checksum_algorithm_accepts_lowercase(self): + future = self.transfer_manager.upload( + self.filename, + self.bucket, + self.key, + {'ChecksumAlgorithm': 'crc32c'}, + [self.record_subscriber], + ) + future.result() + + callargs_kwargs = self.s3_crt_client.make_request.call_args[1] + self.assertEqual( + callargs_kwargs, + { + 'request': mock.ANY, + 'type': awscrt.s3.S3RequestType.PUT_OBJECT, + 'send_filepath': self.filename, + 'on_progress': mock.ANY, + 'on_done': mock.ANY, + 'checksum_config': self._get_expected_upload_checksum_config( + algorithm=awscrt.s3.S3ChecksumAlgorithm.CRC32C + ), + }, + ) + self._assert_expected_crt_http_request( + callargs_kwargs["request"], + expected_http_method='PUT', + expected_content_length=len(self.expected_content), + expected_missing_headers=[ + 'Content-MD5', + 'x-amz-sdk-checksum-algorithm', + 'X-Amz-Trailer', + ], + ) + self._assert_subscribers_called(future) + def test_upload_throws_error_for_unsupported_checksum(self): with self.assertRaisesRegex( ValueError, 'ChecksumAlgorithm: UNSUPPORTED not supported'