From 4bd80464922347db3be1a11dc5fdfec0d74a0be5 Mon Sep 17 00:00:00 2001 From: kyleknap Date: Wed, 15 Nov 2023 18:57:52 -0800 Subject: [PATCH] Update throughput target defaults Instead of defaulting to five gigabits per second when no target througput is configured, s3transfer will use the CRT to determine if there is a recommended target throughput to use. If it is unable, to determine a recommended target throughput, it will default to ten gigabits per second, which is the current throughput default for the CRT S3 client. This behavior was updated to help minimize the amount of configuration required to get the fastest throughputs from the CRT transfer client. The fallback throughput was updated from five to ten gigabits so that the CRT S3 integration is consistent with the underlying CRT S3 client's defaults. --- .../next-release/enhancement-crt-28261.json | 5 ++ s3transfer/crt.py | 44 +++++++++++--- tests/unit/test_crt.py | 58 ++++++++++++++++++- 3 files changed, 97 insertions(+), 10 deletions(-) create mode 100644 .changes/next-release/enhancement-crt-28261.json diff --git a/.changes/next-release/enhancement-crt-28261.json b/.changes/next-release/enhancement-crt-28261.json new file mode 100644 index 00000000..ee65dd62 --- /dev/null +++ b/.changes/next-release/enhancement-crt-28261.json @@ -0,0 +1,5 @@ +{ + "type": "enhancement", + "category": "``crt``", + "description": "Update ``target_throughput`` defaults. If not configured, s3transfer will use the AWS CRT to attempt to determine a recommended target throughput to use based on the system. If there is no recommended throughput, s3transfer now falls back to ten gigabits per second." +} diff --git a/s3transfer/crt.py b/s3transfer/crt.py index 7817e733..465d58d3 100644 --- a/s3transfer/crt.py +++ b/s3transfer/crt.py @@ -25,13 +25,18 @@ EventLoopGroup, TlsContextOptions, ) -from awscrt.s3 import S3Client, S3RequestTlsMode, S3RequestType +from awscrt.s3 import ( + S3Client, + S3RequestTlsMode, + S3RequestType, + get_recommended_throughput_target_gbps, +) from botocore import UNSIGNED from botocore.compat import urlsplit from botocore.config import Config from botocore.exceptions import NoCredentialsError -from s3transfer.constants import GB, MB +from s3transfer.constants import MB from s3transfer.exceptions import TransferNotDoneError from s3transfer.futures import BaseTransferFuture, BaseTransferMeta from s3transfer.utils import CallArgs, OSUtils, get_callbacks @@ -94,7 +99,7 @@ def create_s3_crt_client( region, botocore_credential_provider=None, num_threads=None, - target_throughput=5 * GB / 8, + target_throughput=None, part_size=8 * MB, use_ssl=True, verify=None, @@ -113,8 +118,14 @@ def create_s3_crt_client( is the number of processors in the machine. :type target_throughput: Optional[int] - :param target_throughput: Throughput target in Bytes. - Default is 0.625 GB/s (which translates to 5 Gb/s). + :param target_throughput: Throughput target in bytes per second. + By default, CRT will automatically attempt to choose a target + throughput that matches the system's maximum network throughput. + Currently, if CRT is unable to determine the maximum network + throughput, a fallback target throughput of ``1_250_000_000`` bytes + per second (which translates to 10 gigabits per second, or 1.16 + gibibytes per second) is used. To set a specific target + throughput, set a value for this parameter. :type part_size: Optional[int] :param part_size: Size, in Bytes, of parts that files will be downloaded @@ -163,8 +174,9 @@ def create_s3_crt_client( provider = AwsCredentialsProvider.new_delegate( credentails_provider_adapter ) - - target_gbps = target_throughput * 8 / GB + target_gbps = _get_crt_throughput_target_gbps( + provided_throughput_target_bytes=target_throughput + ) return S3Client( bootstrap=bootstrap, region=region, @@ -176,6 +188,24 @@ def create_s3_crt_client( ) +def _get_crt_throughput_target_gbps(provided_throughput_target_bytes=None): + if provided_throughput_target_bytes is None: + target_gbps = get_recommended_throughput_target_gbps() + logger.debug( + 'Recommended CRT throughput target in gbps: %s', target_gbps + ) + if target_gbps is None: + target_gbps = 10.0 + else: + # NOTE: The GB constant in s3transfer is technically a gibibyte. The + # GB constant is not used here because the CRT interprets gigabits + # for networking as a base power of 10 + # (i.e. 1000 ** 3 instead of 1024 ** 3). + target_gbps = provided_throughput_target_bytes * 8 / 1_000_000_000 + logger.debug('Using CRT throughput target in gbps: %s', target_gbps) + return target_gbps + + class CRTTransferManager: def __init__(self, crt_s3_client, crt_request_serializer, osutil=None): """A transfer manager interface for Amazon S3 on CRT s3 client. diff --git a/tests/unit/test_crt.py b/tests/unit/test_crt.py index 8bd5288e..5d3dd40d 100644 --- a/tests/unit/test_crt.py +++ b/tests/unit/test_crt.py @@ -26,6 +26,11 @@ import s3transfer.crt +requires_crt_pytest = pytest.mark.skipif( + not HAS_CRT, reason="Test requires awscrt to be installed." +) + + @pytest.fixture def mock_crt_process_lock(monkeypatch): # The process lock is cached at the module layer whenever the @@ -38,13 +43,25 @@ def mock_crt_process_lock(monkeypatch): yield mock_lock +@pytest.fixture +def mock_s3_crt_client(): + with mock.patch('s3transfer.crt.S3Client', spec=True) as mock_client: + yield mock_client + + +@pytest.fixture +def mock_get_recommended_throughput_target_gbps(): + with mock.patch( + 's3transfer.crt.get_recommended_throughput_target_gbps' + ) as mock_get_target_gbps: + yield mock_get_target_gbps + + class CustomFutureException(Exception): pass -@pytest.mark.skipif( - not HAS_CRT, reason="Test requires awscrt to be installed." -) +@requires_crt_pytest class TestCRTProcessLock: def test_acquire_crt_s3_process_lock(self, mock_crt_process_lock): lock = s3transfer.crt.acquire_crt_s3_process_lock('app-name') @@ -223,3 +240,38 @@ def test_call(self): writer = s3transfer.crt.OnBodyFileObjWriter(fileobj) writer(chunk=b'content') self.assertEqual(fileobj.getvalue(), b'content') + + +@requires_crt_pytest +class TestCreateS3CRTClient: + @pytest.mark.parametrize( + 'provided_bytes_per_sec,recommended_gbps,expected_gbps', + [ + (None, 100.0, 100.0), + (None, None, 10.0), + # NOTE: create_s3_crt_client() accepts target throughput as bytes + # per second and it is converted to gigabits per second for the + # CRT client instantiation. + (1_000_000_000, None, 8.0), + (1_000_000_000, 100.0, 8.0), + ], + ) + def test_target_throughput( + self, + provided_bytes_per_sec, + recommended_gbps, + expected_gbps, + mock_s3_crt_client, + mock_get_recommended_throughput_target_gbps, + ): + mock_get_recommended_throughput_target_gbps.return_value = ( + recommended_gbps + ) + s3transfer.crt.create_s3_crt_client( + 'us-west-2', + target_throughput=provided_bytes_per_sec, + ) + assert ( + mock_s3_crt_client.call_args[1]['throughput_target_gbps'] + == expected_gbps + )