Skip to content

Commit

Permalink
Update throughput target defaults
Browse files Browse the repository at this point in the history
Instead of defaulting to five gigabits per second when no target
througput is configured, s3transfer will use the CRT to determine
if there is a recommended target throughput to use. If it is unable,
to determine a recommended target throughput, it will default to
ten gigabits per second, which is the current throughput default
for the CRT S3 client.

This behavior was updated to help minimize the amount of configuration
required to get the fastest throughputs from the CRT transfer client.
The fallback throughput was updated from five to ten gigabits so
that the CRT S3 integration is consistent with the underlying CRT S3
client's defaults.
  • Loading branch information
kyleknap committed Nov 16, 2023
1 parent f9e71b2 commit 4bd8046
Show file tree
Hide file tree
Showing 3 changed files with 97 additions and 10 deletions.
5 changes: 5 additions & 0 deletions .changes/next-release/enhancement-crt-28261.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"type": "enhancement",
"category": "``crt``",
"description": "Update ``target_throughput`` defaults. If not configured, s3transfer will use the AWS CRT to attempt to determine a recommended target throughput to use based on the system. If there is no recommended throughput, s3transfer now falls back to ten gigabits per second."
}
44 changes: 37 additions & 7 deletions s3transfer/crt.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,18 @@
EventLoopGroup,
TlsContextOptions,
)
from awscrt.s3 import S3Client, S3RequestTlsMode, S3RequestType
from awscrt.s3 import (
S3Client,
S3RequestTlsMode,
S3RequestType,
get_recommended_throughput_target_gbps,
)
from botocore import UNSIGNED
from botocore.compat import urlsplit
from botocore.config import Config
from botocore.exceptions import NoCredentialsError

from s3transfer.constants import GB, MB
from s3transfer.constants import MB
from s3transfer.exceptions import TransferNotDoneError
from s3transfer.futures import BaseTransferFuture, BaseTransferMeta
from s3transfer.utils import CallArgs, OSUtils, get_callbacks
Expand Down Expand Up @@ -94,7 +99,7 @@ def create_s3_crt_client(
region,
botocore_credential_provider=None,
num_threads=None,
target_throughput=5 * GB / 8,
target_throughput=None,
part_size=8 * MB,
use_ssl=True,
verify=None,
Expand All @@ -113,8 +118,14 @@ def create_s3_crt_client(
is the number of processors in the machine.
:type target_throughput: Optional[int]
:param target_throughput: Throughput target in Bytes.
Default is 0.625 GB/s (which translates to 5 Gb/s).
:param target_throughput: Throughput target in bytes per second.
By default, CRT will automatically attempt to choose a target
throughput that matches the system's maximum network throughput.
Currently, if CRT is unable to determine the maximum network
throughput, a fallback target throughput of ``1_250_000_000`` bytes
per second (which translates to 10 gigabits per second, or 1.16
gibibytes per second) is used. To set a specific target
throughput, set a value for this parameter.
:type part_size: Optional[int]
:param part_size: Size, in Bytes, of parts that files will be downloaded
Expand Down Expand Up @@ -163,8 +174,9 @@ def create_s3_crt_client(
provider = AwsCredentialsProvider.new_delegate(
credentails_provider_adapter
)

target_gbps = target_throughput * 8 / GB
target_gbps = _get_crt_throughput_target_gbps(
provided_throughput_target_bytes=target_throughput
)
return S3Client(
bootstrap=bootstrap,
region=region,
Expand All @@ -176,6 +188,24 @@ def create_s3_crt_client(
)


def _get_crt_throughput_target_gbps(provided_throughput_target_bytes=None):
if provided_throughput_target_bytes is None:
target_gbps = get_recommended_throughput_target_gbps()
logger.debug(
'Recommended CRT throughput target in gbps: %s', target_gbps
)
if target_gbps is None:
target_gbps = 10.0
else:
# NOTE: The GB constant in s3transfer is technically a gibibyte. The
# GB constant is not used here because the CRT interprets gigabits
# for networking as a base power of 10
# (i.e. 1000 ** 3 instead of 1024 ** 3).
target_gbps = provided_throughput_target_bytes * 8 / 1_000_000_000
logger.debug('Using CRT throughput target in gbps: %s', target_gbps)
return target_gbps


class CRTTransferManager:
def __init__(self, crt_s3_client, crt_request_serializer, osutil=None):
"""A transfer manager interface for Amazon S3 on CRT s3 client.
Expand Down
58 changes: 55 additions & 3 deletions tests/unit/test_crt.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,11 @@
import s3transfer.crt


requires_crt_pytest = pytest.mark.skipif(
not HAS_CRT, reason="Test requires awscrt to be installed."
)


@pytest.fixture
def mock_crt_process_lock(monkeypatch):
# The process lock is cached at the module layer whenever the
Expand All @@ -38,13 +43,25 @@ def mock_crt_process_lock(monkeypatch):
yield mock_lock


@pytest.fixture
def mock_s3_crt_client():
with mock.patch('s3transfer.crt.S3Client', spec=True) as mock_client:
yield mock_client


@pytest.fixture
def mock_get_recommended_throughput_target_gbps():
with mock.patch(
's3transfer.crt.get_recommended_throughput_target_gbps'
) as mock_get_target_gbps:
yield mock_get_target_gbps


class CustomFutureException(Exception):
pass


@pytest.mark.skipif(
not HAS_CRT, reason="Test requires awscrt to be installed."
)
@requires_crt_pytest
class TestCRTProcessLock:
def test_acquire_crt_s3_process_lock(self, mock_crt_process_lock):
lock = s3transfer.crt.acquire_crt_s3_process_lock('app-name')
Expand Down Expand Up @@ -223,3 +240,38 @@ def test_call(self):
writer = s3transfer.crt.OnBodyFileObjWriter(fileobj)
writer(chunk=b'content')
self.assertEqual(fileobj.getvalue(), b'content')


@requires_crt_pytest
class TestCreateS3CRTClient:
@pytest.mark.parametrize(
'provided_bytes_per_sec,recommended_gbps,expected_gbps',
[
(None, 100.0, 100.0),
(None, None, 10.0),
# NOTE: create_s3_crt_client() accepts target throughput as bytes
# per second and it is converted to gigabits per second for the
# CRT client instantiation.
(1_000_000_000, None, 8.0),
(1_000_000_000, 100.0, 8.0),
],
)
def test_target_throughput(
self,
provided_bytes_per_sec,
recommended_gbps,
expected_gbps,
mock_s3_crt_client,
mock_get_recommended_throughput_target_gbps,
):
mock_get_recommended_throughput_target_gbps.return_value = (
recommended_gbps
)
s3transfer.crt.create_s3_crt_client(
'us-west-2',
target_throughput=provided_bytes_per_sec,
)
assert (
mock_s3_crt_client.call_args[1]['throughput_target_gbps']
== expected_gbps
)

0 comments on commit 4bd8046

Please sign in to comment.