Skip to content

Commit

Permalink
Add ListObjectV2 support
Browse files Browse the repository at this point in the history
  • Loading branch information
snosratiershad committed May 31, 2024
1 parent 22949ba commit 135e695
Show file tree
Hide file tree
Showing 5 changed files with 157 additions and 11 deletions.
1 change: 1 addition & 0 deletions .ci.s3cfg
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ limit = -1
limitrate = 0
list_md5 = False
list_allow_unordered = False
enable_list_objects_v2 = False
log_target_prefix =
long_listing = False
max_delete = -1
Expand Down
5 changes: 5 additions & 0 deletions S3/Config.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,11 @@ class Config(object):
# This may be faster when listing very large buckets.
list_allow_unordered = False
# Maximum attempts of re-issuing failed requests

# See https://docs.aws.amazon.com/AmazonS3/latest/API/API_ListObjectsV2.html
enable_list_objects_v2 = False


max_retries = 5

## Creating a singleton
Expand Down
12 changes: 9 additions & 3 deletions S3/FileLists.py
Original file line number Diff line number Diff line change
Expand Up @@ -438,9 +438,15 @@ def _get_filelist_remote(remote_uri, recursive = True):

total_size = 0

s3 = S3(Config())
response = s3.bucket_list(remote_uri.bucket(), prefix = remote_uri.object(),
recursive = recursive, uri_params = uri_params)
cfg = Config()
s3 = S3(cfg)
response = s3.bucket_list(
remote_uri.bucket(),
prefix=remote_uri.object(),
recursive=recursive,
uri_params=uri_params,
list_objects_v2=cfg.enable_list_objects_v2
)

rem_base_original = rem_base = remote_uri.object()
remote_uri_original = remote_uri
Expand Down
141 changes: 134 additions & 7 deletions S3/S3.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,19 +317,148 @@ def list_all_buckets(self):
response["list"] = getListFromXml(response["data"], "Bucket")
return response

def bucket_list(self, bucket, prefix = None, recursive = None, uri_params = None, limit = -1):
def bucket_list(
self,
bucket,
prefix=None,
recursive=None,
uri_params=None,
limit=-1,
list_objects_v2=False
):
if uri_params is None:
uri_params = {}
if uri_params.get("list_type") == "v2":
list_objects_v2 = True

item_list = []
prefixes = []
for truncated, dirs, objects in self.bucket_list_streaming(bucket, prefix, recursive, uri_params, limit):
item_list.extend(objects)
prefixes.extend(dirs)
if list_objects_v2:
uri_params.update({"list_type": "v2"})
for truncated, dirs, objects in self.bucket_list_streaming(
bucket,
prefix,
recursive,
uri_params,
limit
):
item_list.extend(objects)
prefixes.extend(dirs)
else:
for truncated, dirs, objects in self.bucket_list_streaming(
bucket,
prefix,
recursive,
uri_params,
limit
):
item_list.extend(objects)
prefixes.extend(dirs)

response = {}
response['list'] = item_list
response['common_prefixes'] = prefixes
response['truncated'] = truncated
return response

def bucket_list_v2_streaming(
self,
bucket,
prefix=None,
recursive=None,
uri_params={},
limit=-1,
):
def _list_truncated(data):
# <IsTruncated> can either be "true" or "false" or be missing completely
is_truncated = getTextFromXml(data, ".//IsTruncated") or "false"
return is_truncated.lower() != "false"

def _get_contents(data):
return getListFromXml(data, "Contents")

def _get_common_prefixes(data):
return getListFromXml(data, "CommonPrefixes")

def _get_next_continuation_token(data):
return getTextFromXml(data, "NextContinuationToken")

uri_params = uri_params and uri_params.copy() or {}
truncated = True

num_objects = 0
num_prefixes = 0
max_keys = limit
next_continuation_token = ""
while truncated:
if next_continuation_token:
response = self.bucket_list_v2_noparse(
bucket,
prefix,
recursive,
uri_params,
max_keys,
next_continuation_token
)
else:
response = self.bucket_list_v2_noparse(
bucket,
prefix,
recursive,
uri_params,
max_keys
)
current_list = _get_contents(response["data"])
current_prefixes = _get_common_prefixes(response["data"])
num_objects += len(current_list)
num_prefixes += len(current_prefixes)
if limit > num_objects + num_prefixes:
max_keys = limit - (num_objects + num_prefixes)
truncated = _list_truncated(response["data"])
if truncated:
if limit == -1 or num_objects + num_prefixes < limit:
if current_list or current_prefixes:
next_continuation_token = _get_next_continuation_token(
response["data"]
)
else:
# Unexpectedly, the server lied, and so the previous
# response was not truncated. So, no new key to get.
yield False, current_prefixes, current_list
break
else:
yield truncated, current_prefixes, current_list
break

yield truncated, current_prefixes, current_list

def bucket_list_v2_noparse(
self,
bucket,
prefix=None,
recursive=None,
uri_params={},
max_keys=-1,
continuation_token=None
):
if prefix:
uri_params['prefix'] = prefix
if not self.config.recursive and not recursive:
uri_params['delimiter'] = "/"
if max_keys != -1:
uri_params['max-keys'] = str(max_keys)
if self.config.list_allow_unordered:
uri_params['allow-unordered'] = "true"
if continuation_token:
uri_params["continuation-token"] = continuation_token
request = self.create_request(
"BUCKET_LIST",
bucket=bucket,
uri_params=uri_params
)
response = self.send_request(request)
return response

def bucket_list_streaming(self, bucket, prefix = None, recursive = None, uri_params = None, limit = -1):
""" Generator that produces <dir_list>, <object_list> pairs of groups of content of a specified bucket. """
def _list_truncated(data):
Expand Down Expand Up @@ -383,9 +512,7 @@ def _get_next_marker(data, current_elts, key):

yield truncated, current_prefixes, current_list

def bucket_list_noparse(self, bucket, prefix = None, recursive = None, uri_params = None, max_keys = -1):
if uri_params is None:
uri_params = {}
def bucket_list_noparse(self, bucket, prefix = None, recursive = None, uri_params = {}, max_keys = -1):
if prefix:
uri_params['prefix'] = prefix
if not self.config.recursive and not recursive:
Expand Down
9 changes: 8 additions & 1 deletion s3cmd
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,12 @@ def subcmd_bucket_list(s3, uri, limit):
if prefix.endswith('*'):
prefix = prefix[:-1]
try:
response = s3.bucket_list(bucket, prefix = prefix, limit = limit)
response = s3.bucket_list(
bucket,
prefix=prefix,
limit=limit,
list_objects_v2=cfg.enable_list_objects_v2
)
except S3Error as e:
if e.info["Code"] in S3.codes:
error(S3.codes[e.info["Code"]] % bucket)
Expand Down Expand Up @@ -3234,6 +3239,8 @@ def main():

optparser.add_option( "--list-allow-unordered", dest="list_allow_unordered", action="store_true", help="Not an AWS standard. Allow the listing results to be returned in unsorted order. This may be faster when listing very large buckets.")

optparser.add_option( "--enable_list_objects_v2", dest="enable_list_objects_v2", action="store_true", help="Switches list API to ListObjectsV2")

optparser.add_option("-H", "--human-readable-sizes", dest="human_readable_sizes", action="store_true", help="Print sizes in human readable form (eg 1kB instead of 1234).")

optparser.add_option( "--ws-index", dest="website_index", action="store", help="Name of index-document (only for [ws-create] command)")
Expand Down

0 comments on commit 135e695

Please sign in to comment.