Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

make fetch template support ISO times #84

Merged
merged 1 commit into from
Dec 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 25 additions & 5 deletions client/datalake/archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@
import requests
from io import BytesIO
import errno
from copy import deepcopy
from datetime import datetime

import boto3
import math
Expand Down Expand Up @@ -323,11 +325,15 @@ def fetch_to_filename(self, url, filename_template=None):
example, to store a file based on `what` it is, you could pass
something like {what}.log. Or if you gathering many `what`'s that
originate from many `where`'s you might want to use something like
{where}/{what}-{start}.log. If filename_template is None (the default),
files are stored in the current directory and the filenames are the ids
from the metadata.
{where}/{what}-{start}.log. Note that the template variables
{start_iso} and {end_iso} are also supported and expand to the ISO
timestamps with millisecond precision (e.g., 2023-12-19T00:10:22.123).

If filename_template is None (the default), files are stored in the
current directory and the filenames are the ids from the metadata.

Returns the filename written.

'''
k = None
if url.startswith('s3://'):
Expand Down Expand Up @@ -371,17 +377,31 @@ def _get_object_from_url(self, url):
return obj, Metadata.from_json(m)

def _get_filename_from_template(self, template, metadata):
template_vars = deepcopy(metadata)
template_vars.update(
start_iso=self._ms_to_iso(metadata.get('start')),
end_iso=self._ms_to_iso(metadata.get('end')),
)
if template is None:
template = '{id}'
try:
return template.format(**metadata)
return template.format(**template_vars)
except KeyError as e:
m = '"{}" does not appear in the datalake metadata'
m = '"{}" does not appear to be a supported template variable.'
m = m.format(str(e))
raise InvalidDatalakePath(m)
except ValueError as e:
raise InvalidDatalakePath(str(e))

_ISO_FORMAT_MS = '%Y-%m-%dT%H:%M:%S.%f'

def _ms_to_iso(self, ts):
if ts is None:
return None
d = datetime.utcfromtimestamp(ts/1000.0)
# drop to ms precision
return d.strftime(self._ISO_FORMAT_MS)[:-3]

def _get_key_name_from_url(self, url):
parts = urlparse(url)
if not parts.path:
Expand Down
24 changes: 24 additions & 0 deletions client/test/test_fetch.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,3 +227,27 @@ def test_metadata_from_http_url(archive, random_metadata):
f = archive.fetch(url + '/data')
assert f.read() == content
assert f.metadata == random_metadata


def test_fetch_template_with_iso(archive, datalake_url_maker, random_metadata,
tmpdir):
random_metadata['start']=1702944622123
random_metadata['end']=1702944634456
url = datalake_url_maker(metadata=random_metadata)
t = os.path.join(str(tmpdir), '{start_iso}-{end_iso}-foobar.log')
fname = '2023-12-19T00:10:22.123-2023-12-19T00:10:34.456-foobar.log'
expected_path = os.path.join(str(tmpdir), fname)
archive.fetch_to_filename(url, filename_template=t)
assert os.path.exists(expected_path)


def test_fetch_template_with_none(archive, datalake_url_maker, random_metadata,
tmpdir):
random_metadata['start']=1702944622123
random_metadata['end']=None
url = datalake_url_maker(metadata=random_metadata)
t = os.path.join(str(tmpdir), '{start_iso}-{end_iso}-foobar.log')
fname = '2023-12-19T00:10:22.123-None-foobar.log'
expected_path = os.path.join(str(tmpdir), fname)
archive.fetch_to_filename(url, filename_template=t)
assert os.path.exists(expected_path)
Loading