Skip to content

Commit

Permalink
API: Adding fallback to default latest query if record metadata.start…
Browse files Browse the repository at this point in the history
… is greater than 24 hrs in future.
  • Loading branch information
ABPLMC committed Sep 23, 2024
1 parent 1e34aac commit db46b9b
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 0 deletions.
18 changes: 18 additions & 0 deletions api/datalake_api/querier.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
import time
import os

from datetime import datetime, timedelta
import decimal
import logging
log = logging.getLogger(__name__)

Expand All @@ -40,6 +42,7 @@
slow and expensive.
'''
DEFAULT_LOOKBACK_DAYS = 14
LATEST_MAX_LOOKFORWARD_HOURS = 24


_ONE_DAY_MS = 24 * 60 * 60 * 1000
Expand Down Expand Up @@ -347,6 +350,9 @@ def _latest_table(self):
return self.dynamodb.Table(self.latest_table_name)

def query_latest(self, what, where, lookback_days=DEFAULT_LOOKBACK_DAYS):
now = datetime.utcnow()
max_lookforward = now + timedelta(hours=LATEST_MAX_LOOKFORWARD_HOURS)

if self.use_latest_table:
log.info('inside use_latest_table=TRUE')
response = self._latest_table.query(
Expand All @@ -359,6 +365,18 @@ def query_latest(self, what, where, lookback_days=DEFAULT_LOOKBACK_DAYS):
return self._default_latest(what, where, lookback_days)

latest_item = items[0]
metadata_start = latest_item['metadata']['start']

if isinstance(metadata_start, (int, decimal.Decimal)):
metadata_start = datetime.utcfromtimestamp(float(metadata_start) / 1000)

elif isinstance(metadata_start, str):
metadata_start = datetime.strptime(metadata_start, '%Y-%m-%dT%H:%M:%S.%fZ')

if metadata_start > max_lookforward:
log.info(f"Record with metadata.start {metadata_start} is beyond MAX_LOOKFORWARD_HOURS. Falling back to default latest.")
return self._default_latest(what, where, lookback_days)

return dict(url=latest_item['url'], metadata=latest_item['metadata'])

else:
Expand Down
16 changes: 16 additions & 0 deletions api/tests/test_archive_querier.py
Original file line number Diff line number Diff line change
Expand Up @@ -598,3 +598,19 @@ def test_query_latest_just_latest_table(table_maker, querier, record_maker):
_validate_latest_result(result, what='meow', where='tree')
else:
assert result is None


def test_query_latest_future_record_exceeds_lookforward(table_maker, querier, record_maker):
future_start = (int(time.time() * 1000) + 25 * 60 * 60 * 1000) # 25 hours ahead
future_end = (int(time.time() * 1000) + 26 * 60 * 60 * 1000) # ends one hour later
record = record_maker(what='meow', where='tree', start=future_start, end=future_end)

default_table, latest_table = table_maker([])
print(default_table.__dict__, type(default_table))

default_table.put_item(Item=record[0])
latest_table.put_item(Item=record[0])

result = querier.query_latest('meow', 'tree')
assert result is None, "No result should be returned if falling back to the default query"

0 comments on commit db46b9b

Please sign in to comment.