From a0ed1dbab5beacb034efafb442a853c9e61f0ea6 Mon Sep 17 00:00:00 2001 From: Alex Bednarek Date: Wed, 29 May 2024 15:56:30 -0400 Subject: [PATCH] WIP: Addressing LATEST_MAX_LOOKBACK with the new latest functionality. --- api/datalake_api/app.py | 1 + api/datalake_api/querier.py | 15 +++++++++------ api/datalake_api/settings.py | 6 ++++++ api/datalake_api/v0.py | 4 ++++ api/tests/test_archive_querier.py | 17 ++++++++++++++++- 5 files changed, 36 insertions(+), 7 deletions(-) diff --git a/api/datalake_api/app.py b/api/datalake_api/app.py index 0d61e2b..af94d74 100644 --- a/api/datalake_api/app.py +++ b/api/datalake_api/app.py @@ -23,6 +23,7 @@ from datalake_api import settings +logging.basicConfig(level=logging.INFO) LOGGER = logging.getLogger(__name__) diff --git a/api/datalake_api/querier.py b/api/datalake_api/querier.py index 141c735..18bdd94 100644 --- a/api/datalake_api/querier.py +++ b/api/datalake_api/querier.py @@ -21,9 +21,7 @@ import os import logging -logging.basicConfig(level=logging.INFO) log = logging.getLogger(__name__) -log.setLevel(logging.INFO) '''the maximum number of results to return to the user @@ -179,12 +177,17 @@ def _unpack(self, result): class ArchiveQuerier(object): - def __init__(self, table_name, latest_table_name=None, dynamodb=None): + def __init__(self, table_name, + latest_table_name=None, + use_latest_table=None, + latest_max_lookback=30, + dynamodb=None): self.table_name = table_name self.latest_table_name = latest_table_name + self.use_latest_table = use_latest_table + self.latest_max_lookback = latest_max_lookback self.dynamodb = dynamodb - self.use_latest_table = os.environ.get("DATALAKE_USE_LATEST_TABLE", - "false").lower() == "true" + def query_by_work_id(self, work_id, what, where=None, cursor=None): kwargs = self._prepare_work_id_kwargs(work_id, what) @@ -353,7 +356,7 @@ def query_latest(self, what, where, lookback_days=DEFAULT_LOOKBACK_DAYS): KeyConditionExpression=Key('what_where_key').eq(f'{what}:{where}') ) items = response.get('Items', []) - if not items: + if not items and self.latest_max_lookback > 0: return self._default_latest(what, where, lookback_days) latest_item = items[0] diff --git a/api/datalake_api/settings.py b/api/datalake_api/settings.py index e55c6d1..3d79650 100644 --- a/api/datalake_api/settings.py +++ b/api/datalake_api/settings.py @@ -11,10 +11,16 @@ # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations under # the License. +import os # default settings + DYNAMODB_TABLE = 'test' DYNAMODB_LATEST_TABLE = 'test_latest' +DATALAKE_USE_LATEST_TABLE = \ + os.environ.get("DATALAKE_USE_LATEST_TABLE", "false").lower() == "true" +LATEST_MAX_LOOKBACK = int(os.environ.get("LATEST_MAX_LOOKBACK", "30")) + AWS_REGION = 'us-west-2' AWS_ACCESS_KEY_ID = None AWS_SECRET_ACCESS_KEY = None diff --git a/api/datalake_api/v0.py b/api/datalake_api/v0.py index 927997d..aba95f1 100644 --- a/api/datalake_api/v0.py +++ b/api/datalake_api/v0.py @@ -51,8 +51,12 @@ def get_archive_querier(): if not hasattr(app, 'archive_querier'): table_name = app.config.get('DYNAMODB_TABLE') latest_table_name = app.config.get('DYNAMODB_LATEST_TABLE') + use_latest_table = app.config.get('DATALAKE_USE_LATEST_TABLE') + latest_max_lookback = app.config.get("LATEST_MAX_LOOKBACK") app.archive_querier = ArchiveQuerier(table_name, latest_table_name, + use_latest_table, + latest_max_lookback, dynamodb=get_dynamodb()) return app.archive_querier diff --git a/api/tests/test_archive_querier.py b/api/tests/test_archive_querier.py index 2c6fe37..00daa7c 100644 --- a/api/tests/test_archive_querier.py +++ b/api/tests/test_archive_querier.py @@ -123,6 +123,10 @@ def query_latest(self, what, where): return HttpRecord(**record) + +""" +Incorporate LATEST_MAX_LOOKBACK HERE +""" @pytest.fixture(params=[ ('archive', 'use_latest'), ('archive', 'use_default'), @@ -565,4 +569,15 @@ def test_latest_table_query(table_maker, querier, record_maker): table_maker(records) querier.use_latest_table = True result = querier.query_latest('boo', 'hoo0') - _validate_latest_result(result, what='boo', where='hoo0') \ No newline at end of file + _validate_latest_result(result, what='boo', where='hoo0') + +""" +Write tests: +With setup of latest table records, +with DYNAMODB_LATEST_TABLE set, with DATALAKE_USE_LATEST_TABLE=true, with LATEST_MAX_LOOKBACK=0, record is found + +With setup of latest table records, +with DYNAMODB_LATEST_TABLE set, with DATALAKE_USE_LATEST_TABLE=false, with LATEST_MAX_LOOKBACK=0, record is not found + +2-4 +""" \ No newline at end of file