From 1c2dc0e79875e905abd02d0eb22bd84d743f7145 Mon Sep 17 00:00:00 2001 From: Scott Willeke Date: Mon, 22 Jul 2024 16:21:46 -0700 Subject: [PATCH] fix: updates the 'add rows' API request to support the new payload shape + tests --- .../destination_glide/glide.py | 14 +-- .../GlideBigTableRestStrategy_int_test.py | 119 ++++++++++++++++++ .../scripts/test-integration.sh | 13 ++ 3 files changed, 136 insertions(+), 10 deletions(-) create mode 100644 airbyte-integrations/connectors/destination-glide/integration_tests/GlideBigTableRestStrategy_int_test.py create mode 100755 airbyte-integrations/connectors/destination-glide/scripts/test-integration.sh diff --git a/airbyte-integrations/connectors/destination-glide/destination_glide/glide.py b/airbyte-integrations/connectors/destination-glide/destination_glide/glide.py index 8c0bb89ce391..a60849edc995 100644 --- a/airbyte-integrations/connectors/destination-glide/destination_glide/glide.py +++ b/airbyte-integrations/connectors/destination-glide/destination_glide/glide.py @@ -107,11 +107,11 @@ def create(cls, strategy: str) -> GlideBigTableBase: Creates a new instance of the default implementation for the GlideBigTable API client. """ implementation_map = { - "tables": GlideBigTableRestStrategy() + "tables": lambda: GlideBigTableRestStrategy() } if strategy not in implementation_map: raise ValueError(f"Strategy '{strategy}' not found. Expected one of '{implmap.keys()}'.") # nopep8 - return implementation_map[strategy] + return implementation_map[strategy]() class GlideBigTableRestStrategy(GlideBigTableBase): @@ -152,18 +152,12 @@ def raise_if_set_schema_not_called(self): "set_schema must be called before add_rows or commit") def _add_row_batch(self, rows: List[BigTableRow]) -> None: - # TODO: add rows to stash/serial https://web.postman.co/workspace/glideapps-Workspace~46b48d24-5fc1-44b6-89aa-8d6751db0fc5/request/9026518-c282ef52-4909-4806-88bf-08510ee80770 logger.debug(f"Adding rows batch with size {len(rows)}") r = requests.post( self.url(f"stashes/{self.stash_id}/{self.stash_serial}"), headers=self.headers(), - json={ - "data": rows, - "options": { - # ignore columns in rows that are not part of schema: - "unknownColumns": "ignore" - } - } + json=rows + ) try: r.raise_for_status() diff --git a/airbyte-integrations/connectors/destination-glide/integration_tests/GlideBigTableRestStrategy_int_test.py b/airbyte-integrations/connectors/destination-glide/integration_tests/GlideBigTableRestStrategy_int_test.py new file mode 100644 index 000000000000..f5ffd39298ab --- /dev/null +++ b/airbyte-integrations/connectors/destination-glide/integration_tests/GlideBigTableRestStrategy_int_test.py @@ -0,0 +1,119 @@ +from datetime import datetime +from destination_glide.glide import GlideBigTableRestStrategy, Column, GlideBigTableFactory +import os +import unittest +from unittest import skip +from unittest.mock import patch +import uuid +import logging +import random + +log = logging.getLogger("test") +log.setLevel(logging.DEBUG) + +class TestGlideBigTableRestStrategy(unittest.TestCase): + ''' + Tests against a working Glide /tables API endpoint rather than being mocked like the one in unit tests. + ''' + + api_host = "https://functions.prod.internal.glideapps.com" + api_key = None + api_path_root = "api" + + + def setUp(self): + self.api_key = os.getenv("GLIDE_API_KEY") + if self.api_key is None: + raise Exception("GLIDE_API_KEY environment variable is not set.") + + # The protocol is to call `init`, `set_schema`, `add_rows` one or more times, and `commit` in that order. + + def test_new_table(self): + + # init + gbt = GlideBigTableFactory().create("tables") + + table_name = f"test-table-{str(uuid.uuid4())}" + gbt.init(self.api_host, self.api_key, self.api_path_root, table_name) + + # set_schema + test_columns = [ + Column("test-str", "string"), + Column("test-num", "number") + ] + gbt.set_schema(test_columns) + + # add_rows + for batch in range(3): + now = datetime.now() + test_rows = range(3) + test_rows = [ + { + "test-str": f"test-str-{now.isoformat()}-{batch}-{i}", + "test-num": (batch * 1000) + i, + } + for i in test_rows + ] + + # this creates the stashes: + gbt.add_rows(test_rows) + + ## this commits the stages by upserting the table: + # wraps= allows us to spy on the gbt's method here and confirm it created the table rather than overwrote it: + with patch.object(gbt, "overwrite_table_from_stash", wraps=gbt.overwrite_table_from_stash) as mock_overwrite_table_from_stash: + with patch.object(gbt, "create_table_from_stash", wraps=gbt.create_table_from_stash) as mock_create_table_from_stash: + gbt.commit() + mock_overwrite_table_from_stash.assert_not_called() + mock_create_table_from_stash.assert_called_once() + + + def test_updating_table(self): + # init + + table_name = f"test-table-{str(uuid.uuid4())}" + gbt = GlideBigTableFactory().create("tables") + gbt.init(self.api_host, self.api_key, self.api_path_root, table_name) + + # set_schema + test_columns = [ + Column("test-str", "string"), + Column("test-num", "number") + ] + gbt.set_schema(test_columns) + + # add_rows + test_rows = [ + { + "test-str": f"test-str-{datetime.now().isoformat()}", + "test-num": random.randint(0, 100000), + } + ] + gbt.add_rows(test_rows) + + ## this commits the stages by upserting the table: + gbt.commit() + + ##### NOW update the existing table we just created: + + # now do the update the second table now: + gbt = GlideBigTableFactory().create("tables") + gbt.init(self.api_host, self.api_key, self.api_path_root, table_name) + gbt.set_schema(test_columns) + + now = datetime.now() + test_rows = [ + { + "test-str": f"test-str-{datetime.now().isoformat()}", + "test-num": random.randint(0, 100000), + } + ] + gbt.add_rows(test_rows) + + # wraps= allows us to spy on the gbt's method here and confirm it overwrote the table rather than created it: + with patch.object(gbt, "overwrite_table_from_stash", wraps=gbt.overwrite_table_from_stash) as mock_overwrite_table_from_stash: + with patch.object(gbt, "create_table_from_stash", wraps=gbt.create_table_from_stash) as mock_create_table_from_stash: + gbt.commit() + mock_overwrite_table_from_stash.assert_called_once() + mock_create_table_from_stash.assert_not_called() + + diff --git a/airbyte-integrations/connectors/destination-glide/scripts/test-integration.sh b/airbyte-integrations/connectors/destination-glide/scripts/test-integration.sh new file mode 100755 index 000000000000..14a20ed2ef35 --- /dev/null +++ b/airbyte-integrations/connectors/destination-glide/scripts/test-integration.sh @@ -0,0 +1,13 @@ +#!/usr/bin/env bash +this_dir=$(cd $(dirname "$0"); pwd) # this script's directory +this_script=$(basename $0) + +# if not GLIDE_API_KEY then print error and exit +if [ -z "$GLIDE_API_KEY" ]; then + echo "**************************************************" + echo "GLIDE_API_KEY is not set." + echo "You probably want to run this like \`GLIDE_API_KEY=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxx ./${this_script}\`\n" + exit 1 +fi + +poetry run pytest integration_tests "$@"