From b963084b5f7b61e26673d88a93867581d8de870d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jesu=C3=A9=20Junior?= Date: Sat, 5 Oct 2024 15:40:49 -0400 Subject: [PATCH] Singer io master (#4) * parse dev mode argument (#158) * parse dev mode argument * update the short flag for dev mode Replace `-dev` to `-D` * Updated the short flag for dev mode * - remove dev mode short flag Co-authored-by: RushiT0122 * Bump version 5.13.0 (#160) * Bump version 5.13.0 * update changelog for dev mode argument parsing * change versioning to minor version bump Co-authored-by: RushiT0122 * Bump backoff to be compatible with newer python versions (#165) * bump backoff for pyton 3.11 compatibility * update pip version * try new circleci yml * remove 'make' * make pylint happy * make pylint happy * make pylint happy again * backoff version is a breaking change for old python versions * Changelog update * Relax dependency version requirements (#167) * Relax dependency constraints * Bump version to `6.0.1` * pin backoff and simplejson to major version * Don't allow older versions * Update changelog * Pin minumum and major versions * Make `ensure_ascii` Dynamic with Default Set to `True` in JSON Serialization (#168) * add parameter - ensure_ascii to load non ascii characters when set to false * add unit test for ensuring ascii characters while loading * update setup and changelog * Enable copilot usage in PR template according to Qlik policy * removing simplejson dep and improve dep version * Adding * bumping version * Removing use_decimal param for json.dumps * Adding a consig to be able to send message to pubsub * Extracting to a F * pubsub doesnt expect None as attr * Parsing state to be used as attr in pubsub * Set state as stream name when type is state * feat: supporting a simple entry from lakehouse lib to send messages to a queue * chore(deps) make jsonschema version flexible --------- Co-authored-by: Rushikesh Todkar <98420315+RushiT0122@users.noreply.github.com> Co-authored-by: RushiT0122 Co-authored-by: Leslie VanDeMark <38043390+leslievandemark@users.noreply.github.com> Co-authored-by: Bryant Gray Co-authored-by: Sourabh Gandhi <105213416+sgandhi1311@users.noreply.github.com> Co-authored-by: Eivin Giske Skaaren --- .circleci/config.yml | 35 ++++++++++++++++++++------------ .github/pull_request_template.md | 4 ++++ CHANGELOG.md | 12 +++++++++++ singer/catalog.py | 2 +- singer/exceptions.py | 2 +- singer/messages.py | 11 +++++----- singer/transform.py | 8 ++++---- singer/utils.py | 10 +++++++-- tests/test_singer.py | 26 ++++++++++++++++++++++++ 9 files changed, 84 insertions(+), 26 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index ae6734c..a64745e 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -1,26 +1,35 @@ -version: 2 +version: 2.1 + +workflows: + build: + jobs: + - build: + context: + - circleci-user + jobs: build: docker: - - image: ubuntu:16.04 + - image: 218546966473.dkr.ecr.us-east-1.amazonaws.com/sources-python:1.1.0 steps: - checkout - - run: - name: 'Install python 3.5.2' - command: | - apt update - apt install --yes python3 python3-pip python3-venv - run: name: 'Setup virtualenv' command: | - mkdir -p ~/.virtualenvs + pyenv global 3.11.7 python3 -m venv ~/.virtualenvs/singer-python source ~/.virtualenvs/singer-python/bin/activate - pip install -U 'pip<19.2' 'setuptools<51.0.0' - make install + pip install -U 'pip==20.3.4' 'setuptools<51.0.0' + pip install .[dev] + - run: + name: 'Pylint' + command: | + source ~/.virtualenvs/singer-python/bin/activate + pip install pylint + pylint singer --extension-pkg-whitelist=ciso8601 -d missing-docstring,broad-exception-raised,broad-exception-caught,bare-except,too-many-return-statements,too-many-branches,too-many-arguments,no-else-return,too-few-public-methods,fixme,protected-access,consider-using-f-string - run: - name: 'Run tests' + name: 'Run Tests' command: | - # Need to re-activate the virtualenv source ~/.virtualenvs/singer-python/bin/activate - make test + pip install nose2 + nose2 -v -s tests diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 6e46b00..ef49bc0 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -9,3 +9,7 @@ # Rollback steps - revert this branch + +#### AI generated code +https://internal.qlik.dev/general/ways-of-working/code-reviews/#guidelines-for-ai-generated-code +- [ ] this PR has been written with the help of GitHub Copilot or another generative AI tool diff --git a/CHANGELOG.md b/CHANGELOG.md index ffcb8e6..4a06bd4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,17 @@ # Changelog +## 6.1.0 + * Make ensure_ascii Dynamic with Default Set to True in JSON Serialization. Required to handle the special characters [#168](https://github.com/singer-io/singer-python/pull/168) + +## 6.0.1 + * Pin backoff and simplejson to any version greater than or equal to the previously allowed version, up to the next major version [#167](https://github.com/singer-io/singer-python/pull/167) + +## 6.0.0 + * Bump backoff version to 2.2.1. This version drops support for python 3.5, but adds it for 3.10 [#165](https://github.com/singer-io/singer-python/pull/165) + +## 5.13.0 + * Add support for dev mode argument parsing [#158](https://github.com/singer-io/singer-python/pull/158) + ## 5.12.2 * Removes pinned `pytz` version [#152](https://github.com/singer-io/singer-python/pull/152) diff --git a/singer/catalog.py b/singer/catalog.py index 1767ff1..373a606 100644 --- a/singer/catalog.py +++ b/singer/catalog.py @@ -92,7 +92,7 @@ def __eq__(self, other): @classmethod def load(cls, filename): - with open(filename) as fp: # pylint: disable=invalid-name + with open(filename, encoding="utf-8") as fp: return Catalog.from_dict(json.load(fp)) @classmethod diff --git a/singer/exceptions.py b/singer/exceptions.py index 9231328..b13016d 100644 --- a/singer/exceptions.py +++ b/singer/exceptions.py @@ -11,7 +11,7 @@ def __init__(self, message): The first line is the error's class name. The subsequent lines are the message that class was created with. """ - super().__init__('{}\n{}'.format(self.__class__.__name__, message)) + super().__init__(f"{self.__class__.__name__}\n{message}") class SingerConfigurationError(SingerError): diff --git a/singer/messages.py b/singer/messages.py index 90f5c0e..24bf7b3 100644 --- a/singer/messages.py +++ b/singer/messages.py @@ -17,16 +17,16 @@ class Message(): '''Base class for messages.''' - def asdict(self): # pylint: disable=no-self-use + def asdict(self): raise Exception('Not implemented') def __eq__(self, other): return isinstance(other, Message) and self.asdict() == other.asdict() def __repr__(self): - pairs = ["{}={}".format(k, v) for k, v in self.asdict().items()] + pairs = [f"{k}={v}" for k, v in self.asdict().items()] attrstr = ", ".join(pairs) - return "{}({})".format(self.__class__.__name__, attrstr) + return f"{self.__class__.__name__}({attrstr})" def __str__(self): return str(self.asdict()) @@ -175,7 +175,7 @@ def asdict(self): def _required_key(msg, k): if k not in msg: - raise Exception("Message is missing required key '{}': {}".format(k, msg)) + raise Exception(f"Message is missing required key '{k}': {msg}") return msg[k] @@ -230,10 +230,11 @@ def format_message(message): def write_message(message): if os.environ.get("USE_QUEUE") and "lakehouse" in sys.modules: + attrs = {"type": message.get("type"), "stream": message.get("stream") } topic = os.environ.get("TAP_NAME") if not topic: raise Exception("TAP_NAME is not set. Please set the envvar TAP_NAME") - PubSubWrapper().write_message(topic, message) + PubSubWrapper.write_message(topic.lower(), message.decode("utf-8"), **attrs) else: sys.stdout.write(format_message(message) + '\n') sys.stdout.flush() diff --git a/singer/transform.py b/singer/transform.py index 3fdefdf..69f812a 100644 --- a/singer/transform.py +++ b/singer/transform.py @@ -77,16 +77,16 @@ def tostr(self): path = ".".join(map(str, self.path)) if self.schema: if self.logging_level >= logging.INFO: - msg = "data does not match {}".format(self.schema) + msg = f"data does not match {self.schema}" else: - msg = "does not match {}".format(self.schema) + msg = f"does not match {self.schema}" else: msg = "not in schema" if self.logging_level >= logging.INFO: - output = "{}: {}".format(path, msg) + output = f"{path}: {msg}" else: - output = "{}: {} {}".format(path, self.data, msg) + output = f"{path}: {self.data} {msg}" return output diff --git a/singer/utils.py b/singer/utils.py index 7772b89..b82d8c1 100644 --- a/singer/utils.py +++ b/singer/utils.py @@ -107,7 +107,7 @@ def chunk(array, num): def load_json(path): - with open(path) as fil: + with open(path, encoding="utf-8") as fil: return json.load(fil) @@ -136,6 +136,7 @@ def parse_args(required_config_keys): -d,--discover Run in discover mode -p,--properties Properties file: DEPRECATED, please use --catalog instead --catalog Catalog file + --dev Runs the tap in dev mode Returns the parsed args object from argparse. For each argument that point to JSON files (config, state, properties), we will automatically @@ -165,6 +166,11 @@ def parse_args(required_config_keys): action='store_true', help='Do schema discovery') + parser.add_argument( + '--dev', + action='store_true', + help='Runs tap in dev mode') + args = parser.parse_args() if args.config: setattr(args, 'config_path', args.config) @@ -189,7 +195,7 @@ def parse_args(required_config_keys): def check_config(config, required_keys): missing_keys = [key for key in required_keys if key not in config] if missing_keys: - raise Exception("Config is missing required keys: {}".format(missing_keys)) + raise Exception(f"Config is missing required keys: {missing_keys}") def backoff(exceptions, giveup): diff --git a/tests/test_singer.py b/tests/test_singer.py index 4fb74de..7f69bb5 100644 --- a/tests/test_singer.py +++ b/tests/test_singer.py @@ -1,5 +1,6 @@ import singer import unittest +from unittest.mock import patch import datetime import dateutil from decimal import Decimal @@ -179,6 +180,31 @@ def test_parse_bulk_decs(self): value = self.create_record(value_str) self.assertEqual(Decimal(value_str), value) + @patch('sys.stdout') + def test_ensure_ascii_false(self, mock_stdout): + """ + Setting ensure_ascii=False will preserve special characters like é + in their original form. + """ + rec = {"name": "José"} + expected_output = '{"type": "RECORD", "stream": "test_stream", "record": {"name": "José"}}\n' + rec_message = singer.RecordMessage(stream="test_stream", record=rec) + result = singer.write_message(rec_message, ensure_ascii=False) + mock_stdout.write.assert_called_once_with(expected_output) + mock_stdout.flush.assert_called_once() + + @patch('sys.stdout') + def test_ensure_ascii_true(self, mock_stdout): + """ + ensure_ascii defaults to True, special characters like é are + escaped into their ASCII representation (e.g., \u00e9) + """ + rec = {"name": "José"} + expected_output = '{"type": "RECORD", "stream": "test_stream", "record": {"name": "Jos\\u00e9"}}\n' + rec_message = singer.RecordMessage(stream="test_stream", record=rec) + result = singer.write_message(rec_message) + mock_stdout.write.assert_called_once_with(expected_output) + mock_stdout.flush.assert_called_once() if __name__ == '__main__': unittest.main()