From 928e55cae490ba84f5857b2a6a97f6ea600773bc Mon Sep 17 00:00:00 2001 From: Mikayla Thompson Date: Fri, 18 Aug 2023 23:23:16 -0600 Subject: [PATCH] Add error handling, progress bar, cleanup Signed-off-by: Mikayla Thompson --- .../main/docker/migrationConsole/Dockerfile | 6 +- .../migrationConsole/humanReadableLogs.py | 91 ++++++++++++++----- 2 files changed, 73 insertions(+), 24 deletions(-) diff --git a/TrafficCapture/dockerSolution/src/main/docker/migrationConsole/Dockerfile b/TrafficCapture/dockerSolution/src/main/docker/migrationConsole/Dockerfile index fea968cff..cf2857208 100644 --- a/TrafficCapture/dockerSolution/src/main/docker/migrationConsole/Dockerfile +++ b/TrafficCapture/dockerSolution/src/main/docker/migrationConsole/Dockerfile @@ -3,11 +3,13 @@ FROM ubuntu:focal ENV DEBIAN_FRONTEND noninteractive RUN apt-get update && \ - apt-get install -y --no-install-recommends python3.11.4 python3-pip python3-dev gcc libc-dev git curl && \ - pip3 install urllib3==1.25.11 opensearch-benchmark==1.1.0 + apt-get install -y --no-install-recommends python3.9 python3-pip python3-dev gcc libc-dev git curl && \ + pip3 install urllib3==1.25.11 opensearch-benchmark==1.1.0 tqdm COPY runTestBenchmarks.sh /root/ +COPY humanReadableLogs.py /root/ RUN chmod ugo+x /root/runTestBenchmarks.sh +RUN chmod ugo+x /root/humanReadableLogs.py WORKDIR /root CMD tail -f /dev/null \ No newline at end of file diff --git a/TrafficCapture/dockerSolution/src/main/docker/migrationConsole/humanReadableLogs.py b/TrafficCapture/dockerSolution/src/main/docker/migrationConsole/humanReadableLogs.py index 1c35bef79..2c38a318e 100755 --- a/TrafficCapture/dockerSolution/src/main/docker/migrationConsole/humanReadableLogs.py +++ b/TrafficCapture/dockerSolution/src/main/docker/migrationConsole/humanReadableLogs.py @@ -6,6 +6,12 @@ import json import pathlib from typing import Optional +import logging + +from tqdm import tqdm +from tqdm.contrib.logging import logging_redirect_tqdm + +logger = logging.getLogger(__name__) LOG_JSON_TUPLE_FIELD = "message" BASE64_ENCODED_TUPLE_PATHS = ["request.body", "primaryResponse.body", "shadowResponse.body"] @@ -27,14 +33,21 @@ BULK_URI_PATH = "_bulk" -def get_element(element: str, dict_: dict) -> Optional[any]: +class DictionaryPathException(Exception): + pass + + +def get_element(element: str, dict_: dict, raise_on_error=False) -> Optional[any]: keys = element.split('.') rv = dict_ for key in keys: try: rv = rv[key] except KeyError: - return None + if raise_on_error: + raise DictionaryPathException(f"Key {key} was not present.") + else: + return None return rv @@ -53,33 +66,67 @@ def parse_args(): return parser.parse_args() -def parse_body_value(raw_value: str, content_encoding: Optional[str], content_type: Optional[str], is_bulk: bool): - b64decoded = base64.b64decode(raw_value) +def parse_body_value(raw_value: str, content_encoding: Optional[str], + content_type: Optional[str], is_bulk: bool, line_no: int): + try: + b64decoded = base64.b64decode(raw_value) + except Exception as e: + logger.error(f"Body value on line {line_no} could not be decoded: {e}. Skipping parsing body value.") + return None is_gzipped = content_encoding is not None and content_encoding == CONTENT_ENCODING_GZIP is_json = content_type is not None and CONTENT_TYPE_JSON in content_type if is_gzipped: - unzipped = gzip.decompress(b64decoded) + try: + unzipped = gzip.decompress(b64decoded) + except Exception as e: + logger.error(f"Body value on line {line_no} should be gzipped but could not be unzipped: {e}. " + "Skipping parsing body value.") + return b64decoded else: unzipped = b64decoded - decoded = unzipped.decode("utf-8") + try: + decoded = unzipped.decode("utf-8") + except Exception as e: + logger.error(f"Body value on line {line_no} could not be decoded to utf-8: {e}. " + "Skipping parsing body value.") + return unzipped if is_json and len(decoded) > 0: if is_bulk: - return [json.loads(line) for line in decoded.splitlines()] - return json.loads(decoded) + try: + return [json.loads(line) for line in decoded.splitlines()] + except Exception as e: + logger.error("Body value on line {line_no} should be a bulk json (list of json lines) but " + f"could not be parsed: {e}. Skipping parsing body value.") + return decoded + try: + return json.loads(decoded) + except Exception as e: + logger.error(f"Body value on line {line_no} should be a json but could not be parsed: {e}. " + "Skipping parsing body value.") + return decoded return decoded -def parse_tuple(line): +def parse_tuple(line: str, line_no: int): item = json.loads(line) message = item[LOG_JSON_TUPLE_FIELD] tuple = json.loads(message) - for path in BASE64_ENCODED_TUPLE_PATHS: - base64value = get_element(path, tuple) - content_encoding = get_element(CONTENT_ENCODING_PATH[path], tuple) - content_type = get_element(CONTENT_TYPE_PATH[path], tuple) - is_bulk_path = BULK_URI_PATH in get_element(URI_PATH, tuple) - value = parse_body_value(base64value, content_encoding, content_type, is_bulk_path) - set_element(path, tuple, value) + try: + is_bulk_path = BULK_URI_PATH in get_element(URI_PATH, tuple, raise_on_error=True) + except DictionaryPathException as e: + logger.error(f"`{URI_PATH}` on line {line_no} could not be loaded: {e} " + f"Skipping parsing tuple.") + return tuple + for body_path in BASE64_ENCODED_TUPLE_PATHS: + base64value = get_element(body_path, tuple) + if base64value is None: + # This component has no body element, which is potentially valid. + continue + content_encoding = get_element(CONTENT_ENCODING_PATH[body_path], tuple) + content_type = get_element(CONTENT_TYPE_PATH[body_path], tuple) + value = parse_body_value(base64value, content_encoding, content_type, is_bulk_path, line_no) + if value: + set_element(body_path, tuple, value) return tuple @@ -90,10 +137,10 @@ def parse_tuple(line): else: outfile = args.infile.parent / f"readable-{args.infile.name}" print(f"Input file: {args.infile}; Output file: {outfile}") - with open(args.infile, 'r') as in_f: - with open(outfile, 'w') as out_f: - for line in in_f: - print(parse_tuple(line), file=out_f) -# TODO: add some try/catching -# TODO: add a progress indicator for large files + logging.basicConfig(level=logging.INFO) + with logging_redirect_tqdm(): + with open(args.infile, 'r') as in_f: + with open(outfile, 'w') as out_f: + for i, line in tqdm(enumerate(in_f)): + print(parse_tuple(line, i + 1), file=out_f)