diff --git a/TrafficCapture/dockerSolution/src/main/docker/migrationConsole/Dockerfile b/TrafficCapture/dockerSolution/src/main/docker/migrationConsole/Dockerfile index aa2ef84e8..fea968cff 100644 --- a/TrafficCapture/dockerSolution/src/main/docker/migrationConsole/Dockerfile +++ b/TrafficCapture/dockerSolution/src/main/docker/migrationConsole/Dockerfile @@ -3,8 +3,8 @@ FROM ubuntu:focal ENV DEBIAN_FRONTEND noninteractive RUN apt-get update && \ - apt-get install -y --no-install-recommends python3.9 python3-pip python3-dev gcc libc-dev git curl && \ - pip3 install opensearch-benchmark + apt-get install -y --no-install-recommends python3.11.4 python3-pip python3-dev gcc libc-dev git curl && \ + pip3 install urllib3==1.25.11 opensearch-benchmark==1.1.0 COPY runTestBenchmarks.sh /root/ RUN chmod ugo+x /root/runTestBenchmarks.sh diff --git a/TrafficCapture/dockerSolution/src/main/docker/migrationConsole/humanReadableLogs.py b/TrafficCapture/dockerSolution/src/main/docker/migrationConsole/humanReadableLogs.py new file mode 100755 index 000000000..3afade882 --- /dev/null +++ b/TrafficCapture/dockerSolution/src/main/docker/migrationConsole/humanReadableLogs.py @@ -0,0 +1,104 @@ +#!/usr/bin/env python3 + +# Find log files in /shared_replayer_output +# User can select one or more files (down the line, specific time ranges?) +# Load file, read each line as json. +# Extract `message`, do the same un-base64 and possibly un-gzip as the comparator + +# ./humanReadableLogs /shared_replayer_output/output_tuple.log /shared_replayer_output/readable_tuples.json + +import argparse +import pathlib +import json +import base64 +import gzip +from typing import Optional + +LOG_JSON_TUPLE_FIELD = "message" +BASE64_ENCODED_TUPLE_PATHS = ["request.body", "primaryResponse.body", "shadowResponse.body"] +# TODO: I'm not positive about the capitalization of the Content-Encoding and Content-Type headers. +# This version worked on my test cases, but not guaranteed to work in all cases. +CONTENT_ENCODING_PATH = { + BASE64_ENCODED_TUPLE_PATHS[0]: "request.content-encoding", + BASE64_ENCODED_TUPLE_PATHS[1]: "primaryResponse.content-encoding", + BASE64_ENCODED_TUPLE_PATHS[2]: "shadowResponse.content-encoding" +} +CONTENT_TYPE_PATH = { + BASE64_ENCODED_TUPLE_PATHS[0]: "request.content-type", + BASE64_ENCODED_TUPLE_PATHS[1]: "primaryResponse.content-type", + BASE64_ENCODED_TUPLE_PATHS[2]: "shadowResponse.content-type" +} +CONTENT_TYPE_JSON = "application/json" +CONTENT_ENCODING_GZIP = "gzip" +URI_PATH = "request.Request-URI" +BULK_URI_PATH = "_bulk" + + +def get_element(element: str, dict_: dict) -> Optional[any]: + keys = element.split('.') + rv = dict_ + for key in keys: + try: + rv = rv[key] + except KeyError: + return None + return rv + + +def set_element(element: str, dict_: dict, value: any) -> None: + keys = element.split('.') + rv = dict_ + for key in keys[:-1]: + rv = rv[key] + rv[keys[-1]] = value + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("infile", type=pathlib.Path, help="Path to input logged tuple file.") + parser.add_argument("--outfile", type=pathlib.Path, help="Path for output human readable tuple file.") + return parser.parse_args() + + +def parse_body_value(raw_value: str, content_encoding: Optional[str], content_type: Optional[str], is_bulk: bool): + b64decoded = base64.b64decode(raw_value) + is_gzipped = content_encoding is not None and content_encoding == CONTENT_ENCODING_GZIP + is_json = content_type is not None and CONTENT_TYPE_JSON in content_type + if is_gzipped: + unzipped = gzip.decompress(b64decoded) + else: + unzipped = b64decoded + decoded = unzipped.decode("utf-8") + if is_json and len(decoded) > 0: + if is_bulk: + return [json.loads(line) for line in decoded.splitlines()] + return json.loads(decoded) + return decoded + + +def parse_tuple(line): + item = json.loads(line) + message = item[LOG_JSON_TUPLE_FIELD] + tuple = json.loads(message) + for path in BASE64_ENCODED_TUPLE_PATHS: + base64value = get_element(path, tuple) + content_encoding = get_element(CONTENT_ENCODING_PATH[path], tuple) + content_type = get_element(CONTENT_TYPE_PATH[path], tuple) + is_bulk_path = BULK_URI_PATH in get_element(URI_PATH, tuple) + value = parse_body_value(base64value, content_encoding, content_type, is_bulk_path) + set_element(path, tuple, value) + return tuple + + +if __name__ == "__main__": + args = parse_args() + print(args.infile) + if args.outfile: + outfile = args.outfile + else: + outfile = args.infile.parent / f"readable-{args.infile.name}" + print(f"Will output to {outfile}") + with open(args.infile, 'r') as in_f: + with open(outfile, 'w') as out_f: + for line in in_f: + print(parse_tuple(line), file=out_f)