Skip to content

Commit

Permalink
Add script to transform tuples to human readable format
Browse files Browse the repository at this point in the history
Signed-off-by: Mikayla Thompson <[email protected]>
  • Loading branch information
mikaylathompson committed Aug 18, 2023
1 parent dd17251 commit 0e774b0
Show file tree
Hide file tree
Showing 2 changed files with 106 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ FROM ubuntu:focal
ENV DEBIAN_FRONTEND noninteractive

RUN apt-get update && \
apt-get install -y --no-install-recommends python3.9 python3-pip python3-dev gcc libc-dev git curl && \
pip3 install opensearch-benchmark
apt-get install -y --no-install-recommends python3.11.4 python3-pip python3-dev gcc libc-dev git curl && \
pip3 install urllib3==1.25.11 opensearch-benchmark==1.1.0

COPY runTestBenchmarks.sh /root/
RUN chmod ugo+x /root/runTestBenchmarks.sh
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
#!/usr/bin/env python3

# Find log files in /shared_replayer_output
# User can select one or more files (down the line, specific time ranges?)
# Load file, read each line as json.
# Extract `message`, do the same un-base64 and possibly un-gzip as the comparator

# ./humanReadableLogs /shared_replayer_output/output_tuple.log /shared_replayer_output/readable_tuples.json

import argparse
import pathlib
import json
import base64
import gzip
from typing import Optional

LOG_JSON_TUPLE_FIELD = "message"
BASE64_ENCODED_TUPLE_PATHS = ["request.body", "primaryResponse.body", "shadowResponse.body"]
# TODO: I'm not positive about the capitalization of the Content-Encoding and Content-Type headers.
# This version worked on my test cases, but not guaranteed to work in all cases.
CONTENT_ENCODING_PATH = {
BASE64_ENCODED_TUPLE_PATHS[0]: "request.content-encoding",
BASE64_ENCODED_TUPLE_PATHS[1]: "primaryResponse.content-encoding",
BASE64_ENCODED_TUPLE_PATHS[2]: "shadowResponse.content-encoding"
}
CONTENT_TYPE_PATH = {
BASE64_ENCODED_TUPLE_PATHS[0]: "request.content-type",
BASE64_ENCODED_TUPLE_PATHS[1]: "primaryResponse.content-type",
BASE64_ENCODED_TUPLE_PATHS[2]: "shadowResponse.content-type"
}
CONTENT_TYPE_JSON = "application/json"
CONTENT_ENCODING_GZIP = "gzip"
URI_PATH = "request.Request-URI"
BULK_URI_PATH = "_bulk"


def get_element(element: str, dict_: dict) -> Optional[any]:
keys = element.split('.')
rv = dict_
for key in keys:
try:
rv = rv[key]
except KeyError:
return None
return rv


def set_element(element: str, dict_: dict, value: any) -> None:
keys = element.split('.')
rv = dict_
for key in keys[:-1]:
rv = rv[key]
rv[keys[-1]] = value


def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument("infile", type=pathlib.Path, help="Path to input logged tuple file.")
parser.add_argument("--outfile", type=pathlib.Path, help="Path for output human readable tuple file.")
return parser.parse_args()


def parse_body_value(raw_value: str, content_encoding: Optional[str], content_type: Optional[str], is_bulk: bool):
b64decoded = base64.b64decode(raw_value)
is_gzipped = content_encoding is not None and content_encoding == CONTENT_ENCODING_GZIP
is_json = content_type is not None and CONTENT_TYPE_JSON in content_type
if is_gzipped:
unzipped = gzip.decompress(b64decoded)
else:
unzipped = b64decoded
decoded = unzipped.decode("utf-8")
if is_json and len(decoded) > 0:
if is_bulk:
return [json.loads(line) for line in decoded.splitlines()]
return json.loads(decoded)
return decoded


def parse_tuple(line):
item = json.loads(line)
message = item[LOG_JSON_TUPLE_FIELD]
tuple = json.loads(message)
for path in BASE64_ENCODED_TUPLE_PATHS:
base64value = get_element(path, tuple)
content_encoding = get_element(CONTENT_ENCODING_PATH[path], tuple)
content_type = get_element(CONTENT_TYPE_PATH[path], tuple)
is_bulk_path = BULK_URI_PATH in get_element(URI_PATH, tuple)
value = parse_body_value(base64value, content_encoding, content_type, is_bulk_path)
set_element(path, tuple, value)
return tuple


if __name__ == "__main__":
args = parse_args()
print(args.infile)
if args.outfile:
outfile = args.outfile
else:
outfile = args.infile.parent / f"readable-{args.infile.name}"
print(f"Will output to {outfile}")
with open(args.infile, 'r') as in_f:
with open(outfile, 'w') as out_f:
for line in in_f:
print(parse_tuple(line), file=out_f)

0 comments on commit 0e774b0

Please sign in to comment.