From dba9ef34240b36c2590196aefdeadf3d0c6716a0 Mon Sep 17 00:00:00 2001 From: nicolas-f <1382241+nicolas-f@users.noreply.github.com> Date: Wed, 10 Jan 2024 13:42:39 +0100 Subject: [PATCH] manage json decode error --- .../scripts_elasticsearch/feed_rpi_data.py | 57 ++++++++++--------- 1 file changed, 30 insertions(+), 27 deletions(-) diff --git a/services/ansible_openvpn/playbooks/files/scripts_elasticsearch/feed_rpi_data.py b/services/ansible_openvpn/playbooks/files/scripts_elasticsearch/feed_rpi_data.py index d71d91b..c8d6cbf 100644 --- a/services/ansible_openvpn/playbooks/files/scripts_elasticsearch/feed_rpi_data.py +++ b/services/ansible_openvpn/playbooks/files/scripts_elasticsearch/feed_rpi_data.py @@ -61,33 +61,36 @@ def fetch_data(args): try: with gzip.open(file_path, 'rb') as f: for line in f: - json_dict = json.loads(line) - if "_index" not in json_dict: - # must create index as it is not specified in the - # document - epoch = os.path.getmtime(file_path) - if "timestamp" in json_dict: - epoch = json_dict["timestamp"] - elif "_source" in json_dict and "timestamp" in \ - json_dict["_source"]: - epoch = json_dict["_source"]["timestamp"] - elif "date" in json_dict: - epoch = calendar.timegm(datetime.datetime.strptime( - json_dict["date"], "%Y-%m-%dT%H:%M:%S.%fZ") - .timetuple()) - dt = datetime.datetime.utcfromtimestamp(epoch) - stop_position = name.find("_") - if stop_position == -1: - stop_position = name.find(".") - json_dict["_index"] = name[:stop_position] + "_" + dt.strftime(args.time_format) - json_dict["_index"] = args.index_prepend + json_dict[ - "_index"] - if "_id" not in json_dict: - # avoid duplicate by hashing the document - json_dict["_id"] = base64.b64encode( - hashlib.sha256(line).digest()).decode( - sys.getdefaultencoding()) - yield json_dict + try: + json_dict = json.loads(line) + if "_index" not in json_dict: + # must create index as it is not specified in the + # document + epoch = os.path.getmtime(file_path) + if "timestamp" in json_dict: + epoch = json_dict["timestamp"] + elif "_source" in json_dict and "timestamp" in \ + json_dict["_source"]: + epoch = json_dict["_source"]["timestamp"] + elif "date" in json_dict: + epoch = calendar.timegm(datetime.datetime.strptime( + json_dict["date"], "%Y-%m-%dT%H:%M:%S.%fZ") + .timetuple()) + dt = datetime.datetime.utcfromtimestamp(epoch) + stop_position = name.find("_") + if stop_position == -1: + stop_position = name.find(".") + json_dict["_index"] = name[:stop_position] + "_" + dt.strftime(args.time_format) + json_dict["_index"] = args.index_prepend + json_dict[ + "_index"] + if "_id" not in json_dict: + # avoid duplicate by hashing the document + json_dict["_id"] = base64.b64encode( + hashlib.sha256(line).digest()).decode( + sys.getdefaultencoding()) + yield json_dict + except json.decoder.JSONDecodeError: + print("Cannot parse json: "+line) except zlib.error as e: print("Issue with compressed file named "+file_path, e) if not args.keep_file: