Skip to content

Commit

Permalink
FIX: More generic way of using different (de)-serializers
Browse files Browse the repository at this point in the history
Signed-off-by: Sebastian Waldbauer <[email protected]>
  • Loading branch information
waldbauer-certat committed Jul 15, 2022
1 parent e97db41 commit 1253c3e
Show file tree
Hide file tree
Showing 32 changed files with 270 additions and 188 deletions.
2 changes: 1 addition & 1 deletion intelmq/bin/intelmqdump.py
Original file line number Diff line number Diff line change
Expand Up @@ -349,7 +349,7 @@ def main():
if queue_name in pipeline_pipes:
if runtime_config[pipeline_pipes[queue_name]]['group'] == 'Parser' and json.loads(msg)['__type'] == 'Event':
print('Event converted to Report automatically.')
msg = message.Report(message.MessageFactory.unserialize(msg)).serialize()
msg = message.Report(message.MessageFactory.deserialize(msg)).serialize()
else:
print(red(f"The given queue '{queue_name}' is not configured. Please retry with a valid queue."))
break
Expand Down
2 changes: 1 addition & 1 deletion intelmq/bots/collectors/amqp/collector_amqp.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def process(self):
self.logger.exception('Error receiving messages.')
else:
if self.expect_intelmq_message:
message = MessageFactory.unserialize(body.decode())
message = MessageFactory.deserialize(body.decode())
self.send_message(message, auto_add=False)
else:
report = self.new_report()
Expand Down
4 changes: 0 additions & 4 deletions intelmq/bots/collectors/microsoft/collector_azure.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,7 @@ class MicrosoftAzureCollectorBot(CollectorBot, CacheMixin):

def init(self):
if ContainerClient is None or create_configuration is None:
<<<<<<< HEAD
raise MissingDependencyError("azure-storage-blob", version='>=12.0.0')
=======
raise MissingDependencyError("azure.storage", version='>=12.0.0')
>>>>>>> c78494bb6 (DOC: azure collector: document minimum azure version)

self.config = create_configuration(storage_sdk='blob')
if hasattr(self, 'https_proxy'):
Expand Down
4 changes: 1 addition & 3 deletions intelmq/bots/outputs/redis/output.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,7 @@ def process(self):
event = self.receive_message()

try:
self.output.lpush(self.queue,
event.to_json(hierarchical=self.hierarchical_output,
with_type=self.with_type))
self.output.lpush(self.queue, event.to_pack(use_packer=self.use_packer, hierarchical=self.hierarchical, with_type=self.with_type))
except Exception:
self.logger.exception('Failed to send message. Reconnecting.')
self.connect()
Expand Down
2 changes: 1 addition & 1 deletion intelmq/bots/outputs/udp/output.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def process(self):
del event['raw']

if self.format == 'json':
self.send(self.header + event.to_json())
self.send(self.header + event.to_pack(use_packer=self.format))
elif self.format == 'delimited':
self.send(self.delimited(event))

Expand Down
2 changes: 1 addition & 1 deletion intelmq/bots/parsers/json/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def process(self):
lines = [base64_decode(report['raw'])]

for line in lines:
new_event = MessageFactory.unserialize(line,
new_event = MessageFactory.deserialize(line,
harmonization=self.harmonization,
default_type='Event',
use_packer="json")
Expand Down
15 changes: 7 additions & 8 deletions intelmq/lib/bot.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
import inspect
import io
import json
import msgpack
import logging
import os
import re
Expand Down Expand Up @@ -102,6 +101,7 @@ class Bot:
statistics_host: str = "127.0.0.1"
statistics_password: Optional[str] = None
statistics_port: int = 6379
use_packer: str = os.environ.get('INTELMQ_USE_PACKER', 'MsgPack')

_message_processed_verb: str = 'Processed'

Expand Down Expand Up @@ -330,8 +330,8 @@ def start(self, starting: bool = True, error_on_pipeline: bool = True,
self.logger.error('Pipeline failed.')
self.__disconnect_pipelines()

except exceptions.UnserializationError as exc:
self.logger.exception('Could not unserialize message from pipeline. No retries useful.')
except exceptions.DeserializationError as exc:
self.logger.exception('Could not deserialize message from pipeline. No retries useful.')

# ensure that we do not re-process the faulty message
self.__error_retries_counter = self.error_max_retries + 1
Expand Down Expand Up @@ -662,7 +662,7 @@ def receive_message(self) -> libmessage.Message:
return self.receive_message()

try:
self.__current_message = libmessage.MessageFactory.unserialize(message,
self.__current_message = libmessage.MessageFactory.deserialize(message,
harmonization=self.harmonization)
except exceptions.InvalidKey as exc:
# In case a incoming message is malformed an does not conform with the currently
Expand Down Expand Up @@ -821,7 +821,7 @@ def __init_logger(self):

def __log_configuration_parameter(self, config_name: str, option: str, value: Any):
if "password" in option or "token" in option:
value = "HIDDEN"
value = "<redacted>"

message = "{} configuration: parameter {!r} loaded with value {!r}." \
.format(config_name.title(), option, value)
Expand Down Expand Up @@ -1319,9 +1319,8 @@ def export_event(self, event: libmessage.Event,
if 'raw' in event:
del event['raw']
if return_type is str:
return event.to_json(hierarchical=self.hierarchical,
with_type=self.with_type,
jsondict_as_string=self.jsondict_as_string)
return event.to_pack(use_packer=self.use_packer, hierarchical=self.hierarchical,
with_type=self.with_type)
else:
retval = event.to_dict(hierarchical=self.hierarchical,
with_type=self.with_type,
Expand Down
2 changes: 1 addition & 1 deletion intelmq/lib/bot_debugger.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ def outputappend(self, msg):
def arg2msg(self, msg):
default_type = "Report" if (self.runtime_configuration.get("group", None) == "Parser" or isinstance(self.instance, ParserBot)) else "Event"
try:
msg = MessageFactory.unserialize(msg, default_type=default_type)
msg = MessageFactory.deserialize(msg, default_type=default_type)
except (Exception, KeyError, TypeError, ValueError) as exc:
if exists(msg):
with open(msg) as f:
Expand Down
20 changes: 17 additions & 3 deletions intelmq/lib/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,10 +169,24 @@ def __init__(self, encodings=None, exception: UnicodeDecodeError = None,
super().__init__("Could not decode string%s." % suffix)


class UnserializationError(IntelMQException, ValueError):
class DeserializationError(IntelMQException, ValueError):
"""
Unrecoverable error during message unserialization
Unrecoverable error during message deserialization
"""
def __init__(self, exception: Exception = None, object: bytes = None):
self.object = object
super().__init__("Could not unserialize message%s." % exception)
super().__init__("Could not deserialize message, %s." % exception)


class SerializationError(IntelMQException, ValueError):
"""
Unrecoverable error during message serialization
"""
def __init__(self, exception: Exception = None, object: bytes = None):
self.object = object
super().__init__("Could not serialize message, %s." % exception)


class MissingPackerError(IntelMQException):
def __init__(self, packer: str):
super().__init__(f"Could not load '{packer}' as packer, please check intelmq.lib.packers.{packer.lower()} and documentation")
72 changes: 42 additions & 30 deletions intelmq/lib/message.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,19 @@
Use MessageFactory to get a Message object (types Report and Event).
"""
import hashlib
import importlib
import inspect
import json
import re
import warnings
from collections import defaultdict
from typing import Any, Dict, Iterable, Optional, Sequence, Union
import msgpack

import intelmq.lib.exceptions as exceptions
import intelmq.lib.harmonization
from intelmq import HARMONIZATION_CONF_FILE
from intelmq.lib import utils
from intelmq.lib.packers.packer import Packer

__all__ = ['Event', 'Message', 'MessageFactory', 'Report']
VALID_MESSSAGE_TYPES = ('Event', 'Message', 'Report')
Expand All @@ -29,8 +31,8 @@

class MessageFactory:
"""
unserialize: JSON encoded message to object
serialize: object to JSON encoded object
deserialize: packed message to object
serialize: object to packed
"""

@staticmethod
Expand All @@ -45,7 +47,7 @@ def from_dict(message: dict, harmonization=None,
default_type: If '__type' is not present in message, the given type will be used
See also:
MessageFactory.unserialize
MessageFactory.deserialize
MessageFactory.serialize
"""
if default_type and "__type" not in message:
Expand All @@ -61,8 +63,8 @@ def from_dict(message: dict, harmonization=None,
return class_reference(message, auto=True, harmonization=harmonization)

@staticmethod
def unserialize(raw_message: bytes, harmonization: dict = None,
default_type: Optional[str] = None, use_packer: str = "msgpack") -> dict:
def deserialize(raw_message: bytes, harmonization: dict = None,
default_type: Optional[str] = None, use_packer: str = "MsgPack", **kwargs) -> dict:
"""
Takes JSON-encoded Message object, returns instance of correct class.
Expand All @@ -75,19 +77,18 @@ def unserialize(raw_message: bytes, harmonization: dict = None,
MessageFactory.from_dict
MessageFactory.serialize
"""
message = Message.unserialize(raw_message, use_packer=use_packer)
message = Message.deserialize(raw_message, use_packer=use_packer, **kwargs)
return MessageFactory.from_dict(message, harmonization=harmonization,
default_type=default_type)

@staticmethod
def serialize(message) -> bytes:
def serialize(message, use_packer: str = 'MsgPack', **kwargs) -> bytes:
"""
Takes instance of message-derived class and makes JSON-encoded Message.
Takes instance of message-derived class and makes packed Message.
The class is saved in __type attribute.
"""
raw_message = Message.serialize(message)
return raw_message
return Message.serialize(message, use_packer=use_packer, **kwargs)


class Message(dict):
Expand Down Expand Up @@ -307,36 +308,43 @@ def copy(self):
return retval

def deep_copy(self):
return MessageFactory.unserialize(MessageFactory.serialize(self),
return MessageFactory.deserialize(MessageFactory.serialize(self),
harmonization={self.__class__.__name__.lower(): self.harmonization_config})

def __str__(self):
return self.serialize(use_packer="json")
return self.serialize(use_packer="JSON")

def serialize(self, use_packer: str = "msgpack"):
def serialize(self, use_packer: str = "MsgPack", **kwargs):
delete_type = False
if '__type' not in self:
delete_type = True
self['__type'] = self.__class__.__name__

if use_packer == "json":
packed = json.dumps(self)
else:
packed = msgpack.packb(self)
try:
packer: Packer = inspect.getmembers(importlib.import_module(f'intelmq.lib.packers.{use_packer.lower()}.packer'))[0][1]()
except:
raise exceptions.MissingPackerError(packer=use_packer)

try:
packed = packer.serialize(data=self, **kwargs)
except Exception as exc:
raise exceptions.SerializationError(exception=exc, object=self)

if delete_type:
del self['__type']
return packed

@staticmethod
def unserialize(message: bytes, use_packer: str = "msgpack"):
def deserialize(message: bytes, use_packer: str = "MsgPack", **kwargs):
try:
if use_packer == "json":
return json.loads(message)
else:
return msgpack.unpackb(message, raw=False)
packer: Packer = inspect.getmembers(importlib.import_module(f'intelmq.lib.packers.{use_packer.lower()}.packer'))[0][1]()
except:
raise exceptions.MissingPackerError(packer=use_packer)

try:
return packer.deserialize(data=message, **kwargs)
except Exception as exc:
raise exceptions.UnserializationError(exception=exc, object=message)
raise exceptions.DeserializationError(exception=exc, object=message)

def __is_valid_key(self, key: str):
try:
Expand Down Expand Up @@ -489,13 +497,17 @@ def to_dict(self, hierarchical: bool = False, with_type: bool = False,

return new_dict

def to_json(self, hierarchical=False, with_type=False, jsondict_as_string=False):
json_dict = self.to_dict(hierarchical=hierarchical, with_type=with_type)
return json.dumps(json_dict, ensure_ascii=False, sort_keys=True)
def to_pack(self, use_packer="MsgPack", hierarchical=False, with_type=False, **kwargs):
try:
packer: Packer = inspect.getmembers(importlib.import_module(f'intelmq.lib.packers.{use_packer.lower()}.packer'))[0][1]()
except:
raise exceptions.MissingPackerError(packer=use_packer)

def to_msgpack(self, hierarchical=False, with_type=False):
msgpack_dict = self.to_dict(hierarchical=hierarchical, with_type=with_type)
return msgpack.packb(msgpack_dict)
try:
data = self.to_dict(hierarchical=hierarchical, with_type=with_type)
return packer.serialize(data, **kwargs)
except Exception as exc:
raise exceptions.SerializationError(exception=exc, object=self)

def __eq__(self, other: dict) -> bool:
"""
Expand Down
Empty file added intelmq/lib/packers/__init__.py
Empty file.
5 changes: 5 additions & 0 deletions intelmq/lib/packers/json/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# SPDX-FileCopyrightText: 2022 CERT.at GmbH <[email protected]>
#
# SPDX-License-Identifier: AGPL-3.0-or-later

# -*- coding: utf-8 -*-
19 changes: 19 additions & 0 deletions intelmq/lib/packers/json/packer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# SPDX-FileCopyrightText: 2022 CERT.at GmbH <[email protected]>
#
# SPDX-License-Identifier: AGPL-3.0-or-later

# -*- coding: utf-8 -*-

from intelmq.lib.packers.packer import Packer
import json


class JSON(Packer):
def __init__(self) -> None:
super().__init__()

def serialize(self, data, **kwargs) -> bytes:
return json.dumps(data, **kwargs)

def deserialize(self, data, **kwargs) -> object:
return json.loads(data, **kwargs)
7 changes: 7 additions & 0 deletions intelmq/lib/packers/msgpack/REQUIREMENTS.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# SPDX-FileCopyrightText: 2022 CERT.at GmbH <[email protected]>
#
# SPDX-License-Identifier: AGPL-3.0-or-later

# -*- coding: utf-8 -*-

msgpack>=0.5
5 changes: 5 additions & 0 deletions intelmq/lib/packers/msgpack/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# SPDX-FileCopyrightText: 2022 CERT.at GmbH <[email protected]>
#
# SPDX-License-Identifier: AGPL-3.0-or-later

# -*- coding: utf-8 -*-
27 changes: 27 additions & 0 deletions intelmq/lib/packers/msgpack/packer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# SPDX-FileCopyrightText: 2022 CERT.at GmbH <[email protected]>
#
# SPDX-License-Identifier: AGPL-3.0-or-later

# -*- coding: utf-8 -*-

from intelmq.lib.packers.packer import Packer
from intelmq.lib import exceptions


try:
import msgpack
except:
msgpack = None


class MsgPack(Packer):
def __init__(self) -> None:
if msgpack is None:
raise exceptions.MissingDependencyError("msgpack")
super().__init__()

def serialize(self, data, **kwargs) -> bytes:
return msgpack.packb(data, **kwargs)

def deserialize(self, data, **kwargs) -> object:
return msgpack.unpackb(data, raw=False, **kwargs)
15 changes: 15 additions & 0 deletions intelmq/lib/packers/packer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# SPDX-FileCopyrightText: 2022 CERT.at GmbH <[email protected]>
#
# SPDX-License-Identifier: AGPL-3.0-or-later

# -*- coding: utf-8 -*-

class Packer():
def __init__(self) -> None:
pass

def serialize(self, data: bytes, **kwargs) -> bytes:
raise NotImplementedError()

def deserialize(self, data: bytes, **kwargs) -> object:
raise NotImplementedError()
Loading

0 comments on commit 1253c3e

Please sign in to comment.