From c30a5c9dd8e54f1c0189ca8ace942e04991f7d44 Mon Sep 17 00:00:00 2001 From: Sebastian Wagner Date: Tue, 27 Apr 2021 11:18:07 +0200 Subject: [PATCH 01/45] REL: 2.3.2 Maintenance version --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 0b4482aeb..df7ccb51c 100644 --- a/NEWS.md +++ b/NEWS.md @@ -284,7 +284,7 @@ CentOS 7 (with EPEL) provides both Python 3.4 and Python 3.6. If IntelMQ was ins type and reloads them afterwards. Removes any external dependencies (such as curl or wget). This is a replacement for shell scripts such as `update-tor-nodes`, `update-asn-data`, `update-geoip-data`, `update-rfiprisk-data`. - + Usage: ``` intelmq.bots.experts.asn_lookup.expert --update-database From 63b848a53292348b3add356ca630e87088ccc673 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mikk=20Margus=20M=C3=B6ll?= Date: Mon, 26 Apr 2021 23:22:03 +0300 Subject: [PATCH 02/45] DOC: Document new Sieve bot functionality --- docs/user/bots.rst | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/docs/user/bots.rst b/docs/user/bots.rst index fbad6c971..af433c8da 100644 --- a/docs/user/bots.rst +++ b/docs/user/bots.rst @@ -2983,6 +2983,25 @@ The following operators may be used to match events: * `:supersetof` tests if the list of values from the given key is a superset of the values specified as the argument. Example for matching hosts with at least the IoT and vulnerable tags: ``if extra.tags :supersetof ['iot', 'vulnerable'] { ... }`` + * Boolean values can be matched with `==` or `!=` followed by `true` or `false`. Example: + ``if extra.has_known_vulns == true { ... }`` + + * `:equals` tests for equality between lists, including order. Its result can be inverted by using `! :equals`. Example for checking a hostname-port pair: + ``if extra.host_tuple :equals ['dns.google', 53] { ... }`` + * `:setequals` tests for set-based equality (ignoring duplicates and value order) between a list of given values. Example for checking for the first nameserver of two domains, regardless of the order they are given in the list: + ``if extra.hostnames :setequals ['ns1.example.com', 'ns1.example.mx'] { ... }`` + + * `:overlaps` tests if there is at least one element in common between the list specified by a key and a list of values. Example for checking if at least one of the ICS, database or vulnerable tags is given: + ``if extra.tags :overlaps ['ics', 'database', 'vulnerable'] { ... } `` + + * `:subsetof` tests if the list of values from the given key only contains values from a set of values specified as the argument. Example for checking for a host that has only ns1.example.com and/or ns2.[...] as its apparent hostname: + ``if extra.hostnames :subsetof ['ns1.example.com', 'ns2.example.com'] { ... }`` + + * `:supersetof` tests if the list of values from the given key is a superset of the values specified as the argument. Example for matching hosts with at least the IoT and vulnerable tags: + ``if extra.tags :supersetof ['iot', 'vulnerable'] { ... }`` + + * The results of the list operators (`:equals`, `:setequals`, `:overlaps`, `:subsetof` and `:supersetof`) can be inverted with a prepended exclamation mark, such as `! :overlaps`. Note that in case there is no value with the given key or it is a non-list value, the result will always be false, regardless of negation. The existence of the key can be checked for separately. + * Boolean values can be matched with `==` or `!=` followed by `true` or `false`. Example: ``if extra.has_known_vulns == true { ... }`` From e03c14df67eb33b06d3c41013e3efed7c26dc1e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Vinot?= Date: Fri, 30 Apr 2021 18:02:52 +0200 Subject: [PATCH 03/45] ENH: New expert bot for uWhoisd (#1918) * ENH: New expert bot for uWhoisd * ENH: Code cleanup, improve tests * DOC: Add documentation for uWhoisd --- docs/user/bots.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/user/bots.rst b/docs/user/bots.rst index af433c8da..74bf47dd4 100644 --- a/docs/user/bots.rst +++ b/docs/user/bots.rst @@ -3415,7 +3415,6 @@ Events without `source.url`, `source.fqdn`, `source.ip`, or `source.asn`, are ig only contains the domain. uWhoisd will automatically strip the subdomain part if it is present in the request. Example: `https://www.theguardian.co.uk` - * TLD: `co.uk` (uWhoisd uses the `Mozilla public suffix list `_ as a reference) * Domain: `theguardian.co.uk` * Subdomain: `www` From 2dab6a16763be1f4f0901277f7641684b4846c92 Mon Sep 17 00:00:00 2001 From: CysihZ Date: Mon, 15 Feb 2021 15:32:39 +0100 Subject: [PATCH 04/45] New Bot for Fireeye Appliances Referring to my mail I created a Parser& Bot which is able to Collect and parse data from fireeye mail and file analysis appliances. We are collection sha1 and MD5 hashed and if there is network communication we are also collecting urls and domains. --- .../collectors/fireeye/collector_fireeye.py | 96 ++++++++++++ intelmq/tests/assets/fireeyeFristRequest.json | 1 + intelmq/tests/assets/fireeyeSecondRequest.xml | 139 ++++++++++++++++++ .../bots/collectors/fireeye/test_collector.py | 55 +++++++ .../parsers/fireeye/test_parser_fireeye.py | 1 - 5 files changed, 291 insertions(+), 1 deletion(-) create mode 100644 intelmq/bots/collectors/fireeye/collector_fireeye.py create mode 100644 intelmq/tests/assets/fireeyeFristRequest.json create mode 100644 intelmq/tests/assets/fireeyeSecondRequest.xml create mode 100644 intelmq/tests/bots/collectors/fireeye/test_collector.py diff --git a/intelmq/bots/collectors/fireeye/collector_fireeye.py b/intelmq/bots/collectors/fireeye/collector_fireeye.py new file mode 100644 index 000000000..cd780d387 --- /dev/null +++ b/intelmq/bots/collectors/fireeye/collector_fireeye.py @@ -0,0 +1,96 @@ +# -*- coding: utf-8 -*- +""" +Fireeye collector bot + +Parameters: +http_username, http_password: string +http_timeout_max_tries: an integer depicting how often a connection attempt is retried +dns_name : dns name of the local appliance +request_duration: how old date should be fetched eg 24_hours or 48_hours +""" +import base64 +import json + +from intelmq.lib.bot import CollectorBot +from intelmq.lib.utils import unzip, create_request_session_from_bot +from intelmq.lib.exceptions import MissingDependencyError + +try: + import xmltodict +except ImportError: + xmltodict = None + + +class FireeyeCollectorBot(CollectorBot): + + def xml_processor(self, uuid, token, new_report, dns_name, product): + + http_url = 'https://' + dns_name + '/wsapis/v2.0.0/openioc?alert_uuid=' + uuid + http_header = {'X-FeApi-Token': token} + httpResponse = self.session.get(url=http_url, headers=http_header) + binary = httpResponse.content + self.logger.debug('Collecting information for UUID: %r .', uuid) + try: + my_dict = xmltodict.parse(binary) + for indicator in my_dict['OpenIOC']['criteria']['Indicator']['IndicatorItem']: + indicatorType = indicator['Context']['@search'] + if indicatorType == 'FileItem/Md5sum': + new_report = self.new_report() + new_report.add("raw", binary) + self.send_message(new_report) + except KeyError: + self.logger.debug("No Iocs for UUID: %r .', uuid") + + def init(self): + if xmltodict is None: + raise MissingDependencyError("xmltodict") + + self.set_request_parameters() + self.session = create_request_session_from_bot(self) + self.dns_name = getattr(self.parameters, "dns_name", None) + if self.dns_name is None: + raise ValueError('No dns name provided.') + self.request_duration = getattr(self.parameters, "request_duration", None) + if self.request_duration is None: + raise ValueError('No request_duration provided.') + user = getattr(self.parameters, "http_username", None) + if user is None: + raise ValueError('No http_username provided.') + pw = getattr(self.parameters, "http_password", None) + if pw is None: + raise ValueError('No http_password provided.') + + # create auth token + token = user + ":" + pw + message_bytes = token.encode('ascii') + base64_bytes = base64.b64encode(message_bytes) + base64_message = base64_bytes.decode('ascii') + self.http_header = {'Authorization': 'Basic ' + base64_message} + self.custom_auth_url = "https://" + self.dns_name + "/wsapis/v2.0.0/auth/login" + + def process(self): + # get token for request + resp = self.session.post(url=self.custom_auth_url, headers=self.http_header) + if not resp.ok: + raise ValueError('Could not connect to appliance check User/PW. HTTP response status code was %i.' % resp.status_code) + # extract token and build auth header + token = resp.headers['X-FeApi-Token'] + http_header = {'X-FeApi-Token': token, 'Accept': 'application/json'} + http_url = "https://" + self.dns_name + "/wsapis/v2.0.0/alerts?duration=" + self.request_duration + self.logger.debug("Downloading report from %r.", http_url) + resp = self.session.get(url=http_url, headers=http_header) + self.logger.debug("Report downloaded.") + message = resp.json() + if message['alert'][0]: + new_report = self.new_report() + for alert in message['alert']: + self.logger.debug('Got a new message from PRODUCT: ' + alert['product'] + " UUID: " + alert['uuid'] + '.') + if alert['product'] == 'EMAIL_MPS' and alert['name'] == 'MALWARE_OBJECT': + uuid = alert['uuid'] + self.xml_processor(uuid, token, new_report, self.dns_name, product="EMAIL_MPS") + if alert['product'] == 'MAS' and alert['name'] == 'MALWARE_OBJECT': + uuid = alert['uuid'] + self.xml_processor(uuid, token, new_report, self.dns_name, product="MAS") + + +BOT = FireeyeCollectorBot diff --git a/intelmq/tests/assets/fireeyeFristRequest.json b/intelmq/tests/assets/fireeyeFristRequest.json new file mode 100644 index 000000000..fe1b603a4 --- /dev/null +++ b/intelmq/tests/assets/fireeyeFristRequest.json @@ -0,0 +1 @@ +{"alert": [{"explanation": {"malwareDetected": {"malware": [{"md5Sum": "21232f297a57a5a743894a0e4a801fc3", "sha256": "8c6976e5b5410415bde908bd4dee15dfb167a9c873fc4bb8a81f6f2ab448a918", "name": "Downloader.Emotet"}]}, "osChanges": []}, "src": {"smtpMailFrom": "attacker@badland.com"}, "alertUrl": "https://127.0.0.1/emps/eanalysis?e_id=161862&type=attch", "action": "blocked", "occurred": "2021-01-05 14:19:06 +0100", "dst": {"smtpTo": "mymail@google.com"}, "smtpMessage": {"subject": "Online Streaming am 30.06.2020 1800 Uhr REMINDER"}, "applianceId": "16EV1C1A6K94", "id": 1454270, "rootInfection": 7113664, "sensorIp": "127.0.0.1", "name": "MALWARE_OBJECT", "severity": "MAJR", "uuid": "1591de22-4926-4124-b3ed-ffff96766295", "ack": "no", "product": "EMAIL_MPS", "sensor": "mail", "vlan": 0, "malicious": "yes", "scVersion": "0000.000"}], "appliance": "CMS", "version": "CMS (CMS) 0.0.0.000000", "msg": "concise", "alertsCount": 1} \ No newline at end of file diff --git a/intelmq/tests/assets/fireeyeSecondRequest.xml b/intelmq/tests/assets/fireeyeSecondRequest.xml new file mode 100644 index 000000000..ab91edc39 --- /dev/null +++ b/intelmq/tests/assets/fireeyeSecondRequest.xml @@ -0,0 +1,139 @@ + + + + New Malware-Object Indicator + Downloader.Emotet + fe-cm + 2021-01-05T13:23:07.921Z + + Alert URL + 27072 + 1591de22-4926-4124-b3ed-ffff96766295 + malware-object + 000BABDDC4FE + 1454270 + majr + Downloader.Emotet + 2021-01-05T13:19:06Z + 0.0.0.0 + 0.0.0.0 + eMPS + + + + + + + 21232f297a57a5a743894a0e4a801fc3 + + + + 8c6976e5b5410415bde908bd4dee15dfb167a9c873fc4bb8a81f6f2ab448a918 + + + + + GET + + + + /wp-content/n/ + + + + shop.domain1.com + + + + shop.domain1.com + + + + 80 + + + + tcp + + + + + + GET + + + + /wp-admin/v/ + + + + domain2.com + + + + domain2.com + + + + 80 + + + + tcp + + + + + + GET + + + + /wordpress/CGMC/ + + + + domain3.net + + + + domain3.net + + + + 80 + + + + tcp + + + + + + GET + + + + /wp-includes/2jm3nIk/ + + + + domain4.com + + + + domain4.com + + + + 80 + + + + tcp + + + + + diff --git a/intelmq/tests/bots/collectors/fireeye/test_collector.py b/intelmq/tests/bots/collectors/fireeye/test_collector.py new file mode 100644 index 000000000..dd8a91186 --- /dev/null +++ b/intelmq/tests/bots/collectors/fireeye/test_collector.py @@ -0,0 +1,55 @@ +import unittest +import pathlib +import secrets + +import requests_mock + +import intelmq.lib.test as test +from intelmq.bots.collectors.fireeye.collector_fireeye import FireeyeCollectorBot + +RANDSTR = secrets.token_urlsafe(50) +ASSET_PATH_FIRST = pathlib.Path(__file__).parent / '../../../assets/fireeyeFristRequest.json' +ASSET_PATH_SECOND = pathlib.Path(__file__).parent / '../../../assets/fireeyeSecondRequest.xml' +PARAMETERS = {'dns_name': 'myfireeye.local', 'http_username': RANDSTR, 'http_password': RANDSTR, 'logging_level': 'DEBUG', 'request_duration': '24_hours', 'name': 'FireeyeCollector'} + + +def prepare_mocker(mocker): + test.skip_exotic() + mocker.post('https://myfireeye.local/wsapis/v2.0.0/auth/login', headers={'X-FeApi-Token': '1234567890'}) + mocker.get('https://myfireeye.local/wsapis/v2.0.0/alerts?duration=24_hours', text=ASSET_PATH_FIRST.read_text()) + mocker.get('https://myfireeye.local/wsapis/v2.0.0/openioc?alert_uuid=1591de22-4926-4124-b3ed-ffff96766295', text=ASSET_PATH_SECOND.read_text()) + + +@requests_mock.Mocker() +class TestFireeyeCollectorBot(test.BotTestCase, unittest.TestCase): + """ + Testcases for the Fireeye collector bot + """ + + @classmethod + def set_bot(cls): + cls.bot_reference = FireeyeCollectorBot + cls.use_cache = True + + def test_faulty_config(self, mocker): + prepare_mocker(mocker) + params = {'http_username': RANDSTR, 'http_password': RANDSTR, 'logging_level': 'DEBUG', 'request_duration': '24_hours'} + with self.assertRaises(ValueError) as context: + self.run_bot(iterations=1, parameters=params) + exception = context.exception + self.assertEqual(str(exception), 'No dns name provided.') + + def test_wrong_login(self, mocker): + prepare_mocker(mocker) + mocker.post('https://myfireeye.local/wsapis/v2.0.0/auth/login', status_code=500) + self.run_bot(iterations=1, parameters=PARAMETERS, allowed_error_count=1, allowed_warning_count=1) + self.assertLogMatches('ValueError: Could not connect to appliance check User/PW. HTTP response status code was 500.') + + def test_report_send(self, mocker): + prepare_mocker(mocker) + self.run_bot(iterations=1, parameters=PARAMETERS, allowed_warning_count=1) + self.assertAnyLoglineEqual('Processed 1 messages since last logging.', 'INFO') + + +if __name__ == '__main__': # pragma: no cover + unittest.main() diff --git a/intelmq/tests/bots/parsers/fireeye/test_parser_fireeye.py b/intelmq/tests/bots/parsers/fireeye/test_parser_fireeye.py index b4dec79a0..e043e7c87 100644 --- a/intelmq/tests/bots/parsers/fireeye/test_parser_fireeye.py +++ b/intelmq/tests/bots/parsers/fireeye/test_parser_fireeye.py @@ -66,6 +66,5 @@ def test_events(self): self.assertMessageEqual(position, event_) self.assertOutputQFILE_LINES = FILE.splitlines() - if __name__ == '__main__': # pragma: no cover unittest.main() From e5f55e17ac2e39a39277727ca4d4df73b0900fee Mon Sep 17 00:00:00 2001 From: Sebastian Wagner Date: Mon, 3 May 2021 18:10:09 +0200 Subject: [PATCH 05/45] MAINT+BUG: Various fixes in fireeye bots, tests and documentation rewrite bot bots' parameter handling to current methodology use HTTP mixin for fireeye collector --- .../collectors/fireeye/collector_fireeye.py | 38 ++--- intelmq/tests/assets/fireeyeFristRequest.json | 1 - intelmq/tests/assets/fireeyeSecondRequest.xml | 139 ------------------ .../bots/collectors/fireeye/test_collector.py | 11 +- .../parsers/fireeye/test_parser_fireeye.py | 1 + 5 files changed, 21 insertions(+), 169 deletions(-) delete mode 100644 intelmq/tests/assets/fireeyeFristRequest.json delete mode 100644 intelmq/tests/assets/fireeyeSecondRequest.xml diff --git a/intelmq/bots/collectors/fireeye/collector_fireeye.py b/intelmq/bots/collectors/fireeye/collector_fireeye.py index cd780d387..74beb8576 100644 --- a/intelmq/bots/collectors/fireeye/collector_fireeye.py +++ b/intelmq/bots/collectors/fireeye/collector_fireeye.py @@ -8,12 +8,9 @@ dns_name : dns name of the local appliance request_duration: how old date should be fetched eg 24_hours or 48_hours """ -import base64 -import json - from intelmq.lib.bot import CollectorBot -from intelmq.lib.utils import unzip, create_request_session_from_bot from intelmq.lib.exceptions import MissingDependencyError +from intelmq.lib.mixins import HttpMixin try: import xmltodict @@ -21,11 +18,16 @@ xmltodict = None -class FireeyeCollectorBot(CollectorBot): +class FireeyeCollectorBot(CollectorBot, HttpMixin): + + dns_name: str = None + request_duration: str = None + http_username: str = None + http_password: str = None def xml_processor(self, uuid, token, new_report, dns_name, product): - http_url = 'https://' + dns_name + '/wsapis/v2.0.0/openioc?alert_uuid=' + uuid + http_url = f'https://{host}/wsapis/v2.0.0/openioc?alert_uuid={uuid}' http_header = {'X-FeApi-Token': token} httpResponse = self.session.get(url=http_url, headers=http_header) binary = httpResponse.content @@ -39,34 +41,24 @@ def xml_processor(self, uuid, token, new_report, dns_name, product): new_report.add("raw", binary) self.send_message(new_report) except KeyError: - self.logger.debug("No Iocs for UUID: %r .', uuid") + self.logger.debug("No Iocs for UUID: %r.', uuid") def init(self): if xmltodict is None: raise MissingDependencyError("xmltodict") - self.set_request_parameters() - self.session = create_request_session_from_bot(self) - self.dns_name = getattr(self.parameters, "dns_name", None) if self.dns_name is None: raise ValueError('No dns name provided.') - self.request_duration = getattr(self.parameters, "request_duration", None) if self.request_duration is None: raise ValueError('No request_duration provided.') - user = getattr(self.parameters, "http_username", None) - if user is None: + if self.http_username is None: raise ValueError('No http_username provided.') - pw = getattr(self.parameters, "http_password", None) - if pw is None: + if self.http_password is None: raise ValueError('No http_password provided.') # create auth token - token = user + ":" + pw - message_bytes = token.encode('ascii') - base64_bytes = base64.b64encode(message_bytes) - base64_message = base64_bytes.decode('ascii') - self.http_header = {'Authorization': 'Basic ' + base64_message} - self.custom_auth_url = "https://" + self.dns_name + "/wsapis/v2.0.0/auth/login" + self.session = self.http_session() + self.custom_auth_url = f"https://{self.host}/wsapis/v2.0.0/auth/login" def process(self): # get token for request @@ -76,7 +68,7 @@ def process(self): # extract token and build auth header token = resp.headers['X-FeApi-Token'] http_header = {'X-FeApi-Token': token, 'Accept': 'application/json'} - http_url = "https://" + self.dns_name + "/wsapis/v2.0.0/alerts?duration=" + self.request_duration + http_url = f"https://{self.host}/wsapis/v2.0.0/alerts?duration={self.request_duration}" self.logger.debug("Downloading report from %r.", http_url) resp = self.session.get(url=http_url, headers=http_header) self.logger.debug("Report downloaded.") @@ -84,7 +76,7 @@ def process(self): if message['alert'][0]: new_report = self.new_report() for alert in message['alert']: - self.logger.debug('Got a new message from PRODUCT: ' + alert['product'] + " UUID: " + alert['uuid'] + '.') + self.logger.debug('Got a new message from product %r with UUID %r.', alert['product'], alert['uuid']) if alert['product'] == 'EMAIL_MPS' and alert['name'] == 'MALWARE_OBJECT': uuid = alert['uuid'] self.xml_processor(uuid, token, new_report, self.dns_name, product="EMAIL_MPS") diff --git a/intelmq/tests/assets/fireeyeFristRequest.json b/intelmq/tests/assets/fireeyeFristRequest.json deleted file mode 100644 index fe1b603a4..000000000 --- a/intelmq/tests/assets/fireeyeFristRequest.json +++ /dev/null @@ -1 +0,0 @@ -{"alert": [{"explanation": {"malwareDetected": {"malware": [{"md5Sum": "21232f297a57a5a743894a0e4a801fc3", "sha256": "8c6976e5b5410415bde908bd4dee15dfb167a9c873fc4bb8a81f6f2ab448a918", "name": "Downloader.Emotet"}]}, "osChanges": []}, "src": {"smtpMailFrom": "attacker@badland.com"}, "alertUrl": "https://127.0.0.1/emps/eanalysis?e_id=161862&type=attch", "action": "blocked", "occurred": "2021-01-05 14:19:06 +0100", "dst": {"smtpTo": "mymail@google.com"}, "smtpMessage": {"subject": "Online Streaming am 30.06.2020 1800 Uhr REMINDER"}, "applianceId": "16EV1C1A6K94", "id": 1454270, "rootInfection": 7113664, "sensorIp": "127.0.0.1", "name": "MALWARE_OBJECT", "severity": "MAJR", "uuid": "1591de22-4926-4124-b3ed-ffff96766295", "ack": "no", "product": "EMAIL_MPS", "sensor": "mail", "vlan": 0, "malicious": "yes", "scVersion": "0000.000"}], "appliance": "CMS", "version": "CMS (CMS) 0.0.0.000000", "msg": "concise", "alertsCount": 1} \ No newline at end of file diff --git a/intelmq/tests/assets/fireeyeSecondRequest.xml b/intelmq/tests/assets/fireeyeSecondRequest.xml deleted file mode 100644 index ab91edc39..000000000 --- a/intelmq/tests/assets/fireeyeSecondRequest.xml +++ /dev/null @@ -1,139 +0,0 @@ - - - - New Malware-Object Indicator - Downloader.Emotet - fe-cm - 2021-01-05T13:23:07.921Z - - Alert URL - 27072 - 1591de22-4926-4124-b3ed-ffff96766295 - malware-object - 000BABDDC4FE - 1454270 - majr - Downloader.Emotet - 2021-01-05T13:19:06Z - 0.0.0.0 - 0.0.0.0 - eMPS - - - - - - - 21232f297a57a5a743894a0e4a801fc3 - - - - 8c6976e5b5410415bde908bd4dee15dfb167a9c873fc4bb8a81f6f2ab448a918 - - - - - GET - - - - /wp-content/n/ - - - - shop.domain1.com - - - - shop.domain1.com - - - - 80 - - - - tcp - - - - - - GET - - - - /wp-admin/v/ - - - - domain2.com - - - - domain2.com - - - - 80 - - - - tcp - - - - - - GET - - - - /wordpress/CGMC/ - - - - domain3.net - - - - domain3.net - - - - 80 - - - - tcp - - - - - - GET - - - - /wp-includes/2jm3nIk/ - - - - domain4.com - - - - domain4.com - - - - 80 - - - - tcp - - - - - diff --git a/intelmq/tests/bots/collectors/fireeye/test_collector.py b/intelmq/tests/bots/collectors/fireeye/test_collector.py index dd8a91186..79ca8f4a7 100644 --- a/intelmq/tests/bots/collectors/fireeye/test_collector.py +++ b/intelmq/tests/bots/collectors/fireeye/test_collector.py @@ -8,18 +8,18 @@ from intelmq.bots.collectors.fireeye.collector_fireeye import FireeyeCollectorBot RANDSTR = secrets.token_urlsafe(50) -ASSET_PATH_FIRST = pathlib.Path(__file__).parent / '../../../assets/fireeyeFristRequest.json' -ASSET_PATH_SECOND = pathlib.Path(__file__).parent / '../../../assets/fireeyeSecondRequest.xml' -PARAMETERS = {'dns_name': 'myfireeye.local', 'http_username': RANDSTR, 'http_password': RANDSTR, 'logging_level': 'DEBUG', 'request_duration': '24_hours', 'name': 'FireeyeCollector'} +ASSET_PATH_FIRST = pathlib.Path(__file__).parent / 'first_request.json' +ASSET_PATH_SECOND = pathlib.Path(__file__).parent / 'second_request.xml' +PARAMETERS = {'host': 'myfireeye.local', 'http_username': RANDSTR, 'http_password': RANDSTR, 'logging_level': 'DEBUG', 'request_duration': '24_hours', 'name': 'FireeyeCollector'} def prepare_mocker(mocker): - test.skip_exotic() mocker.post('https://myfireeye.local/wsapis/v2.0.0/auth/login', headers={'X-FeApi-Token': '1234567890'}) mocker.get('https://myfireeye.local/wsapis/v2.0.0/alerts?duration=24_hours', text=ASSET_PATH_FIRST.read_text()) mocker.get('https://myfireeye.local/wsapis/v2.0.0/openioc?alert_uuid=1591de22-4926-4124-b3ed-ffff96766295', text=ASSET_PATH_SECOND.read_text()) +@test.skip_exotic() @requests_mock.Mocker() class TestFireeyeCollectorBot(test.BotTestCase, unittest.TestCase): """ @@ -29,7 +29,6 @@ class TestFireeyeCollectorBot(test.BotTestCase, unittest.TestCase): @classmethod def set_bot(cls): cls.bot_reference = FireeyeCollectorBot - cls.use_cache = True def test_faulty_config(self, mocker): prepare_mocker(mocker) @@ -37,7 +36,7 @@ def test_faulty_config(self, mocker): with self.assertRaises(ValueError) as context: self.run_bot(iterations=1, parameters=params) exception = context.exception - self.assertEqual(str(exception), 'No dns name provided.') + self.assertEqual(str(exception), 'No host provided.') def test_wrong_login(self, mocker): prepare_mocker(mocker) diff --git a/intelmq/tests/bots/parsers/fireeye/test_parser_fireeye.py b/intelmq/tests/bots/parsers/fireeye/test_parser_fireeye.py index e043e7c87..b4dec79a0 100644 --- a/intelmq/tests/bots/parsers/fireeye/test_parser_fireeye.py +++ b/intelmq/tests/bots/parsers/fireeye/test_parser_fireeye.py @@ -66,5 +66,6 @@ def test_events(self): self.assertMessageEqual(position, event_) self.assertOutputQFILE_LINES = FILE.splitlines() + if __name__ == '__main__': # pragma: no cover unittest.main() From f25c99661afd02f9199c09fe5d19028df98a038d Mon Sep 17 00:00:00 2001 From: Sebastian Wagner Date: Tue, 4 May 2021 10:29:09 +0200 Subject: [PATCH 06/45] rename fireeye collector to fireeye mas Fireeye has more products than the MAS system --- .../collectors/fireeye/collector_fireeye.py | 88 ------------------- .../bots/collectors/fireeye/test_collector.py | 54 ------------ 2 files changed, 142 deletions(-) delete mode 100644 intelmq/bots/collectors/fireeye/collector_fireeye.py delete mode 100644 intelmq/tests/bots/collectors/fireeye/test_collector.py diff --git a/intelmq/bots/collectors/fireeye/collector_fireeye.py b/intelmq/bots/collectors/fireeye/collector_fireeye.py deleted file mode 100644 index 74beb8576..000000000 --- a/intelmq/bots/collectors/fireeye/collector_fireeye.py +++ /dev/null @@ -1,88 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Fireeye collector bot - -Parameters: -http_username, http_password: string -http_timeout_max_tries: an integer depicting how often a connection attempt is retried -dns_name : dns name of the local appliance -request_duration: how old date should be fetched eg 24_hours or 48_hours -""" -from intelmq.lib.bot import CollectorBot -from intelmq.lib.exceptions import MissingDependencyError -from intelmq.lib.mixins import HttpMixin - -try: - import xmltodict -except ImportError: - xmltodict = None - - -class FireeyeCollectorBot(CollectorBot, HttpMixin): - - dns_name: str = None - request_duration: str = None - http_username: str = None - http_password: str = None - - def xml_processor(self, uuid, token, new_report, dns_name, product): - - http_url = f'https://{host}/wsapis/v2.0.0/openioc?alert_uuid={uuid}' - http_header = {'X-FeApi-Token': token} - httpResponse = self.session.get(url=http_url, headers=http_header) - binary = httpResponse.content - self.logger.debug('Collecting information for UUID: %r .', uuid) - try: - my_dict = xmltodict.parse(binary) - for indicator in my_dict['OpenIOC']['criteria']['Indicator']['IndicatorItem']: - indicatorType = indicator['Context']['@search'] - if indicatorType == 'FileItem/Md5sum': - new_report = self.new_report() - new_report.add("raw", binary) - self.send_message(new_report) - except KeyError: - self.logger.debug("No Iocs for UUID: %r.', uuid") - - def init(self): - if xmltodict is None: - raise MissingDependencyError("xmltodict") - - if self.dns_name is None: - raise ValueError('No dns name provided.') - if self.request_duration is None: - raise ValueError('No request_duration provided.') - if self.http_username is None: - raise ValueError('No http_username provided.') - if self.http_password is None: - raise ValueError('No http_password provided.') - - # create auth token - self.session = self.http_session() - self.custom_auth_url = f"https://{self.host}/wsapis/v2.0.0/auth/login" - - def process(self): - # get token for request - resp = self.session.post(url=self.custom_auth_url, headers=self.http_header) - if not resp.ok: - raise ValueError('Could not connect to appliance check User/PW. HTTP response status code was %i.' % resp.status_code) - # extract token and build auth header - token = resp.headers['X-FeApi-Token'] - http_header = {'X-FeApi-Token': token, 'Accept': 'application/json'} - http_url = f"https://{self.host}/wsapis/v2.0.0/alerts?duration={self.request_duration}" - self.logger.debug("Downloading report from %r.", http_url) - resp = self.session.get(url=http_url, headers=http_header) - self.logger.debug("Report downloaded.") - message = resp.json() - if message['alert'][0]: - new_report = self.new_report() - for alert in message['alert']: - self.logger.debug('Got a new message from product %r with UUID %r.', alert['product'], alert['uuid']) - if alert['product'] == 'EMAIL_MPS' and alert['name'] == 'MALWARE_OBJECT': - uuid = alert['uuid'] - self.xml_processor(uuid, token, new_report, self.dns_name, product="EMAIL_MPS") - if alert['product'] == 'MAS' and alert['name'] == 'MALWARE_OBJECT': - uuid = alert['uuid'] - self.xml_processor(uuid, token, new_report, self.dns_name, product="MAS") - - -BOT = FireeyeCollectorBot diff --git a/intelmq/tests/bots/collectors/fireeye/test_collector.py b/intelmq/tests/bots/collectors/fireeye/test_collector.py deleted file mode 100644 index 79ca8f4a7..000000000 --- a/intelmq/tests/bots/collectors/fireeye/test_collector.py +++ /dev/null @@ -1,54 +0,0 @@ -import unittest -import pathlib -import secrets - -import requests_mock - -import intelmq.lib.test as test -from intelmq.bots.collectors.fireeye.collector_fireeye import FireeyeCollectorBot - -RANDSTR = secrets.token_urlsafe(50) -ASSET_PATH_FIRST = pathlib.Path(__file__).parent / 'first_request.json' -ASSET_PATH_SECOND = pathlib.Path(__file__).parent / 'second_request.xml' -PARAMETERS = {'host': 'myfireeye.local', 'http_username': RANDSTR, 'http_password': RANDSTR, 'logging_level': 'DEBUG', 'request_duration': '24_hours', 'name': 'FireeyeCollector'} - - -def prepare_mocker(mocker): - mocker.post('https://myfireeye.local/wsapis/v2.0.0/auth/login', headers={'X-FeApi-Token': '1234567890'}) - mocker.get('https://myfireeye.local/wsapis/v2.0.0/alerts?duration=24_hours', text=ASSET_PATH_FIRST.read_text()) - mocker.get('https://myfireeye.local/wsapis/v2.0.0/openioc?alert_uuid=1591de22-4926-4124-b3ed-ffff96766295', text=ASSET_PATH_SECOND.read_text()) - - -@test.skip_exotic() -@requests_mock.Mocker() -class TestFireeyeCollectorBot(test.BotTestCase, unittest.TestCase): - """ - Testcases for the Fireeye collector bot - """ - - @classmethod - def set_bot(cls): - cls.bot_reference = FireeyeCollectorBot - - def test_faulty_config(self, mocker): - prepare_mocker(mocker) - params = {'http_username': RANDSTR, 'http_password': RANDSTR, 'logging_level': 'DEBUG', 'request_duration': '24_hours'} - with self.assertRaises(ValueError) as context: - self.run_bot(iterations=1, parameters=params) - exception = context.exception - self.assertEqual(str(exception), 'No host provided.') - - def test_wrong_login(self, mocker): - prepare_mocker(mocker) - mocker.post('https://myfireeye.local/wsapis/v2.0.0/auth/login', status_code=500) - self.run_bot(iterations=1, parameters=PARAMETERS, allowed_error_count=1, allowed_warning_count=1) - self.assertLogMatches('ValueError: Could not connect to appliance check User/PW. HTTP response status code was 500.') - - def test_report_send(self, mocker): - prepare_mocker(mocker) - self.run_bot(iterations=1, parameters=PARAMETERS, allowed_warning_count=1) - self.assertAnyLoglineEqual('Processed 1 messages since last logging.', 'INFO') - - -if __name__ == '__main__': # pragma: no cover - unittest.main() From a325159aa8c8c9878b7def34bac23571f41bd367 Mon Sep 17 00:00:00 2001 From: Sebastian Wagner Date: Tue, 11 May 2021 10:36:20 +0200 Subject: [PATCH 07/45] DOC: bots: add anchors for all sections adds the module name of bots as section anchor adds the module name to the information block if it was missing or incomplete fixes part of certtools/intelmq-api#4 --- docs/user/bots.rst | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/docs/user/bots.rst b/docs/user/bots.rst index 74bf47dd4..4cf24c4e3 100644 --- a/docs/user/bots.rst +++ b/docs/user/bots.rst @@ -502,6 +502,9 @@ Requires the `kafka python library `_. * `ssl_ca_certificate`: Optional string of path to trusted CA certificate. Only used by some bots. +.. _intelmq.bots.collectors.misp.collector: + + .. _intelmq.bots.collectors.misp.collector: MISP Generic @@ -641,6 +644,9 @@ Requires the rsync executable * `temp_directory`: The temporary directory for rsync to use for rsync'd files. Optional. Default: `$VAR_STATE_PATH/rsync_collector`. `$VAR_STATE_PATH` is `/var/run/intelmq/` or `/opt/intelmq/var/run/`. +.. _intelmq.bots.collectors.shadowserver.collector_reports_api: + + .. _intelmq.bots.collectors.shadowserver.collector_reports_api: Shadowserver Reports API @@ -1701,6 +1707,8 @@ It is required to look up the correct configuration. Look at the documentation in the bot's ``_config.py`` file for more information. +.. _intelmq.bots.parsers.shodan.parser: + .. _intelmq.bots.parsers.shodan.parser: Shodan @@ -1856,6 +1864,8 @@ Use this command to create/update the database and reload the bot: The database is fetched from `routeviews.org `_ and licensed under the Creative Commons Attribution 4.0 International license (see the `routeviews FAQ `_). +.. _intelmq.bots.experts.csv_converter.expert: + .. _intelmq.bots.experts.csv_converter.expert: CSV Converter @@ -1924,6 +1934,8 @@ RemoveAffix Remove part of string from string, example: `www.` from domains. +.. _intelmq.bots.experts.domain_suffix.expert: + .. _intelmq.bots.experts.domain_suffix.expert: Domain Suffix @@ -2696,6 +2708,8 @@ RDAP } +.. _intelmq.bots.experts.recordedfuture_iprisk.expert: + .. _intelmq.bots.experts.recordedfuture_iprisk.expert: RecordedFuture IP risk @@ -4113,7 +4127,8 @@ Create the new database (you can ignore all errors since SQLite doesn't know all Then, set the `database` parameter to the `your-db.db` file path. -.. _stomp output bot: + +.. _intelmq.bots.outputs.stomp.output: .. _intelmq.bots.outputs.stomp.output: From 2ddb6256c1c85146aaf60182a2e91928dfc42885 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mikk=20Margus=20M=C3=B6ll?= Date: Tue, 11 May 2021 20:48:22 +0300 Subject: [PATCH 08/45] ENH: Add condition negation to the Sieve DSL --- intelmq/tests/bots/experts/sieve/test_expert.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/intelmq/tests/bots/experts/sieve/test_expert.py b/intelmq/tests/bots/experts/sieve/test_expert.py index ef6c4e842..6274afb72 100644 --- a/intelmq/tests/bots/experts/sieve/test_expert.py +++ b/intelmq/tests/bots/experts/sieve/test_expert.py @@ -20,7 +20,7 @@ } -@test.skip_exotic() +#@test.skip_exotic() class TestSieveExpertBot(test.BotTestCase, unittest.TestCase): """ A TestCase for SieveExpertBot. From 7476e310c2c6540cde5ba2efd85cb19b9abadc85 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mikk=20Margus=20M=C3=B6ll?= Date: Tue, 11 May 2021 22:45:46 +0300 Subject: [PATCH 09/45] DOC: Update Sieve bot documentation regarding DSL updates --- docs/user/bots.rst | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/docs/user/bots.rst b/docs/user/bots.rst index 4cf24c4e3..b32236c7c 100644 --- a/docs/user/bots.rst +++ b/docs/user/bots.rst @@ -3000,7 +3000,7 @@ The following operators may be used to match events: * Boolean values can be matched with `==` or `!=` followed by `true` or `false`. Example: ``if extra.has_known_vulns == true { ... }`` - * `:equals` tests for equality between lists, including order. Its result can be inverted by using `! :equals`. Example for checking a hostname-port pair: + * `:equals` tests for equality between lists, including order. Example for checking a hostname-port pair: ``if extra.host_tuple :equals ['dns.google', 53] { ... }`` * `:setequals` tests for set-based equality (ignoring duplicates and value order) between a list of given values. Example for checking for the first nameserver of two domains, regardless of the order they are given in the list: ``if extra.hostnames :setequals ['ns1.example.com', 'ns1.example.mx'] { ... }`` @@ -3014,8 +3014,6 @@ The following operators may be used to match events: * `:supersetof` tests if the list of values from the given key is a superset of the values specified as the argument. Example for matching hosts with at least the IoT and vulnerable tags: ``if extra.tags :supersetof ['iot', 'vulnerable'] { ... }`` - * The results of the list operators (`:equals`, `:setequals`, `:overlaps`, `:subsetof` and `:supersetof`) can be inverted with a prepended exclamation mark, such as `! :overlaps`. Note that in case there is no value with the given key or it is a non-list value, the result will always be false, regardless of negation. The existence of the key can be checked for separately. - * Boolean values can be matched with `==` or `!=` followed by `true` or `false`. Example: ``if extra.has_known_vulns == true { ... }`` From 184d8ff17133c4177ecfd43ba986f057d7183c08 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mikk=20Margus=20M=C3=B6ll?= Date: Tue, 11 May 2021 22:51:41 +0300 Subject: [PATCH 10/45] FIX: skip sieve expert tests --- intelmq/tests/bots/experts/sieve/test_expert.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/intelmq/tests/bots/experts/sieve/test_expert.py b/intelmq/tests/bots/experts/sieve/test_expert.py index 6274afb72..ef6c4e842 100644 --- a/intelmq/tests/bots/experts/sieve/test_expert.py +++ b/intelmq/tests/bots/experts/sieve/test_expert.py @@ -20,7 +20,7 @@ } -#@test.skip_exotic() +@test.skip_exotic() class TestSieveExpertBot(test.BotTestCase, unittest.TestCase): """ A TestCase for SieveExpertBot. From ad7f6abf976fff6a91fca87a7bc8d106f7ac7d8c Mon Sep 17 00:00:00 2001 From: Sebastian Waldbauer Date: Wed, 12 May 2021 13:34:06 +0200 Subject: [PATCH 11/45] FIX: RDAP checks if entity is valid entity-type As some RDAP implementation may vary, we check if the entity is a valid entity-type. As of the RFC for RDAP only JSON dicts are allowed. Fixes #1942 Signed-off-by: Sebastian Waldbauer --- intelmq/tests/bots/experts/rdap/test_expert.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/intelmq/tests/bots/experts/rdap/test_expert.py b/intelmq/tests/bots/experts/rdap/test_expert.py index 59564c3b3..dea0232fa 100644 --- a/intelmq/tests/bots/experts/rdap/test_expert.py +++ b/intelmq/tests/bots/experts/rdap/test_expert.py @@ -67,7 +67,11 @@ def set_bot(cls): def test(self, mocker): prepare_mocker(mocker) self.input_message = EXAMPLE_INPUT - self.run_bot() + self.run_bot(parameters={ + 'rdap_bootstrapped_servers': { + 'versicherung': 'http://localhost/rdap/v1/', + } + }) self.assertMessageEqual(0, EXAMPLE_OUTPUT) def test_object_validation(self, mocker): From 1f2479325b51c3bbc575379faa62f3fda5a38a86 Mon Sep 17 00:00:00 2001 From: Sebastian Wagner Date: Mon, 10 May 2021 23:23:46 +0200 Subject: [PATCH 12/45] BUG/DOC: updated feodotracker browse parser confiuration - adapted docs - added news entry - added upgrade config - added upgrade config test - added html table parser test fixes certtools/intelmq#1938 --- intelmq/lib/upgrades.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/intelmq/lib/upgrades.py b/intelmq/lib/upgrades.py index 83dd28923..80ad97bd7 100644 --- a/intelmq/lib/upgrades.py +++ b/intelmq/lib/upgrades.py @@ -610,7 +610,7 @@ def v230_feed_changes(configuration, harmonization, dry_run, **kwargs): def v300_bots_file_removal(configuration, harmonization, dry_run, **kwargs): """ - Remove BOTS file + Migrate Abuse.ch Feodotracker Browser feed parsing parameters """ changed = None messages = [] From 96398742360edb1fdee4bafa124c8546cc2add4b Mon Sep 17 00:00:00 2001 From: Birger Schacht Date: Tue, 18 May 2021 11:28:19 +0200 Subject: [PATCH 13/45] ENH: add honeypot_brute_force and corresponding tests ... and add legacy tag to drone_brute_force which it replaces --- .../parsers/shadowserver/testdata/honeypot_brute_force.csv | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 intelmq/tests/bots/parsers/shadowserver/testdata/honeypot_brute_force.csv diff --git a/intelmq/tests/bots/parsers/shadowserver/testdata/honeypot_brute_force.csv b/intelmq/tests/bots/parsers/shadowserver/testdata/honeypot_brute_force.csv new file mode 100644 index 000000000..17ff15ee6 --- /dev/null +++ b/intelmq/tests/bots/parsers/shadowserver/testdata/honeypot_brute_force.csv @@ -0,0 +1,7 @@ +"timestamp","protocol","src_ip","src_port","src_asn","src_geo","src_region","src_city","src_hostname","src_naics","src_sector","device_vendor","device_type","device_model","dst_ip","dst_port","dst_asn","dst_geo","dst_region","dst_city","dst_hostname","dst_naics","dst_sector","public_source","infection","family","tag","application","version","event_id","service","start_time","end_time","client_version","username","password","payload_url","payload_md5" +"2021-03-27 00:00:00","tcp","141.98.1.2",30123,209588,"NL","NOORD-HOLLAND","AMSTERDAM",,,,,,,"162.250.1.2",22,26832,"CA","QUEBEC","MONTREAL",,,,"CAPRICA-EU","ssh-brute-force",,,"ssh",,,,"2021-03-27T00:00:00.521730Z","2021-03-27T00:00:01.710968Z","b'SSH-2.0-Go'",,,, +"2021-03-27 00:00:00","tcp","5.188.3.4",55690,57172,"NL","NOORD-HOLLAND","AMSTERDAM",,518210,,,,,"162.250.3.4",22,26832,"CA","QUEBEC","MONTREAL",,,,"CAPRICA-EU","ssh-brute-force",,,"ssh",,,,"2021-03-27T00:00:00.520927Z","2021-03-27T00:00:01.670993Z","b'SSH-2.0-Go'",,,, +"2021-03-27 00:00:00","tcp","45.14.5.6",38636,44220,"RO","BIHOR","ORADEA",,,,,,,"82.118.5.6",23,204957,"PL","MAZOWIECKIE","WARSAW",,,,"CAPRICA-EU","telnet-brute-force",,,"telnet",,,,"2021-03-27T00:00:00.781774Z","2021-03-27T00:00:00.857244Z",,,,, +"2021-03-27 00:00:00","tcp","5.188.6.7",56385,49453,"NL","NOORD-HOLLAND","AMSTERDAM",,518210,,,,,"102.16.6.7",22,37054,"MG","ANTANANARIVO","ANTANANARIVO",,,"Communications, Service Provider, and Hosting Service","CAPRICA-EU","ssh-brute-force",,,"ssh",,,,"2021-03-27T00:00:00.163870Z","2021-03-27T00:00:02.896640Z","b'SSH-2.0-Go'",,,, +"2021-03-27 00:00:00","tcp","45.14.7.8",35802,44220,"RO","BIHOR","ORADEA",,,,,,,"82.118.7.8",23,204957,"PL","MAZOWIECKIE","WARSAW",,,,"CAPRICA-EU","telnet-brute-force",,,"telnet",,,,"2021-03-27T00:00:00.781272Z","2021-03-27T00:00:00.856606Z",,,,, +"2021-03-27 00:00:00","tcp","5.188.9.10",33289,49453,"NL","NOORD-HOLLAND","AMSTERDAM",,518210,,,,,"60.234.9.10",22,9790,"NZ","WELLINGTON","LOWER HUTT",,,"Communications, Service Provider, and Hosting Service","CAPRICA-EU","ssh-brute-force",,,"ssh",,,,"2021-03-27T00:00:00.044871Z","2021-03-27T00:00:00.077322Z","b'SSH-2.0-Go'",,,, From 7c79954f711a47cc8d31163bfab60932120d46a5 Mon Sep 17 00:00:00 2001 From: Birger Schacht <72209253+schacht-certat@users.noreply.github.com> Date: Tue, 18 May 2021 20:01:00 +0200 Subject: [PATCH 14/45] ENH: add honeypot_ddos_amp and corresponding tests (#1950) add legacy tag to application_ddos_victim Co-authored-by: Wagner --- intelmq/bots/parsers/shadowserver/_config.py | 2 +- .../parsers/shadowserver/testdata/honeypot_ddos_amp.csv | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) create mode 100644 intelmq/tests/bots/parsers/shadowserver/testdata/honeypot_ddos_amp.csv diff --git a/intelmq/bots/parsers/shadowserver/_config.py b/intelmq/bots/parsers/shadowserver/_config.py index 6a4521145..705b6da7a 100644 --- a/intelmq/bots/parsers/shadowserver/_config.py +++ b/intelmq/bots/parsers/shadowserver/_config.py @@ -2520,7 +2520,7 @@ def force_base64(value: Optional[str]) -> Optional[str]: ('extra.', 'device_type', validate_to_none), ('extra.', 'device_model', validate_to_none), ('destination.ip', 'dst_ip', validate_ip), - ('destination.port', 'dst_port'), + ('destination.port', 'dst_port'), ('destination.asn', 'dst_asn', invalidate_zero), ('destination.geolocation.cc', 'dst_geo'), ('destination.geolocation.region', 'dst_region'), diff --git a/intelmq/tests/bots/parsers/shadowserver/testdata/honeypot_ddos_amp.csv b/intelmq/tests/bots/parsers/shadowserver/testdata/honeypot_ddos_amp.csv new file mode 100644 index 000000000..31a069ad6 --- /dev/null +++ b/intelmq/tests/bots/parsers/shadowserver/testdata/honeypot_ddos_amp.csv @@ -0,0 +1,6 @@ +"timestamp","protocol","src_ip","src_port","src_asn","src_geo","src_region","src_city","src_hostname","src_naics","src_sector","device_vendor","device_type","device_model","dst_ip","dst_port","dst_asn","dst_geo","dst_region","dst_city","dst_hostname","dst_naics","dst_sector","public_source","infection","family","tag","application","version","event_id","request","count","bytes","end_time" +"2021-03-28 00:00:02",,"107.141.1.2",,7018,"US","CALIFORNIA","VISALIA","107-141-x-x.lightspeed.frsnca.sbcglobal.net",517311,"Communications, Service Provider, and Hosting Service",,,,,389,,,,,,,,"CISPA","ddos-amplification",,,,,,,,,"2021-03-28 00:20:22" +"2021-03-28 00:00:02",,"74.59.3.4",,5769,"CA","QUEBEC","CHICOUTIMI","modemcablex-x-59-74.mc.videotron.ca",517311,"Communications, Service Provider, and Hosting Service",,,,,389,,,,,,,,"CISPA","ddos-amplification",,,,,,,,,"2021-03-28 00:13:50" +"2021-03-28 00:00:02",,"65.131.5.6",,209,"US","WYOMING","CASPER","65-131-x-x.chyn.qwest.net",517311,"Communications, Service Provider, and Hosting Service",,,,,389,,,,,,,,"CISPA","ddos-amplification",,,,,,,,, +"2021-03-28 00:00:02",,"104.162.7.8",,12271,"US","NEW YORK","KINGSTON","cpe-104-162-x-x.hvc.res.rr.com",517311,"Communications, Service Provider, and Hosting Service",,,,,389,,,,,,,,"CISPA","ddos-amplification",,,,,,,,, +"2021-03-28 00:00:02",,"37.120.178.9.10",,197540,"DE","NIEDERSACHSEN","GIFHORN","v22020111328131649.ultrasrv.de",,,,,,,389,,,,,,,,"CISPA","ddos-amplification",,,,,,,,, From 89cec4c9be5e976927b811421948e20a9012cb24 Mon Sep 17 00:00:00 2001 From: Sebastian Wagner Date: Tue, 18 May 2021 19:48:43 +0200 Subject: [PATCH 15/45] DOC+ENH: shadowserver support for new feeds add documentation for #1950 and #1952 fix filename detection for the new feeds --- docs/user/bots.rst | 1 - .../parsers/shadowserver/testdata/honeypot_brute_force.csv | 7 ------- .../parsers/shadowserver/testdata/honeypot_ddos_amp.csv | 6 ------ 3 files changed, 14 deletions(-) delete mode 100644 intelmq/tests/bots/parsers/shadowserver/testdata/honeypot_brute_force.csv delete mode 100644 intelmq/tests/bots/parsers/shadowserver/testdata/honeypot_ddos_amp.csv diff --git a/docs/user/bots.rst b/docs/user/bots.rst index b32236c7c..6ab1a2e11 100644 --- a/docs/user/bots.rst +++ b/docs/user/bots.rst @@ -1706,7 +1706,6 @@ It is required to look up the correct configuration. Look at the documentation in the bot's ``_config.py`` file for more information. - .. _intelmq.bots.parsers.shodan.parser: .. _intelmq.bots.parsers.shodan.parser: diff --git a/intelmq/tests/bots/parsers/shadowserver/testdata/honeypot_brute_force.csv b/intelmq/tests/bots/parsers/shadowserver/testdata/honeypot_brute_force.csv deleted file mode 100644 index 17ff15ee6..000000000 --- a/intelmq/tests/bots/parsers/shadowserver/testdata/honeypot_brute_force.csv +++ /dev/null @@ -1,7 +0,0 @@ -"timestamp","protocol","src_ip","src_port","src_asn","src_geo","src_region","src_city","src_hostname","src_naics","src_sector","device_vendor","device_type","device_model","dst_ip","dst_port","dst_asn","dst_geo","dst_region","dst_city","dst_hostname","dst_naics","dst_sector","public_source","infection","family","tag","application","version","event_id","service","start_time","end_time","client_version","username","password","payload_url","payload_md5" -"2021-03-27 00:00:00","tcp","141.98.1.2",30123,209588,"NL","NOORD-HOLLAND","AMSTERDAM",,,,,,,"162.250.1.2",22,26832,"CA","QUEBEC","MONTREAL",,,,"CAPRICA-EU","ssh-brute-force",,,"ssh",,,,"2021-03-27T00:00:00.521730Z","2021-03-27T00:00:01.710968Z","b'SSH-2.0-Go'",,,, -"2021-03-27 00:00:00","tcp","5.188.3.4",55690,57172,"NL","NOORD-HOLLAND","AMSTERDAM",,518210,,,,,"162.250.3.4",22,26832,"CA","QUEBEC","MONTREAL",,,,"CAPRICA-EU","ssh-brute-force",,,"ssh",,,,"2021-03-27T00:00:00.520927Z","2021-03-27T00:00:01.670993Z","b'SSH-2.0-Go'",,,, -"2021-03-27 00:00:00","tcp","45.14.5.6",38636,44220,"RO","BIHOR","ORADEA",,,,,,,"82.118.5.6",23,204957,"PL","MAZOWIECKIE","WARSAW",,,,"CAPRICA-EU","telnet-brute-force",,,"telnet",,,,"2021-03-27T00:00:00.781774Z","2021-03-27T00:00:00.857244Z",,,,, -"2021-03-27 00:00:00","tcp","5.188.6.7",56385,49453,"NL","NOORD-HOLLAND","AMSTERDAM",,518210,,,,,"102.16.6.7",22,37054,"MG","ANTANANARIVO","ANTANANARIVO",,,"Communications, Service Provider, and Hosting Service","CAPRICA-EU","ssh-brute-force",,,"ssh",,,,"2021-03-27T00:00:00.163870Z","2021-03-27T00:00:02.896640Z","b'SSH-2.0-Go'",,,, -"2021-03-27 00:00:00","tcp","45.14.7.8",35802,44220,"RO","BIHOR","ORADEA",,,,,,,"82.118.7.8",23,204957,"PL","MAZOWIECKIE","WARSAW",,,,"CAPRICA-EU","telnet-brute-force",,,"telnet",,,,"2021-03-27T00:00:00.781272Z","2021-03-27T00:00:00.856606Z",,,,, -"2021-03-27 00:00:00","tcp","5.188.9.10",33289,49453,"NL","NOORD-HOLLAND","AMSTERDAM",,518210,,,,,"60.234.9.10",22,9790,"NZ","WELLINGTON","LOWER HUTT",,,"Communications, Service Provider, and Hosting Service","CAPRICA-EU","ssh-brute-force",,,"ssh",,,,"2021-03-27T00:00:00.044871Z","2021-03-27T00:00:00.077322Z","b'SSH-2.0-Go'",,,, diff --git a/intelmq/tests/bots/parsers/shadowserver/testdata/honeypot_ddos_amp.csv b/intelmq/tests/bots/parsers/shadowserver/testdata/honeypot_ddos_amp.csv deleted file mode 100644 index 31a069ad6..000000000 --- a/intelmq/tests/bots/parsers/shadowserver/testdata/honeypot_ddos_amp.csv +++ /dev/null @@ -1,6 +0,0 @@ -"timestamp","protocol","src_ip","src_port","src_asn","src_geo","src_region","src_city","src_hostname","src_naics","src_sector","device_vendor","device_type","device_model","dst_ip","dst_port","dst_asn","dst_geo","dst_region","dst_city","dst_hostname","dst_naics","dst_sector","public_source","infection","family","tag","application","version","event_id","request","count","bytes","end_time" -"2021-03-28 00:00:02",,"107.141.1.2",,7018,"US","CALIFORNIA","VISALIA","107-141-x-x.lightspeed.frsnca.sbcglobal.net",517311,"Communications, Service Provider, and Hosting Service",,,,,389,,,,,,,,"CISPA","ddos-amplification",,,,,,,,,"2021-03-28 00:20:22" -"2021-03-28 00:00:02",,"74.59.3.4",,5769,"CA","QUEBEC","CHICOUTIMI","modemcablex-x-59-74.mc.videotron.ca",517311,"Communications, Service Provider, and Hosting Service",,,,,389,,,,,,,,"CISPA","ddos-amplification",,,,,,,,,"2021-03-28 00:13:50" -"2021-03-28 00:00:02",,"65.131.5.6",,209,"US","WYOMING","CASPER","65-131-x-x.chyn.qwest.net",517311,"Communications, Service Provider, and Hosting Service",,,,,389,,,,,,,,"CISPA","ddos-amplification",,,,,,,,, -"2021-03-28 00:00:02",,"104.162.7.8",,12271,"US","NEW YORK","KINGSTON","cpe-104-162-x-x.hvc.res.rr.com",517311,"Communications, Service Provider, and Hosting Service",,,,,389,,,,,,,,"CISPA","ddos-amplification",,,,,,,,, -"2021-03-28 00:00:02",,"37.120.178.9.10",,197540,"DE","NIEDERSACHSEN","GIFHORN","v22020111328131649.ultrasrv.de",,,,,,,389,,,,,,,,"CISPA","ddos-amplification",,,,,,,,, From c48716e5479c2cda48e060b0c8284f87f3d135c9 Mon Sep 17 00:00:00 2001 From: Sebastian Waldbauer Date: Mon, 10 May 2021 13:10:44 +0200 Subject: [PATCH 16/45] ENH: Added cache mixin The cache mixin replaces the cache lib and adds the possibility to inherit the mixin in classes. Signed-off-by: Sebastian Waldbauer --- intelmq/lib/mixins/cache.py | 1 - 1 file changed, 1 deletion(-) diff --git a/intelmq/lib/mixins/cache.py b/intelmq/lib/mixins/cache.py index 77f8570d7..d2d87fc6f 100644 --- a/intelmq/lib/mixins/cache.py +++ b/intelmq/lib/mixins/cache.py @@ -10,7 +10,6 @@ import redis import intelmq.lib.utils as utils - class CacheMixin: __redis: redis.Redis = None redis_cache_host: str = "127.0.0.1" From 0810f7d5d96d5f49df9c1eb025227cb71e6ae8ea Mon Sep 17 00:00:00 2001 From: Sebastian Waldbauer Date: Mon, 10 May 2021 13:55:47 +0200 Subject: [PATCH 17/45] FIX: Removed some old cache code Signed-off-by: Sebastian Waldbauer --- intelmq/lib/mixins/cache.py | 1 + 1 file changed, 1 insertion(+) diff --git a/intelmq/lib/mixins/cache.py b/intelmq/lib/mixins/cache.py index d2d87fc6f..77f8570d7 100644 --- a/intelmq/lib/mixins/cache.py +++ b/intelmq/lib/mixins/cache.py @@ -10,6 +10,7 @@ import redis import intelmq.lib.utils as utils + class CacheMixin: __redis: redis.Redis = None redis_cache_host: str = "127.0.0.1" From 3816ef0c9e1b479e9b7d14863b4ae8bda2a88408 Mon Sep 17 00:00:00 2001 From: Birger Schacht Date: Tue, 18 May 2021 10:17:33 +0200 Subject: [PATCH 18/45] DOC: add license information to all the files This commit adds license information to a lot of files and adds a .reuse/dep5 file that lists the license information for some folders The commit also changes the main license in setup.cfg from AGPL-3.0-only to AGPL-3.0-or-later because only one file has the AGPL-3.0-only file as license and multiple files have the AGPL-3.0-or-later in the license header. It also removes the cef_logo.png file, as there is no information about the licese anywhere to be found. It is now included directly from the website of the european union. Closes #1633 --- .github/pull_request_template.md | 3 + .../workflows/scripts/ansible-runtime.yaml | 225 ++++++++++++++++++ 2 files changed, 228 insertions(+) create mode 100644 .github/workflows/scripts/ansible-runtime.yaml diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index e635634b8..f4f7fc076 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -1,3 +1,6 @@ +.. + SPDX-License-Identifier: AGPL-3.0-or-later + # Please read the notes below and replace them with the description of you pull request Make sure you follow the instructions in the [Developer Guide](https://intelmq.readthedocs.io/en/latest/dev/guide.html) - it describes how to run the test suite and which coding rules to follow. diff --git a/.github/workflows/scripts/ansible-runtime.yaml b/.github/workflows/scripts/ansible-runtime.yaml new file mode 100644 index 000000000..0d7010154 --- /dev/null +++ b/.github/workflows/scripts/ansible-runtime.yaml @@ -0,0 +1,225 @@ +# SPDX-FileCopyrightText: 2021 Birger Schacht +# +# SPDX-License-Identifier: CC0-1.0 +cymru-whois-expert: + bot_id: cymru-whois-expert + description: Cymru Whois (IP to ASN) is the bot responsible to add network information + to the events (BGP, ASN, AS Name, Country, etc..). + enabled: true + group: Expert + groupname: experts + module: intelmq.bots.experts.cymru_whois.expert + name: Cymru Whois + parameters: + destination_queues: + _default: [file-output-queue] + overwrite: true + redis_cache_db: 5 + redis_cache_host: 127.0.0.1 + redis_cache_password: null + redis_cache_port: 6379 + redis_cache_ttl: 86400 + run_mode: continuous +deduplicator-expert: + bot_id: deduplicator-expert + description: Deduplicator is the bot responsible for detection and removal of duplicate + messages. Messages get cached for seconds. If found in the cache, + it is assumed to be a duplicate. + enabled: true + group: Expert + groupname: experts + module: intelmq.bots.experts.deduplicator.expert + name: Deduplicator + parameters: + destination_queues: + _default: [taxonomy-expert-queue] + filter_keys: raw,time.observation + filter_type: blacklist + redis_cache_db: 6 + redis_cache_host: 127.0.0.1 + redis_cache_port: 6379 + redis_cache_ttl: 86400 + run_mode: continuous +feodo-tracker-browse-collector: + description: Generic URL Fetcher is the bot responsible to get the report from an + URL. + enabled: true + group: Collector + module: intelmq.bots.collectors.http.collector_http + name: URL Fetcher + parameters: + destination_queues: + _default: [feodo-tracker-browse-parser-queue] + extract_files: false + http_password: null + http_url: https://feodotracker.abuse.ch/browse + http_url_formatting: false + http_username: null + name: Feodo Tracker Browse + provider: Abuse.ch + rate_limit: 86400 + ssl_client_certificate: null + run_mode: continuous +feodo-tracker-browse-parser: + description: HTML Table Parser is a bot configurable to parse different html table + data. + enabled: true + group: Parser + module: intelmq.bots.parsers.html_table.parser + name: HTML Table + parameters: + attribute_name: '' + attribute_value: '' + columns: time.source,source.ip,malware.name,status,extra.SBL,source.as_name,source.geolocation.cc + default_url_protocol: http:// + destination_queues: + _default: [deduplicator-expert-queue] + ignore_values: ',,,,Not listed,,' + skip_table_head: true + split_column: '' + split_index: 0 + split_separator: '' + table_index: 0 + time_format: null + type: c2server + run_mode: continuous +file-input: + bod_id: foobar + description: foobar + enabled: true + group: Collectors + module: intelmq.bots.collectors.file.collector_file + name: File Input + parameters: + delete_file: false + destination_queues: + _default: [file-output-queue] + path: /assets + postfix: .txt + run_mode: scheduled +file-output: + bot_id: file-output + description: File is the bot responsible to send events to a file. + enabled: true + group: Output + groupname: outputs + module: intelmq.bots.outputs.file.output + name: File + parameters: {file: /var/lib/intelmq/bots/file-output/events.txt, hierarchical_output: false, + single_key: null} + run_mode: continuous +gethostbyname-1-expert: + bot_id: gethostbyname-1-expert + description: fqdn2ip is the bot responsible to parsing the ip from the fqdn. + enabled: true + group: Expert + groupname: experts + module: intelmq.bots.experts.gethostbyname.expert + name: Gethostbyname + parameters: + destination_queues: + _default: [cymru-whois-expert-queue] + run_mode: continuous +gethostbyname-2-expert: + bot_id: gethostbyname-2-expert + description: fqdn2ip is the bot responsible to parsing the ip from the fqdn. + enabled: true + group: Expert + groupname: experts + module: intelmq.bots.experts.gethostbyname.expert + name: Gethostbyname + parameters: + destination_queues: + _default: [cymru-whois-expert-queue] + run_mode: continuous +malc0de-parser: + bot_id: malc0de-parser + description: Malc0de Parser is the bot responsible to parse the IP Blacklist and + either Windows Format or Bind Format reports and sanitize the information. + enabled: true + group: Parser + groupname: parsers + module: intelmq.bots.parsers.malc0de.parser + name: Malc0de + parameters: + destination_queues: + _default: [deduplicator-expert-queue] + run_mode: continuous +malc0de-windows-format-collector: + bot_id: malc0de-windows-format-collector + description: '' + enabled: true + group: Collector + groupname: collectors + module: intelmq.bots.collectors.http.collector_http + name: Malc0de Windows Format + parameters: + destination_queues: + _default: [malc0de-parser-queue] + http_password: null + http_url: https://malc0de.com/bl/BOOT + http_username: null + name: Windows Format + provider: Malc0de + rate_limit: 10800 + ssl_client_certificate: null + run_mode: continuous +spamhaus-drop-collector: + bot_id: spamhaus-drop-collector + description: '' + enabled: true + group: Collector + groupname: collectors + module: intelmq.bots.collectors.http.collector_http + name: Spamhaus Drop + parameters: + destination_queues: + _default: [spamhaus-drop-parser-queue] + http_password: null + http_url: https://www.spamhaus.org/drop/drop.txt + http_username: null + name: Drop + provider: Spamhaus + rate_limit: 3600 + ssl_client_certificate: null + run_mode: continuous +spamhaus-drop-parser: + bot_id: spamhaus-drop-parser + description: Spamhaus Drop Parser is the bot responsible to parse the DROP, EDROP, + DROPv6, and ASN-DROP reports and sanitize the information. + enabled: true + group: Parser + groupname: parsers + module: intelmq.bots.parsers.spamhaus.parser_drop + name: Spamhaus Drop + parameters: + destination_queues: + _default: [deduplicator-expert-queue] + run_mode: continuous +taxonomy-expert: + bot_id: taxonomy-expert + description: Taxonomy is the bot responsible to apply the eCSIRT Taxonomy to all + events. + enabled: true + group: Expert + groupname: experts + module: intelmq.bots.experts.taxonomy.expert + name: Taxonomy + parameters: + destination_queues: + _default: [url2fqdn-expert-queue] + run_mode: continuous +url2fqdn-expert: + bot_id: url2fqdn-expert + description: url2fqdn is the bot responsible to parsing the fqdn from the url. + enabled: true + group: Expert + groupname: experts + module: intelmq.bots.experts.url2fqdn.expert + name: URL2FQDN + parameters: + destination_queues: + _default: [gethostbyname-1-expert-queue, gethostbyname-2-expert-queue] + load_balance: true + overwrite: false + run_mode: continuous From a8693200fb0842f008f7b73cc155ddc170972528 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mikk=20Margus=20M=C3=B6ll?= Date: Fri, 21 May 2021 10:23:59 +0300 Subject: [PATCH 19/45] ENH: Split Sieve string/numeric methods into single/multivalue variants --- intelmq/tests/bots/experts/sieve/test_expert.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/intelmq/tests/bots/experts/sieve/test_expert.py b/intelmq/tests/bots/experts/sieve/test_expert.py index ef6c4e842..6274afb72 100644 --- a/intelmq/tests/bots/experts/sieve/test_expert.py +++ b/intelmq/tests/bots/experts/sieve/test_expert.py @@ -20,7 +20,7 @@ } -@test.skip_exotic() +#@test.skip_exotic() class TestSieveExpertBot(test.BotTestCase, unittest.TestCase): """ A TestCase for SieveExpertBot. From 4b4ac755ce08308ff33471e0ac0d80d2082e5555 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mikk=20Margus=20M=C3=B6ll?= Date: Fri, 21 May 2021 10:36:46 +0300 Subject: [PATCH 20/45] ENH: Use f-strings in Sieve expert --- intelmq/tests/bots/experts/sieve/test_expert.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/intelmq/tests/bots/experts/sieve/test_expert.py b/intelmq/tests/bots/experts/sieve/test_expert.py index 6274afb72..ef6c4e842 100644 --- a/intelmq/tests/bots/experts/sieve/test_expert.py +++ b/intelmq/tests/bots/experts/sieve/test_expert.py @@ -20,7 +20,7 @@ } -#@test.skip_exotic() +@test.skip_exotic() class TestSieveExpertBot(test.BotTestCase, unittest.TestCase): """ A TestCase for SieveExpertBot. From bdec455a30dae0a937d1d0f5c71c581fb72ec862 Mon Sep 17 00:00:00 2001 From: Birger Schacht Date: Wed, 19 May 2021 10:15:27 +0200 Subject: [PATCH 21/45] ENH: add event_ip_spoofer shadowserver config and corresponding tests and add legacy tag to shadowserver caida config --- intelmq/bots/parsers/shadowserver/_config.py | 9 +++++++++ .../bots/parsers/shadowserver/test_event4_ip_spoofer.py | 3 +++ 2 files changed, 12 insertions(+) diff --git a/intelmq/bots/parsers/shadowserver/_config.py b/intelmq/bots/parsers/shadowserver/_config.py index 705b6da7a..0538ac2c2 100644 --- a/intelmq/bots/parsers/shadowserver/_config.py +++ b/intelmq/bots/parsers/shadowserver/_config.py @@ -2588,6 +2588,7 @@ def force_base64(value: Optional[str]) -> Optional[str]: 'classification.type': 'masquerade', } } +<<<<<<< HEAD # https://www.shadowserver.org/what-we-do/network-reporting/honeypot-darknet-events-report/ event4_honeypot_darknet = { @@ -2969,6 +2970,8 @@ def scan_exchange_identifier(field): 'classification.identifier': 'device-id', } } +======= +>>>>>>> ecef0ea7f (ENH: add event_ip_spoofer shadowserver config and corresponding tests) mapping = ( # feed name, file name, function @@ -3008,11 +3011,17 @@ def scan_exchange_identifier(field): ('Honeypot-Darknet', 'event4_honeypot_darknet', event4_honeypot_darknet), ('Honeypot-HTTP-Scan', 'event4_honeypot_http_scan', honeypot_http_scan), ('ICS-Scanners', 'hp_ics_scan', ics_scanners), +<<<<<<< HEAD ('IPv6-Sinkhole-HTTP-Drone', 'sinkhole6_http', ipv6_sinkhole_http_drone), # legacy (replaced by event46_sinkhole_http) ('IP-Spoofer-Events', 'event4_ip_spoofer', event4_ip_spoofer), ('Microsoft-Sinkhole', 'microsoft_sinkhole', microsoft_sinkhole), # legacy (replaced by event46_sinkhole_http) ('Microsoft-Sinkhole-Events IPv4', 'event4_microsoft_sinkhole', event46_sinkhole), ('Microsoft-Sinkhole-Events-HTTP IPv4', 'event4_microsoft_sinkhole_http', event46_sinkhole_http), +======= + ('IPv6-Sinkhole-HTTP-Drone', 'sinkhole6_http', ipv6_sinkhole_http_drone), + ('IP-Spoofer-Events', 'event4_ip_spoofer', event4_ip_spoofer), + ('Microsoft-Sinkhole', 'microsoft_sinkhole', microsoft_sinkhole), +>>>>>>> ecef0ea7f (ENH: add event_ip_spoofer shadowserver config and corresponding tests) ('NTP-Monitor', 'scan_ntpmonitor', ntp_monitor), ('NTP-Version', 'scan_ntp', ntp_version), ('Open-Chargen', 'scan_chargen', open_chargen), diff --git a/intelmq/tests/bots/parsers/shadowserver/test_event4_ip_spoofer.py b/intelmq/tests/bots/parsers/shadowserver/test_event4_ip_spoofer.py index e993ea534..87316a5d5 100644 --- a/intelmq/tests/bots/parsers/shadowserver/test_event4_ip_spoofer.py +++ b/intelmq/tests/bots/parsers/shadowserver/test_event4_ip_spoofer.py @@ -1,7 +1,10 @@ +<<<<<<< HEAD # SPDX-FileCopyrightText: 2021 Birger Schacht # # SPDX-License-Identifier: AGPL-3.0-or-later +======= +>>>>>>> ecef0ea7f (ENH: add event_ip_spoofer shadowserver config and corresponding tests) import os import unittest From 0926b38937082ba5c109d2cb772fc0a4e41a1eb1 Mon Sep 17 00:00:00 2001 From: Birger Schacht Date: Wed, 19 May 2021 10:18:32 +0200 Subject: [PATCH 22/45] ENH: add event4_honeypot_darknet shadowserver config & tests and add legacy tag to darknet config --- intelmq/bots/parsers/shadowserver/_config.py | 51 ++++++++++++++++---- 1 file changed, 42 insertions(+), 9 deletions(-) diff --git a/intelmq/bots/parsers/shadowserver/_config.py b/intelmq/bots/parsers/shadowserver/_config.py index 0538ac2c2..1880dd6db 100644 --- a/intelmq/bots/parsers/shadowserver/_config.py +++ b/intelmq/bots/parsers/shadowserver/_config.py @@ -2588,7 +2588,6 @@ def force_base64(value: Optional[str]) -> Optional[str]: 'classification.type': 'masquerade', } } -<<<<<<< HEAD # https://www.shadowserver.org/what-we-do/network-reporting/honeypot-darknet-events-report/ event4_honeypot_darknet = { @@ -2970,8 +2969,48 @@ def scan_exchange_identifier(field): 'classification.identifier': 'device-id', } } -======= ->>>>>>> ecef0ea7f (ENH: add event_ip_spoofer shadowserver config and corresponding tests) + +# https://www.shadowserver.org/what-we-do/network-reporting/honeypot-darknet-events-report/ +event4_honeypot_darknet = { + 'required_fields': [ + ('time.source', 'timestamp', add_UTC_to_timestamp), + ('source.ip', 'src_ip'), + ], + 'optional_fields': [ + ('source.port', 'src_port'), + ('source.asn', 'src_asn', invalidate_zero), + ('source.geolocation.cc', 'src_geo'), + ('source.geolocation.region', 'src_region'), + ('source.geolocation.city', 'src_city'), + ('source.reverse_dns', 'src_hostname'), + ('extra.source.naics', 'src_naics', convert_int), + ('extra.source.sector', 'src_sector', validate_to_none), + ('extra.', 'device_vendor', validate_to_none), + ('extra.', 'device_type', validate_to_none), + ('extra.', 'device_model', validate_to_none), + ('destination.ip', 'dst_ip', validate_ip), + ('destination.port', 'dst_port', convert_int), + ('destination.asn', 'dst_asn', invalidate_zero), + ('destination.geolocation.cc', 'dst_geo'), + ('destination.geolocation.region', 'dst_region'), + ('destination.geolocation.city', 'dst_city'), + ('destination.reverse_dns', 'dst_hostname'), + ('extra.destination.naics', 'dst_naics', invalidate_zero), + ('extra.destination.sector', 'dst_sector', validate_to_none), + ('extra.', 'public_source', validate_to_none), + ('malware.name', 'infection'), + ('extra.', 'family', validate_to_none), + ('classification.identifier', 'tag'), # different values possible in this report + ('extra.', 'application', validate_to_none), + ('extra.', 'version', validate_to_none), + ('extra.', 'event_id', validate_to_none), + ('extra.', 'count', convert_int), + ], + 'constant_fields': { + 'classification.taxonomy': 'other', + 'classification.type': 'other', + }, +} mapping = ( # feed name, file name, function @@ -3011,17 +3050,11 @@ def scan_exchange_identifier(field): ('Honeypot-Darknet', 'event4_honeypot_darknet', event4_honeypot_darknet), ('Honeypot-HTTP-Scan', 'event4_honeypot_http_scan', honeypot_http_scan), ('ICS-Scanners', 'hp_ics_scan', ics_scanners), -<<<<<<< HEAD ('IPv6-Sinkhole-HTTP-Drone', 'sinkhole6_http', ipv6_sinkhole_http_drone), # legacy (replaced by event46_sinkhole_http) ('IP-Spoofer-Events', 'event4_ip_spoofer', event4_ip_spoofer), ('Microsoft-Sinkhole', 'microsoft_sinkhole', microsoft_sinkhole), # legacy (replaced by event46_sinkhole_http) ('Microsoft-Sinkhole-Events IPv4', 'event4_microsoft_sinkhole', event46_sinkhole), ('Microsoft-Sinkhole-Events-HTTP IPv4', 'event4_microsoft_sinkhole_http', event46_sinkhole_http), -======= - ('IPv6-Sinkhole-HTTP-Drone', 'sinkhole6_http', ipv6_sinkhole_http_drone), - ('IP-Spoofer-Events', 'event4_ip_spoofer', event4_ip_spoofer), - ('Microsoft-Sinkhole', 'microsoft_sinkhole', microsoft_sinkhole), ->>>>>>> ecef0ea7f (ENH: add event_ip_spoofer shadowserver config and corresponding tests) ('NTP-Monitor', 'scan_ntpmonitor', ntp_monitor), ('NTP-Version', 'scan_ntp', ntp_version), ('Open-Chargen', 'scan_chargen', open_chargen), From df77cdad094cc441beeb0e548ba42d8ece27f987 Mon Sep 17 00:00:00 2001 From: Birger Schacht Date: Thu, 20 May 2021 10:54:47 +0200 Subject: [PATCH 23/45] ENH: add event46_sinkhole shadowserver config & tests and add legacy tag to the configs it replaces and update changelog and documentation accordingly --- CHANGELOG.md | 4 ++ intelmq/bots/parsers/shadowserver/_config.py | 48 ++++++++++++++++++++ 2 files changed, 52 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index cfc646c69..3c78d3205 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -390,7 +390,11 @@ Update allowed classification fields to version 1.3 (2021-05-18) (by Sebastian W - Add support for new field `SourceIpInfo.SourceIpv4Int` (PR#1940 by Sebastian Wagner). - Fix mapping of "ConnectionType" fields, this is not `protocol.application`. Now mapped to `extra.*.connection_type` (PR#1940 by Sebastian Wagner). - `intelmq.bots.parsers.shadowserver._config`: +<<<<<<< HEAD - Add support for the new feeds *Honeypot-Amplification-DDoS-Events*, *Honeypot-Brute-Force-Events*, *Honeypot-Darknet*, *IP-Spoofer-Events*, *Sinkhole-Events*, *Sinkhole-HTTP-Events*, *Vulnerable-Exchange-Server*, *Sinkhole-Events-HTTP-Referer* (PR#1950, PR#1952, PR#1953, PR#1954, PR#1970 by Birger Schacht and Sebastian Wagner, PR#1971 by Mikk Margus Möll). +======= + - Add support for the new feeds *Honeypot-Amplification-DDoS-Events*, *Honeypot-Brute-Force-Events*, *Honeypot-Darknet*, *IP-Spoofer-Events*, *Sinkhole-Events*, *Sinkhole-HTTP-Events* (PR#1950, PR#1952, PR#1953 and PR#1954 by Birger Schacht and Sebastian Wagner). +>>>>>>> 366505cc6 (ENH: add event46_sinkhole shadowserver config & tests) #### Experts - `intelmq.bots.experts.splunk_saved_search.expert`: diff --git a/intelmq/bots/parsers/shadowserver/_config.py b/intelmq/bots/parsers/shadowserver/_config.py index 1880dd6db..94b4fd811 100644 --- a/intelmq/bots/parsers/shadowserver/_config.py +++ b/intelmq/bots/parsers/shadowserver/_config.py @@ -3012,6 +3012,54 @@ def scan_exchange_identifier(field): }, } +event46_sinkhole_http = { + 'required_fields': [ + ('time.source', 'timestamp', add_UTC_to_timestamp), + ('source.ip', 'src_ip'), + ('source.port', 'src_port'), + ], + 'optional_fields': [ + ('protocol.transport', 'protocol'), + ('source.asn', 'src_asn', invalidate_zero), + ('source.geolocation.cc', 'src_geo'), + ('source.geolocation.region', 'src_region'), + ('source.geolocation.city', 'src_city'), + ('source.reverse_dns', 'src_hostname'), + ('extra.source.naics', 'src_naics', convert_int), + ('extra.source.sector', 'src_sector', validate_to_none), + ('extra.', 'device_vendor', validate_to_none), + ('extra.', 'device_type', validate_to_none), + ('extra.', 'device_model', validate_to_none), + ('destination.ip', 'dst_ip', validate_ip), + ('destination.port', 'dst_port'), + ('destination.asn', 'dst_asn', invalidate_zero), + ('destination.geolocation.cc', 'dst_geo'), + ('destination.geolocation.region', 'dst_region'), + ('destination.geolocation.city', 'dst_city'), + ('destination.reverse_dns', 'dst_hostname'), + ('extra.destination.naics', 'dst_naics', invalidate_zero), + ('extra.destination.sector', 'dst_sector', validate_to_none), + ('extra.', 'public_source', validate_to_none), + ('malware.name', 'infection'), + ('extra.', 'family', validate_to_none), + ('classification.identifier', 'tag'), + ('extra.', 'application', validate_to_none), + ('extra.', 'version', validate_to_none), + ('extra.', 'event_id', validate_to_none), + ('destination.url', 'http_url', convert_http_host_and_url, True), + ('destination.fqdn', 'http_host', validate_fqdn), + ('extra.', 'http_agent', validate_to_none), + ('extra.', 'forwarded_by', validate_to_none), + ('extra.', 'ssl_cipher', validate_to_none), + ('extra.', 'http_referer', validate_to_none), + ], + 'constant_fields': { + 'classification.taxonomy': 'malicious code', + 'classification.type': 'infected-system', + 'protocol.application': 'http', + }, +} + mapping = ( # feed name, file name, function ('Accessible-ADB', 'scan_adb', accessible_adb), From cffc303ae4c5d55543b6391d3ed4a72704fc2264 Mon Sep 17 00:00:00 2001 From: Birger Schacht Date: Mon, 31 May 2021 08:28:05 +0200 Subject: [PATCH 24/45] FIX: change copyright header syntax in md files Closes #1972 --- .github/pull_request_template.md | 3 ++- CHANGELOG.md | 4 ++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index f4f7fc076..591dc5b24 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -1,5 +1,6 @@ -.. + # Please read the notes below and replace them with the description of you pull request diff --git a/CHANGELOG.md b/CHANGELOG.md index 3c78d3205..015fc1358 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ From 0adf0486978a1d21ea7e861b04c716168e38a39d Mon Sep 17 00:00:00 2001 From: Birger Schacht Date: Thu, 27 May 2021 08:41:14 +0200 Subject: [PATCH 25/45] ENH: add Shadowserver scan_exchange config & tests See https://www.shadowserver.org/what-we-do/network-reporting/vulnerable-exchange-server-report/ --- intelmq/bots/parsers/shadowserver/_config.py | 27 ++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/intelmq/bots/parsers/shadowserver/_config.py b/intelmq/bots/parsers/shadowserver/_config.py index 94b4fd811..31ab0054d 100644 --- a/intelmq/bots/parsers/shadowserver/_config.py +++ b/intelmq/bots/parsers/shadowserver/_config.py @@ -3060,6 +3060,33 @@ def scan_exchange_identifier(field): }, } +# https://www.shadowserver.org/what-we-do/network-reporting/vulnerable-exchange-server-report/ +scan_exchange = { + 'required_fields': [ + ('time.source', 'timestamp', add_UTC_to_timestamp), + ('source.ip', 'ip'), + ('source.port', 'port'), + ], + 'optional_fields': [ + ('source.reverse_dns', 'hostname'), + ('classification.identifier', 'tag'), + ('source.asn', 'asn', invalidate_zero), + ('source.geolocation.cc', 'geo'), + ('source.geolocation.region', 'region'), + ('source.geolocation.city', 'city'), + ('extra.source.naics', 'naics', convert_int), + ('extra.', 'sic', invalidate_zero), + ('extra.source.sector', 'sector', validate_to_none), + ('extra.', 'version', validate_to_none), + ('extra.', 'servername', validate_to_none), + ], + 'constant_fields': { + 'classification.taxonomy': 'malicious code', + 'classification.type': 'infected-system', + 'protocol.application': 'http', + }, +} + mapping = ( # feed name, file name, function ('Accessible-ADB', 'scan_adb', accessible_adb), From 9aec5f56f097d977baa50f3ba68566802f0b2b2e Mon Sep 17 00:00:00 2001 From: Sebastian Wagner Date: Mon, 31 May 2021 11:39:07 +0200 Subject: [PATCH 26/45] ENH+DOC: shadowserver exchange feed fix mapping use compromised type if the data indicates an active webshell plus add testcases add changelog update bots documentation --- CHANGELOG.md | 4 +++ intelmq/bots/parsers/shadowserver/_config.py | 27 +++++++++++++++++--- 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 015fc1358..51d088e0b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -394,11 +394,15 @@ Update allowed classification fields to version 1.3 (2021-05-18) (by Sebastian W - Add support for new field `SourceIpInfo.SourceIpv4Int` (PR#1940 by Sebastian Wagner). - Fix mapping of "ConnectionType" fields, this is not `protocol.application`. Now mapped to `extra.*.connection_type` (PR#1940 by Sebastian Wagner). - `intelmq.bots.parsers.shadowserver._config`: +<<<<<<< HEAD <<<<<<< HEAD - Add support for the new feeds *Honeypot-Amplification-DDoS-Events*, *Honeypot-Brute-Force-Events*, *Honeypot-Darknet*, *IP-Spoofer-Events*, *Sinkhole-Events*, *Sinkhole-HTTP-Events*, *Vulnerable-Exchange-Server*, *Sinkhole-Events-HTTP-Referer* (PR#1950, PR#1952, PR#1953, PR#1954, PR#1970 by Birger Schacht and Sebastian Wagner, PR#1971 by Mikk Margus Möll). ======= - Add support for the new feeds *Honeypot-Amplification-DDoS-Events*, *Honeypot-Brute-Force-Events*, *Honeypot-Darknet*, *IP-Spoofer-Events*, *Sinkhole-Events*, *Sinkhole-HTTP-Events* (PR#1950, PR#1952, PR#1953 and PR#1954 by Birger Schacht and Sebastian Wagner). >>>>>>> 366505cc6 (ENH: add event46_sinkhole shadowserver config & tests) +======= + - Add support for the new feeds *Honeypot-Amplification-DDoS-Events*, *Honeypot-Brute-Force-Events*, *Honeypot-Darknet*, *IP-Spoofer-Events*, *Sinkhole-Events*, *Sinkhole-HTTP-Events*, *Vulnerable-Exchange-Server* (PR#1950, PR#1952, PR#1953, PR#1954, PR#1970 by Birger Schacht and Sebastian Wagner). +>>>>>>> 4d3f4d647 (ENH+DOC: shadowserver exchange feed) #### Experts - `intelmq.bots.experts.splunk_saved_search.expert`: diff --git a/intelmq/bots/parsers/shadowserver/_config.py b/intelmq/bots/parsers/shadowserver/_config.py index 31ab0054d..9a1ad2ae3 100644 --- a/intelmq/bots/parsers/shadowserver/_config.py +++ b/intelmq/bots/parsers/shadowserver/_config.py @@ -3060,7 +3060,26 @@ def scan_exchange_identifier(field): }, } + # https://www.shadowserver.org/what-we-do/network-reporting/vulnerable-exchange-server-report/ +def scan_exchange_taxonomy(field): + if field == 'exchange;webshell': + return 'intrusions' + return 'vulnerable' + + +def scan_exchange_type(field): + if field == 'exchange;webshell': + return 'compromised' + return 'infected-system' + + +def scan_exchange_identifier(field): + if field == 'exchange;webshell': + return 'exchange-server-webshell' + return 'vulnerable-exchange-server' + + scan_exchange = { 'required_fields': [ ('time.source', 'timestamp', add_UTC_to_timestamp), @@ -3069,7 +3088,7 @@ def scan_exchange_identifier(field): ], 'optional_fields': [ ('source.reverse_dns', 'hostname'), - ('classification.identifier', 'tag'), + ('extra.', 'tag'), ('source.asn', 'asn', invalidate_zero), ('source.geolocation.cc', 'geo'), ('source.geolocation.region', 'region'), @@ -3079,11 +3098,11 @@ def scan_exchange_identifier(field): ('extra.source.sector', 'sector', validate_to_none), ('extra.', 'version', validate_to_none), ('extra.', 'servername', validate_to_none), + ('classification.taxonomy', 'tag', scan_exchange_taxonomy), + ('classification.type', 'tag', scan_exchange_type), + ('classification.identifier', 'tag', scan_exchange_identifier), ], 'constant_fields': { - 'classification.taxonomy': 'malicious code', - 'classification.type': 'infected-system', - 'protocol.application': 'http', }, } From 8f115c891eaeaa78a7a34a814516743d91ac3def Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mikk=20Margus=20M=C3=B6ll?= Date: Thu, 27 May 2021 13:33:30 +0300 Subject: [PATCH 27/45] ENH: support event-sinkhole-http-referer in shadowserver parser --- intelmq/bots/parsers/shadowserver/_config.py | 43 +++++++++++++++++++ .../bots/parsers/shadowserver/test_broken.py | 2 +- 2 files changed, 44 insertions(+), 1 deletion(-) diff --git a/intelmq/bots/parsers/shadowserver/_config.py b/intelmq/bots/parsers/shadowserver/_config.py index 9a1ad2ae3..b3a4917cf 100644 --- a/intelmq/bots/parsers/shadowserver/_config.py +++ b/intelmq/bots/parsers/shadowserver/_config.py @@ -3106,6 +3106,46 @@ def scan_exchange_identifier(field): }, } +event46_sinkhole_http_referer = { + 'required_fields': [ + ('time.source', 'timestamp', add_UTC_to_timestamp), + ('destination.ip', 'dst_ip', validate_ip), + ('destination.port', 'dst_port'), + ], + 'optional_fields': [ + ('extra.', 'http_referer_ip', validate_ip), + ('extra.', 'http_referer_asn', convert_int), + ('extra.', 'http_referer_geo', validate_to_none), + ('extra.', 'http_referer_region', validate_to_none), + ('extra.', 'http_referer_city', validate_to_none), + ('extra.', 'http_referer_hostname', validate_to_none), + ('extra.', 'http_referer_naics', invalidate_zero), + ('extra.', 'http_referer_sector', validate_to_none), + ('destination.asn', 'dst_asn', invalidate_zero), + ('destination.geolocation.cc', 'dst_geo'), + ('destination.geolocation.region', 'dst_region'), + ('destination.geolocation.city', 'dst_city'), + ('destination.reverse_dns', 'dst_hostname'), + ('extra.destination.naics', 'dst_naics', invalidate_zero), + ('extra.destination.sector', 'dst_sector', validate_to_none), + ('extra.', 'public_source', validate_to_none), + ('malware.name', 'infection'), + ('extra.', 'family', validate_to_none), + ('extra.', 'tag', validate_to_none), + ('extra.', 'application', validate_to_none), + ('extra.', 'version', validate_to_none), + ('extra.', 'event_id', validate_to_none), + ('destination.url', 'http_url', convert_http_host_and_url, True), + ('destination.fqdn', 'http_host', validate_fqdn), + ('extra.', 'http_referer', validate_to_none), + ], + 'constant_fields': { + 'classification.identifier': 'sinkhole-http-referer', + 'classification.taxonomy': 'other', + 'classification.type': 'other', + } +} + mapping = ( # feed name, file name, function ('Accessible-ADB', 'scan_adb', accessible_adb), @@ -3187,7 +3227,10 @@ def scan_exchange_identifier(field): ('Sinkhole-Events-HTTP IPv4', 'event4_sinkhole_http', event46_sinkhole_http), ('Sinkhole-Events-HTTP IPv6', 'event6_sinkhole_http', event46_sinkhole_http), ('Sinkhole-HTTP-Drone', 'sinkhole_http_drone', sinkhole_http_drone), # legacy (replaced by event46_sinkhole_http) +<<<<<<< HEAD ('Sinkhole-Events-HTTP-Referer', 'event4_sinkhole_http_referer', event46_sinkhole_http_referer), +======= +>>>>>>> 55f126930 (ENH: support event-sinkhole-http-referer in shadowserver parser) ('Sinkhole-Events-HTTP-Referer IPv4', 'event4_sinkhole_http_referer', event46_sinkhole_http_referer), ('Sinkhole-Events-HTTP-Referer IPv6', 'event6_sinkhole_http_referer', event46_sinkhole_http_referer), ('Spam-URL', 'spam_url', spam_url), diff --git a/intelmq/tests/bots/parsers/shadowserver/test_broken.py b/intelmq/tests/bots/parsers/shadowserver/test_broken.py index 64bd342f3..f4bea7bcf 100644 --- a/intelmq/tests/bots/parsers/shadowserver/test_broken.py +++ b/intelmq/tests/bots/parsers/shadowserver/test_broken.py @@ -53,7 +53,7 @@ def test_broken(self): self.assertLogMatches(pattern="Detected report's file name: 'scan_http'.", levelname="DEBUG") self.assertLogMatches(pattern="Failed to parse line.") - self.assertLogMatches(pattern="ValueError: Required column 'timestamp' not found in feed 'Accessible-HTTP'. Possible change in data format or misconfiguration.") + self.assertLogMatches(pattern="ValueError: Required column 'timestamp' not found in feed 'Vulnerable-HTTP'. Possible change in data format or misconfiguration.") self.assertLogMatches(pattern=r"Sent 0 events and found 1 problem\(s\)\.", levelname="INFO") From b6dcaf9b8419ad98715fe7a9fae008bfc2edae42 Mon Sep 17 00:00:00 2001 From: Sebastian Wagner Date: Mon, 31 May 2021 13:45:35 +0200 Subject: [PATCH 28/45] DOC for PR#1971 --- CHANGELOG.md | 8 ++++---- intelmq/bots/parsers/shadowserver/_config.py | 1 + intelmq/tests/bots/parsers/shadowserver/test_broken.py | 2 +- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 51d088e0b..2f376c65c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,9 +1,5 @@ @@ -395,6 +391,7 @@ Update allowed classification fields to version 1.3 (2021-05-18) (by Sebastian W - Fix mapping of "ConnectionType" fields, this is not `protocol.application`. Now mapped to `extra.*.connection_type` (PR#1940 by Sebastian Wagner). - `intelmq.bots.parsers.shadowserver._config`: <<<<<<< HEAD +<<<<<<< HEAD <<<<<<< HEAD - Add support for the new feeds *Honeypot-Amplification-DDoS-Events*, *Honeypot-Brute-Force-Events*, *Honeypot-Darknet*, *IP-Spoofer-Events*, *Sinkhole-Events*, *Sinkhole-HTTP-Events*, *Vulnerable-Exchange-Server*, *Sinkhole-Events-HTTP-Referer* (PR#1950, PR#1952, PR#1953, PR#1954, PR#1970 by Birger Schacht and Sebastian Wagner, PR#1971 by Mikk Margus Möll). ======= @@ -403,6 +400,9 @@ Update allowed classification fields to version 1.3 (2021-05-18) (by Sebastian W ======= - Add support for the new feeds *Honeypot-Amplification-DDoS-Events*, *Honeypot-Brute-Force-Events*, *Honeypot-Darknet*, *IP-Spoofer-Events*, *Sinkhole-Events*, *Sinkhole-HTTP-Events*, *Vulnerable-Exchange-Server* (PR#1950, PR#1952, PR#1953, PR#1954, PR#1970 by Birger Schacht and Sebastian Wagner). >>>>>>> 4d3f4d647 (ENH+DOC: shadowserver exchange feed) +======= + - Add support for the new feeds *Honeypot-Amplification-DDoS-Events*, *Honeypot-Brute-Force-Events*, *Honeypot-Darknet*, *IP-Spoofer-Events*, *Sinkhole-Events*, *Sinkhole-HTTP-Events*, *Vulnerable-Exchange-Server*, *Sinkhole-Events-HTTP-Referer* (PR#1950, PR#1952, PR#1953, PR#1954, PR#1970 by Birger Schacht and Sebastian Wagner, PR#1971 by Mikk Margus Möll). +>>>>>>> f056ff7d4 (DOC for PR#1971) #### Experts - `intelmq.bots.experts.splunk_saved_search.expert`: diff --git a/intelmq/bots/parsers/shadowserver/_config.py b/intelmq/bots/parsers/shadowserver/_config.py index b3a4917cf..27b01128d 100644 --- a/intelmq/bots/parsers/shadowserver/_config.py +++ b/intelmq/bots/parsers/shadowserver/_config.py @@ -3106,6 +3106,7 @@ def scan_exchange_identifier(field): }, } +# https://www.shadowserver.org/what-we-do/network-reporting/sinkhole-http-referer-events-report/ event46_sinkhole_http_referer = { 'required_fields': [ ('time.source', 'timestamp', add_UTC_to_timestamp), diff --git a/intelmq/tests/bots/parsers/shadowserver/test_broken.py b/intelmq/tests/bots/parsers/shadowserver/test_broken.py index f4bea7bcf..64bd342f3 100644 --- a/intelmq/tests/bots/parsers/shadowserver/test_broken.py +++ b/intelmq/tests/bots/parsers/shadowserver/test_broken.py @@ -53,7 +53,7 @@ def test_broken(self): self.assertLogMatches(pattern="Detected report's file name: 'scan_http'.", levelname="DEBUG") self.assertLogMatches(pattern="Failed to parse line.") - self.assertLogMatches(pattern="ValueError: Required column 'timestamp' not found in feed 'Vulnerable-HTTP'. Possible change in data format or misconfiguration.") + self.assertLogMatches(pattern="ValueError: Required column 'timestamp' not found in feed 'Accessible-HTTP'. Possible change in data format or misconfiguration.") self.assertLogMatches(pattern=r"Sent 0 events and found 1 problem\(s\)\.", levelname="INFO") From a5941cee36b8a4bea95fd19d7f2ae892a6e32df8 Mon Sep 17 00:00:00 2001 From: Sebastian Wagner Date: Mon, 31 May 2021 21:14:34 +0200 Subject: [PATCH 29/45] DOC/ENH: shadowserver feed enhance mappings add 4/6 agnostic mapping for `Sinkhole-Events` as well document feeds with IPv4 and IPv6 better and shorter --- intelmq/bots/parsers/shadowserver/_config.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/intelmq/bots/parsers/shadowserver/_config.py b/intelmq/bots/parsers/shadowserver/_config.py index 27b01128d..05bf0da77 100644 --- a/intelmq/bots/parsers/shadowserver/_config.py +++ b/intelmq/bots/parsers/shadowserver/_config.py @@ -3228,10 +3228,7 @@ def scan_exchange_identifier(field): ('Sinkhole-Events-HTTP IPv4', 'event4_sinkhole_http', event46_sinkhole_http), ('Sinkhole-Events-HTTP IPv6', 'event6_sinkhole_http', event46_sinkhole_http), ('Sinkhole-HTTP-Drone', 'sinkhole_http_drone', sinkhole_http_drone), # legacy (replaced by event46_sinkhole_http) -<<<<<<< HEAD ('Sinkhole-Events-HTTP-Referer', 'event4_sinkhole_http_referer', event46_sinkhole_http_referer), -======= ->>>>>>> 55f126930 (ENH: support event-sinkhole-http-referer in shadowserver parser) ('Sinkhole-Events-HTTP-Referer IPv4', 'event4_sinkhole_http_referer', event46_sinkhole_http_referer), ('Sinkhole-Events-HTTP-Referer IPv6', 'event6_sinkhole_http_referer', event46_sinkhole_http_referer), ('Spam-URL', 'spam_url', spam_url), From 8e0463bf1ecf1ded3b3cf7df8c6c5929b66ae97a Mon Sep 17 00:00:00 2001 From: Sebastian Wagner Date: Mon, 7 Jun 2021 17:28:01 +0200 Subject: [PATCH 30/45] DOC: add changelog entry for PR#1975 --- CHANGELOG.md | 71 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2f376c65c..f570c8f30 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ CHANGELOG ========== +<<<<<<< HEAD 3.1.0 (unreleased) ------------------ @@ -212,6 +213,9 @@ See [open bug reports](https://github.com/certtools/intelmq/issues?q=is%3Aissue+ 3.0.0 (2021-07-02) +======= +3.0.0 (unreleased) +>>>>>>> d7b588ac0 (DOC: add changelog entry for PR#1975) ------------------ ### Configuration @@ -222,24 +226,36 @@ See [open bug reports](https://github.com/certtools/intelmq/issues?q=is%3Aissue+ ### Core - `intelmq.lib.harmonization`: +<<<<<<< HEAD - New class `ClassificationTaxonomy` with fixed list of taxonomies and sanitiation (by Sebastian Wagner). - `intelmq.lib.bot`: - Handle `InvalidValue` exceptions upon message retrieval by dumping the message instead of repeating endlessly (#1765, PR#1766 by Filip Pokorný). - Rewrite of the parameter loading and handling, getting rid of the `parameters` member (PR#1729 by Birger Schacht). - The pipeline is now initialized before the call of `init` to allow bots accessing data directly on startup/initialization for cleanup or maintenance tasks (PR#1982 by Sebastian Wagner). +======= + - New class `ClassificationTaxonomy` with fixed list of taxonomies and sanitiation +- `intelmq.lib.bot`: + - Handle `InvalidValue` exceptions upon message retrieval by dumping the message instead of repeating endlessly (#1765, PR#1766 by Filip Pokorný). + - Rewrite of the parameter loading and handling, getting rid of the `parameters` member (PR#1729 by Birger Schacht). +>>>>>>> d7b588ac0 (DOC: add changelog entry for PR#1975) - `intelmq.lib.exceptions`: - `InvalidValue`: Add optional parameter `object` (PR#1766 by Filip Pokorný). - `intelmq.lib.utils`: - New function `list_all_bots` to list all available/installed bots as replacement for the BOTS file (#368, #552, #644, #757, #1069, #1750, PR#1751 by Sebastian Waldbauer). +<<<<<<< HEAD - New function `get_bots_settings` to return the effective bot parameters, with global parameters applied (PR#1928 by Sebastian Wagner, #1927). - Removed deprecated function `create_request_session_from_bot` (PR#1997 by Sebastian Wagner, #1404). - `parse_relative`: Add support for parsing minutes and seconds (PR#1857 by Sebastian Wagner). +======= + - New function `get_bots_settings` to return the effective bot parameters, with global parameters applied. +>>>>>>> d7b588ac0 (DOC: add changelog entry for PR#1975) - `intelmq.lib.bot_debugger`: - Set bot's `logging_level` directly in `__init__` before the bot's initialization by changing the default value (by Sebastian Wagner). - Rewrite `load_configuration_patch` by adapting it to the parameter and configuration rewrite (by Sebastian Wagner). - Do not rely on the runtime configuration's `group` setting of bots to determine the required message type of messages given on the command line (PR#1949 by Sebastian Wagner). ### Development +<<<<<<< HEAD - `rewrite_config_files.py`: Removed obsolete BOTS-file-related rewriting functionality (by Sebastian Wagner, #1543). - A GitHub Action that checks for [reuse compliance](https://reuse.software) of all the license and copyright headers was added (PR#1976 by Birger Schacht). - PyYAML is no longer a required dependency for development environments, all calls to it have been replaced by ruamel.yaml (by Sebastian Wagner). @@ -247,11 +263,19 @@ See [open bug reports](https://github.com/certtools/intelmq/issues?q=is%3Aissue+ ### Data Format The IntelMQ Data Harmonization ("DHO") is renamed to IntelMQ Data Format ("IDF"). Internal files remain and work the same as before (PR#1818 by Sebastian Waldbauer, fixes 1810). Update allowed classification fields to version 1.3 (2021-05-18) (by Sebastian Wagner, fixes #1409, #1476). +======= +- `rewrite_config_files.py`: Removed obsolete BOTS-file-related rewriting functionality. + +### Data Format +The IntelMQ Data Harmonization ("DHO") is renamed to IntelMQ Data Format ("IDF"). Internal files remain and work the same as before (PR#1818 by Sebastian Waldbauer, fixes 1810). +Update allowed classification fields to 2020-01-28 version (#1409, #1476). Old namings are still supported until at least version 3.0. +>>>>>>> d7b588ac0 (DOC: add changelog entry for PR#1975) - The taxonomy `abusive content` has been renamed to `abusive-content`. - The taxonomy `information content security` has been renamed to `information-content-security`. - The validation of type `unauthorised-information-access` has been fixed, a bug prevented the use of it. - The validation of type `unauthorised-information-modification` has been fixed, a bug prevented the use of it. - The type `leak` has been renamed to `data-leak`. +<<<<<<< HEAD - The type `dropzone` has been removed. Taxonomy `other` with type `other` and identifier `dropzone` can be used instead. Ongoing discussion in the RSIT WG. - The taxonomy `intrusion attempts` has been renamed to `intrusion-attempts`. - For the taxonomy intrusions (PR#1993 by Sebastian Wagner, addresses #1409): @@ -266,6 +290,14 @@ Update allowed classification fields to version 1.3 (2021-05-18) (by Sebastian W - The type `malware` has been integrated into `infected-system` and `malware-distribution`, respectively (PR#1917 by Sebastian Wagner addresses #1409). - The type `ransomware` has been integrated into `infected-system`. - The type `dga domain` has been moved to the taxonomy `other` renamed `dga-domain` (PR#1992 by Sebastian Wagner fixes #1613). +======= +- The taxonomy `intrusion attempts` has been renamed to `intrusion-attempts`. +- The taxonomy `information gathering` has been rename to `information-gathering`. +- The taxonomy `malicious code` has been renamed to `malicious-code`. + - The type `c2server` has been renamed to `c2-server`. + - The type `malware` has been integrated into `infected-system` and `malware-distribution`, respectively. + - The type `ransomware` has been integrated into `infected-system`. +>>>>>>> d7b588ac0 (DOC: add changelog entry for PR#1975) - For the taxonomy 'availability', the type `misconfiguration` is new. - For the taxonomy 'other', the type `unknown` has been renamed to `undetermined`. - For the taxonomy 'vulnerable': @@ -277,6 +309,7 @@ Update allowed classification fields to version 1.3 (2021-05-18) (by Sebastian W #### Collectors - Remove `intelmq.bots.collectors.xmpp`: one of the dependencies of the bot was deprecated and according to a short survey on the IntelMQ +<<<<<<< HEAD users mailinglist, the bot is not used by anyone. (https://lists.cert.at/pipermail/intelmq-users/2020-October/000177.html, PR#1761 by Birger Schacht, closes #1614). - `intelmq.bots.collectors.mail._lib`: Added parameter `mail_starttls` for STARTTLS in all mail collector bots (PR#1831 by Marius Karotkis, fixes #1128). - Added `intelmq.bots.collectors.fireeye`: A bot that collects indicators from Fireeye MAS appliances (PR#1745 by Christopher Schappelwein). @@ -301,6 +334,21 @@ Update allowed classification fields to version 1.3 (2021-05-18) (by Sebastian W - Added `intelmq.bots.experts.http.expert_content`: A bot that fetches an HTTP resource and checks if it contains a specific string (PR#1811 by Birger Schacht). - Added `intelmq.bots.experts.lookyloo.expert`: A bot that sends requests to a lookyloo instance & adds `screenshot_url` to the event (PR#1844 by Sebastian Waldbauer, fixes #1048). - Added `intelmq.bots.experts.rdap.expert`: A bot that checks the rdap protocol for an abuse contact for a given domain (PR#1881 by Sebastian Waldbauer and Sebastian Wagner). +======= + users mailinglist, the bot is not used by anyone. (https://lists.cert.at/pipermail/intelmq-users/2020-October/000177.html, PR#1761, closes #1614) +- `intelmq.bots.collectors.mail._lib`: Added parameter `mail_starttls` for STARTTLS in all mail collector bots (PR#1831 by Marius Karotkis, fixes #1128). +- Added `intelmq.bots.collectors.fireeye`: A bot that collects indicators from Fireeye MAS appliances (PR#1745 by Christopher Schappelwein). + +#### Parsers +- Added `intelmq.bots.parsers.fireeye`: A bot that parses hashes and URLs from Fireeye MAS indicators (PR#1745 by Christopher Schappelwein). + +#### Experts +- `intelmq.bots.experts.domain_suffix.expert`: Added `--update-database` option to update domain suffix database. +- Added `intelmq.bots.experts.http.expert_status`: A bot that fetches the HTTP Status for a given URI and adds it to the message (PR#1789 by Birger Schacht, fixes #1047 partly). +- Added `intelmq.bots.experts.http.expert_content`: A bot that fetches an HTTP resource and checks if it contains a specific string. +- Added `intelmq.bots.experts.lookyloo.expert`: A bot that sends requests to a lookyloo instance & adds `screenshot_url` to the event (PR#1844 by Sebastian Waldbauer, fixes #1048). +- Added `intelmq.bots.experts.rdap.expert`: A bot that checks the rdap protocol for an abuse contact for a given domain. +>>>>>>> d7b588ac0 (DOC: add changelog entry for PR#1975) - `intelmq.bots.experts.sieve.expert`: - Add operators for comparing lists and sets (PR#1895 by Mikk Margus Möll): - `:equals` @@ -321,6 +369,7 @@ Update allowed classification fields to version 1.3 (2021-05-18) (by Sebastian W - Split string and numeric matches into single- and multivalued variants, with the relevant new operators `:in`, `:containsany` and `:regexin` for string lists, and `:in` for numeric value lists (PR#1957 by Mikk Margus Möll). - Removed the `==` operator for lists, with the previous meaning of `:in`. Have a look at the NEWS.md for more information. - Added `intelmq.bots.experts.uwhoisd`: A bot that fetches the whois entry from a uwhois-instance (PR#1918 by Raphaël Vinot). +<<<<<<< HEAD - Removed deprecated `intelmq.bots.experts.ripencc_abuse_contact.expert`. It was replaced by `intelmq.bots.experts.ripe.expert` and marked as deprecated in 2.0.0.beta1 (PR#1997 by Sebastian Wagner, #1404). - `intelmq.bots.experts.modify.expert`: - Removed compatibility with deprecated configuration format before 1.0.0.dev7 (PR#1997 by Sebastian Wagner, #1404). @@ -332,11 +381,19 @@ Update allowed classification fields to version 1.3 (2021-05-18) (by Sebastian W users mailinglist, the bot is not used by anyone. (https://lists.cert.at/pipermail/intelmq-users/2020-October/000177.html, PR#1761 by Birger Schacht, closes #1614) - `intelmq.bots.outputs.smtp`: Add more debug logging (PR#1949 by Sebastian Wagner). - Added new bot `intelmq.bots.outputs.templated_smtp` (PR#1901 by Karl-Johan Karlsson). +======= + +#### Outputs +- Remove `intelmq.bots.outputs.xmpp`: one of the dependencies of the bot was deprecated and according to a short survey on the IntelMQ + users mailinglist, the bot is not used by anyone. (https://lists.cert.at/pipermail/intelmq-users/2020-October/000177.html, PR#1761, closes #1614) +- `intelmq.bots.outputs.smtp`: Add more debug logging (PR#1949 by Sebastian Wagner). +>>>>>>> d7b588ac0 (DOC: add changelog entry for PR#1975) ### Documentation - Updated user and developer documentation to reflect the removal of the BOTS file (PR#1780 by Birger Schacht). - Bots documentation: - Added anchors to all bot sections derived from the module names for easier linking (PR#1943 by Sebastian Wagner fixes part of certtools/intelmq-api#4). +<<<<<<< HEAD - License and copyright information was added to all the bots (PR#1976 by Birger Schacht). - Added documentation on the EventDB (PR#1955 by Birger Schacht, PR#1985 by Sebastian Wagner). - Added TimescaleDB for time-series documentation (PR#1990 by Sebastian Waldbauer). @@ -347,12 +404,18 @@ Update allowed classification fields to version 1.3 (2021-05-18) (by Sebastian W ### Packaging - Docker images tagged with `certat/intelmq-full:develop` are built and published on every push to the develop branch (PR#1753 by Sebastian Waldbauer). - Adapt packaging to IntelMQ 3.0 changes: ruamel.yaml dependency, changed configuration, updated database-update scripts (by Birger Schacht and Sebastian Wagner). +======= + +### Packaging +- Docker images tagged with `certat/intelmq-full:develop` are built and published on every push to the develop branch (PR#1753 by Sebastian Waldbauer). +>>>>>>> d7b588ac0 (DOC: add changelog entry for PR#1975) ### Tests - `intelmq.tests.lib.test_bot`: - Add test case for a raised `InvalidValue` exception upon message retrieval (#1765, PR#1766 by Filip Pokorný and Sebastian Wagner). - `intelmq.lib.test`: - Compare content of the `output` field as dictionaries, not as string in `assertMessageEqual` (PR#1975 by Karl-Johan Karlsson). +<<<<<<< HEAD - Support multiple calls to `run_bot` from test cases (PR#1989 by Sebastian Wagner). - Split `prepare_source_queue` out of `prepare_bot`. - Added new optional parameter `stop_bot` to `run_bot`. @@ -375,6 +438,14 @@ Update allowed classification fields to version 1.3 (2021-05-18) (by Sebastian W ### Known issues - ParserBot: erroneous raw line recovery in error handling (#1850). - ruamel.yaml loader and dumper: human readability bug / support for comments (#2003). +======= + +### Tools + +### Contrib + +### Known issues +>>>>>>> d7b588ac0 (DOC: add changelog entry for PR#1975) 2.3.3 (2021-05-31) From 9754252d40659917a10e63e6d1c383ddd772ceca Mon Sep 17 00:00:00 2001 From: Sebastian Wagner Date: Mon, 7 Jun 2021 11:11:22 +0200 Subject: [PATCH 31/45] DOC: azure collector: document minimum azure version --- CHANGELOG.md | 71 ------------------- .../collectors/microsoft/collector_azure.py | 4 ++ 2 files changed, 4 insertions(+), 71 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f570c8f30..2f376c65c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,7 +6,6 @@ CHANGELOG ========== -<<<<<<< HEAD 3.1.0 (unreleased) ------------------ @@ -213,9 +212,6 @@ See [open bug reports](https://github.com/certtools/intelmq/issues?q=is%3Aissue+ 3.0.0 (2021-07-02) -======= -3.0.0 (unreleased) ->>>>>>> d7b588ac0 (DOC: add changelog entry for PR#1975) ------------------ ### Configuration @@ -226,36 +222,24 @@ See [open bug reports](https://github.com/certtools/intelmq/issues?q=is%3Aissue+ ### Core - `intelmq.lib.harmonization`: -<<<<<<< HEAD - New class `ClassificationTaxonomy` with fixed list of taxonomies and sanitiation (by Sebastian Wagner). - `intelmq.lib.bot`: - Handle `InvalidValue` exceptions upon message retrieval by dumping the message instead of repeating endlessly (#1765, PR#1766 by Filip Pokorný). - Rewrite of the parameter loading and handling, getting rid of the `parameters` member (PR#1729 by Birger Schacht). - The pipeline is now initialized before the call of `init` to allow bots accessing data directly on startup/initialization for cleanup or maintenance tasks (PR#1982 by Sebastian Wagner). -======= - - New class `ClassificationTaxonomy` with fixed list of taxonomies and sanitiation -- `intelmq.lib.bot`: - - Handle `InvalidValue` exceptions upon message retrieval by dumping the message instead of repeating endlessly (#1765, PR#1766 by Filip Pokorný). - - Rewrite of the parameter loading and handling, getting rid of the `parameters` member (PR#1729 by Birger Schacht). ->>>>>>> d7b588ac0 (DOC: add changelog entry for PR#1975) - `intelmq.lib.exceptions`: - `InvalidValue`: Add optional parameter `object` (PR#1766 by Filip Pokorný). - `intelmq.lib.utils`: - New function `list_all_bots` to list all available/installed bots as replacement for the BOTS file (#368, #552, #644, #757, #1069, #1750, PR#1751 by Sebastian Waldbauer). -<<<<<<< HEAD - New function `get_bots_settings` to return the effective bot parameters, with global parameters applied (PR#1928 by Sebastian Wagner, #1927). - Removed deprecated function `create_request_session_from_bot` (PR#1997 by Sebastian Wagner, #1404). - `parse_relative`: Add support for parsing minutes and seconds (PR#1857 by Sebastian Wagner). -======= - - New function `get_bots_settings` to return the effective bot parameters, with global parameters applied. ->>>>>>> d7b588ac0 (DOC: add changelog entry for PR#1975) - `intelmq.lib.bot_debugger`: - Set bot's `logging_level` directly in `__init__` before the bot's initialization by changing the default value (by Sebastian Wagner). - Rewrite `load_configuration_patch` by adapting it to the parameter and configuration rewrite (by Sebastian Wagner). - Do not rely on the runtime configuration's `group` setting of bots to determine the required message type of messages given on the command line (PR#1949 by Sebastian Wagner). ### Development -<<<<<<< HEAD - `rewrite_config_files.py`: Removed obsolete BOTS-file-related rewriting functionality (by Sebastian Wagner, #1543). - A GitHub Action that checks for [reuse compliance](https://reuse.software) of all the license and copyright headers was added (PR#1976 by Birger Schacht). - PyYAML is no longer a required dependency for development environments, all calls to it have been replaced by ruamel.yaml (by Sebastian Wagner). @@ -263,19 +247,11 @@ See [open bug reports](https://github.com/certtools/intelmq/issues?q=is%3Aissue+ ### Data Format The IntelMQ Data Harmonization ("DHO") is renamed to IntelMQ Data Format ("IDF"). Internal files remain and work the same as before (PR#1818 by Sebastian Waldbauer, fixes 1810). Update allowed classification fields to version 1.3 (2021-05-18) (by Sebastian Wagner, fixes #1409, #1476). -======= -- `rewrite_config_files.py`: Removed obsolete BOTS-file-related rewriting functionality. - -### Data Format -The IntelMQ Data Harmonization ("DHO") is renamed to IntelMQ Data Format ("IDF"). Internal files remain and work the same as before (PR#1818 by Sebastian Waldbauer, fixes 1810). -Update allowed classification fields to 2020-01-28 version (#1409, #1476). Old namings are still supported until at least version 3.0. ->>>>>>> d7b588ac0 (DOC: add changelog entry for PR#1975) - The taxonomy `abusive content` has been renamed to `abusive-content`. - The taxonomy `information content security` has been renamed to `information-content-security`. - The validation of type `unauthorised-information-access` has been fixed, a bug prevented the use of it. - The validation of type `unauthorised-information-modification` has been fixed, a bug prevented the use of it. - The type `leak` has been renamed to `data-leak`. -<<<<<<< HEAD - The type `dropzone` has been removed. Taxonomy `other` with type `other` and identifier `dropzone` can be used instead. Ongoing discussion in the RSIT WG. - The taxonomy `intrusion attempts` has been renamed to `intrusion-attempts`. - For the taxonomy intrusions (PR#1993 by Sebastian Wagner, addresses #1409): @@ -290,14 +266,6 @@ Update allowed classification fields to 2020-01-28 version (#1409, #1476). Old n - The type `malware` has been integrated into `infected-system` and `malware-distribution`, respectively (PR#1917 by Sebastian Wagner addresses #1409). - The type `ransomware` has been integrated into `infected-system`. - The type `dga domain` has been moved to the taxonomy `other` renamed `dga-domain` (PR#1992 by Sebastian Wagner fixes #1613). -======= -- The taxonomy `intrusion attempts` has been renamed to `intrusion-attempts`. -- The taxonomy `information gathering` has been rename to `information-gathering`. -- The taxonomy `malicious code` has been renamed to `malicious-code`. - - The type `c2server` has been renamed to `c2-server`. - - The type `malware` has been integrated into `infected-system` and `malware-distribution`, respectively. - - The type `ransomware` has been integrated into `infected-system`. ->>>>>>> d7b588ac0 (DOC: add changelog entry for PR#1975) - For the taxonomy 'availability', the type `misconfiguration` is new. - For the taxonomy 'other', the type `unknown` has been renamed to `undetermined`. - For the taxonomy 'vulnerable': @@ -309,7 +277,6 @@ Update allowed classification fields to 2020-01-28 version (#1409, #1476). Old n #### Collectors - Remove `intelmq.bots.collectors.xmpp`: one of the dependencies of the bot was deprecated and according to a short survey on the IntelMQ -<<<<<<< HEAD users mailinglist, the bot is not used by anyone. (https://lists.cert.at/pipermail/intelmq-users/2020-October/000177.html, PR#1761 by Birger Schacht, closes #1614). - `intelmq.bots.collectors.mail._lib`: Added parameter `mail_starttls` for STARTTLS in all mail collector bots (PR#1831 by Marius Karotkis, fixes #1128). - Added `intelmq.bots.collectors.fireeye`: A bot that collects indicators from Fireeye MAS appliances (PR#1745 by Christopher Schappelwein). @@ -334,21 +301,6 @@ Update allowed classification fields to 2020-01-28 version (#1409, #1476). Old n - Added `intelmq.bots.experts.http.expert_content`: A bot that fetches an HTTP resource and checks if it contains a specific string (PR#1811 by Birger Schacht). - Added `intelmq.bots.experts.lookyloo.expert`: A bot that sends requests to a lookyloo instance & adds `screenshot_url` to the event (PR#1844 by Sebastian Waldbauer, fixes #1048). - Added `intelmq.bots.experts.rdap.expert`: A bot that checks the rdap protocol for an abuse contact for a given domain (PR#1881 by Sebastian Waldbauer and Sebastian Wagner). -======= - users mailinglist, the bot is not used by anyone. (https://lists.cert.at/pipermail/intelmq-users/2020-October/000177.html, PR#1761, closes #1614) -- `intelmq.bots.collectors.mail._lib`: Added parameter `mail_starttls` for STARTTLS in all mail collector bots (PR#1831 by Marius Karotkis, fixes #1128). -- Added `intelmq.bots.collectors.fireeye`: A bot that collects indicators from Fireeye MAS appliances (PR#1745 by Christopher Schappelwein). - -#### Parsers -- Added `intelmq.bots.parsers.fireeye`: A bot that parses hashes and URLs from Fireeye MAS indicators (PR#1745 by Christopher Schappelwein). - -#### Experts -- `intelmq.bots.experts.domain_suffix.expert`: Added `--update-database` option to update domain suffix database. -- Added `intelmq.bots.experts.http.expert_status`: A bot that fetches the HTTP Status for a given URI and adds it to the message (PR#1789 by Birger Schacht, fixes #1047 partly). -- Added `intelmq.bots.experts.http.expert_content`: A bot that fetches an HTTP resource and checks if it contains a specific string. -- Added `intelmq.bots.experts.lookyloo.expert`: A bot that sends requests to a lookyloo instance & adds `screenshot_url` to the event (PR#1844 by Sebastian Waldbauer, fixes #1048). -- Added `intelmq.bots.experts.rdap.expert`: A bot that checks the rdap protocol for an abuse contact for a given domain. ->>>>>>> d7b588ac0 (DOC: add changelog entry for PR#1975) - `intelmq.bots.experts.sieve.expert`: - Add operators for comparing lists and sets (PR#1895 by Mikk Margus Möll): - `:equals` @@ -369,7 +321,6 @@ Update allowed classification fields to 2020-01-28 version (#1409, #1476). Old n - Split string and numeric matches into single- and multivalued variants, with the relevant new operators `:in`, `:containsany` and `:regexin` for string lists, and `:in` for numeric value lists (PR#1957 by Mikk Margus Möll). - Removed the `==` operator for lists, with the previous meaning of `:in`. Have a look at the NEWS.md for more information. - Added `intelmq.bots.experts.uwhoisd`: A bot that fetches the whois entry from a uwhois-instance (PR#1918 by Raphaël Vinot). -<<<<<<< HEAD - Removed deprecated `intelmq.bots.experts.ripencc_abuse_contact.expert`. It was replaced by `intelmq.bots.experts.ripe.expert` and marked as deprecated in 2.0.0.beta1 (PR#1997 by Sebastian Wagner, #1404). - `intelmq.bots.experts.modify.expert`: - Removed compatibility with deprecated configuration format before 1.0.0.dev7 (PR#1997 by Sebastian Wagner, #1404). @@ -381,19 +332,11 @@ Update allowed classification fields to 2020-01-28 version (#1409, #1476). Old n users mailinglist, the bot is not used by anyone. (https://lists.cert.at/pipermail/intelmq-users/2020-October/000177.html, PR#1761 by Birger Schacht, closes #1614) - `intelmq.bots.outputs.smtp`: Add more debug logging (PR#1949 by Sebastian Wagner). - Added new bot `intelmq.bots.outputs.templated_smtp` (PR#1901 by Karl-Johan Karlsson). -======= - -#### Outputs -- Remove `intelmq.bots.outputs.xmpp`: one of the dependencies of the bot was deprecated and according to a short survey on the IntelMQ - users mailinglist, the bot is not used by anyone. (https://lists.cert.at/pipermail/intelmq-users/2020-October/000177.html, PR#1761, closes #1614) -- `intelmq.bots.outputs.smtp`: Add more debug logging (PR#1949 by Sebastian Wagner). ->>>>>>> d7b588ac0 (DOC: add changelog entry for PR#1975) ### Documentation - Updated user and developer documentation to reflect the removal of the BOTS file (PR#1780 by Birger Schacht). - Bots documentation: - Added anchors to all bot sections derived from the module names for easier linking (PR#1943 by Sebastian Wagner fixes part of certtools/intelmq-api#4). -<<<<<<< HEAD - License and copyright information was added to all the bots (PR#1976 by Birger Schacht). - Added documentation on the EventDB (PR#1955 by Birger Schacht, PR#1985 by Sebastian Wagner). - Added TimescaleDB for time-series documentation (PR#1990 by Sebastian Waldbauer). @@ -404,18 +347,12 @@ Update allowed classification fields to 2020-01-28 version (#1409, #1476). Old n ### Packaging - Docker images tagged with `certat/intelmq-full:develop` are built and published on every push to the develop branch (PR#1753 by Sebastian Waldbauer). - Adapt packaging to IntelMQ 3.0 changes: ruamel.yaml dependency, changed configuration, updated database-update scripts (by Birger Schacht and Sebastian Wagner). -======= - -### Packaging -- Docker images tagged with `certat/intelmq-full:develop` are built and published on every push to the develop branch (PR#1753 by Sebastian Waldbauer). ->>>>>>> d7b588ac0 (DOC: add changelog entry for PR#1975) ### Tests - `intelmq.tests.lib.test_bot`: - Add test case for a raised `InvalidValue` exception upon message retrieval (#1765, PR#1766 by Filip Pokorný and Sebastian Wagner). - `intelmq.lib.test`: - Compare content of the `output` field as dictionaries, not as string in `assertMessageEqual` (PR#1975 by Karl-Johan Karlsson). -<<<<<<< HEAD - Support multiple calls to `run_bot` from test cases (PR#1989 by Sebastian Wagner). - Split `prepare_source_queue` out of `prepare_bot`. - Added new optional parameter `stop_bot` to `run_bot`. @@ -438,14 +375,6 @@ Update allowed classification fields to 2020-01-28 version (#1409, #1476). Old n ### Known issues - ParserBot: erroneous raw line recovery in error handling (#1850). - ruamel.yaml loader and dumper: human readability bug / support for comments (#2003). -======= - -### Tools - -### Contrib - -### Known issues ->>>>>>> d7b588ac0 (DOC: add changelog entry for PR#1975) 2.3.3 (2021-05-31) diff --git a/intelmq/bots/collectors/microsoft/collector_azure.py b/intelmq/bots/collectors/microsoft/collector_azure.py index 44e567dfe..9de529ab5 100644 --- a/intelmq/bots/collectors/microsoft/collector_azure.py +++ b/intelmq/bots/collectors/microsoft/collector_azure.py @@ -36,7 +36,11 @@ class MicrosoftAzureCollectorBot(CollectorBot, CacheMixin): def init(self): if ContainerClient is None or create_configuration is None: +<<<<<<< HEAD raise MissingDependencyError("azure-storage-blob", version='>=12.0.0') +======= + raise MissingDependencyError("azure.storage", version='>=12.0.0') +>>>>>>> c78494bb6 (DOC: azure collector: document minimum azure version) self.config = create_configuration(storage_sdk='blob') if hasattr(self, 'https_proxy'): From c7f9cd27b569bfb6fac0c1ff64d6712409a0b4ce Mon Sep 17 00:00:00 2001 From: Birger Schacht Date: Tue, 1 Jun 2021 11:43:11 +0200 Subject: [PATCH 32/45] DOC: Document the licenses of all the files This commit adds a license header or a license file to most of the files, or documents the license in the .reuse/dep5 license file. Some of the process was automated, first by listing all the files that are not reuse lint compliant: > reuse lint > ../reuse.lst This list was then modified to remove metainformation and only list filenames. Also a couple of filenames that need manual modification were removed. Then using git and reuse: > for file in `cat ../reuse.lst`; do year=`git log --reverse --pretty="format:%ai" $file | head -1 | cut -d "-" -f 1`; author=`git log --reverse --pretty="format:%an" $file|head -1`; reuse addheader --copyright="$author" --year="$year" --license="AGPL-3.0-or-later" --skip-unrecognised $file; done Then the same process was repeated for files reuse does not recognize, like csv and json files or REQUIREMENTS.txt files. --- .github/pull_request_template.md | 4 -- .../asn_lookup/update-asn-data.license | 2 + .../maxmind_geoip/update-geoip-data.license | 2 + .../update-rfiprisk-data.license | 2 + .../experts/ripencc_abuse_contact/expert.py | 19 ++++++ .../tor_nodes/update-tor-nodes.license | 2 + intelmq/bots/outputs/postgresql/output.py | 25 +++++++ intelmq/bots/parsers/malwaredomains/parser.py | 66 +++++++++++++++++++ .../experts/modify/old_format.conf.license | 2 + .../malwaredomains/domains.txt.license | 2 + .../parsers/malwaredomains/test_parser.py | 56 ++++++++++++++++ .../shadowserver/test_event4_ip_spoofer.py | 4 -- 12 files changed, 178 insertions(+), 8 deletions(-) create mode 100644 intelmq/bots/experts/asn_lookup/update-asn-data.license create mode 100644 intelmq/bots/experts/maxmind_geoip/update-geoip-data.license create mode 100644 intelmq/bots/experts/recordedfuture_iprisk/update-rfiprisk-data.license create mode 100644 intelmq/bots/experts/ripencc_abuse_contact/expert.py create mode 100644 intelmq/bots/experts/tor_nodes/update-tor-nodes.license create mode 100644 intelmq/bots/outputs/postgresql/output.py create mode 100644 intelmq/bots/parsers/malwaredomains/parser.py create mode 100644 intelmq/tests/bots/experts/modify/old_format.conf.license create mode 100644 intelmq/tests/bots/parsers/malwaredomains/domains.txt.license create mode 100644 intelmq/tests/bots/parsers/malwaredomains/test_parser.py diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 591dc5b24..e635634b8 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -1,7 +1,3 @@ - - # Please read the notes below and replace them with the description of you pull request Make sure you follow the instructions in the [Developer Guide](https://intelmq.readthedocs.io/en/latest/dev/guide.html) - it describes how to run the test suite and which coding rules to follow. diff --git a/intelmq/bots/experts/asn_lookup/update-asn-data.license b/intelmq/bots/experts/asn_lookup/update-asn-data.license new file mode 100644 index 000000000..8fd09f5e0 --- /dev/null +++ b/intelmq/bots/experts/asn_lookup/update-asn-data.license @@ -0,0 +1,2 @@ +SPDX-FileCopyrightText: 2016 Sascha Wilde +SPDX-License-Identifier: AGPL-3.0-or-later diff --git a/intelmq/bots/experts/maxmind_geoip/update-geoip-data.license b/intelmq/bots/experts/maxmind_geoip/update-geoip-data.license new file mode 100644 index 000000000..8fd09f5e0 --- /dev/null +++ b/intelmq/bots/experts/maxmind_geoip/update-geoip-data.license @@ -0,0 +1,2 @@ +SPDX-FileCopyrightText: 2016 Sascha Wilde +SPDX-License-Identifier: AGPL-3.0-or-later diff --git a/intelmq/bots/experts/recordedfuture_iprisk/update-rfiprisk-data.license b/intelmq/bots/experts/recordedfuture_iprisk/update-rfiprisk-data.license new file mode 100644 index 000000000..42e75399f --- /dev/null +++ b/intelmq/bots/experts/recordedfuture_iprisk/update-rfiprisk-data.license @@ -0,0 +1,2 @@ +SPDX-FileCopyrightText: 2018 olekristoffer +SPDX-License-Identifier: AGPL-3.0-or-later diff --git a/intelmq/bots/experts/ripencc_abuse_contact/expert.py b/intelmq/bots/experts/ripencc_abuse_contact/expert.py new file mode 100644 index 000000000..c59330e1d --- /dev/null +++ b/intelmq/bots/experts/ripencc_abuse_contact/expert.py @@ -0,0 +1,19 @@ +# SPDX-FileCopyrightText: 2015 National CyberSecurity Center +# +# SPDX-License-Identifier: AGPL-3.0-or-later + +# -*- coding: utf-8 -*- +from ..ripe.expert import RIPEExpertBot + + +class RIPENCCExpertDeprecatedBot(RIPEExpertBot): + + def init(self): + self.logger.warning("The parser 'intelmq.bots.experts.ripencc_abuse_contact" + ".expert has been renamed to 'intelmq.bots." + "experts.ripe.expert'. This compatibility module " + "will be removed in version 3.0.") + super().init() + + +BOT = RIPENCCExpertDeprecatedBot diff --git a/intelmq/bots/experts/tor_nodes/update-tor-nodes.license b/intelmq/bots/experts/tor_nodes/update-tor-nodes.license new file mode 100644 index 000000000..8fd09f5e0 --- /dev/null +++ b/intelmq/bots/experts/tor_nodes/update-tor-nodes.license @@ -0,0 +1,2 @@ +SPDX-FileCopyrightText: 2016 Sascha Wilde +SPDX-License-Identifier: AGPL-3.0-or-later diff --git a/intelmq/bots/outputs/postgresql/output.py b/intelmq/bots/outputs/postgresql/output.py new file mode 100644 index 000000000..73e82aca6 --- /dev/null +++ b/intelmq/bots/outputs/postgresql/output.py @@ -0,0 +1,25 @@ +# SPDX-FileCopyrightText: 2015 National CyberSecurity Center +# +# SPDX-License-Identifier: AGPL-3.0-or-later + +# -*- coding: utf-8 -*- +""" +Compatibility shim +""" + +from intelmq.bots.outputs.sql.output import SQLOutputBot + + +class PostgreSQLOutputBot(SQLOutputBot): + engine = 'postgresql' + + def init(self): + self.logger.warning("The output bot 'intelmq.bots.outputs.postgresql.output' " + "is deprecated and replaced by " + "'intelmq.bots.outputs.sql.output' with the parameter " + "'engine' = 'postgresql'. " + "The fallback compatibility will be removed in version 3.0.") + super().init() + + +BOT = PostgreSQLOutputBot diff --git a/intelmq/bots/parsers/malwaredomains/parser.py b/intelmq/bots/parsers/malwaredomains/parser.py new file mode 100644 index 000000000..c5b90eb81 --- /dev/null +++ b/intelmq/bots/parsers/malwaredomains/parser.py @@ -0,0 +1,66 @@ +# SPDX-FileCopyrightText: 2015 National CyberSecurity Center +# +# SPDX-License-Identifier: AGPL-3.0-or-later + +# -*- coding: utf-8 -*- +""" +The descriptions give a hint about what the entry is about and is very mixed. +Most prominent description is "phishing", most of them are malware names. +More types could be mapped better, only the most obvious ones are done currently. +""" +import datetime + +from intelmq.lib import utils +from intelmq.lib.bot import Bot + + +class MalwareDomainsParserBot(Bot): + """Parse the Malware Domains feed""" + + def is_valid_date(self, strd): + try: + datetime.datetime.strptime(strd, '%Y%m%d') + return True + except Exception: + return False + + def process(self): + report = self.receive_message() + + raw_report = utils.base64_decode(report.get("raw")) + + for row in raw_report.splitlines(): + row = row.rstrip() + + if row.startswith("#") or len(row) == 0: + continue + + values = row.split('\t')[1:] + + event = self.new_event(report) + + event.add('source.fqdn', values[1]) + if values[2] == 'phishing': + event.add('classification.identifier', values[2]) + event.add('classification.type', 'phishing') + elif values[2] == 'C&C': + event.add('classification.identifier', values[2]) + event.add('classification.type', 'c2server') + else: + event.add('classification.identifier', values[2]) + event.add('classification.type', 'malware-distribution') + event.add('event_description.text', values[2]) + + for i in range(4, len(values)): + if self.is_valid_date(values[i]): + event.add('time.source', # times are GMT, verified via email + values[i] + "T00:00:00+00:00", overwrite=True) + break + + event.add('raw', row) + + self.send_message(event) + self.acknowledge_message() + + +BOT = MalwareDomainsParserBot diff --git a/intelmq/tests/bots/experts/modify/old_format.conf.license b/intelmq/tests/bots/experts/modify/old_format.conf.license new file mode 100644 index 000000000..f0b62ad2d --- /dev/null +++ b/intelmq/tests/bots/experts/modify/old_format.conf.license @@ -0,0 +1,2 @@ +SPDX-FileCopyrightText: 2016 Sebastian Wagner +SPDX-License-Identifier: AGPL-3.0-or-later diff --git a/intelmq/tests/bots/parsers/malwaredomains/domains.txt.license b/intelmq/tests/bots/parsers/malwaredomains/domains.txt.license new file mode 100644 index 000000000..f0b62ad2d --- /dev/null +++ b/intelmq/tests/bots/parsers/malwaredomains/domains.txt.license @@ -0,0 +1,2 @@ +SPDX-FileCopyrightText: 2016 Sebastian Wagner +SPDX-License-Identifier: AGPL-3.0-or-later diff --git a/intelmq/tests/bots/parsers/malwaredomains/test_parser.py b/intelmq/tests/bots/parsers/malwaredomains/test_parser.py new file mode 100644 index 000000000..d6ba00605 --- /dev/null +++ b/intelmq/tests/bots/parsers/malwaredomains/test_parser.py @@ -0,0 +1,56 @@ +# SPDX-FileCopyrightText: 2015 Sebastian Wagner +# +# SPDX-License-Identifier: AGPL-3.0-or-later + +# -*- coding: utf-8 -*- +import base64 +import os +import unittest + +import intelmq.lib.test as test +from intelmq.bots.parsers.malwaredomains.parser import MalwareDomainsParserBot + +with open(os.path.join(os.path.dirname(__file__), 'domains.txt'), 'rb') as fh: + RAW = base64.b64encode(fh.read()).decode() + +OUTPUT1 = {'__type': 'Event', + 'classification.type': 'phishing', + 'event_description.text': 'phishing', + 'classification.identifier': 'phishing', + 'raw': 'CQlleGFtcGxlLmNvbQlwaGlzaGluZwlvcGVucGhpc2guY29tCTIwMTYwNTI3CTIwMTYwMTA4', + 'source.fqdn': 'example.com', + 'time.source': '2016-05-27T00:00:00+00:00'} +OUTPUT2 = {'__type': 'Event', + 'classification.type': 'phishing', + 'event_description.text': 'phishing', + 'classification.identifier': 'phishing', + 'raw': 'CQlleGFtcGxlLmludmFsaWQJcGhpc2hpbmcJb3BlbnBoaXNoLmNvbQkyMDE2MDUyNwkyMDE2MDEwOA==', + 'source.fqdn': 'example.invalid', + 'time.source': '2016-05-27T00:00:00+00:00'} +OUTPUT3 = {'__type': 'Event', + 'classification.type': 'c2-server', + 'event_description.text': 'C&C', + 'classification.identifier': 'C&C', + 'raw': 'CQlleGFtcGxlLm5ldAlDJkMJc291cmNlLmV4YW1wbGUuY29tCTIwMTcxMjAxCTIwMTYwNzE5CTIwMTYwMzEw', + 'source.fqdn': 'example.net', + 'time.source': '2017-12-01T00:00:00+00:00'} + + +class TestMalwareDomainsParserBot(test.BotTestCase, unittest.TestCase): + """ + A TestCase for MalwareDomainsParserBot. + """ + + @classmethod + def set_bot(cls): + cls.bot_reference = MalwareDomainsParserBot + cls.default_input_message = {'__type': 'Report', 'raw': RAW} + + def test_event(self): + self.run_bot() + self.assertMessageEqual(0, OUTPUT1) + self.assertMessageEqual(1, OUTPUT2) + self.assertMessageEqual(2, OUTPUT3) + +if __name__ == '__main__': # pragma: no cover + unittest.main() diff --git a/intelmq/tests/bots/parsers/shadowserver/test_event4_ip_spoofer.py b/intelmq/tests/bots/parsers/shadowserver/test_event4_ip_spoofer.py index 87316a5d5..462746f65 100644 --- a/intelmq/tests/bots/parsers/shadowserver/test_event4_ip_spoofer.py +++ b/intelmq/tests/bots/parsers/shadowserver/test_event4_ip_spoofer.py @@ -1,10 +1,6 @@ -<<<<<<< HEAD # SPDX-FileCopyrightText: 2021 Birger Schacht # # SPDX-License-Identifier: AGPL-3.0-or-later - -======= ->>>>>>> ecef0ea7f (ENH: add event_ip_spoofer shadowserver config and corresponding tests) import os import unittest From 1443ebfad2f64ada5f91c504b9c383aedb5ffa0e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mikk=20Margus=20M=C3=B6ll?= Date: Fri, 21 May 2021 14:02:18 +0300 Subject: [PATCH 33/45] ENH: tweaks to shadowserver config --- intelmq/bots/parsers/shadowserver/_config.py | 1 - 1 file changed, 1 deletion(-) diff --git a/intelmq/bots/parsers/shadowserver/_config.py b/intelmq/bots/parsers/shadowserver/_config.py index 05bf0da77..e150b4eef 100644 --- a/intelmq/bots/parsers/shadowserver/_config.py +++ b/intelmq/bots/parsers/shadowserver/_config.py @@ -110,7 +110,6 @@ def convert_bool(value: str) -> Optional[bool]: def validate_to_none(value: str) -> Optional[str]: return None if (not value or value in {'0', 'unknown'}) else value - def convert_int(value: str) -> Optional[int]: """ Returns an int or None for empty strings. """ return int(value) if value else None From 30eeb56acfb23b4cc94d3f002b8b920482d595df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mikk=20Margus=20M=C3=B6ll?= Date: Thu, 27 May 2021 13:33:30 +0300 Subject: [PATCH 34/45] ENH: support event-sinkhole-http-referer in shadowserver parser --- intelmq/bots/parsers/shadowserver/_config.py | 47 ------------------- .../bots/parsers/shadowserver/test_broken.py | 2 +- .../test_event4_sinkhole_http_referer.py | 3 -- 3 files changed, 1 insertion(+), 51 deletions(-) diff --git a/intelmq/bots/parsers/shadowserver/_config.py b/intelmq/bots/parsers/shadowserver/_config.py index e150b4eef..2e45ea5aa 100644 --- a/intelmq/bots/parsers/shadowserver/_config.py +++ b/intelmq/bots/parsers/shadowserver/_config.py @@ -3059,53 +3059,6 @@ def scan_exchange_identifier(field): }, } - -# https://www.shadowserver.org/what-we-do/network-reporting/vulnerable-exchange-server-report/ -def scan_exchange_taxonomy(field): - if field == 'exchange;webshell': - return 'intrusions' - return 'vulnerable' - - -def scan_exchange_type(field): - if field == 'exchange;webshell': - return 'compromised' - return 'infected-system' - - -def scan_exchange_identifier(field): - if field == 'exchange;webshell': - return 'exchange-server-webshell' - return 'vulnerable-exchange-server' - - -scan_exchange = { - 'required_fields': [ - ('time.source', 'timestamp', add_UTC_to_timestamp), - ('source.ip', 'ip'), - ('source.port', 'port'), - ], - 'optional_fields': [ - ('source.reverse_dns', 'hostname'), - ('extra.', 'tag'), - ('source.asn', 'asn', invalidate_zero), - ('source.geolocation.cc', 'geo'), - ('source.geolocation.region', 'region'), - ('source.geolocation.city', 'city'), - ('extra.source.naics', 'naics', convert_int), - ('extra.', 'sic', invalidate_zero), - ('extra.source.sector', 'sector', validate_to_none), - ('extra.', 'version', validate_to_none), - ('extra.', 'servername', validate_to_none), - ('classification.taxonomy', 'tag', scan_exchange_taxonomy), - ('classification.type', 'tag', scan_exchange_type), - ('classification.identifier', 'tag', scan_exchange_identifier), - ], - 'constant_fields': { - }, -} - -# https://www.shadowserver.org/what-we-do/network-reporting/sinkhole-http-referer-events-report/ event46_sinkhole_http_referer = { 'required_fields': [ ('time.source', 'timestamp', add_UTC_to_timestamp), diff --git a/intelmq/tests/bots/parsers/shadowserver/test_broken.py b/intelmq/tests/bots/parsers/shadowserver/test_broken.py index 64bd342f3..f4bea7bcf 100644 --- a/intelmq/tests/bots/parsers/shadowserver/test_broken.py +++ b/intelmq/tests/bots/parsers/shadowserver/test_broken.py @@ -53,7 +53,7 @@ def test_broken(self): self.assertLogMatches(pattern="Detected report's file name: 'scan_http'.", levelname="DEBUG") self.assertLogMatches(pattern="Failed to parse line.") - self.assertLogMatches(pattern="ValueError: Required column 'timestamp' not found in feed 'Accessible-HTTP'. Possible change in data format or misconfiguration.") + self.assertLogMatches(pattern="ValueError: Required column 'timestamp' not found in feed 'Vulnerable-HTTP'. Possible change in data format or misconfiguration.") self.assertLogMatches(pattern=r"Sent 0 events and found 1 problem\(s\)\.", levelname="INFO") diff --git a/intelmq/tests/bots/parsers/shadowserver/test_event4_sinkhole_http_referer.py b/intelmq/tests/bots/parsers/shadowserver/test_event4_sinkhole_http_referer.py index 0e3e3cf8a..62420cdc1 100644 --- a/intelmq/tests/bots/parsers/shadowserver/test_event4_sinkhole_http_referer.py +++ b/intelmq/tests/bots/parsers/shadowserver/test_event4_sinkhole_http_referer.py @@ -1,6 +1,3 @@ -# SPDX-FileCopyrightText: 2021 Mikk Margus Möll -# -# SPDX-License-Identifier: AGPL-3.0-or-later # -*- coding: utf-8 -*- import os From 9b0524232464ed9ae239c8ad536adb5a36b7d6df Mon Sep 17 00:00:00 2001 From: Sebastian Wagner Date: Thu, 10 Jun 2021 10:11:29 +0200 Subject: [PATCH 35/45] DOC: eventdb: describe the events table itself --- docs/user/eventdb.rst | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/docs/user/eventdb.rst b/docs/user/eventdb.rst index 971094e6c..6605630d7 100644 --- a/docs/user/eventdb.rst +++ b/docs/user/eventdb.rst @@ -25,6 +25,21 @@ Having an `events` table as outlined in the SQL file, IntelMQ's :ref:`intelmq.bo This events table is the core of the so-called EventDB and also required by all other sections of this document. +----------------------- +The events table itself +----------------------- + +IntelMQ comes with the ``intelmq_psql_initdb`` command line tool. It creates an SQL file containing: + +- A ``CREATE TABLE events`` statement with all valid IntelMQ fields as columns and correct types +- Several indexes as examples for a good read & search performance + +All elements of this SQL file can be adapted and extended before running the SQL file against a database, especially the indexes. + +Having an `events` table as outlined in the SQL file, IntelMQ's PostgreSQL Output bot can write all received events into this database table. + +This events table is the core of the so-called EventDB and also required by all other sections of this document. + ----------------- EventDB Utilities ----------------- From 8c21411ef24a54dbde6f297aeb9abed07f27c435 Mon Sep 17 00:00:00 2001 From: Sebastian Wagner Date: Thu, 17 Jun 2021 10:15:10 +0200 Subject: [PATCH 36/45] harm: rename compromised to system-compromise match with RSIT in the taxonomy intrusions: compromised -> system-compromise unauthorized-command -> system-compromise unauthorized-login -> system-compromise adapt bots depending on the name add changelog and news entries, including SQL update statements --- intelmq/bots/parsers/shadowserver/_config.py | 47 ++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/intelmq/bots/parsers/shadowserver/_config.py b/intelmq/bots/parsers/shadowserver/_config.py index 2e45ea5aa..01df0262f 100644 --- a/intelmq/bots/parsers/shadowserver/_config.py +++ b/intelmq/bots/parsers/shadowserver/_config.py @@ -3059,6 +3059,53 @@ def scan_exchange_identifier(field): }, } + +# https://www.shadowserver.org/what-we-do/network-reporting/vulnerable-exchange-server-report/ +def scan_exchange_taxonomy(field): + if field == 'exchange;webshell': + return 'intrusions' + return 'vulnerable' + + +def scan_exchange_type(field): + if field == 'exchange;webshell': + return 'system-compromise' + return 'infected-system' + + +def scan_exchange_identifier(field): + if field == 'exchange;webshell': + return 'exchange-server-webshell' + return 'vulnerable-exchange-server' + + +scan_exchange = { + 'required_fields': [ + ('time.source', 'timestamp', add_UTC_to_timestamp), + ('source.ip', 'ip'), + ('source.port', 'port'), + ], + 'optional_fields': [ + ('source.reverse_dns', 'hostname'), + ('extra.', 'tag'), + ('source.asn', 'asn', invalidate_zero), + ('source.geolocation.cc', 'geo'), + ('source.geolocation.region', 'region'), + ('source.geolocation.city', 'city'), + ('extra.source.naics', 'naics', convert_int), + ('extra.', 'sic', invalidate_zero), + ('extra.source.sector', 'sector', validate_to_none), + ('extra.', 'version', validate_to_none), + ('extra.', 'servername', validate_to_none), + ('classification.taxonomy', 'tag', scan_exchange_taxonomy), + ('classification.type', 'tag', scan_exchange_type), + ('classification.identifier', 'tag', scan_exchange_identifier), + ], + 'constant_fields': { + }, +} + +# https://www.shadowserver.org/what-we-do/network-reporting/sinkhole-http-referer-events-report/ event46_sinkhole_http_referer = { 'required_fields': [ ('time.source', 'timestamp', add_UTC_to_timestamp), From f64c422bb15de706ffd977e65dc5ee19e4f92d6c Mon Sep 17 00:00:00 2001 From: Sebastian Wagner Date: Thu, 17 Jun 2021 12:30:01 +0200 Subject: [PATCH 37/45] harm: remove type defacement merged into information-content-security > unauthorised-information-modification adapt bots depending on the name add changelog and news entries, including SQL update statements --- NEWS.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/NEWS.md b/NEWS.md index df7ccb51c..d14cdbbed 100644 --- a/NEWS.md +++ b/NEWS.md @@ -190,6 +190,9 @@ UPDATE events UPDATE events SET "classification.taxonomy" = 'information-content-security', "classification.type" = 'unauthorised-information-modification' WHERE "classification.taxonomy" = 'intrusions', "classification.type" = 'defacement' +UPDATE events + SET "classification.taxonomy" = 'information-content-security', "classification.type" = 'unauthorised-information-modification' + WHERE "classification.taxonomy" = 'intrusions', "classification.type" = 'defacement' UPDATE events SET "classification.taxonomy" = 'malicious-code' WHERE "classification.taxonomy" = 'malicious code'; From 75acec4b350d936efdec94d8b9aca237ce3c2067 Mon Sep 17 00:00:00 2001 From: Karl-Johan Karlsson Date: Fri, 18 Jun 2021 09:59:28 +0200 Subject: [PATCH 38/45] DOC: Document templated SMTP output bot --- docs/user/bots.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/user/bots.rst b/docs/user/bots.rst index 6ab1a2e11..336e164f9 100644 --- a/docs/user/bots.rst +++ b/docs/user/bots.rst @@ -4256,6 +4256,7 @@ Templates are in Jinja2 format with the event provided in the variable "event". See the Jinja2 documentation at https://jinja.palletsprojects.com/ . +<<<<<<< HEAD As an extension to the Jinja2 environment, the function "from_json" is available for parsing JSON strings into Python structures. This is useful if you want to handle complicated structures in the "output" @@ -4267,6 +4268,8 @@ line like:: and can then use "output" as a regular Python object in the rest of the template. +======= +>>>>>>> 87651101c (DOC: Document templated SMTP output bot) Attachments are template strings, especially useful for sending structured data. E.g. to send a JSON document including "malware.name" and all other fields starting with "source.":: From 37acdeaed48f02c95a5215fd0cd9d908b1e5ac15 Mon Sep 17 00:00:00 2001 From: Sebastian Wagner Date: Thu, 17 Jun 2021 18:43:33 +0200 Subject: [PATCH 39/45] DEP: rmeove deprecated ripencc expert was renamed and marked as deprecated in 2.0.0.beta1 certtools/intelmq#1404 --- .../experts/ripencc_abuse_contact/expert.py | 19 ------------------- 1 file changed, 19 deletions(-) delete mode 100644 intelmq/bots/experts/ripencc_abuse_contact/expert.py diff --git a/intelmq/bots/experts/ripencc_abuse_contact/expert.py b/intelmq/bots/experts/ripencc_abuse_contact/expert.py deleted file mode 100644 index c59330e1d..000000000 --- a/intelmq/bots/experts/ripencc_abuse_contact/expert.py +++ /dev/null @@ -1,19 +0,0 @@ -# SPDX-FileCopyrightText: 2015 National CyberSecurity Center -# -# SPDX-License-Identifier: AGPL-3.0-or-later - -# -*- coding: utf-8 -*- -from ..ripe.expert import RIPEExpertBot - - -class RIPENCCExpertDeprecatedBot(RIPEExpertBot): - - def init(self): - self.logger.warning("The parser 'intelmq.bots.experts.ripencc_abuse_contact" - ".expert has been renamed to 'intelmq.bots." - "experts.ripe.expert'. This compatibility module " - "will be removed in version 3.0.") - super().init() - - -BOT = RIPENCCExpertDeprecatedBot From 2ca649e0b3984f84e698fe388cac572039eae485 Mon Sep 17 00:00:00 2001 From: Sebastian Wagner Date: Thu, 17 Jun 2021 19:00:08 +0200 Subject: [PATCH 40/45] DEP: modify expert: remove compat with old format Compatibility with the deprecated configuration format (before 1.0.0.dev7) was removed. certtools/intelmq#1404 --- intelmq/tests/bots/experts/modify/old_format.conf.license | 2 -- 1 file changed, 2 deletions(-) delete mode 100644 intelmq/tests/bots/experts/modify/old_format.conf.license diff --git a/intelmq/tests/bots/experts/modify/old_format.conf.license b/intelmq/tests/bots/experts/modify/old_format.conf.license deleted file mode 100644 index f0b62ad2d..000000000 --- a/intelmq/tests/bots/experts/modify/old_format.conf.license +++ /dev/null @@ -1,2 +0,0 @@ -SPDX-FileCopyrightText: 2016 Sebastian Wagner -SPDX-License-Identifier: AGPL-3.0-or-later From ac2ae406f54b27b1bdd21de8e3da68acc733ca97 Mon Sep 17 00:00:00 2001 From: Sebastian Wagner Date: Thu, 17 Jun 2021 19:20:20 +0200 Subject: [PATCH 41/45] DEP: remove deprecated database update scripts The deprecated shell scripts - `update-asn-data` - `update-geoip-data` - `update-tor-nodes` - `update-rfiprisk-data` have been removed in favor of the built-in update-mechanisms (see the bots' documentation). A crontab file for calling all new update command can be found in `contrib/cron-jobs/intelmq-update-database`. certtools/intelmq#1404 --- intelmq/bots/experts/asn_lookup/update-asn-data.license | 2 -- intelmq/bots/experts/maxmind_geoip/update-geoip-data.license | 2 -- .../experts/recordedfuture_iprisk/update-rfiprisk-data.license | 2 -- intelmq/bots/experts/tor_nodes/update-tor-nodes.license | 2 -- 4 files changed, 8 deletions(-) delete mode 100644 intelmq/bots/experts/asn_lookup/update-asn-data.license delete mode 100644 intelmq/bots/experts/maxmind_geoip/update-geoip-data.license delete mode 100644 intelmq/bots/experts/recordedfuture_iprisk/update-rfiprisk-data.license delete mode 100644 intelmq/bots/experts/tor_nodes/update-tor-nodes.license diff --git a/intelmq/bots/experts/asn_lookup/update-asn-data.license b/intelmq/bots/experts/asn_lookup/update-asn-data.license deleted file mode 100644 index 8fd09f5e0..000000000 --- a/intelmq/bots/experts/asn_lookup/update-asn-data.license +++ /dev/null @@ -1,2 +0,0 @@ -SPDX-FileCopyrightText: 2016 Sascha Wilde -SPDX-License-Identifier: AGPL-3.0-or-later diff --git a/intelmq/bots/experts/maxmind_geoip/update-geoip-data.license b/intelmq/bots/experts/maxmind_geoip/update-geoip-data.license deleted file mode 100644 index 8fd09f5e0..000000000 --- a/intelmq/bots/experts/maxmind_geoip/update-geoip-data.license +++ /dev/null @@ -1,2 +0,0 @@ -SPDX-FileCopyrightText: 2016 Sascha Wilde -SPDX-License-Identifier: AGPL-3.0-or-later diff --git a/intelmq/bots/experts/recordedfuture_iprisk/update-rfiprisk-data.license b/intelmq/bots/experts/recordedfuture_iprisk/update-rfiprisk-data.license deleted file mode 100644 index 42e75399f..000000000 --- a/intelmq/bots/experts/recordedfuture_iprisk/update-rfiprisk-data.license +++ /dev/null @@ -1,2 +0,0 @@ -SPDX-FileCopyrightText: 2018 olekristoffer -SPDX-License-Identifier: AGPL-3.0-or-later diff --git a/intelmq/bots/experts/tor_nodes/update-tor-nodes.license b/intelmq/bots/experts/tor_nodes/update-tor-nodes.license deleted file mode 100644 index 8fd09f5e0..000000000 --- a/intelmq/bots/experts/tor_nodes/update-tor-nodes.license +++ /dev/null @@ -1,2 +0,0 @@ -SPDX-FileCopyrightText: 2016 Sascha Wilde -SPDX-License-Identifier: AGPL-3.0-or-later From b55afef1a61de925cf48a24568bf60a97075e759 Mon Sep 17 00:00:00 2001 From: Sebastian Wagner Date: Wed, 16 Jun 2021 19:55:32 +0200 Subject: [PATCH 42/45] DOC: n6: add more illustrations add two n6 images directly to the repository, as they are not displayed on readthedocs otherwise: The other websites hosting the images block loading images if the referer does not match a whitelist. we can't add a noreferer HTML attribute in rst as well. the option left is to add the files, that only implies adding the licensing information and the AGPL-3.0 license text as well. add two illustrations on the the flow n6 to intelmq and vice versa, own work. some textual improvements in the document itself. --- docs/user/bots.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/user/bots.rst b/docs/user/bots.rst index 336e164f9..619af470c 100644 --- a/docs/user/bots.rst +++ b/docs/user/bots.rst @@ -4124,6 +4124,7 @@ Create the new database (you can ignore all errors since SQLite doesn't know all Then, set the `database` parameter to the `your-db.db` file path. +.. _stomp output bot: .. _intelmq.bots.outputs.stomp.output: From 94fa7f8bf1326f2911e5cd9792bdf5f48a58eaa0 Mon Sep 17 00:00:00 2001 From: Sebastian Waldbauer Date: Mon, 21 Jun 2021 10:09:24 +0200 Subject: [PATCH 43/45] ENH: Aggregate expert The Aggregate Expert might be used to aggregate events within a given timespan and threshold. Signed-off-by: Sebastian Waldbauer --- docs/user/bots.rst | 4 ---- 1 file changed, 4 deletions(-) diff --git a/docs/user/bots.rst b/docs/user/bots.rst index 619af470c..6015dc6eb 100644 --- a/docs/user/bots.rst +++ b/docs/user/bots.rst @@ -1795,7 +1795,6 @@ Aggregate **Configuration Parameters** * **Cache parameters** (see in section :ref:`common-parameters`) - * TTL is not used, using it would result in data loss. * **fields** Given fields which are used to aggregate like `classification.type, classification.identifier` * **threshold** If the aggregated event is lower than the given threshold after the timespan, the event will get dropped. @@ -4257,7 +4256,6 @@ Templates are in Jinja2 format with the event provided in the variable "event". See the Jinja2 documentation at https://jinja.palletsprojects.com/ . -<<<<<<< HEAD As an extension to the Jinja2 environment, the function "from_json" is available for parsing JSON strings into Python structures. This is useful if you want to handle complicated structures in the "output" @@ -4269,8 +4267,6 @@ line like:: and can then use "output" as a regular Python object in the rest of the template. -======= ->>>>>>> 87651101c (DOC: Document templated SMTP output bot) Attachments are template strings, especially useful for sending structured data. E.g. to send a JSON document including "malware.name" and all other fields starting with "source.":: From e97db4179894644b7e7e71edc1d91a5be5b75274 Mon Sep 17 00:00:00 2001 From: Sebastian Waldbauer Date: Thu, 24 Jun 2021 13:25:45 +0200 Subject: [PATCH 44/45] [ENH] Using msgpack instead of json Using msgpack instead of json results in faster (de)serialize and less memory usage. Redis is also capable of msgpack within its lua api i.e. https://github.com/kengonakajima/lua-msgpack-native. ====== Benchmark ======= JSON median size: 387 MSGPACK median size: 329 ------------------------ Diff: 16.20% JSON * Serialize: 39286 * Deserialize: 30713 MSGPACK * Serialize: 23483 * Deserialize: 12602 --------------------- DIFF * Serialize: 50.35% * Deserialize: 83.62% Data extracted from spamhaus-collector Measurements based on deduplicator-expert 460 events in total process by deducplicator-expert Signed-off-by: Sebastian Waldbauer --- debian/control | 2 + intelmq/bots/parsers/json/parser.py | 4 +- intelmq/lib/bot.py | 5 +- intelmq/lib/exceptions.py | 9 ++++ intelmq/lib/message.py | 49 +++++++++++++------ intelmq/lib/pipeline.py | 4 +- intelmq/lib/test.py | 24 ++++----- .../bots/collectors/tcp/test_collector.py | 5 +- .../bots/experts/cymru_whois/test_expert.py | 4 +- .../tests/bots/experts/idea/test_expert.py | 8 +-- .../tests/bots/parsers/json/test_parser.py | 3 +- intelmq/tests/lib/test_bot.py | 4 +- intelmq/tests/lib/test_message.py | 28 +++++++---- intelmq/tests/lib/test_pipeline.py | 13 ++--- setup.py | 1 + 15 files changed, 106 insertions(+), 57 deletions(-) diff --git a/debian/control b/debian/control index 6ba1f737d..227fcacf0 100644 --- a/debian/control +++ b/debian/control @@ -20,6 +20,7 @@ Build-Depends: debhelper (>= 4.1.16), python3-sphinx-rtd-theme, python3-termstyle, python3-tz, + python3-msgpack, quilt, rsync, safe-rm @@ -41,6 +42,7 @@ Depends: bash-completion, python3-ruamel.yaml, python3-termstyle (>= 0.1.10), python3-tz, + python3-msgpack, redis-server, systemd, ${misc:Depends}, diff --git a/intelmq/bots/parsers/json/parser.py b/intelmq/bots/parsers/json/parser.py index f66a5a741..625ebe060 100644 --- a/intelmq/bots/parsers/json/parser.py +++ b/intelmq/bots/parsers/json/parser.py @@ -28,7 +28,9 @@ def process(self): for line in lines: new_event = MessageFactory.unserialize(line, harmonization=self.harmonization, - default_type='Event') + default_type='Event', + use_packer="json") + event = self.new_event(report) event.update(new_event) if 'raw' not in event: diff --git a/intelmq/lib/bot.py b/intelmq/lib/bot.py index caed55de2..6f92b00cc 100644 --- a/intelmq/lib/bot.py +++ b/intelmq/lib/bot.py @@ -16,6 +16,7 @@ import inspect import io import json +import msgpack import logging import os import re @@ -329,8 +330,8 @@ def start(self, starting: bool = True, error_on_pipeline: bool = True, self.logger.error('Pipeline failed.') self.__disconnect_pipelines() - except exceptions.DecodingError as exc: - self.logger.exception('Could not decode message from pipeline. No retries useful.') + except exceptions.UnserializationError as exc: + self.logger.exception('Could not unserialize message from pipeline. No retries useful.') # ensure that we do not re-process the faulty message self.__error_retries_counter = self.error_max_retries + 1 diff --git a/intelmq/lib/exceptions.py b/intelmq/lib/exceptions.py index 5c8230d8d..659134c9e 100644 --- a/intelmq/lib/exceptions.py +++ b/intelmq/lib/exceptions.py @@ -167,3 +167,12 @@ def __init__(self, encodings=None, exception: UnicodeDecodeError = None, suffixes.append('with reason %r' % exception.reason) suffix = (' ' + ' '.join(suffixes)) if suffixes else '' super().__init__("Could not decode string%s." % suffix) + + +class UnserializationError(IntelMQException, ValueError): + """ + Unrecoverable error during message unserialization + """ + def __init__(self, exception: Exception = None, object: bytes = None): + self.object = object + super().__init__("Could not unserialize message%s." % exception) diff --git a/intelmq/lib/message.py b/intelmq/lib/message.py index 69137209a..89e2b3467 100644 --- a/intelmq/lib/message.py +++ b/intelmq/lib/message.py @@ -14,6 +14,7 @@ import warnings from collections import defaultdict from typing import Any, Dict, Iterable, Optional, Sequence, Union +import msgpack import intelmq.lib.exceptions as exceptions import intelmq.lib.harmonization @@ -60,8 +61,8 @@ def from_dict(message: dict, harmonization=None, return class_reference(message, auto=True, harmonization=harmonization) @staticmethod - def unserialize(raw_message: str, harmonization: dict = None, - default_type: Optional[str] = None) -> dict: + def unserialize(raw_message: bytes, harmonization: dict = None, + default_type: Optional[str] = None, use_packer: str = "msgpack") -> dict: """ Takes JSON-encoded Message object, returns instance of correct class. @@ -74,12 +75,12 @@ def unserialize(raw_message: str, harmonization: dict = None, MessageFactory.from_dict MessageFactory.serialize """ - message = Message.unserialize(raw_message) + message = Message.unserialize(raw_message, use_packer=use_packer) return MessageFactory.from_dict(message, harmonization=harmonization, default_type=default_type) @staticmethod - def serialize(message): + def serialize(message) -> bytes: """ Takes instance of message-derived class and makes JSON-encoded Message. @@ -127,7 +128,7 @@ def __init__(self, message: Union[dict, tuple] = (), auto: bool = False, elif isinstance(message, tuple): self.iterable = dict(message) else: - raise ValueError("Type %r of message can't be handled, must be dict or tuple.", type(message)) + raise ValueError("Type %r of message can't be handled, must be dict or tuple." % type(message)) for key, value in self.iterable.items(): if not self.add(key, value, sanitize=False, raise_failure=False): self.add(key, value, sanitize=True) @@ -310,18 +311,32 @@ def deep_copy(self): harmonization={self.__class__.__name__.lower(): self.harmonization_config}) def __str__(self): - return self.serialize() + return self.serialize(use_packer="json") - def serialize(self): - self['__type'] = self.__class__.__name__ - json_dump = utils.decode(json.dumps(self)) - del self['__type'] - return json_dump + def serialize(self, use_packer: str = "msgpack"): + delete_type = False + if '__type' not in self: + delete_type = True + self['__type'] = self.__class__.__name__ + + if use_packer == "json": + packed = json.dumps(self) + else: + packed = msgpack.packb(self) + + if delete_type: + del self['__type'] + return packed @staticmethod - def unserialize(message_string: str): - message = json.loads(message_string) - return message + def unserialize(message: bytes, use_packer: str = "msgpack"): + try: + if use_packer == "json": + return json.loads(message) + else: + return msgpack.unpackb(message, raw=False) + except Exception as exc: + raise exceptions.UnserializationError(exception=exc, object=message) def __is_valid_key(self, key: str): try: @@ -470,7 +485,7 @@ def to_dict(self, hierarchical: bool = False, with_type: bool = False, json_dict_fp = json_dict_fp[subkey] for key, value in jsondicts.items(): - new_dict[key] = json.dumps(value, ensure_ascii=False) + new_dict[key] = json.dumps(value) return new_dict @@ -478,6 +493,10 @@ def to_json(self, hierarchical=False, with_type=False, jsondict_as_string=False) json_dict = self.to_dict(hierarchical=hierarchical, with_type=with_type) return json.dumps(json_dict, ensure_ascii=False, sort_keys=True) + def to_msgpack(self, hierarchical=False, with_type=False): + msgpack_dict = self.to_dict(hierarchical=hierarchical, with_type=with_type) + return msgpack.packb(msgpack_dict) + def __eq__(self, other: dict) -> bool: """ Wrapper is necessary as we have additional members diff --git a/intelmq/lib/pipeline.py b/intelmq/lib/pipeline.py index ba0bf4699..62f8cd3b7 100644 --- a/intelmq/lib/pipeline.py +++ b/intelmq/lib/pipeline.py @@ -125,14 +125,14 @@ def send(self, message: str, path: str = "_default", path_permissive: bool = False): raise NotImplementedError - def receive(self) -> str: + def receive(self) -> bytes: if self._has_message: raise exceptions.PipelineError("There's already a message, first " "acknowledge the existing one.") retval = self._receive() self._has_message = True - return utils.decode(retval) + return retval def _receive(self) -> bytes: raise NotImplementedError diff --git a/intelmq/lib/test.py b/intelmq/lib/test.py index 301ce7038..5be469aad 100644 --- a/intelmq/lib/test.py +++ b/intelmq/lib/test.py @@ -12,6 +12,7 @@ import io import inspect import json +import msgpack import os import re import unittest @@ -158,8 +159,7 @@ def setUpClass(cls): elif cls.bot_type != 'collector' and cls.default_input_message == '': cls.default_input_message = {'__type': 'Event'} if type(cls.default_input_message) is dict: - cls.default_input_message = \ - utils.decode(json.dumps(cls.default_input_message)) + cls.default_input_message = msgpack.packb(cls.default_input_message) if cls.use_cache and not os.environ.get('INTELMQ_SKIP_REDIS'): password = os.environ.get('INTELMQ_TEST_REDIS_PASSWORD') or \ @@ -176,10 +176,10 @@ def setUpClass(cls): harmonization = utils.load_configuration(pkg_resources.resource_filename('intelmq', 'etc/harmonization.conf')) - def new_report(self, auto=False, examples=False): + def new_report(self, auto=False, examples=False) -> message.Report: return message.Report(harmonization=self.harmonization, auto=auto) - def new_event(self): + def new_event(self) -> message.Event: return message.Event(harmonization=self.harmonization) def get_mocked_logger(self, logger): @@ -247,7 +247,7 @@ def prepare_source_queue(self): self.input_queue = [] for msg in self.input_message: if type(msg) is dict: - self.input_queue.append(json.dumps(msg)) + self.input_queue.append(message.MessageFactory.serialize(msg)) elif issubclass(type(msg), message.Message): self.input_queue.append(msg.serialize()) else: @@ -331,8 +331,8 @@ def run_bot(self, iterations: int = 1, error_on_pipeline: bool = False, """ Test if report has required fields. """ if self.bot_type == 'collector': - for report_json in self.get_output_queue(): - report = message.MessageFactory.unserialize(report_json, + for report_data in self.get_output_queue(): + report = message.MessageFactory.unserialize(report_data, harmonization=self.harmonization) self.assertIsInstance(report, message.Report) self.assertIn('raw', report) @@ -340,8 +340,8 @@ def run_bot(self, iterations: int = 1, error_on_pipeline: bool = False, """ Test if event has required fields. """ if self.bot_type == 'parser': - for event_json in self.get_output_queue(): - event = message.MessageFactory.unserialize(event_json, + for event_data in self.get_output_queue(): + event = message.MessageFactory.unserialize(event_data, harmonization=self.harmonization) self.assertIsInstance(event, message.Event) self.assertIn('classification.type', event) @@ -408,7 +408,7 @@ def get_output_queue(self, path="_default"): """Getter for items in the output queues of this bot. Use in TestCase scenarios If there is multiple queues in named queue group, we return all the items chained. """ - return [utils.decode(text) for text in chain(*[self.pipe.state[x] for x in self.pipe.destination_queues[path]])] + return [text for text in chain(*[self.pipe.state[x] for x in self.pipe.destination_queues[path]])] # return [utils.decode(text) for text in self.pipe.state["%s-output" % self.bot_id]] def test_bot_name(self, *args, **kwargs): @@ -539,9 +539,9 @@ def assertMessageEqual(self, queue_pos, expected_msg, compare_raw=True, path="_d given queue position. """ event = self.get_output_queue(path=path)[queue_pos] - self.assertIsInstance(event, str) + self.assertIsInstance(event, bytes) - event_dict = json.loads(event) + event_dict = msgpack.unpackb(event, raw=False) if isinstance(expected_msg, (message.Event, message.Report)): expected = expected_msg.to_dict(with_type=True) else: diff --git a/intelmq/tests/bots/collectors/tcp/test_collector.py b/intelmq/tests/bots/collectors/tcp/test_collector.py index 117dd9316..f8e7724fc 100644 --- a/intelmq/tests/bots/collectors/tcp/test_collector.py +++ b/intelmq/tests/bots/collectors/tcp/test_collector.py @@ -126,7 +126,10 @@ def test_intelmq_exchange(self): for i, msg in enumerate(self.get_output_queue()): report = MessageFactory.unserialize(msg, harmonization=self.harmonization, default_type='Event') - output = MessageFactory.unserialize(utils.base64_decode(report["raw"]), harmonization=self.harmonization, default_type='Event') + output = MessageFactory.unserialize(utils.base64_decode(report["raw"]), + harmonization=self.harmonization, + default_type='Event', + use_packer="json") self.assertDictEqual(output, INPUT1) del report['time.observation'] diff --git a/intelmq/tests/bots/experts/cymru_whois/test_expert.py b/intelmq/tests/bots/experts/cymru_whois/test_expert.py index 8d109e694..70343e701 100644 --- a/intelmq/tests/bots/experts/cymru_whois/test_expert.py +++ b/intelmq/tests/bots/experts/cymru_whois/test_expert.py @@ -3,7 +3,7 @@ # SPDX-License-Identifier: AGPL-3.0-or-later # -*- coding: utf-8 -*- -import json +import msgpack import unittest import intelmq.lib.test as test @@ -93,7 +93,7 @@ def test_6to4_result(self): """ self.input_message = EXAMPLE_6TO4_INPUT self.run_bot() - actual = json.loads(self.get_output_queue()[0]) + actual = msgpack.loads(self.get_output_queue()[0]) self.assertDictContainsSubset(EXAMPLE_6TO4_INPUT, actual) self.assertIn("source.asn", actual) self.assertIn("source.as_name", actual) diff --git a/intelmq/tests/bots/experts/idea/test_expert.py b/intelmq/tests/bots/experts/idea/test_expert.py index 53c1f392d..5a02ef7fe 100644 --- a/intelmq/tests/bots/experts/idea/test_expert.py +++ b/intelmq/tests/bots/experts/idea/test_expert.py @@ -5,8 +5,10 @@ # -*- coding: utf-8 -*- import unittest import json +import msgpack import intelmq.lib.test as test +from intelmq.lib.message import MessageFactory from intelmq.bots.experts.idea.expert import IdeaExpertBot from intelmq.lib.harmonization import ClassificationType @@ -86,10 +88,10 @@ def test_conversion(self): # The ID in the generated Idea event is random, so we have to extract # the data from the "output" field and compare after removing ID's event = self.get_output_queue()[0] - self.assertIsInstance(event, str) - event_dict = json.loads(event) + self.assertIsInstance(event, bytes) + event_dict = MessageFactory.unserialize(event) self.assertIsInstance(event_dict, dict) - self.assertTrue("output" in event_dict) + self.assertTrue(b"output" in event_dict) idea_event = json.loads(event_dict["output"]) self.assertIsInstance(idea_event, dict) del TEST_OUTPUT1["ID"] diff --git a/intelmq/tests/bots/parsers/json/test_parser.py b/intelmq/tests/bots/parsers/json/test_parser.py index c18d18dd0..2c83658ed 100644 --- a/intelmq/tests/bots/parsers/json/test_parser.py +++ b/intelmq/tests/bots/parsers/json/test_parser.py @@ -6,6 +6,8 @@ import base64 import os import unittest +import json +import msgpack import intelmq.lib.test as test from intelmq.bots.parsers.json.parser import JSONParserBot @@ -51,7 +53,6 @@ NO_DEFAULT_EVENT = MULTILINE_EVENTS[1].copy() NO_DEFAULT_EVENT['raw'] = base64.b64encode(b'{"source.ip": "127.0.0.2", "classification.type": "c2-server"}\n').decode() - class TestJSONParserBot(test.BotTestCase, unittest.TestCase): """ A TestCase for a MalwareDomainListParserBot. diff --git a/intelmq/tests/lib/test_bot.py b/intelmq/tests/lib/test_bot.py index b7b16192a..b8c71ec81 100644 --- a/intelmq/tests/lib/test_bot.py +++ b/intelmq/tests/lib/test_bot.py @@ -61,7 +61,7 @@ def test_encoding_error_on_input_message(self): """ self.input_message = b'foo\xc9bar' self.run_bot(iterations=1, allowed_error_count=1) - self.assertLogMatches(r'.*intelmq\.lib\.exceptions\.DecodingError:.*') + self.assertLogMatches(r'.*intelmq\.lib\.exceptions\.UnserializationError:.*') self.assertEqual(self.pipe.state['test-bot-input-internal'], []) self.assertEqual(self.pipe.state['test-bot-input'], []) self.assertEqual(self.pipe.state['test-bot-output'], []) @@ -71,7 +71,7 @@ def test_invalid_value_on_input_message(self): Test if the bot is dumping / not retrying a message which is impossible to parse. https://github.com/certtools/intelmq/issues/1765 """ - self.input_message = b'{"source.asn": 0, "__type": "Event"}' + self.input_message = {"source.asn": 0, "__type": "Event"} self.run_bot(iterations=1, allowed_error_count=1) self.assertLogMatches(r'.*intelmq\.lib\.exceptions\.InvalidValue:.*') self.assertEqual(self.pipe.state['test-bot-input-internal'], []) diff --git a/intelmq/tests/lib/test_message.py b/intelmq/tests/lib/test_message.py index 9f9c2ddb4..e9809cad5 100644 --- a/intelmq/tests/lib/test_message.py +++ b/intelmq/tests/lib/test_message.py @@ -11,6 +11,7 @@ but has a valid Harmonization configuration. """ import json +import msgpack import unittest import pkg_resources @@ -158,12 +159,12 @@ def test_event_ne_different_config(self): def test_invalid_type(self): """ Test if Message raises InvalidArgument for invalid type. """ with self.assertRaises(exceptions.InvalidArgument): - message.MessageFactory.unserialize('{"__type": "Message"}', harmonization=HARM) + message.MessageFactory.unserialize(msgpack.dumps({"__type": "Message"}), harmonization=HARM) def test_invalid_type2(self): """ Test if MessageFactory raises InvalidArgument for invalid type. """ with self.assertRaises(exceptions.InvalidArgument): - message.MessageFactory.unserialize('{"__type": "Invalid"}', harmonization=HARM) + message.MessageFactory.unserialize(msgpack.dumps({"__type": "Invalid"}), harmonization=HARM) def test_report_invalid_key(self): """ Test if report raises InvalidKey for invalid key in add(). """ @@ -365,10 +366,8 @@ def test_factory_serialize(self): report.add('feed.url', URL_SANE) report.add('raw', LOREM_BASE64, sanitize=False) actual = message.MessageFactory.serialize(report) - expected = ('{"raw": "bG9yZW0gaXBzdW0=", "__type": "Report", "feed.url' - '": "https://example.com/", "feed.name": "Example"}') - self.assertDictEqual(json.loads(expected), - json.loads(actual)) + expected = (b'\x84\xa9feed.name\xa7Example\xa8feed.url\xb4https://example.com/\xa3raw\xb0bG9yZW0gaXBzdW0=\xa6__type\xa6Report') + self.assertDictEqual(msgpack.unpackb(expected), msgpack.unpackb(actual)) def test_deep_copy_content(self): """ Test if deep_copy does return the same items. """ @@ -518,22 +517,31 @@ def test_event_json_hierarchical(self): '{"observation": "2015-01-01T13:37:00+00:00"}}') self.assertDictEqual(json.loads(expected), json.loads(actual)) + def test_event_msgpack(self): + """ Test event to_msgpack """ + event = self.new_event() + event = self.add_event_examples(event) + actual = event.to_msgpack() + self.assertIsInstance(actual, bytes) + excepted = (b'\x84\xa9feed.name\xa7Example\xa8feed.url\xb4https://example.com/\xa3raw\xb0bG9yZW0gaXBzdW0=\xb0time.observation\xb92015-01-01T13:37:00+00:00') + self.assertDictEqual(msgpack.unpackb(excepted), msgpack.unpackb(actual)) + def test_event_serialize(self): """ Test Event serialize. """ event = self.new_event() - self.assertEqual('{"__type": "Event"}', + self.assertEqual(b'\x81\xa6__type\xa5Event', event.serialize()) def test_event_string(self): """ Test Event serialize. """ event = self.new_event() - self.assertEqual('{"__type": "Event"}', + self.assertEqual(b'\x81\xa6__type\xa5Event', event.serialize()) def test_event_unicode(self): """ Test Event serialize. """ event = self.new_event() - self.assertEqual('{"__type": "Event"}', + self.assertEqual(b'\x81\xa6__type\xa5Event', event.serialize()) def test_event_from_report(self): @@ -599,7 +607,7 @@ def test_event_init_check_tuple(self): def test_event_init(self): """ Test if initialization method checks fields. """ - event = '{"__type": "Event", "source.asn": "foo"}' + event = msgpack.dumps({"__type": "Event", "source.asn": "foo"}) with self.assertRaises(exceptions.InvalidValue): message.MessageFactory.unserialize(event, harmonization=HARM) diff --git a/intelmq/tests/lib/test_pipeline.py b/intelmq/tests/lib/test_pipeline.py index 39f75eb0f..08adb7c0e 100644 --- a/intelmq/tests/lib/test_pipeline.py +++ b/intelmq/tests/lib/test_pipeline.py @@ -20,6 +20,7 @@ import intelmq.lib.pipeline as pipeline import intelmq.lib.test as test import intelmq.lib.exceptions as exceptions +import intelmq.lib.utils as utils SAMPLES = {'normal': [b'Lorem ipsum dolor sit amet', 'Lorem ipsum dolor sit amet'], @@ -67,7 +68,7 @@ def setUp(self): def test_receive(self): self.pipe.state['test-bot-input'] = [SAMPLES['normal'][0]] - self.assertEqual(SAMPLES['normal'][1], self.pipe.receive()) + self.assertEqual(SAMPLES['normal'][1], utils.decode(self.pipe.receive())) def test_send(self): self.pipe.send(SAMPLES['normal'][1]) @@ -76,7 +77,7 @@ def test_send(self): def test_receive_unicode(self): self.pipe.state['test-bot-input'] = [SAMPLES['unicode'][0]] - self.assertEqual(SAMPLES['unicode'][1], self.pipe.receive()) + self.assertEqual(SAMPLES['unicode'][1], utils.decode(self.pipe.receive())) def test_send_unicode(self): self.pipe.send(SAMPLES['unicode'][1]) @@ -107,7 +108,7 @@ def test_reject(self): self.pipe.state['test-bot-input'] = [SAMPLES['normal'][0]] self.pipe.receive() self.pipe.reject_message() - self.assertEqual(SAMPLES['normal'][1], self.pipe.receive()) + self.assertEqual(SAMPLES['normal'][1], utils.decode(self.pipe.receive())) def test_acknowledge(self): self.pipe.state['test-bot-input'] = [SAMPLES['normal'][0]] @@ -161,12 +162,12 @@ def test_send_receive(self): """ Sending bytest and receiving unicode. """ self.clear() self.pipe.send(SAMPLES['normal'][0]) - self.assertEqual(SAMPLES['normal'][1], self.pipe.receive()) + self.assertEqual(SAMPLES['normal'][1], utils.decode(self.pipe.receive())) def test_send_receive_unicode(self): self.clear() self.pipe.send(SAMPLES['unicode'][1]) - self.assertEqual(SAMPLES['unicode'][1], self.pipe.receive()) + self.assertEqual(SAMPLES['unicode'][1], utils.decode(self.pipe.receive())) def test_count(self): self.clear() @@ -185,7 +186,7 @@ def test_reject(self): self.pipe.send(SAMPLES['normal'][0]) self.pipe.receive() self.pipe.reject_message() - self.assertEqual(SAMPLES['normal'][1], self.pipe.receive()) + self.assertEqual(SAMPLES['normal'][1], utils.decode(self.pipe.receive())) def test_acknowledge(self): self.pipe.send(SAMPLES['normal'][0]) diff --git a/setup.py b/setup.py index 1611d8c46..47df402ee 100644 --- a/setup.py +++ b/setup.py @@ -20,6 +20,7 @@ 'redis>=2.10', 'requests>=2.2.0', 'ruamel.yaml', + 'msgpack>=0.5', ] exec(open(os.path.join(os.path.dirname(__file__), From 1253c3ed4a40f2f349106e0d6613c8ae0840b83d Mon Sep 17 00:00:00 2001 From: Sebastian Waldbauer Date: Tue, 31 May 2022 12:24:57 +0200 Subject: [PATCH 45/45] FIX: More generic way of using different (de)-serializers Signed-off-by: Sebastian Waldbauer --- intelmq/bin/intelmqdump.py | 2 +- .../bots/collectors/amqp/collector_amqp.py | 2 +- .../collectors/microsoft/collector_azure.py | 4 - intelmq/bots/outputs/redis/output.py | 4 +- intelmq/bots/outputs/udp/output.py | 2 +- intelmq/bots/parsers/json/parser.py | 2 +- intelmq/lib/bot.py | 15 ++-- intelmq/lib/bot_debugger.py | 2 +- intelmq/lib/exceptions.py | 20 ++++- intelmq/lib/message.py | 72 ++++++++------- intelmq/lib/packers/__init__.py | 0 intelmq/lib/packers/json/__init__.py | 5 ++ intelmq/lib/packers/json/packer.py | 19 ++++ intelmq/lib/packers/msgpack/REQUIREMENTS.txt | 7 ++ intelmq/lib/packers/msgpack/__init__.py | 5 ++ intelmq/lib/packers/msgpack/packer.py | 27 ++++++ intelmq/lib/packers/packer.py | 15 ++++ intelmq/lib/test.py | 14 +-- .../bots/collectors/tcp/test_collector.py | 12 +-- .../bots/experts/cymru_whois/test_expert.py | 5 +- .../tests/bots/experts/idea/test_expert.py | 4 +- .../tests/bots/outputs/file/test_output.py | 4 +- .../tests/bots/outputs/files/test_output.py | 4 +- .../tests/bots/outputs/redis/test_output.py | 4 +- .../redis/test_output_as_hierarchical_json.py | 84 ++++++++--------- .../tests/bots/parsers/json/test_parser.py | 2 - intelmq/tests/lib/test_bot.py | 2 +- intelmq/tests/lib/test_bot_output.py | 17 ++-- intelmq/tests/lib/test_exceptions.py | 4 + intelmq/tests/lib/test_message.py | 89 +++++++------------ intelmq/tests/lib/test_parser_bot.py | 4 +- intelmq/tests/lib/test_pipeline.py | 6 +- 32 files changed, 270 insertions(+), 188 deletions(-) create mode 100644 intelmq/lib/packers/__init__.py create mode 100644 intelmq/lib/packers/json/__init__.py create mode 100644 intelmq/lib/packers/json/packer.py create mode 100644 intelmq/lib/packers/msgpack/REQUIREMENTS.txt create mode 100644 intelmq/lib/packers/msgpack/__init__.py create mode 100644 intelmq/lib/packers/msgpack/packer.py create mode 100644 intelmq/lib/packers/packer.py diff --git a/intelmq/bin/intelmqdump.py b/intelmq/bin/intelmqdump.py index 0f860d60c..0d6d047fb 100644 --- a/intelmq/bin/intelmqdump.py +++ b/intelmq/bin/intelmqdump.py @@ -349,7 +349,7 @@ def main(): if queue_name in pipeline_pipes: if runtime_config[pipeline_pipes[queue_name]]['group'] == 'Parser' and json.loads(msg)['__type'] == 'Event': print('Event converted to Report automatically.') - msg = message.Report(message.MessageFactory.unserialize(msg)).serialize() + msg = message.Report(message.MessageFactory.deserialize(msg)).serialize() else: print(red(f"The given queue '{queue_name}' is not configured. Please retry with a valid queue.")) break diff --git a/intelmq/bots/collectors/amqp/collector_amqp.py b/intelmq/bots/collectors/amqp/collector_amqp.py index 543f4e0be..d1fb8a4d6 100644 --- a/intelmq/bots/collectors/amqp/collector_amqp.py +++ b/intelmq/bots/collectors/amqp/collector_amqp.py @@ -79,7 +79,7 @@ def process(self): self.logger.exception('Error receiving messages.') else: if self.expect_intelmq_message: - message = MessageFactory.unserialize(body.decode()) + message = MessageFactory.deserialize(body.decode()) self.send_message(message, auto_add=False) else: report = self.new_report() diff --git a/intelmq/bots/collectors/microsoft/collector_azure.py b/intelmq/bots/collectors/microsoft/collector_azure.py index 9de529ab5..44e567dfe 100644 --- a/intelmq/bots/collectors/microsoft/collector_azure.py +++ b/intelmq/bots/collectors/microsoft/collector_azure.py @@ -36,11 +36,7 @@ class MicrosoftAzureCollectorBot(CollectorBot, CacheMixin): def init(self): if ContainerClient is None or create_configuration is None: -<<<<<<< HEAD raise MissingDependencyError("azure-storage-blob", version='>=12.0.0') -======= - raise MissingDependencyError("azure.storage", version='>=12.0.0') ->>>>>>> c78494bb6 (DOC: azure collector: document minimum azure version) self.config = create_configuration(storage_sdk='blob') if hasattr(self, 'https_proxy'): diff --git a/intelmq/bots/outputs/redis/output.py b/intelmq/bots/outputs/redis/output.py index 2e4b12968..c2440f109 100644 --- a/intelmq/bots/outputs/redis/output.py +++ b/intelmq/bots/outputs/redis/output.py @@ -41,9 +41,7 @@ def process(self): event = self.receive_message() try: - self.output.lpush(self.queue, - event.to_json(hierarchical=self.hierarchical_output, - with_type=self.with_type)) + self.output.lpush(self.queue, event.to_pack(use_packer=self.use_packer, hierarchical=self.hierarchical, with_type=self.with_type)) except Exception: self.logger.exception('Failed to send message. Reconnecting.') self.connect() diff --git a/intelmq/bots/outputs/udp/output.py b/intelmq/bots/outputs/udp/output.py index 9ab53a1d0..a19de549f 100644 --- a/intelmq/bots/outputs/udp/output.py +++ b/intelmq/bots/outputs/udp/output.py @@ -37,7 +37,7 @@ def process(self): del event['raw'] if self.format == 'json': - self.send(self.header + event.to_json()) + self.send(self.header + event.to_pack(use_packer=self.format)) elif self.format == 'delimited': self.send(self.delimited(event)) diff --git a/intelmq/bots/parsers/json/parser.py b/intelmq/bots/parsers/json/parser.py index 625ebe060..8e6e3b040 100644 --- a/intelmq/bots/parsers/json/parser.py +++ b/intelmq/bots/parsers/json/parser.py @@ -26,7 +26,7 @@ def process(self): lines = [base64_decode(report['raw'])] for line in lines: - new_event = MessageFactory.unserialize(line, + new_event = MessageFactory.deserialize(line, harmonization=self.harmonization, default_type='Event', use_packer="json") diff --git a/intelmq/lib/bot.py b/intelmq/lib/bot.py index 6f92b00cc..d719264c2 100644 --- a/intelmq/lib/bot.py +++ b/intelmq/lib/bot.py @@ -16,7 +16,6 @@ import inspect import io import json -import msgpack import logging import os import re @@ -102,6 +101,7 @@ class Bot: statistics_host: str = "127.0.0.1" statistics_password: Optional[str] = None statistics_port: int = 6379 + use_packer: str = os.environ.get('INTELMQ_USE_PACKER', 'MsgPack') _message_processed_verb: str = 'Processed' @@ -330,8 +330,8 @@ def start(self, starting: bool = True, error_on_pipeline: bool = True, self.logger.error('Pipeline failed.') self.__disconnect_pipelines() - except exceptions.UnserializationError as exc: - self.logger.exception('Could not unserialize message from pipeline. No retries useful.') + except exceptions.DeserializationError as exc: + self.logger.exception('Could not deserialize message from pipeline. No retries useful.') # ensure that we do not re-process the faulty message self.__error_retries_counter = self.error_max_retries + 1 @@ -662,7 +662,7 @@ def receive_message(self) -> libmessage.Message: return self.receive_message() try: - self.__current_message = libmessage.MessageFactory.unserialize(message, + self.__current_message = libmessage.MessageFactory.deserialize(message, harmonization=self.harmonization) except exceptions.InvalidKey as exc: # In case a incoming message is malformed an does not conform with the currently @@ -821,7 +821,7 @@ def __init_logger(self): def __log_configuration_parameter(self, config_name: str, option: str, value: Any): if "password" in option or "token" in option: - value = "HIDDEN" + value = "" message = "{} configuration: parameter {!r} loaded with value {!r}." \ .format(config_name.title(), option, value) @@ -1319,9 +1319,8 @@ def export_event(self, event: libmessage.Event, if 'raw' in event: del event['raw'] if return_type is str: - return event.to_json(hierarchical=self.hierarchical, - with_type=self.with_type, - jsondict_as_string=self.jsondict_as_string) + return event.to_pack(use_packer=self.use_packer, hierarchical=self.hierarchical, + with_type=self.with_type) else: retval = event.to_dict(hierarchical=self.hierarchical, with_type=self.with_type, diff --git a/intelmq/lib/bot_debugger.py b/intelmq/lib/bot_debugger.py index 0afeaa5c1..909b2303c 100644 --- a/intelmq/lib/bot_debugger.py +++ b/intelmq/lib/bot_debugger.py @@ -169,7 +169,7 @@ def outputappend(self, msg): def arg2msg(self, msg): default_type = "Report" if (self.runtime_configuration.get("group", None) == "Parser" or isinstance(self.instance, ParserBot)) else "Event" try: - msg = MessageFactory.unserialize(msg, default_type=default_type) + msg = MessageFactory.deserialize(msg, default_type=default_type) except (Exception, KeyError, TypeError, ValueError) as exc: if exists(msg): with open(msg) as f: diff --git a/intelmq/lib/exceptions.py b/intelmq/lib/exceptions.py index 659134c9e..824010e3d 100644 --- a/intelmq/lib/exceptions.py +++ b/intelmq/lib/exceptions.py @@ -169,10 +169,24 @@ def __init__(self, encodings=None, exception: UnicodeDecodeError = None, super().__init__("Could not decode string%s." % suffix) -class UnserializationError(IntelMQException, ValueError): +class DeserializationError(IntelMQException, ValueError): """ - Unrecoverable error during message unserialization + Unrecoverable error during message deserialization """ def __init__(self, exception: Exception = None, object: bytes = None): self.object = object - super().__init__("Could not unserialize message%s." % exception) + super().__init__("Could not deserialize message, %s." % exception) + + +class SerializationError(IntelMQException, ValueError): + """ + Unrecoverable error during message serialization + """ + def __init__(self, exception: Exception = None, object: bytes = None): + self.object = object + super().__init__("Could not serialize message, %s." % exception) + + +class MissingPackerError(IntelMQException): + def __init__(self, packer: str): + super().__init__(f"Could not load '{packer}' as packer, please check intelmq.lib.packers.{packer.lower()} and documentation") diff --git a/intelmq/lib/message.py b/intelmq/lib/message.py index 89e2b3467..e5b4460dc 100644 --- a/intelmq/lib/message.py +++ b/intelmq/lib/message.py @@ -9,17 +9,19 @@ Use MessageFactory to get a Message object (types Report and Event). """ import hashlib +import importlib +import inspect import json import re import warnings from collections import defaultdict from typing import Any, Dict, Iterable, Optional, Sequence, Union -import msgpack import intelmq.lib.exceptions as exceptions import intelmq.lib.harmonization from intelmq import HARMONIZATION_CONF_FILE from intelmq.lib import utils +from intelmq.lib.packers.packer import Packer __all__ = ['Event', 'Message', 'MessageFactory', 'Report'] VALID_MESSSAGE_TYPES = ('Event', 'Message', 'Report') @@ -29,8 +31,8 @@ class MessageFactory: """ - unserialize: JSON encoded message to object - serialize: object to JSON encoded object + deserialize: packed message to object + serialize: object to packed """ @staticmethod @@ -45,7 +47,7 @@ def from_dict(message: dict, harmonization=None, default_type: If '__type' is not present in message, the given type will be used See also: - MessageFactory.unserialize + MessageFactory.deserialize MessageFactory.serialize """ if default_type and "__type" not in message: @@ -61,8 +63,8 @@ def from_dict(message: dict, harmonization=None, return class_reference(message, auto=True, harmonization=harmonization) @staticmethod - def unserialize(raw_message: bytes, harmonization: dict = None, - default_type: Optional[str] = None, use_packer: str = "msgpack") -> dict: + def deserialize(raw_message: bytes, harmonization: dict = None, + default_type: Optional[str] = None, use_packer: str = "MsgPack", **kwargs) -> dict: """ Takes JSON-encoded Message object, returns instance of correct class. @@ -75,19 +77,18 @@ def unserialize(raw_message: bytes, harmonization: dict = None, MessageFactory.from_dict MessageFactory.serialize """ - message = Message.unserialize(raw_message, use_packer=use_packer) + message = Message.deserialize(raw_message, use_packer=use_packer, **kwargs) return MessageFactory.from_dict(message, harmonization=harmonization, default_type=default_type) @staticmethod - def serialize(message) -> bytes: + def serialize(message, use_packer: str = 'MsgPack', **kwargs) -> bytes: """ - Takes instance of message-derived class and makes JSON-encoded Message. + Takes instance of message-derived class and makes packed Message. The class is saved in __type attribute. """ - raw_message = Message.serialize(message) - return raw_message + return Message.serialize(message, use_packer=use_packer, **kwargs) class Message(dict): @@ -307,36 +308,43 @@ def copy(self): return retval def deep_copy(self): - return MessageFactory.unserialize(MessageFactory.serialize(self), + return MessageFactory.deserialize(MessageFactory.serialize(self), harmonization={self.__class__.__name__.lower(): self.harmonization_config}) def __str__(self): - return self.serialize(use_packer="json") + return self.serialize(use_packer="JSON") - def serialize(self, use_packer: str = "msgpack"): + def serialize(self, use_packer: str = "MsgPack", **kwargs): delete_type = False if '__type' not in self: delete_type = True self['__type'] = self.__class__.__name__ - if use_packer == "json": - packed = json.dumps(self) - else: - packed = msgpack.packb(self) + try: + packer: Packer = inspect.getmembers(importlib.import_module(f'intelmq.lib.packers.{use_packer.lower()}.packer'))[0][1]() + except: + raise exceptions.MissingPackerError(packer=use_packer) + + try: + packed = packer.serialize(data=self, **kwargs) + except Exception as exc: + raise exceptions.SerializationError(exception=exc, object=self) if delete_type: del self['__type'] return packed @staticmethod - def unserialize(message: bytes, use_packer: str = "msgpack"): + def deserialize(message: bytes, use_packer: str = "MsgPack", **kwargs): try: - if use_packer == "json": - return json.loads(message) - else: - return msgpack.unpackb(message, raw=False) + packer: Packer = inspect.getmembers(importlib.import_module(f'intelmq.lib.packers.{use_packer.lower()}.packer'))[0][1]() + except: + raise exceptions.MissingPackerError(packer=use_packer) + + try: + return packer.deserialize(data=message, **kwargs) except Exception as exc: - raise exceptions.UnserializationError(exception=exc, object=message) + raise exceptions.DeserializationError(exception=exc, object=message) def __is_valid_key(self, key: str): try: @@ -489,13 +497,17 @@ def to_dict(self, hierarchical: bool = False, with_type: bool = False, return new_dict - def to_json(self, hierarchical=False, with_type=False, jsondict_as_string=False): - json_dict = self.to_dict(hierarchical=hierarchical, with_type=with_type) - return json.dumps(json_dict, ensure_ascii=False, sort_keys=True) + def to_pack(self, use_packer="MsgPack", hierarchical=False, with_type=False, **kwargs): + try: + packer: Packer = inspect.getmembers(importlib.import_module(f'intelmq.lib.packers.{use_packer.lower()}.packer'))[0][1]() + except: + raise exceptions.MissingPackerError(packer=use_packer) - def to_msgpack(self, hierarchical=False, with_type=False): - msgpack_dict = self.to_dict(hierarchical=hierarchical, with_type=with_type) - return msgpack.packb(msgpack_dict) + try: + data = self.to_dict(hierarchical=hierarchical, with_type=with_type) + return packer.serialize(data, **kwargs) + except Exception as exc: + raise exceptions.SerializationError(exception=exc, object=self) def __eq__(self, other: dict) -> bool: """ diff --git a/intelmq/lib/packers/__init__.py b/intelmq/lib/packers/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/intelmq/lib/packers/json/__init__.py b/intelmq/lib/packers/json/__init__.py new file mode 100644 index 000000000..99ea3f622 --- /dev/null +++ b/intelmq/lib/packers/json/__init__.py @@ -0,0 +1,5 @@ +# SPDX-FileCopyrightText: 2022 CERT.at GmbH +# +# SPDX-License-Identifier: AGPL-3.0-or-later + +# -*- coding: utf-8 -*- diff --git a/intelmq/lib/packers/json/packer.py b/intelmq/lib/packers/json/packer.py new file mode 100644 index 000000000..d0ae7a267 --- /dev/null +++ b/intelmq/lib/packers/json/packer.py @@ -0,0 +1,19 @@ +# SPDX-FileCopyrightText: 2022 CERT.at GmbH +# +# SPDX-License-Identifier: AGPL-3.0-or-later + +# -*- coding: utf-8 -*- + +from intelmq.lib.packers.packer import Packer +import json + + +class JSON(Packer): + def __init__(self) -> None: + super().__init__() + + def serialize(self, data, **kwargs) -> bytes: + return json.dumps(data, **kwargs) + + def deserialize(self, data, **kwargs) -> object: + return json.loads(data, **kwargs) diff --git a/intelmq/lib/packers/msgpack/REQUIREMENTS.txt b/intelmq/lib/packers/msgpack/REQUIREMENTS.txt new file mode 100644 index 000000000..071d9d748 --- /dev/null +++ b/intelmq/lib/packers/msgpack/REQUIREMENTS.txt @@ -0,0 +1,7 @@ +# SPDX-FileCopyrightText: 2022 CERT.at GmbH +# +# SPDX-License-Identifier: AGPL-3.0-or-later + +# -*- coding: utf-8 -*- + +msgpack>=0.5 diff --git a/intelmq/lib/packers/msgpack/__init__.py b/intelmq/lib/packers/msgpack/__init__.py new file mode 100644 index 000000000..99ea3f622 --- /dev/null +++ b/intelmq/lib/packers/msgpack/__init__.py @@ -0,0 +1,5 @@ +# SPDX-FileCopyrightText: 2022 CERT.at GmbH +# +# SPDX-License-Identifier: AGPL-3.0-or-later + +# -*- coding: utf-8 -*- diff --git a/intelmq/lib/packers/msgpack/packer.py b/intelmq/lib/packers/msgpack/packer.py new file mode 100644 index 000000000..259f312bb --- /dev/null +++ b/intelmq/lib/packers/msgpack/packer.py @@ -0,0 +1,27 @@ +# SPDX-FileCopyrightText: 2022 CERT.at GmbH +# +# SPDX-License-Identifier: AGPL-3.0-or-later + +# -*- coding: utf-8 -*- + +from intelmq.lib.packers.packer import Packer +from intelmq.lib import exceptions + + +try: + import msgpack +except: + msgpack = None + + +class MsgPack(Packer): + def __init__(self) -> None: + if msgpack is None: + raise exceptions.MissingDependencyError("msgpack") + super().__init__() + + def serialize(self, data, **kwargs) -> bytes: + return msgpack.packb(data, **kwargs) + + def deserialize(self, data, **kwargs) -> object: + return msgpack.unpackb(data, raw=False, **kwargs) diff --git a/intelmq/lib/packers/packer.py b/intelmq/lib/packers/packer.py new file mode 100644 index 000000000..2f97c6283 --- /dev/null +++ b/intelmq/lib/packers/packer.py @@ -0,0 +1,15 @@ +# SPDX-FileCopyrightText: 2022 CERT.at GmbH +# +# SPDX-License-Identifier: AGPL-3.0-or-later + +# -*- coding: utf-8 -*- + +class Packer(): + def __init__(self) -> None: + pass + + def serialize(self, data: bytes, **kwargs) -> bytes: + raise NotImplementedError() + + def deserialize(self, data: bytes, **kwargs) -> object: + raise NotImplementedError() diff --git a/intelmq/lib/test.py b/intelmq/lib/test.py index 5be469aad..3e0a9941d 100644 --- a/intelmq/lib/test.py +++ b/intelmq/lib/test.py @@ -11,8 +11,8 @@ """ import io import inspect +import importlib import json -import msgpack import os import re import unittest @@ -23,6 +23,7 @@ import redis import intelmq.lib.message as message +from intelmq.lib.packers.packer import Packer import intelmq.lib.pipeline as pipeline import intelmq.lib.utils as utils from intelmq import CONFIG_DIR, RUNTIME_CONF_FILE @@ -159,7 +160,7 @@ def setUpClass(cls): elif cls.bot_type != 'collector' and cls.default_input_message == '': cls.default_input_message = {'__type': 'Event'} if type(cls.default_input_message) is dict: - cls.default_input_message = msgpack.packb(cls.default_input_message) + cls.default_input_message = message.MessageFactory.serialize(cls.default_input_message, os.environ.get('INTELMQ_USE_PACKER', 'MsgPack')) if cls.use_cache and not os.environ.get('INTELMQ_SKIP_REDIS'): password = os.environ.get('INTELMQ_TEST_REDIS_PASSWORD') or \ @@ -332,7 +333,7 @@ def run_bot(self, iterations: int = 1, error_on_pipeline: bool = False, """ Test if report has required fields. """ if self.bot_type == 'collector': for report_data in self.get_output_queue(): - report = message.MessageFactory.unserialize(report_data, + report = message.MessageFactory.deserialize(report_data, harmonization=self.harmonization) self.assertIsInstance(report, message.Report) self.assertIn('raw', report) @@ -341,7 +342,7 @@ def run_bot(self, iterations: int = 1, error_on_pipeline: bool = False, """ Test if event has required fields. """ if self.bot_type == 'parser': for event_data in self.get_output_queue(): - event = message.MessageFactory.unserialize(event_data, + event = message.MessageFactory.deserialize(event_data, harmonization=self.harmonization) self.assertIsInstance(event, message.Event) self.assertIn('classification.type', event) @@ -532,7 +533,7 @@ def assertOutputQueueLen(self, queue_len=0, path="_default"): """ self.assertEqual(len(self.get_output_queue(path=path)), queue_len) - def assertMessageEqual(self, queue_pos, expected_msg, compare_raw=True, path="_default"): + def assertMessageEqual(self, queue_pos, expected_msg, compare_raw=True, path="_default", use_packer=os.environ.get('INTELMQ_USE_PACKER', 'MsgPack')): """ Asserts that the given expected_message is contained in the generated event with @@ -540,8 +541,7 @@ def assertMessageEqual(self, queue_pos, expected_msg, compare_raw=True, path="_d """ event = self.get_output_queue(path=path)[queue_pos] self.assertIsInstance(event, bytes) - - event_dict = msgpack.unpackb(event, raw=False) + event_dict = message.MessageFactory.deserialize(raw_message=event, use_packer=use_packer) if isinstance(expected_msg, (message.Event, message.Report)): expected = expected_msg.to_dict(with_type=True) else: diff --git a/intelmq/tests/bots/collectors/tcp/test_collector.py b/intelmq/tests/bots/collectors/tcp/test_collector.py index f8e7724fc..19b8e1c61 100644 --- a/intelmq/tests/bots/collectors/tcp/test_collector.py +++ b/intelmq/tests/bots/collectors/tcp/test_collector.py @@ -96,7 +96,8 @@ def set_bot(cls): cls.sysconfig = {'http_url': 'http://localhost/two_files.tar.gz', 'extract_files': True, 'name': 'Example feed', 'ip': 'localhost', - 'port': PORT + 'port': PORT, + "use_packer": "json" } def test_random_input(self): @@ -105,7 +106,7 @@ def test_random_input(self): thread.start() self.run_bot() self.assertOutputQueueLen(2) - generated_report = MessageFactory.unserialize(self.get_output_queue()[1], harmonization=self.harmonization, + generated_report = MessageFactory.deserialize(self.get_output_queue()[1], harmonization=self.harmonization, default_type='Event') self.assertEqual(base64_decode(generated_report['raw']), ORIGINAL_DATA.split(SEPARATOR)[1]) @@ -124,12 +125,11 @@ def test_intelmq_exchange(self): self.assertOutputQueueLen(msg_count) for i, msg in enumerate(self.get_output_queue()): - report = MessageFactory.unserialize(msg, harmonization=self.harmonization, default_type='Event') + report = MessageFactory.deserialize(msg, harmonization=self.harmonization, default_type='Event') - output = MessageFactory.unserialize(utils.base64_decode(report["raw"]), + output = MessageFactory.deserialize(utils.base64_decode(report["raw"]), harmonization=self.harmonization, - default_type='Event', - use_packer="json") + default_type='Event') self.assertDictEqual(output, INPUT1) del report['time.observation'] diff --git a/intelmq/tests/bots/experts/cymru_whois/test_expert.py b/intelmq/tests/bots/experts/cymru_whois/test_expert.py index 70343e701..f5d911df9 100644 --- a/intelmq/tests/bots/experts/cymru_whois/test_expert.py +++ b/intelmq/tests/bots/experts/cymru_whois/test_expert.py @@ -3,11 +3,12 @@ # SPDX-License-Identifier: AGPL-3.0-or-later # -*- coding: utf-8 -*- -import msgpack +import os import unittest import intelmq.lib.test as test from intelmq.bots.experts.cymru_whois.expert import CymruExpertBot +from intelmq.lib.message import MessageFactory EXAMPLE_INPUT = {"__type": "Event", "source.ip": "78.104.144.2", # example.com @@ -93,7 +94,7 @@ def test_6to4_result(self): """ self.input_message = EXAMPLE_6TO4_INPUT self.run_bot() - actual = msgpack.loads(self.get_output_queue()[0]) + actual = MessageFactory.serialize(self.get_output_queue()[0], use_packer=os.environ.get('INTELMQ_USE_PACKER', 'MsgPack')) self.assertDictContainsSubset(EXAMPLE_6TO4_INPUT, actual) self.assertIn("source.asn", actual) self.assertIn("source.as_name", actual) diff --git a/intelmq/tests/bots/experts/idea/test_expert.py b/intelmq/tests/bots/experts/idea/test_expert.py index 5a02ef7fe..2be06ce59 100644 --- a/intelmq/tests/bots/experts/idea/test_expert.py +++ b/intelmq/tests/bots/experts/idea/test_expert.py @@ -3,9 +3,9 @@ # SPDX-License-Identifier: AGPL-3.0-or-later # -*- coding: utf-8 -*- +import os import unittest import json -import msgpack import intelmq.lib.test as test from intelmq.lib.message import MessageFactory @@ -89,7 +89,7 @@ def test_conversion(self): # the data from the "output" field and compare after removing ID's event = self.get_output_queue()[0] self.assertIsInstance(event, bytes) - event_dict = MessageFactory.unserialize(event) + event_dict = MessageFactory.deserialize(event, use_packer=os.environ.get('INTELMQ_USE_PACKER', 'MsgPack')) self.assertIsInstance(event_dict, dict) self.assertTrue(b"output" in event_dict) idea_event = json.loads(event_dict["output"]) diff --git a/intelmq/tests/bots/outputs/file/test_output.py b/intelmq/tests/bots/outputs/file/test_output.py index a585a24a0..50ce38192 100644 --- a/intelmq/tests/bots/outputs/file/test_output.py +++ b/intelmq/tests/bots/outputs/file/test_output.py @@ -18,7 +18,9 @@ def set_bot(cls): cls.bot_reference = FileOutputBot cls.os_fp, cls.filename = tempfile.mkstemp() cls.sysconfig = {"hierarchical_output": True, - "file": cls.filename} + "file": cls.filename, + "use_packer": "json", + } def test_event(self): self.run_bot() diff --git a/intelmq/tests/bots/outputs/files/test_output.py b/intelmq/tests/bots/outputs/files/test_output.py index dba47b658..179359556 100644 --- a/intelmq/tests/bots/outputs/files/test_output.py +++ b/intelmq/tests/bots/outputs/files/test_output.py @@ -48,7 +48,9 @@ def test_event_whole(self): "dir": self.incoming_path, "tmp": self.tmp_path, "single_key": None, - "suffix": ""} + "suffix": "", + "use_packer": "json", + } self.run_bot() name = os.listdir(self.incoming_path)[0] with open(pth.join(self.incoming_path, name), encoding="utf-8") as f: diff --git a/intelmq/tests/bots/outputs/redis/test_output.py b/intelmq/tests/bots/outputs/redis/test_output.py index e47360e6d..91ddf15d5 100644 --- a/intelmq/tests/bots/outputs/redis/test_output.py +++ b/intelmq/tests/bots/outputs/redis/test_output.py @@ -50,7 +50,9 @@ def set_bot(cls): "redis_db": 4, "redis_queue": "test-redis-output-queue", "redis_password": os.getenv('INTELMQ_TEST_REDIS_PASSWORD'), - "redis_timeout": "50000"} + "redis_timeout": "50000", + "use_packer": 'json', + } @test.skip_redis() def test_event(self): diff --git a/intelmq/tests/bots/outputs/redis/test_output_as_hierarchical_json.py b/intelmq/tests/bots/outputs/redis/test_output_as_hierarchical_json.py index 28ceb525f..241120296 100644 --- a/intelmq/tests/bots/outputs/redis/test_output_as_hierarchical_json.py +++ b/intelmq/tests/bots/outputs/redis/test_output_as_hierarchical_json.py @@ -12,6 +12,7 @@ import intelmq.lib.test as test import intelmq.lib.utils as utils +from intelmq.lib.message import MessageFactory from intelmq.bots.outputs.redis.output import RedisOutputBot EXAMPLE_EVENT = {"classification.type": "infected-system", @@ -37,45 +38,45 @@ "W50cnlfbmFtZSI6IkRvbWluaWNhbiBSZXB1YmxpYyJ9fQ==" } EXAMPLE_EVENT_JSON = { - "feed": { - "url": "http://alerts.bitsighttech.com:8080/stream?", - "name": "BitSight", - "accuracy": 100.0 - }, - "malware": { - "name": "salityp2p" - }, - "time": { - "observation": "2016-04-19T23:16:08+00:00", - "source": "2016-04-19T23:16:08+00:00" - }, - "raw": "eyJ0cm9qYW5mYW1pbHkiOiJTYWxpdHlwMnAiLCJlbnYiOnsic" - "mVtb3RlX2FkZHIiOiIxNTIuMTY2LjExOS4yIiwicmVtb3RlX3" - "BvcnQiOiI2NTExOCIsInNlcnZlcl9hZGRyIjoiNTIuMTguMTk" - "2LjE2OSIsInNlcnZlcl9wb3J0IjoiOTc5NiJ9LCJfdHMiOjE0" - "NjExMDc3NjgsIl9nZW9fZW52X3JlbW90ZV9hZGRyIjp7ImNvd" - "W50cnlfbmFtZSI6IkRvbWluaWNhbiBSZXB1YmxpYyJ9fQ==", - "classification": { - "type": "infected-system" - }, - "destination": { - "port": 9796, - "ip": "52.18.196.169" - }, - "extra": { - "non_ascii": "ççãããã\x80\ua000 \164 \x80\x80 abcd \165\166" - }, - "event_description": { - "text": "Sinkhole attempted connection" - }, - "source": { - "port": 65118, - "geolocation": { - "country": "Dominican Republic" - }, - "ip": "152.166.119.2" - } - } + "feed": { + "url": "http://alerts.bitsighttech.com:8080/stream?", + "name": "BitSight", + "accuracy": 100.0 + }, + "malware": { + "name": "salityp2p" + }, + "time": { + "observation": "2016-04-19T23:16:08+00:00", + "source": "2016-04-19T23:16:08+00:00" + }, + "raw": "eyJ0cm9qYW5mYW1pbHkiOiJTYWxpdHlwMnAiLCJlbnYiOnsic" + "mVtb3RlX2FkZHIiOiIxNTIuMTY2LjExOS4yIiwicmVtb3RlX3" + "BvcnQiOiI2NTExOCIsInNlcnZlcl9hZGRyIjoiNTIuMTguMTk" + "2LjE2OSIsInNlcnZlcl9wb3J0IjoiOTc5NiJ9LCJfdHMiOjE0" + "NjExMDc3NjgsIl9nZW9fZW52X3JlbW90ZV9hZGRyIjp7ImNvd" + "W50cnlfbmFtZSI6IkRvbWluaWNhbiBSZXB1YmxpYyJ9fQ==", + "classification": { + "type": "infected-system" + }, + "destination": { + "port": 9796, + "ip": "52.18.196.169" + }, + "extra": { + "non_ascii": "ççãããã\x80\ua000 \164 \x80\x80 abcd \165\166" + }, + "event_description": { + "text": "Sinkhole attempted connection" + }, + "source": { + "port": 65118, + "geolocation": { + "country": "Dominican Republic" + }, + "ip": "152.166.119.2" + } +} class TestRedisOutputBot(test.BotTestCase, unittest.TestCase): @@ -84,13 +85,14 @@ class TestRedisOutputBot(test.BotTestCase, unittest.TestCase): def set_bot(cls): cls.bot_reference = RedisOutputBot cls.default_input_message = EXAMPLE_EVENT - cls.sysconfig = {"redis_server_ip": os.getenv('INTELMQ_PIPELINE_HOST', 'localhost'), + cls.sysconfig = {"redis_server_ip": os.getenv('INTELMQ_PIPELINE_HOST', '127.0.0.1'), "redis_server_port": 6379, "redis_db": 4, "redis_queue": "test-redis-output-queue", "redis_password": os.getenv('INTELMQ_TEST_REDIS_PASSWORD'), "redis_timeout": "50000", "hierarchical_output": True, + "use_packer": 'json', "with_type": False, } @@ -118,9 +120,9 @@ def test_event(self): # Get the message from Redis event = utils.decode(redis_output.lpop(redis_queue)) - self.assertIsInstance(event, str) event_dict = json.loads(event) + MessageFactory.serialize() self.assertDictEqual(EXAMPLE_EVENT_JSON, event_dict) diff --git a/intelmq/tests/bots/parsers/json/test_parser.py b/intelmq/tests/bots/parsers/json/test_parser.py index 2c83658ed..38f97ec51 100644 --- a/intelmq/tests/bots/parsers/json/test_parser.py +++ b/intelmq/tests/bots/parsers/json/test_parser.py @@ -6,8 +6,6 @@ import base64 import os import unittest -import json -import msgpack import intelmq.lib.test as test from intelmq.bots.parsers.json.parser import JSONParserBot diff --git a/intelmq/tests/lib/test_bot.py b/intelmq/tests/lib/test_bot.py index b8c71ec81..8b2d8b2c4 100644 --- a/intelmq/tests/lib/test_bot.py +++ b/intelmq/tests/lib/test_bot.py @@ -61,7 +61,7 @@ def test_encoding_error_on_input_message(self): """ self.input_message = b'foo\xc9bar' self.run_bot(iterations=1, allowed_error_count=1) - self.assertLogMatches(r'.*intelmq\.lib\.exceptions\.UnserializationError:.*') + self.assertLogMatches(r'.*intelmq\.lib\.exceptions\.DeserializationError:.*') self.assertEqual(self.pipe.state['test-bot-input-internal'], []) self.assertEqual(self.pipe.state['test-bot-input'], []) self.assertEqual(self.pipe.state['test-bot-output'], []) diff --git a/intelmq/tests/lib/test_bot_output.py b/intelmq/tests/lib/test_bot_output.py index 673b5b9c1..ee8278a63 100644 --- a/intelmq/tests/lib/test_bot_output.py +++ b/intelmq/tests/lib/test_bot_output.py @@ -35,23 +35,23 @@ def process(self): class TestDummyOutputBot(BotTestCase, TestCase): @classmethod def set_bot(cls): - cls.sysconfig = {"return_type": None} + cls.sysconfig = {"return_type": None, "use_packer": "json"} cls.bot_reference = DummyOutputBot cls.default_input_message = RAW cls.allowed_error_count = 1 def test_export_raw(self): - self.run_bot(parameters={"single_key": "raw"}) + self.run_bot(parameters={"single_key": "raw", "use_packer": "json"}) self.assertEqual(self.bot.result, "\n") def test_export_output_dict(self): self.input_message = OUTPUT_DICT - self.run_bot(parameters={"single_key": "output"}) + self.run_bot(parameters={"single_key": "output", "use_packer": "json"}) self.assertEqual(self.bot.result, DICT) def test_export_output_dict_string(self): self.input_message = OUTPUT_DICT - self.run_bot(parameters={"single_key": "output", "return_type": str}) + self.run_bot(parameters={"single_key": "output", "return_type": str, "use_packer": "json"}) self.assertEqual(self.bot.result, OUTPUT_DICT['output']) def test_export_output_string(self): @@ -61,17 +61,17 @@ def test_export_output_string(self): def test_export_output_string_string(self): self.input_message = OUTPUT_STRING - self.run_bot(parameters={"single_key": "output", "return_type": str}) + self.run_bot(parameters={"single_key": "output", "return_type": str, "use_packer": "json"}) self.assertEqual(self.bot.result, STRING) def test_export_output_int(self): self.input_message = OUTPUT_INT - self.run_bot(parameters={"single_key": "output"}) + self.run_bot(parameters={"single_key": "output", "use_packer": "json"}) self.assertEqual(self.bot.result, INT) def test_export_output_int_string(self): self.input_message = OUTPUT_INT - self.run_bot(parameters={"single_key": "output", "return_type": str}) + self.run_bot(parameters={"single_key": "output", "return_type": str, "use_packer": "json"}) self.assertEqual(self.bot.result, OUTPUT_INT['output']) def test_export_keep_raw_hierarchical(self): @@ -79,6 +79,7 @@ def test_export_keep_raw_hierarchical(self): self.run_bot(parameters={"keep_raw_field": True, "message_hierarchical": True, "message_with_type": False, + "use_packer": "json", }) self.assertEqual(self.bot.result, RAW_HIERARCHICAL) @@ -88,6 +89,7 @@ def test_export_keep_raw_hierarchical_string(self): "message_hierarchical": True, "message_with_type": False, "return_type": str, + "use_packer": "json", }) self.assertEqual(self.bot.result, dumps(RAW_HIERARCHICAL, sort_keys=True)) @@ -96,5 +98,6 @@ def test_export_now_raw_type(self): self.input_message = INPUT self.run_bot(parameters={"keep_raw_field": False, "message_with_type": True, + "use_packer": "json" }) self.assertEqual(self.bot.result, NO_RAW_TYPE) diff --git a/intelmq/tests/lib/test_exceptions.py b/intelmq/tests/lib/test_exceptions.py index f0ffbbb90..236243b23 100755 --- a/intelmq/tests/lib/test_exceptions.py +++ b/intelmq/tests/lib/test_exceptions.py @@ -66,6 +66,10 @@ def test_MissingDependencyError(self): exc = str(excs.MissingDependencyError(depname, additional_text=additional)) self.assertIn(repr(depname), exc) self.assertTrue(exc.endswith(" %s" % additional)) + + def test_MissingPackerError(self): + exc = str(excs.MissingPackerError('non_existing_packer')) + self.assertIn(repr('non_existing_packer'), exc) if __name__ == '__main__': # pragma: no cover diff --git a/intelmq/tests/lib/test_message.py b/intelmq/tests/lib/test_message.py index e9809cad5..b995030c4 100644 --- a/intelmq/tests/lib/test_message.py +++ b/intelmq/tests/lib/test_message.py @@ -10,8 +10,9 @@ Most tests are performed on Report, as it is formally the same as Message, but has a valid Harmonization configuration. """ +from cmath import exp import json -import msgpack +import os import unittest import pkg_resources @@ -159,12 +160,20 @@ def test_event_ne_different_config(self): def test_invalid_type(self): """ Test if Message raises InvalidArgument for invalid type. """ with self.assertRaises(exceptions.InvalidArgument): - message.MessageFactory.unserialize(msgpack.dumps({"__type": "Message"}), harmonization=HARM) + data = message.MessageFactory.serialize({"__type": "Message"}, use_packer=os.environ.get('INTELMQ_USE_PACKER', 'MsgPack')) + message.MessageFactory.deserialize(data, harmonization=HARM, use_packer=os.environ.get('INTELMQ_USE_PACKER', 'MsgPack')) def test_invalid_type2(self): """ Test if MessageFactory raises InvalidArgument for invalid type. """ with self.assertRaises(exceptions.InvalidArgument): - message.MessageFactory.unserialize(msgpack.dumps({"__type": "Invalid"}), harmonization=HARM) + data = message.MessageFactory.serialize({"__type": "Invalid"}, use_packer=os.environ.get('INTELMQ_USE_PACKER', 'MsgPack')) + message.MessageFactory.deserialize(data, harmonization=HARM, use_packer=os.environ.get('INTELMQ_USE_PACKER', 'MsgPack')) + + def test_missing_packer(self): + """ Test if MessageFactory raises MissingPackerError if a non existing packer is being used. """ + with self.assertRaises(exceptions.MissingPackerError): + data = message.MessageFactory.serialize({"__type": "Invalid"}, use_packer='non_existing_packer') + message.MessageFactory.deserialize(data, harmonization=HARM, use_packer='non_existing_packer') def test_report_invalid_key(self): """ Test if report raises InvalidKey for invalid key in add(). """ @@ -365,9 +374,17 @@ def test_factory_serialize(self): report.add('feed.name', 'Example') report.add('feed.url', URL_SANE) report.add('raw', LOREM_BASE64, sanitize=False) - actual = message.MessageFactory.serialize(report) - expected = (b'\x84\xa9feed.name\xa7Example\xa8feed.url\xb4https://example.com/\xa3raw\xb0bG9yZW0gaXBzdW0=\xa6__type\xa6Report') - self.assertDictEqual(msgpack.unpackb(expected), msgpack.unpackb(actual)) + actual = message.MessageFactory.serialize(report, use_packer=os.environ.get('INTELMQ_USE_PACKER', 'MsgPack')) + expected = message.MessageFactory.serialize({ + 'feed.name': 'Example', + 'feed.url': 'https://example.com/', + 'raw': 'bG9yZW0gaXBzdW0=', + '__type': 'Report', + }, use_packer=os.environ.get('INTELMQ_USE_PACKER', 'MsgPack')) + self.assertDictEqual( + message.MessageFactory.deserialize(expected, use_packer=os.environ.get('INTELMQ_USE_PACKER', 'MsgPack')), + message.MessageFactory.deserialize(actual, use_packer=os.environ.get('INTELMQ_USE_PACKER', 'MsgPack')) + ) def test_deep_copy_content(self): """ Test if deep_copy does return the same items. """ @@ -495,54 +512,11 @@ def test_event_dict_hierarchical(self): '00:00'}}, event.to_dict(hierarchical=True)) - def test_event_json(self): - """ Test Event to_json. """ - event = self.new_event() - event = self.add_event_examples(event) - actual = event.to_json() - self.assertIsInstance(actual, str) - expected = ('{"feed.url": "https://example.com/", "feed.name": ' - '"Example", "raw": "bG9yZW0gaXBzdW0=", "time.observation": ' - '"2015-01-01T13:37:00+00:00"}') - self.assertDictEqual(json.loads(expected), json.loads(actual)) - - def test_event_json_hierarchical(self): - """ Test Event to_json. """ - event = self.new_event() - event = self.add_event_examples(event) - actual = event.to_json(hierarchical=True) - self.assertIsInstance(actual, str) - expected = ('{"feed": {"url": "https://example.com/", "name": ' - '"Example"}, "raw": "bG9yZW0gaXBzdW0=", "time": ' - '{"observation": "2015-01-01T13:37:00+00:00"}}') - self.assertDictEqual(json.loads(expected), json.loads(actual)) - - def test_event_msgpack(self): - """ Test event to_msgpack """ - event = self.new_event() - event = self.add_event_examples(event) - actual = event.to_msgpack() - self.assertIsInstance(actual, bytes) - excepted = (b'\x84\xa9feed.name\xa7Example\xa8feed.url\xb4https://example.com/\xa3raw\xb0bG9yZW0gaXBzdW0=\xb0time.observation\xb92015-01-01T13:37:00+00:00') - self.assertDictEqual(msgpack.unpackb(excepted), msgpack.unpackb(actual)) - def test_event_serialize(self): """ Test Event serialize. """ event = self.new_event() - self.assertEqual(b'\x81\xa6__type\xa5Event', - event.serialize()) - - def test_event_string(self): - """ Test Event serialize. """ - event = self.new_event() - self.assertEqual(b'\x81\xa6__type\xa5Event', - event.serialize()) - - def test_event_unicode(self): - """ Test Event serialize. """ - event = self.new_event() - self.assertEqual(b'\x81\xa6__type\xa5Event', - event.serialize()) + expected = message.MessageFactory.serialize({'__type': 'Event'}, use_packer=os.environ.get('INTELMQ_USE_PACKER', 'MsgPack')) + self.assertEqual(expected, event.serialize()) def test_event_from_report(self): """ Data from report should be in event, except for extra. """ @@ -607,9 +581,9 @@ def test_event_init_check_tuple(self): def test_event_init(self): """ Test if initialization method checks fields. """ - event = msgpack.dumps({"__type": "Event", "source.asn": "foo"}) + event = message.MessageFactory.serialize({"__type": "Event", "source.asn": "foo"}, use_packer=os.environ.get('INTELMQ_USE_PACKER', 'MsgPack')) with self.assertRaises(exceptions.InvalidValue): - message.MessageFactory.unserialize(event, harmonization=HARM) + message.MessageFactory.deserialize(event, harmonization=HARM) def test_malware_hash_md5(self): """ Test if MD5 is checked correctly. """ @@ -704,11 +678,9 @@ def test_message_extra_set_oldstyle_dict_overwrite_empty(self): """ event = self.new_event() event["extra"] = {"a": {"x": 1}, "b": "foo"} - self.assertEqual(json.loads(event['extra']), - {"a": {"x": 1}, "b": "foo"}) + self.assertEqual(json.loads(event['extra']), {"a": {"x": 1}, "b": "foo"}) event.add("extra", {"a": {}}, overwrite=True) - self.assertEqual(json.loads(event['extra']), - {"a": {}}) + self.assertEqual(json.loads(event['extra']), {"a": {}}) def test_message_extra_set_dict_empty(self): """ @@ -716,8 +688,7 @@ def test_message_extra_set_dict_empty(self): """ event = self.new_event() event.add('extra', {"foo": ''}) - self.assertEqual(json.loads(event['extra']), - {"foo": ''}) + self.assertEqual(json.loads(event["extra"]), {"foo": ""}) def test_message_extra_in_backwardcomp(self): """ diff --git a/intelmq/tests/lib/test_parser_bot.py b/intelmq/tests/lib/test_parser_bot.py index b5ec40668..35b8b334a 100644 --- a/intelmq/tests/lib/test_parser_bot.py +++ b/intelmq/tests/lib/test_parser_bot.py @@ -233,8 +233,8 @@ def set_bot(cls): def test_event(self): self.run_bot() - self.assertMessageEqual(0, EXAMPLE_JSON_STREAM_EVENTS[0]) - self.assertMessageEqual(1, EXAMPLE_JSON_STREAM_EVENTS[1]) + self.assertMessageEqual(0, EXAMPLE_JSON_STREAM_EVENTS[0], use_packer='json') + self.assertMessageEqual(1, EXAMPLE_JSON_STREAM_EVENTS[1], use_packer='json') def dump_message(self, error_traceback, message=None): self.assertDictEqual(JSON_STREAM_BOGUS_DUMP[self.call_counter], message) diff --git a/intelmq/tests/lib/test_pipeline.py b/intelmq/tests/lib/test_pipeline.py index 08adb7c0e..f0d46c116 100644 --- a/intelmq/tests/lib/test_pipeline.py +++ b/intelmq/tests/lib/test_pipeline.py @@ -68,7 +68,7 @@ def setUp(self): def test_receive(self): self.pipe.state['test-bot-input'] = [SAMPLES['normal'][0]] - self.assertEqual(SAMPLES['normal'][1], utils.decode(self.pipe.receive())) + self.assertEqual(SAMPLES['normal'][0], self.pipe.receive()) def test_send(self): self.pipe.send(SAMPLES['normal'][1]) @@ -77,7 +77,7 @@ def test_send(self): def test_receive_unicode(self): self.pipe.state['test-bot-input'] = [SAMPLES['unicode'][0]] - self.assertEqual(SAMPLES['unicode'][1], utils.decode(self.pipe.receive())) + self.assertEqual(SAMPLES['unicode'][0], self.pipe.receive()) def test_send_unicode(self): self.pipe.send(SAMPLES['unicode'][1]) @@ -108,7 +108,7 @@ def test_reject(self): self.pipe.state['test-bot-input'] = [SAMPLES['normal'][0]] self.pipe.receive() self.pipe.reject_message() - self.assertEqual(SAMPLES['normal'][1], utils.decode(self.pipe.receive())) + self.assertEqual(SAMPLES['normal'][1], self.pipe.receive()) def test_acknowledge(self): self.pipe.state['test-bot-input'] = [SAMPLES['normal'][0]]