BigFIx module update for translations (#81)

opencybersecurityalliance · Feb 1, 2019 · ae113a0 · ae113a0
1 parent f895492
commit ae113a0
Show file tree

Hide file tree

Showing 11 changed files with 510 additions and 429 deletions.
diff --git a/stix_shifter/stix_translation/src/modules/bigfix/README.md b/stix_shifter/stix_translation/src/modules/bigfix/README.md
diff --git a/stix_shifter/stix_translation/src/modules/bigfix/bigfix_data_mapping.py b/stix_shifter/stix_translation/src/modules/bigfix/bigfix_data_mapping.py
diff --git a/stix_shifter/stix_translation/src/modules/bigfix/bigfix_query_constructor.py b/stix_shifter/stix_translation/src/modules/bigfix/bigfix_query_constructor.py
diff --git a/stix_shifter/stix_translation/src/modules/bigfix/bigfix_translator.py b/stix_shifter/stix_translation/src/modules/bigfix/bigfix_translator.py
@@ -1,10 +1,16 @@
 from ..base.base_translator import BaseTranslator
 from .stix_to_bigfix import StixToRelevanceQuery
-from .bigfix_result_translator import BigfixResultTranslator
+from ...json_to_stix.json_to_stix import JSONToStix
 
+from os import path
 
 class Translator(BaseTranslator):
 
     def __init__(self):
-        self.result_translator = BigfixResultTranslator()
+        basepath = path.dirname(__file__)
+        filepath = path.abspath(
+            path.join(basepath, "json", "to_stix_map.json"))
+
+        self.mapping_filepath = filepath
+        self.result_translator = JSONToStix(filepath)
         self.query_translator = StixToRelevanceQuery()
diff --git a/stix_shifter/stix_translation/src/modules/bigfix/json/from_stix_map.json b/stix_shifter/stix_translation/src/modules/bigfix/json/from_stix_map.json
diff --git a/stix_shifter/stix_translation/src/modules/bigfix/json/to_stix_map.json b/stix_shifter/stix_translation/src/modules/bigfix/json/to_stix_map.json
@@ -0,0 +1,81 @@
+{
+    "file_name": {
+        "key": "file.name",
+        "object": "file"
+    },
+    "file_path": [
+        {
+            "key": "directory.path",
+            "object": "directory"
+        },
+        {
+            "key": "file.parent_directory_ref",
+            "object": "file",
+            "references": "directory"
+        },
+        {   
+            "key": "process.binary_ref",
+            "object": "process",
+            "references": "file"
+        }
+    ],
+    "sha256hash": {
+        "key": "file.hashes.SHA-256",
+        "object": "file"
+    },
+    "sha1hash": {
+        "key": "file.hashes.SHA-1",
+        "object": "file"
+    },
+    "md5hash": {
+        "key": "file.hashes.MD5",
+        "object": "file"
+    },
+    "process_name": {
+        "key": "process.name",
+        "object": "process"
+    },
+    "process_id": {
+        "key": "process.pid",
+        "object": "process"
+    },
+    "start_time": [
+        {
+            "key": "created",
+            "transformer": "EpochSecondsToTimestamp",
+            "cybox": false
+        },
+        {
+            "key": "first_observed",
+            "transformer": "EpochSecondsToTimestamp",
+            "cybox": false
+        },
+        {
+            "key": "last_observed",
+            "transformer": "EpochSecondsToTimestamp",
+            "cybox": false
+        }
+    ],
+    "modified_time": [
+        {
+            "key": "modified",
+            "transformer": "EpochSecondsToTimestamp",
+            "cybox": false
+        },
+        {
+            "key": "first_observed",
+            "transformer": "EpochSecondsToTimestamp",
+            "cybox": false
+        },
+        {
+            "key": "last_observed",
+            "transformer": "EpochSecondsToTimestamp",
+            "cybox": false
+        }
+    ],
+    "computer_identity":
+    {
+        "key": "name",
+        "cybox": false
+    } 
+}
diff --git a/stix_shifter/stix_translation/src/modules/bigfix/stix_to_bigfix.py b/stix_shifter/stix_translation/src/modules/bigfix/stix_to_bigfix.py
@@ -2,7 +2,6 @@
 
 from ...patterns.parser import generate_query
 from ..base.base_query_translator import BaseQueryTranslator
-from . import bigfix_data_mapping
 from . import bigfix_query_constructor
 
 logger = logging.getLogger(__name__)
@@ -27,12 +26,9 @@ def transform_query(self, data, options, mapping=None):
         logger.info("Converting STIX2 Pattern to Relevance language")
 
         query_object = generate_query(data)
-        data_model_mapper = bigfix_data_mapping.BigFixDataMapper(options)
         result_limit = options['result_limit'] if 'result_limit' in options else DEFAULT_LIMIT
         timerange = options['timerange'] if 'timerange' in options else DEFAULT_TIMERANGE
         query_string = bigfix_query_constructor.translate_pattern(
-            query_object, data_model_mapper, result_limit, timerange)
-
-        print(query_string)
+            query_object, result_limit, timerange)
 
         return query_string
diff --git a/stix_shifter/stix_transmission/src/modules/bigfix/bigfix_results_connector.py b/stix_shifter/stix_transmission/src/modules/bigfix/bigfix_results_connector.py
@@ -16,17 +16,22 @@ def get_success_status(self, data_dict):
 
     def create_results_connection(self, search_id, offset, length):
         response_txt = None
-        return_obj = dict()
+        return_obj = {}
         try:
             response = self.api_client.get_search_results(search_id, offset, length)
             response_txt = response.read().decode('utf-8')
             response_code = response.code
-            
+
             if 199 < response_code < 300:
                 try:
                     response_dict = json.loads(response_txt)
                     return_obj['success'] = self.get_success_status(response_dict)
-                    return_obj['data'] = response_dict['results']
+                    return_obj['data'] = []
+                    for computer_obj in response_dict['results']:
+                        is_failure = computer_obj['isFailure']
+                        if is_failure == False:
+                            formatted_result_obj = self.format_computer_obj(computer_obj)
+                            return_obj['data'].append(formatted_result_obj)
                 except json.decoder.JSONDecodeError:
                     response_dict = xmltodict.parse(response_txt)
                     ErrorResponder.fill_error(return_obj, response_dict, ['BESAPI','ClientQueryResults','QueryResult', '+IsFailure=1','~Result'])
@@ -41,4 +46,38 @@ def create_results_connection(self, search_id, offset, length):
                 print('can not parse response: ' + str(response_txt))
             else:
                 raise e
-        return return_obj
+        return json.dumps(return_obj)
+
+    @staticmethod
+    def format_computer_obj(computer_obj):
+        # {"computerID": 12369754, "computerName": "bigdata4545.canlab.ibm.com", "subQueryID": 1, "isFailure": false, "result": "file, .X0-lock, sha256, 7236f966f07259a1de3ee0d48a3ef0ee47c4a551af7f0d76dcabbbb9d6e00940, sha1, 8b5e953be1db90172af66631132f6f27dda402d2, md5, e5307d27f0eb9a27af8597a1ddc51e89, /tmp/.X0-lock, 1541424894", "ResponseTime": 0}
+
+        result = computer_obj['result']
+        obj_list = result.split(',')
+        formatted_obj = {}
+
+        computer_identity = str(computer_obj['computerID']) + '-' + computer_obj['computerName']
+
+        formatted_obj['computer_identity'] = computer_identity
+        formatted_obj['subQueryID'] = computer_obj['subQueryID']
+        if result.startswith('process'):
+            formatted_obj['start_time'] = obj_list[10].strip()
+            formatted_obj['type'] = obj_list[0].strip()
+            formatted_obj['process_name'] = obj_list[1].strip()
+            formatted_obj['process_id'] = obj_list[2].strip()
+            formatted_obj['sha256hash'] = obj_list[4].strip()
+            formatted_obj['sha1hash'] = obj_list[6].strip()
+            formatted_obj['md5hash'] = obj_list[8].strip()
+            formatted_obj['file_path'] = obj_list[9].strip()
+        elif result.startswith('file'):
+            formatted_obj['type'] = obj_list[0].strip()
+            formatted_obj['file_name'] = obj_list[1].strip()
+            formatted_obj['sha256hash'] = obj_list[3].strip()
+            formatted_obj['sha1hash'] = obj_list[5].strip()
+            formatted_obj['md5hash'] = obj_list[7].strip()
+            formatted_obj['file_path'] = obj_list[8].strip()
+            formatted_obj['modified_time'] = obj_list[9].strip()
+        else:
+            print('Unknown result')
+
+        return formatted_obj
diff --git a/tests/bigfix_tests/test_bigfix_results_to_stix.py b/tests/bigfix_tests/test_bigfix_results_to_stix.py
@@ -1,18 +1,95 @@
-from stix_shifter.stix_translation import stix_translation
-import unittest
+import json
+from stix_shifter.stix_translation.src import transformers 
+from stix_shifter.stix_translation.src.json_to_stix import json_to_stix_translator
+from stix_shifter.stix_translation.src.modules.bigfix import bigfix_translator
 
-translation = stix_translation.StixTranslation()
+interface = bigfix_translator.Translator()
+map_file = open(interface.mapping_filepath).read()
+map_data = json.loads(map_file)
+data_source = {
+    "type": "identity",
+    "id": "identity--3532c56d-ea72-48be-a2ad-1a53f4c9c6d3",
+    "name": "BigFix",
+    "identity_class": "events"
+}
+options = {}
 
+class TestBigFixResultsToStix(object):
+    @staticmethod
+    def get_first(itr, constraint):
+        return next(
+            (obj for obj in itr if constraint(obj)),
+            None
+        )
 
-class TestBigFixResultsToStix(unittest.TestCase, object):
+    @staticmethod
+    def get_first_of_type(itr, typ):
+        return TestBigFixResultsToStix.get_first(itr, lambda o: type(o) == dict and o.get('type') == typ)
 
-    def test_ipv4_query(self):
-        # bf_results = "[{'computerID': 12369754, 'computerName': 'bigdata4545.canlab.ibm.com', 'subQueryID': 1," \
-        #              " 'isFailure': False, 'result': '.err, d41d8cd98f00b204e9800998ecf8427e, /.err'," \
-        #              " 'ResponseTime': 1000}, " \
-        #              "{'computerID': 14821900, 'computerName': 'DESKTOP-C30V1JF', 'subQueryID': 1, 'isFailure': True," \
-        #              " 'result': '12520437.cpx, 0a0feb9eb28bde8cd835716343b03b14, C:\\Windows\\system32\\12520437.cpx'," \
-        #              " 'ResponseTime': 63000}]"
-        # query = translation.translate('bigfix', 'results', '{}', bf_results)
-        # assert query == bf_results
-        assert "query" == "query"
+
+    def test_common_prop(self):
+        data = {"computer_identity": "12369754-bigdata4545.canlab.ibm.com", "subQueryID": 1, "start_time": "1541424881", "type": "process", "process_name": "systemd", "process_id": "1", "sha256hash": "9c74c625b2aba7a2e8d8a42e2e94715c355367f7cbfa9bd5404ba52b726792a6", "sha1hash": "916933045c5c91ebcaa325e7f8302f3a732a0a3d", "md5hash": "28a9beb86c4d4c31ba572805bea8494f", "file_path": "/usr/lib/systemd/systemd"}
+        result_bundle = json_to_stix_translator.convert_to_stix(
+            data_source, map_data, [data], transformers.get_all_transformers(), options)
+        print(json.dumps(result_bundle, indent=2))
+        assert(result_bundle['type'] == 'bundle')
+        result_bundle_objects = result_bundle['objects']
+
+        result_bundle_identity = result_bundle_objects[0]
+        assert(result_bundle_identity['type'] == data_source['type'])
+        assert(result_bundle_identity['id'] == data_source['id'])
+        assert(result_bundle_identity['name'] == data_source['name'])
+        assert(result_bundle_identity['identity_class']
+               == data_source['identity_class'])
+
+        observed_data = result_bundle_objects[1]
+        print(observed_data)
+        assert(observed_data['id'] is not None)
+        assert(observed_data['type'] == "observed-data")
+        assert(observed_data['created_by_ref'] == result_bundle_identity['id'])
+
+        assert(observed_data['created'] is not None)
+        assert(observed_data['first_observed'] is not None)
+        assert(observed_data['last_observed'] is not None)
+
+    def test_file_results_to_stix(self):
+        file_name = '.X0-lock'
+        data = {"computer_identity": "12369754-bigdata4545.canlab.ibm.com", "subQueryID": 1, "type": "file", "file_name": ".X0-lock", "sha256hash": "7236f966f07259a1de3ee0d48a3ef0ee47c4a551af7f0d76dcabbbb9d6e00940", "sha1hash": "8b5e953be1db90172af66631132f6f27dda402d2", "md5hash": "e5307d27f0eb9a27af8597a1ddc51e89", "file_path": "/tmp/.X0-lock", "modified_time": "1541424894"}
+        result_bundle = json_to_stix_translator.convert_to_stix(
+            data_source, map_data, [data], transformers.get_all_transformers(), options)
+
+        result_bundle_objects = result_bundle['objects']
+
+        result_bundle_identity = result_bundle_objects[0]
+        assert(result_bundle_identity['type'] == data_source['type'])
+
+        observed_data = result_bundle_objects[1]
+
+        assert('objects' in observed_data)
+        objects = observed_data['objects']
+
+        file_obj = TestBigFixResultsToStix.get_first_of_type(objects.values(), 'file')
+        assert(file_obj is not None), 'file object type not found'
+        assert(file_obj.keys() == {'type', 'name', 'hashes', 'parent_directory_ref'})
+        assert(file_obj['name'] == file_name)
+
+    def test_process_results_to_stix(self):
+        process_name = 'systemd'
+        data = {"computer_identity": "12369754-bigdata4545.canlab.ibm.com", "subQueryID": 1, "start_time": "1541424881", "type": "process", "process_name": "systemd", "process_id": "1", "sha256hash": "9c74c625b2aba7a2e8d8a42e2e94715c355367f7cbfa9bd5404ba52b726792a6", "sha1hash": "916933045c5c91ebcaa325e7f8302f3a732a0a3d", "md5hash": "28a9beb86c4d4c31ba572805bea8494f", "file_path": "/usr/lib/systemd/systemd"}
+        result_bundle = json_to_stix_translator.convert_to_stix(
+            data_source, map_data, [data], transformers.get_all_transformers(), options)
+        print(json.dumps(result_bundle, indent=2))
+        result_bundle_objects = result_bundle['objects']
+
+        result_bundle_identity = result_bundle_objects[0]
+        assert(result_bundle_identity['type'] == data_source['type'])
+
+        observed_data = result_bundle_objects[1]
+
+        assert('objects' in observed_data)
+        objects = observed_data['objects']
+
+        process_obj = TestBigFixResultsToStix.get_first_of_type(objects.values(), 'process')
+        assert(process_obj is not None), 'process object type not found'
+        assert(process_obj.keys() == {'type', 'name', 'pid', 'binary_ref'})
+        assert(process_obj['name'] == process_name)
diff --git a/tests/bigfix_tests/test_stix_to_relevance.py b/tests/bigfix_tests/test_stix_to_relevance.py
@@ -5,14 +5,27 @@
 
 
 class TestStixToRelevance(unittest.TestCase, object):
-
+        
     def test_process_query(self):
 
-        # stix_pattern = "[process:name = 'node' or file:hashes.sha256 = '0c0017201b82e1d8613513dc80d1bf46320a957c393b6ca4fb7fa5c3b682c7e5']"
+        stix_pattern = "[process:name = 'node' AND file:hashes.'SHA-256' = '0c0017201b82e1d8613513dc80d1bf46320a957c393b6ca4fb7fa5c3b682c7e5']"
+
+        query = translation.translate('bigfix', 'query', '{}', stix_pattern)
+        # queries = '( "process", name of it | "n/a", process id of it as string | "n/a", "sha256", sha256 of image file of it | "n/a", "sha1", sha1 of image file of it | "n/a", "md5", md5 of image file of it | "n/a", pathname of image file of it | "n/a", (start time of it - "01 Jan 1970 00:00:00 +0000" as time)/second ) of processes whose (name of it as lowercase = "node" as lowercase AND sha256 of image file of it as lowercase = "0c0017201b82e1d8613513dc80d1bf46320a957c393b6ca4fb7fa5c3b682c7e5" as lowercase )'
+
+        queries = '<BESAPI xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="BESAPI.xsd"><ClientQuery><ApplicabilityRelevance>true</ApplicabilityRelevance><QueryText>( "process", name of it | "n/a", process id of it as string | "n/a", "sha256", sha256 of image file of it | "n/a", "sha1", sha1 of image file of it | "n/a", "md5", md5 of image file of it | "n/a", pathname of image file of it | "n/a", (start time of it - "01 Jan 1970 00:00:00 +0000" as time)/second ) of processes whose (name of it as lowercase = "node" as lowercase AND sha256 of image file of it as lowercase = "0c0017201b82e1d8613513dc80d1bf46320a957c393b6ca4fb7fa5c3b682c7e5" as lowercase )</QueryText><Target><CustomRelevance>true</CustomRelevance></Target></ClientQuery></BESAPI>'
+        parsed_stix = [{'attribute': 'file:hashes.SHA-256', 'comparison_operator': '=', 'value': '0c0017201b82e1d8613513dc80d1bf46320a957c393b6ca4fb7fa5c3b682c7e5'}, {'attribute': 'process:name', 'comparison_operator': '=', 'value': 'node'}]
+        print(query)
+        assert query == {'queries': queries, 'parsed_stix': parsed_stix}    
+
+    def test_file_query(self):
+
+        stix_pattern = "[file:name = 'a' AND file:parent_directory_ref.path = '/root' OR file:hashes.'SHA-256' = '2584c4ba8b0d2a52d94023f420b7e356a1b1a3f2291ad5eba06683d58c48570d']"
 
-        # query = translation.translate('bigfix', 'query', '{}', stix_pattern)
+        query = translation.translate('bigfix', 'query', '{}', stix_pattern)
+        # queries = '("file", name of it | "n/a", "sha256", sha256 of it | "n/a", "sha1", sha1 of it | "n/a", "md5", md5 of it | "n/a", pathname of it | "n/a", (modification time of it - "01 Jan 1970 00:00:00 +0000" as time)/second ) of files whose (name of it as lowercase = "a" as lowercase OR sha256 of it as lowercase = "2584c4ba8b0d2a52d94023f420b7e356a1b1a3f2291ad5eba06683d58c48570d" as lowercase) of folder ("/root")'
 
-        # parsed_stix = [{'attribute': 'file:hashes.sha256', 'comparison_operator': '=', 'value': '0c0017201b82e1d8613513dc80d1bf46320a957c393b6ca4fb7fa5c3b682c7e5'}, {'attribute': 'process:name', 'comparison_operator': '=', 'value': 'node'}]
-        # print(query)
-        # assert query == {'queries': stix_pattern, 'parsed_stix': parsed_stix}
-        assert "query" == "query"
+        queries = '<BESAPI xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="BESAPI.xsd"><ClientQuery><ApplicabilityRelevance>true</ApplicabilityRelevance><QueryText>("file", name of it | "n/a", "sha256", sha256 of it | "n/a", "sha1", sha1 of it | "n/a", "md5", md5 of it | "n/a", pathname of it | "n/a", (modification time of it - "01 Jan 1970 00:00:00 +0000" as time)/second ) of files whose (name of it as lowercase = "a" as lowercase OR sha256 of it as lowercase = "2584c4ba8b0d2a52d94023f420b7e356a1b1a3f2291ad5eba06683d58c48570d" as lowercase) of folder ("/root")</QueryText><Target><CustomRelevance>true</CustomRelevance></Target></ClientQuery></BESAPI>'
+        parsed_stix = [{'attribute': 'file:hashes.SHA-256', 'comparison_operator': '=', 'value': '2584c4ba8b0d2a52d94023f420b7e356a1b1a3f2291ad5eba06683d58c48570d'}, {'attribute': 'file:parent_directory_ref.path', 'comparison_operator': '=', 'value': '/root'}, {'attribute': 'file:name', 'comparison_operator': '=', 'value': 'a'}]
+        print(query)
+        assert query == {'queries': queries, 'parsed_stix': parsed_stix}