From 5673b04729a4753c035ac7fa34cd6ce924fd1cc0 Mon Sep 17 00:00:00 2001 From: Paul Coccoli Date: Thu, 7 Sep 2023 14:59:15 -0400 Subject: [PATCH 1/9] Change test_stixshifter_diagnosis to use bin dir instead of system PATH --- bin/stix-shifter-diag | 0 tests/test_stixshifter_diagnosis.py | 11 ++++++++--- 2 files changed, 8 insertions(+), 3 deletions(-) mode change 100644 => 100755 bin/stix-shifter-diag diff --git a/bin/stix-shifter-diag b/bin/stix-shifter-diag old mode 100644 new mode 100755 diff --git a/tests/test_stixshifter_diagnosis.py b/tests/test_stixshifter_diagnosis.py index e65d377c..b5f8cdfc 100644 --- a/tests/test_stixshifter_diagnosis.py +++ b/tests/test_stixshifter_diagnosis.py @@ -1,10 +1,15 @@ -import pytest +import os import subprocess +import pytest from kestrel_datasource_stixshifter.diagnosis import Diagnosis from .utils import stixshifter_profile_lab101, stixshifter_profile_ecs +cwd = os.path.dirname(os.path.abspath(__file__)) +STIX_SHIFTER_DIAG = os.path.join(cwd, "../bin/stix-shifter-diag") + + def test_diagnosis(stixshifter_profile_lab101): pattern = " ".join( [ @@ -74,7 +79,7 @@ def test_cli(stixshifter_profile_lab101): """ result = subprocess.run( - args=["stix-shifter-diag", "lab101"], + args=[STIX_SHIFTER_DIAG, "lab101"], universal_newlines=True, stdout=subprocess.PIPE, ) @@ -128,7 +133,7 @@ def test_cli_ecs(stixshifter_profile_ecs): result = subprocess.run( args=[ - "stix-shifter-diag", + STIX_SHIFTER_DIAG, "-p", "[x-oca-asset:device_id = '123456'] START t'2000-01-01T00:00:00.000Z' STOP t'3000-01-01T00:00:00.000Z'", "-t", From b73178823955bda335b727253923a7ca812bbee6 Mon Sep 17 00:00:00 2001 From: Paul Coccoli Date: Sun, 17 Sep 2023 13:28:41 -0400 Subject: [PATCH 2/9] Add rudimentary interactive CLI ikestrel --- bin/ikestrel | 92 ++++++++++++++++++++++++++++++++++++++++++++++++++++ setup.cfg | 1 + 2 files changed, 93 insertions(+) create mode 100644 bin/ikestrel diff --git a/bin/ikestrel b/bin/ikestrel new file mode 100644 index 00000000..812f89d5 --- /dev/null +++ b/bin/ikestrel @@ -0,0 +1,92 @@ +#!/usr/bin/env python3 + +# Executing a hunt in an interactive CLI +# Usage: `ikestrel [-v] [--debug]` + +import argparse +import cmd +import logging + +from tabulate import tabulate + +from kestrel.session import Session +from kestrel.codegen.display import DisplayBlockSummary, DisplayDataframe +from kestrel.exceptions import KestrelException +from kestrel.utils import add_logging_handler, clear_logging_handlers + + +CMDS = [ # command_no_result from kestrel.lark + "APPLY", + "DISP", + "INFO", + "SAVE", +] + + +def display_outputs(outputs): + for i in outputs: + if isinstance(i, DisplayBlockSummary): + print(i.to_string()) + elif isinstance(i, DisplayDataframe): + data = i.to_dict()["data"] + print(tabulate(data, headers="keys")) + else: + print(i.to_string()) + + +class Cli(cmd.Cmd): + prompt = "> " + + def __init__(self, session: Session): + self.session = session + self.buf = "" + super().__init__() + + def default(self, line: str): + try: + outputs = self.session.execute(line) + display_outputs(outputs) + except KestrelException as e: + print(e) + + def completenames(self, text, *ignored): + code, _start, _end = ignored + if code.isupper(): + # Probably a command? + results = [i for i in CMDS if i.startswith(code)] + else: + # Try all commands and vars + results = [i for i in CMDS if i.lower().startswith(code)] + results += [ + i for i in self.session.get_variable_names() if i.startswith(code) + ] + return results + + def completedefault(self, *ignored): + _, code, start, end = ignored + results = self.session.do_complete(code, end) + stub = code[start:] + return [stub + suffix for suffix in results] + + def do_EOF(self, _line: str): + print() + return True + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Kestrel Interpreter") + parser.add_argument( + "-v", "--verbose", help="print verbose log", action="store_true" + ) + parser.add_argument( + "--debug", help="debug level log (default is info level)", action="store_true" + ) + args = parser.parse_args() + + clear_logging_handlers() + if args.verbose: + add_logging_handler(logging.StreamHandler(), args.debug) + + with Session(debug_mode=args.debug) as s: + cli = Cli(s) + cli.cmdloop() diff --git a/setup.cfg b/setup.cfg index 5c02bd94..13965696 100644 --- a/setup.cfg +++ b/setup.cfg @@ -26,6 +26,7 @@ package_dir = =src scripts = bin/kestrel + bin/ikestrel bin/stix-shifter-diag python_requires = >= 3.8 install_requires = From eb747126ac863b8e871c3d67db35a3b4a9843634 Mon Sep 17 00:00:00 2001 From: Paul Coccoli Date: Sun, 17 Sep 2023 13:49:31 -0400 Subject: [PATCH 3/9] DESCRIBE command --- docs/language/commands.rst | 39 +++++++++++++++++++- src/kestrel/codegen/commands.py | 64 ++++++++++++++++++++++++++++++++- src/kestrel/syntax/kestrel.lark | 5 +++ src/kestrel/syntax/parser.py | 8 +++++ tests/test_command_describe.py | 27 ++++++++++++++ tests/test_parser.py | 9 +++++ 6 files changed, 150 insertions(+), 2 deletions(-) create mode 100644 tests/test_command_describe.py diff --git a/docs/language/commands.rst b/docs/language/commands.rst index 88787768..458db224 100644 --- a/docs/language/commands.rst +++ b/docs/language/commands.rst @@ -7,7 +7,7 @@ A Kestrel command describes a :ref:`language/tac:hunt step` in one of the five c #. Retrieval: ``GET``, ``FIND``, ``NEW``. #. Transformation: ``SORT``, ``GROUP``. #. Enrichment: ``APPLY``. -#. Inspection: ``INFO``, ``DISP``. +#. Inspection: ``INFO``, ``DISP``, ``DESCRIBE``. #. Flow-control: ``SAVE``, ``LOAD``, ``ASSIGN``, ``MERGE``, ``JOIN``. To achieve :ref:`language/tac:composable hunt flow` and allow threat hunters to compose hunt @@ -47,6 +47,8 @@ object, or both a variable and a display object. +---------+----------------+---------------+----------------+---------------+ | DISP | yes | maybe | no | yes | +---------+----------------+---------------+----------------+---------------+ +| DESCRIBE | yes | no | no | yes | ++---------+----------------+---------------+----------------+---------------+ | SORT | yes | yes | yes | no | +---------+----------------+---------------+----------------+---------------+ | GROUP | yes | yes | yes | no | @@ -674,6 +676,41 @@ Examples # display the timestamps from observations of those processes: DISP TIMESTAMPED(procs) ATTR pid, name, command_line +DESCRIBE +-------- + +The command ``DESCRIBE`` is an *inspection* hunt step to show +descriptive statistics of a Kestrel variable attribute. + +Syntax +^^^^^^ +:: + + DESCRIBE varx.attr + +The command shows the following information of an numeric attribute: + +- count: the number of non-NULL values +- mean: the average value +- min: the minimum value +- max: the maximum value + +The command shows the following information of other attributes: + +- count: the number of non-NULL values +- unique: the number of unique values +- top: the most freqently occurring value +- freq: the number of occurrences of the top value + +Examples +^^^^^^^^ + +.. code-block:: coffeescript + + # showing information like unique count of src_port + nt = GET network-traffic FROM stixshifter://idsX WHERE dst_port = 80 + DESCRIBE nt.src_port + SORT ---- diff --git a/src/kestrel/codegen/commands.py b/src/kestrel/codegen/commands.py index 454349dd..b1239e84 100644 --- a/src/kestrel/codegen/commands.py +++ b/src/kestrel/codegen/commands.py @@ -25,7 +25,18 @@ from firepit.deref import auto_deref from firepit.exceptions import InvalidAttr -from firepit.query import Limit, Offset, Order, Predicate, Projection, Query +from firepit.query import ( + Aggregation, + Column, + Limit, + Group, + Offset, + Order, + Predicate, + Projection, + Query, + Table, +) from firepit.stix20 import summarize_pattern from kestrel.utils import remove_empty_dicts, dedup_ordered_dicts @@ -223,6 +234,57 @@ def disp(stmt, session): return None, DisplayDataframe(dedup_ordered_dicts(remove_empty_dicts(content))) +@_debug_logger +@_skip_command_if_empty_input +def describe(stmt, session): + entity_table = session.symtable[stmt["input"]].entity_table + attribute = stmt["attribute"] + schema = {i["name"]: i["type"] for i in session.store.schema(entity_table)} + attr_type = schema[attribute].lower() + + result = OrderedDict() + + qry = Query(entity_table) + if attr_type in ("integer", "bigint", "numeric"): + qry.append( + Aggregation( + [ + ("COUNT", attribute, "count"), + ("AVG", attribute, "mean"), + ("MIN", attribute, "min"), + ("MAX", attribute, "max"), + ] + ) + ) + else: + qry.append( + Aggregation( + [("COUNT", attribute, "count"), ("NUNIQUE", attribute, "unique")] + ) + ) + cursor = session.store.run_query(qry) + content = cursor.fetchall()[0] + result.update(content) + + # Need second query for top and freq + qry = Query( + [ + Table(entity_table), + Group([Column(attribute, alias="top")]), + Aggregation([("COUNT", "*", "freq")]), + Order([("freq", Order.DESC)]), + Limit(1), + ] + ) + + cursor = session.store.run_query(qry) + content = cursor.fetchall()[0] + + result.update(content) + + return None, DisplayDict(result) + + @_debug_logger @_default_output @_skip_command_if_empty_input diff --git a/src/kestrel/syntax/kestrel.lark b/src/kestrel/syntax/kestrel.lark index 5742046d..eda6958c 100644 --- a/src/kestrel/syntax/kestrel.lark +++ b/src/kestrel/syntax/kestrel.lark @@ -31,6 +31,7 @@ assignment: VARIABLE "=" expression | disp | info | save + | describe // // All commands @@ -58,6 +59,8 @@ info: "INFO"i VARIABLE save: "SAVE"i VARIABLE "TO"i stdpath +describe: "DESCRIBE"i var_attr + // // Variable definition // @@ -255,6 +258,8 @@ literal_list: "(" literal ("," literal)* ")" reference_or_simple_string: ECNAME ("." ATTRIBUTE)? +var_attr: ECNAME "." ATTRIBUTE + ?string: advanced_string number: NUMBER diff --git a/src/kestrel/syntax/parser.py b/src/kestrel/syntax/parser.py index a6595e93..2d9ff341 100644 --- a/src/kestrel/syntax/parser.py +++ b/src/kestrel/syntax/parser.py @@ -100,6 +100,11 @@ def disp(self, args): packet["attrs"] = "*" return packet + def describe(self, args): + packet = {"command": "describe"} + packet.update(args[0]) + return packet + def get(self, args): packet = { "command": "get", @@ -272,6 +277,9 @@ def literal_list(self, args): def literal(self, args): return args[0] + def var_attr(self, args): + return {"input": _first(args), "attribute": _second(args)} + def reference_or_simple_string(self, args): if len(args) > 1: variable = _first(args) diff --git a/tests/test_command_describe.py b/tests/test_command_describe.py new file mode 100644 index 00000000..d98d8734 --- /dev/null +++ b/tests/test_command_describe.py @@ -0,0 +1,27 @@ +import pytest + +from kestrel.session import Session + + +def test_describe(): + with Session() as s: + stmt = """ +newvar = NEW [ {"type": "process", "name": "cmd.exe", "pid": 123} + , {"type": "process", "name": "explorer.exe", "pid": 99} + , {"type": "process", "name": "explorer.exe", "pid": 200} + ] +""" + s.execute(stmt) + out = s.execute("DESCRIBE newvar.name") + stats = out[0].to_dict()['data'] + assert stats['count'] == 3 + assert stats['unique'] == 2 + assert stats['top'] == "explorer.exe" + assert stats['freq'] == 2 + + out = s.execute("DESCRIBE newvar.pid") + stats = out[0].to_dict()['data'] + assert stats['count'] == 3 + assert stats['mean'] == (123 + 99 + 200)/3 + assert stats['min'] == 99 + assert stats['max'] == 200 diff --git a/tests/test_parser.py b/tests/test_parser.py index b9b91cee..cae28f5d 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -345,3 +345,12 @@ def test_grouping_3(): {"attr": "baz", "func": "count", "alias": "count_baz"}, {"attr": "blah", "func": "max", "alias": "whatever"}, ] + + +def test_describe_simple(): + results = parse_kestrel("describe foo.bar") + result = results[0] + print(result) + assert result["command"] == "describe" + assert result["input"] == "foo" + assert result["attribute"] == "bar" From 055d9819ee1906756b9fcead4d08718f683532a7 Mon Sep 17 00:00:00 2001 From: Xiaokui Shu Date: Wed, 20 Sep 2023 17:22:39 -0400 Subject: [PATCH 4/9] rename and refactory check_module_availability() --- bin/stix-shifter-diag | 4 +- .../connector.py | 58 ++++++++----------- src/kestrel_datasource_stixshifter/query.py | 4 +- tests/test_stixshifter.py | 6 +- tests/test_stixshifter_translator.py | 10 ++-- 5 files changed, 37 insertions(+), 45 deletions(-) diff --git a/bin/stix-shifter-diag b/bin/stix-shifter-diag index 244ae5f7..cc53ba55 100755 --- a/bin/stix-shifter-diag +++ b/bin/stix-shifter-diag @@ -3,7 +3,7 @@ import argparse import datetime from kestrel_datasource_stixshifter.diagnosis import Diagnosis -from kestrel_datasource_stixshifter.connector import check_module_availability +from kestrel_datasource_stixshifter.connector import setup_connector_module from firepit.timestamp import timefmt @@ -83,7 +83,7 @@ if __name__ == "__main__": diag.diagnose_config() # 2. setup connector and ping - check_module_availability(diag.connector_name, args.ignore_cert) + setup_connector_module(diag.connector_name, args.ignore_cert) # 3. query translation test diag.diagnose_translate_query(patterns[0]) diff --git a/src/kestrel_datasource_stixshifter/connector.py b/src/kestrel_datasource_stixshifter/connector.py index a41dffda..9645c273 100644 --- a/src/kestrel_datasource_stixshifter/connector.py +++ b/src/kestrel_datasource_stixshifter/connector.py @@ -89,40 +89,32 @@ def install_package(connector_name, requests_verify=True): ) -def ensure_version_consistency(connector_name, requests_verify=True): - """Check if the installed connector package has the same version as - stix-shifter If the version is different, uninstall connector - package and the install the same version as stix-shifter - - """ - stixshifter_version = version("stix_shifter") - package_name = get_package_name(connector_name) - package_version = version(package_name) - if package_version == stixshifter_version: - return - package_w_ver = package_name + "==" + package_version - _logger.info( - f"{package_name} version {package_version} is different " - f"from stix-shifter version {stixshifter_version}." - ) - _logger.info(f'uninstalling Python package "{package_w_ver}".') +def setup_connector_module(connector_name, requests_verify=True): try: - subprocess.check_call( - [sys.executable, "-m", "pip", "uninstall", "--yes", package_w_ver] - ) - except: - _logger.info(f"failed to uninstall package {package_w_ver}") - install_package(connector_name, requests_verify) - - -def check_module_availability(connector_name, requests_verify=True): - try: - importlib.import_module( - "stix_shifter_modules." + connector_name + ".entry_point" - ) - - ensure_version_consistency(connector_name, requests_verify) - + importlib.import_module("stix_shifter_modules." + connector_name + ".entry_point") except: + connector_available = False + else: + stixshifter_version = version("stix_shifter") + package_name = get_package_name(connector_name) + package_version = version(package_name) + if package_version == stixshifter_version: + connector_available = True + else: + connector_available = False + package_w_ver = package_name + "==" + package_version + _logger.info( + f"{package_name} version {package_version} is different " + f"from stix-shifter version {stixshifter_version}." + ) + _logger.info(f'uninstalling Python package "{package_w_ver}".') + try: + subprocess.check_call( + [sys.executable, "-m", "pip", "uninstall", "--yes", package_w_ver] + ) + except: + _logger.info(f"failed to uninstall package {package_w_ver}") + + if not connector_available: _logger.info(f'miss STIX-shifter connector "{connector_name}"') install_package(connector_name, requests_verify) diff --git a/src/kestrel_datasource_stixshifter/query.py b/src/kestrel_datasource_stixshifter/query.py index f3fa5ea6..1539b5d9 100644 --- a/src/kestrel_datasource_stixshifter/query.py +++ b/src/kestrel_datasource_stixshifter/query.py @@ -9,7 +9,7 @@ from kestrel.datasource import ReturnFromStore from kestrel.utils import mkdtemp from kestrel.exceptions import DataSourceError, DataSourceManagerInternalError -from kestrel_datasource_stixshifter.connector import check_module_availability +from kestrel_datasource_stixshifter.connector import setup_connector_module from kestrel_datasource_stixshifter import multiproc from kestrel_datasource_stixshifter.config import ( get_datasource_from_profiles, @@ -86,7 +86,7 @@ def query_datasource(uri, pattern, session_id, config, store, limit=None): copy.deepcopy, get_datasource_from_profiles(profile, config["profiles"]) ) - check_module_availability(connector_name) + setup_connector_module(connector_name) if _logger.isEnabledFor(logging.DEBUG): data_path_striped = "".join(filter(str.isalnum, profile)) diff --git a/tests/test_stixshifter.py b/tests/test_stixshifter.py index d8cb93a5..4e21589a 100644 --- a/tests/test_stixshifter.py +++ b/tests/test_stixshifter.py @@ -5,7 +5,7 @@ from kestrel_datasource_stixshifter.connector import ( verify_package_origin, - check_module_availability, + setup_connector_module, ) from kestrel_datasource_stixshifter.config import get_datasource_from_profiles @@ -17,10 +17,10 @@ def test_verify_package_origin(): verify_package_origin(connector_name, "test_version") -def test_check_module_availability(): +def test_setup_connector_module(): connectors = ["stix_bundle"] for connector_name in connectors: - check_module_availability(connector_name) + setup_connector_module(connector_name) def test_yaml_profiles_refresh(tmp_path): diff --git a/tests/test_stixshifter_translator.py b/tests/test_stixshifter_translator.py index 94c58d9d..3d36cff0 100644 --- a/tests/test_stixshifter_translator.py +++ b/tests/test_stixshifter_translator.py @@ -4,7 +4,7 @@ import pytest from multiprocessing import Queue -from kestrel_datasource_stixshifter.connector import check_module_availability +from kestrel_datasource_stixshifter.connector import setup_connector_module from kestrel_datasource_stixshifter import multiproc from kestrel_datasource_stixshifter.worker.utils import TransmissionResult from kestrel_datasource_stixshifter.worker import STOP_SIGN @@ -78,7 +78,7 @@ def test_stixshifter_translate(): query_id = "8df266aa-2901-4a94-ace9-a4403e310fa1" - check_module_availability(CONNECTOR_NAME) + setup_connector_module(CONNECTOR_NAME) input_queue = Queue() output_queue = Queue() @@ -108,7 +108,7 @@ def test_stixshifter_translate(): def test_stixshifter_translate_with_bundle_writing_to_disk(tmpdir): query_id = "8df266aa-2901-4a94-ace9-a4403e310fa1" - check_module_availability(CONNECTOR_NAME) + setup_connector_module(CONNECTOR_NAME) cache_bundle_path_prefix = str(tmpdir.join("test")) offset_str = str(SAMPLE_RESULT.offset).zfill(32) cache_bundle_path = cache_bundle_path_prefix + f"_{offset_str}.json" @@ -145,7 +145,7 @@ def test_stixshifter_translate_with_bundle_writing_to_disk(tmpdir): def test_fast_translate(): query_id = "8df266aa-2901-4a94-ace9-a4403e310fa1" - check_module_availability(CONNECTOR_NAME) + setup_connector_module(CONNECTOR_NAME) input_queue = Queue() output_queue = Queue() @@ -174,7 +174,7 @@ def test_fast_translate(): def test_stixshifter_fast_translate_with_parquet_writing_to_disk(tmpdir): query_id = "8df266aa-2901-4a94-ace9-a4403e310fa1" - check_module_availability(CONNECTOR_NAME) + setup_connector_module(CONNECTOR_NAME) cache_parquet_path_prefix = str(tmpdir.join("test")) offset_str = str(SAMPLE_RESULT.offset).zfill(32) cache_parquet_path = cache_parquet_path_prefix + f"_{offset_str}.parquet" From a008d25f52f910bdcc157177403d63af49b2f02f Mon Sep 17 00:00:00 2001 From: Xiaokui Shu Date: Wed, 20 Sep 2023 17:57:19 -0400 Subject: [PATCH 5/9] enable connector dev mode to resolve #402 --- bin/stix-shifter-diag | 2 +- src/kestrel_datasource_stixshifter/config.py | 25 +++++++++---- .../connector.py | 4 +-- .../diagnosis.py | 1 + .../interface.py | 1 + src/kestrel_datasource_stixshifter/query.py | 3 +- tests/test_stixshifter.py | 36 ++++++++++++++++--- 7 files changed, 58 insertions(+), 14 deletions(-) diff --git a/bin/stix-shifter-diag b/bin/stix-shifter-diag index cc53ba55..3f9e9554 100755 --- a/bin/stix-shifter-diag +++ b/bin/stix-shifter-diag @@ -83,7 +83,7 @@ if __name__ == "__main__": diag.diagnose_config() # 2. setup connector and ping - setup_connector_module(diag.connector_name, args.ignore_cert) + setup_connector_module(diag.connector_name, diag.allow_dev_connector, args.ignore_cert) # 3. query translation test diag.diagnose_translate_query(patterns[0]) diff --git a/src/kestrel_datasource_stixshifter/config.py b/src/kestrel_datasource_stixshifter/config.py index 7f5b967d..1b4a907f 100644 --- a/src/kestrel_datasource_stixshifter/config.py +++ b/src/kestrel_datasource_stixshifter/config.py @@ -17,6 +17,7 @@ RETRIEVAL_BATCH_SIZE = 2000 SINGLE_BATCH_TIMEOUT = 60 COOL_DOWN_AFTER_TRANSMISSION = 0 +ALLOW_DEV_CONNECTOR = False FAST_TRANSLATE_CONNECTORS = [] # Suggested: ["qradar", "elastic_ecs"] @@ -140,8 +141,9 @@ def get_datasource_from_profiles(profile_name, profiles): if "options" not in connection: connection["options"] = {} - retrieval_batch_size = _extract_integer_param_from_connection_config( + retrieval_batch_size = _extract_param_from_connection_config( "retrieval_batch_size", + int, RETRIEVAL_BATCH_SIZE, connection, profile_name, @@ -149,8 +151,9 @@ def get_datasource_from_profiles(profile_name, profiles): # rename this field for stix-shifter use; x2 the size to ensure retrieval connection["options"]["result_limit"] = retrieval_batch_size * 2 - single_batch_timeout = _extract_integer_param_from_connection_config( + single_batch_timeout = _extract_param_from_connection_config( "single_batch_timeout", + int, SINGLE_BATCH_TIMEOUT, connection, profile_name, @@ -158,19 +161,29 @@ def get_datasource_from_profiles(profile_name, profiles): # rename this field for stix-shifter use connection["options"]["timeout"] = single_batch_timeout - cool_down_after_transmission = _extract_integer_param_from_connection_config( + cool_down_after_transmission = _extract_param_from_connection_config( "cool_down_after_transmission", + int, COOL_DOWN_AFTER_TRANSMISSION, connection, profile_name, ) + allow_dev_connector = _extract_param_from_connection_config( + "allow_dev_connector", + bool, + ALLOW_DEV_CONNECTOR, + connection, + profile_name, + ) + return ( connector_name, connection, configuration, retrieval_batch_size, cool_down_after_transmission, + allow_dev_connector, ) @@ -208,14 +221,14 @@ def load_options(): return config["options"] -def _extract_integer_param_from_connection_config( - param_name, default, connection, profile_name +def _extract_param_from_connection_config( + param_name, processing_func, default, connection, profile_name ): value = default if param_name in connection["options"]: # remove the non-stix-shifter field {param_name} to avoid stix-shifter error try: - value = int(connection["options"].pop(param_name)) + value = processing_func(connection["options"].pop(param_name)) except: raise InvalidDataSource( profile_name, diff --git a/src/kestrel_datasource_stixshifter/connector.py b/src/kestrel_datasource_stixshifter/connector.py index 9645c273..84b50588 100644 --- a/src/kestrel_datasource_stixshifter/connector.py +++ b/src/kestrel_datasource_stixshifter/connector.py @@ -89,7 +89,7 @@ def install_package(connector_name, requests_verify=True): ) -def setup_connector_module(connector_name, requests_verify=True): +def setup_connector_module(connector_name, allow_dev_connector=False, requests_verify=True): try: importlib.import_module("stix_shifter_modules." + connector_name + ".entry_point") except: @@ -98,7 +98,7 @@ def setup_connector_module(connector_name, requests_verify=True): stixshifter_version = version("stix_shifter") package_name = get_package_name(connector_name) package_version = version(package_name) - if package_version == stixshifter_version: + if package_version == stixshifter_version or allow_dev_connector: connector_available = True else: connector_available = False diff --git a/src/kestrel_datasource_stixshifter/diagnosis.py b/src/kestrel_datasource_stixshifter/diagnosis.py index 7cb5f929..0e81cc00 100644 --- a/src/kestrel_datasource_stixshifter/diagnosis.py +++ b/src/kestrel_datasource_stixshifter/diagnosis.py @@ -25,6 +25,7 @@ def __init__(self, datasource_name): self.configuration_dict, self.retrieval_batch_size, self.cool_down_after_transmission, + self.allow_dev_connector, ) = get_datasource_from_profiles(datasource_name, self.profiles) self.if_fast_translation = ( self.connector_name in self.kestrel_options["fast_translate"] diff --git a/src/kestrel_datasource_stixshifter/interface.py b/src/kestrel_datasource_stixshifter/interface.py index 3ee7ef39..6bb24cfb 100644 --- a/src/kestrel_datasource_stixshifter/interface.py +++ b/src/kestrel_datasource_stixshifter/interface.py @@ -32,6 +32,7 @@ retrieval_batch_size: 10000 # set to 10000 to match default Elasticsearch page size; Kestrel default across connectors: 2000 single_batch_timeout: 120 # increase it if hit 60 seconds (Kestrel default) timeout error for each batch of retrieval cool_down_after_transmission: 2 # seconds to cool down between data source API calls, required by some API such as sentinelone; Kestrel default: 0 + allow_dev_connector: True # do not check version of a connector to allow custom/testing connector installed with any version; Kestrel default: False dialects: # more info: https://github.com/opencybersecurityalliance/stix-shifter/tree/develop/stix_shifter_modules/elastic_ecs#dialects - beats # need it if the index is created by Filebeat/Winlogbeat/*beat config: diff --git a/src/kestrel_datasource_stixshifter/query.py b/src/kestrel_datasource_stixshifter/query.py index 1539b5d9..fa0d61e5 100644 --- a/src/kestrel_datasource_stixshifter/query.py +++ b/src/kestrel_datasource_stixshifter/query.py @@ -82,11 +82,12 @@ def query_datasource(uri, pattern, session_id, config, store, limit=None): configuration_dict, retrieval_batch_size, cool_down_after_transmission, + allow_dev_connector, ) = map( copy.deepcopy, get_datasource_from_profiles(profile, config["profiles"]) ) - setup_connector_module(connector_name) + setup_connector_module(connector_name, allow_dev_connector) if _logger.isEnabledFor(logging.DEBUG): data_path_striped = "".join(filter(str.isalnum, profile)) diff --git a/tests/test_stixshifter.py b/tests/test_stixshifter.py index 4e21589a..89b62efa 100644 --- a/tests/test_stixshifter.py +++ b/tests/test_stixshifter.py @@ -1,11 +1,16 @@ import pytest import os +import sys +import subprocess +import importlib +from importlib.metadata import version from kestrel.session import Session from kestrel_datasource_stixshifter.connector import ( verify_package_origin, setup_connector_module, + get_package_name, ) from kestrel_datasource_stixshifter.config import get_datasource_from_profiles @@ -21,6 +26,27 @@ def test_setup_connector_module(): connectors = ["stix_bundle"] for connector_name in connectors: setup_connector_module(connector_name) + importlib.import_module("stix_shifter_modules." + connector_name + ".entry_point") + + +def test_setup_connector_module_w_wrong_version(): + subprocess.check_call([sys.executable, "-m", "pip", "install", "stix-shifter-modules-paloalto==5.0.0"]) + connector_name = "paloalto" + setup_connector_module(connector_name) + importlib.import_module("stix_shifter_modules." + connector_name + ".entry_point") + stixshifter_version = version("stix_shifter") + package_name = get_package_name(connector_name) + package_version = version(package_name) + assert stixshifter_version == package_version + + +def test_setup_connector_module_dev_connector(): + subprocess.check_call([sys.executable, "-m", "pip", "install", "stix-shifter-modules-datadog==5.0.0"]) + connector_name = "datadog" + setup_connector_module(connector_name, True) + importlib.import_module("stix_shifter_modules." + connector_name + ".entry_point") + package_version = version(get_package_name(connector_name)) + assert package_version == "5.0.0" def test_yaml_profiles_refresh(tmp_path): @@ -51,12 +77,13 @@ def test_yaml_profiles_refresh(tmp_path): retrieval_batch_size: 10000 single_batch_timeout: 120 cool_down_after_transmission: 5 + allow_dev_connector: True dialects: - beats config: auth: id: profileB - api_key: asdf + api_key: xxxxxx """ profile_file = tmp_path / "stixshifter.yaml" @@ -79,7 +106,7 @@ def test_yaml_profiles_refresh(tmp_path): ss_config = s.config["datasources"]["kestrel_datasource_stixshifter"] ss_profiles = ss_config["profiles"] - connector_name, connection, configuration, retrieval_batch_size, cool_down_after_transmission = get_datasource_from_profiles("host101", ss_profiles) + connector_name, connection, configuration, retrieval_batch_size, cool_down_after_transmission, allow_dev_connector = get_datasource_from_profiles("host101", ss_profiles) assert connector_name == "elastic_ecs" assert configuration["auth"]["id"] == "profileA" assert configuration["auth"]["api_key"] == "qwer" @@ -95,13 +122,14 @@ def test_yaml_profiles_refresh(tmp_path): # need to refresh the pointers since the dict is updated ss_profiles = ss_config["profiles"] - connector_name, connection, configuration, retrieval_batch_size, cool_down_after_transmission = get_datasource_from_profiles("host101", ss_profiles) + connector_name, connection, configuration, retrieval_batch_size, cool_down_after_transmission, allow_dev_connector = get_datasource_from_profiles("host101", ss_profiles) assert connector_name == "elastic_ecs" assert configuration["auth"]["id"] == "profileB" - assert configuration["auth"]["api_key"] == "asdf" + assert configuration["auth"]["api_key"] == "xxxxxx" assert connection["options"]["timeout"] == 120 assert connection["options"]["result_limit"] == 10000 * 2 assert retrieval_batch_size == 10000 assert cool_down_after_transmission == 5 + assert allow_dev_connector == True del os.environ["KESTREL_STIXSHIFTER_CONFIG"] From 43041e82fe7d4d5bc1f0b9a2d492e12a683e89c4 Mon Sep 17 00:00:00 2001 From: Xiaokui Shu Date: Wed, 20 Sep 2023 18:02:23 -0400 Subject: [PATCH 6/9] fix styling --- src/kestrel_datasource_stixshifter/connector.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/kestrel_datasource_stixshifter/connector.py b/src/kestrel_datasource_stixshifter/connector.py index 84b50588..370eecea 100644 --- a/src/kestrel_datasource_stixshifter/connector.py +++ b/src/kestrel_datasource_stixshifter/connector.py @@ -89,9 +89,13 @@ def install_package(connector_name, requests_verify=True): ) -def setup_connector_module(connector_name, allow_dev_connector=False, requests_verify=True): +def setup_connector_module( + connector_name, allow_dev_connector=False, requests_verify=True +): try: - importlib.import_module("stix_shifter_modules." + connector_name + ".entry_point") + importlib.import_module( + "stix_shifter_modules." + connector_name + ".entry_point" + ) except: connector_available = False else: From 86ad460828c94380e97bacba3ea99e698c1cb623 Mon Sep 17 00:00:00 2001 From: Xiaokui Shu Date: Mon, 25 Sep 2023 10:29:38 -0400 Subject: [PATCH 7/9] v1.7.6 --- CHANGELOG.rst | 10 ++++++++++ pyproject.toml | 2 +- setup.cfg | 10 +++++----- 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 7c1ed163..36be9023 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -9,6 +9,16 @@ The format is based on `Keep a Changelog`_. Unreleased ========== +1.7.6 (2023-09-25) +================== + +Added +----- + +- ``DESCRIBE`` command to get insight of attributes +- ``ikestrel`` interactive shell (command-line utility) +- Custom stix-shifter connector support #402 + 1.7.5 (2023-09-07) ================== diff --git a/pyproject.toml b/pyproject.toml index dc80c2fc..f59ddeff 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,3 +1,3 @@ [build-system] -requires = ["setuptools >= 56.0.0", "wheel"] +requires = ["setuptools >= 68.2.2", "wheel"] build-backend = "setuptools.build_meta" diff --git a/setup.cfg b/setup.cfg index 13965696..80e36982 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = kestrel-lang -version = 1.7.5 +version = 1.7.6 description = Kestrel Threat Hunting Language long_description = file:README.rst long_description_content_type = text/x-rst @@ -30,18 +30,18 @@ scripts = bin/stix-shifter-diag python_requires = >= 3.8 install_requires = - typeguard>=4.1.3 + typeguard>=4.1.5 pyyaml>=6.0.1 lxml>=4.9.3 lark>=1.1.7 - pandas>=2.0.0 + pandas>=2.1.1 pyarrow>=13.0.0 docker>=6.1.3 requests>=2.31.0 - nest-asyncio>=1.5.7 + nest-asyncio>=1.5.8 stix-shifter==6.2.1 stix-shifter-utils==6.2.1 - firepit>=2.3.27 + firepit>=2.3.29 tests_require = pytest From 8d9f6acb8f962638beba6d8d5828a6bba9168b3d Mon Sep 17 00:00:00 2001 From: Xiaokui Shu Date: Mon, 25 Sep 2023 10:34:15 -0400 Subject: [PATCH 8/9] downgrade dep:pandas for Python 3.8 support --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 80e36982..34c2470e 100644 --- a/setup.cfg +++ b/setup.cfg @@ -34,7 +34,7 @@ install_requires = pyyaml>=6.0.1 lxml>=4.9.3 lark>=1.1.7 - pandas>=2.1.1 + pandas>=2.0.3 pyarrow>=13.0.0 docker>=6.1.3 requests>=2.31.0 From bca61b76e123bca9a42a8b7aabf84bbd185491bd Mon Sep 17 00:00:00 2001 From: Xiaokui Shu Date: Mon, 25 Sep 2023 11:01:15 -0400 Subject: [PATCH 9/9] add missing item in CHANGELOG --- CHANGELOG.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 36be9023..273076af 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -19,6 +19,11 @@ Added - ``ikestrel`` interactive shell (command-line utility) - Custom stix-shifter connector support #402 +Fixed +----- + +- Command-line utility tests failed without install + 1.7.5 (2023-09-07) ==================