Merge pull request #257 from splunk/feature/coverage-report

Expanding coverage and other metrics in summary.yml
splunk · Aug 27, 2024 · 90f1b91 · 90f1b91
2 parents 01d3853 + 09170dd
commit 90f1b91
Show file tree

Hide file tree

Showing 24 changed files with 427 additions and 216 deletions.
diff --git a/contentctl/actions/detection_testing/infrastructures/DetectionTestingInfrastructure.py b/contentctl/actions/detection_testing/infrastructures/DetectionTestingInfrastructure.py
@@ -10,7 +10,6 @@
 from tempfile import TemporaryDirectory, mktemp
 from ssl import SSLEOFError, SSLZeroReturnError
 from sys import stdout
-#from dataclasses import dataclass
 from shutil import copyfile
 from typing import Union, Optional
 
@@ -29,7 +28,7 @@
 from contentctl.objects.base_test import BaseTest
 from contentctl.objects.unit_test import UnitTest
 from contentctl.objects.integration_test import IntegrationTest
-from contentctl.objects.unit_test_attack_data import UnitTestAttackData
+from contentctl.objects.test_attack_data import TestAttackData
 from contentctl.objects.unit_test_result import UnitTestResult
 from contentctl.objects.integration_test_result import IntegrationTestResult
 from contentctl.objects.test_group import TestGroup
@@ -61,13 +60,19 @@ class CleanupTestGroupResults(BaseModel):
 
 class ContainerStoppedException(Exception):
     pass
+class CannotRunBaselineException(Exception):
+    # Support for testing detections with baselines 
+    # does not currently exist in contentctl.
+    # As such, whenever we encounter a detection 
+    # with baselines we should generate a descriptive
+    # exception
+    pass
 
 
 @dataclasses.dataclass(frozen=False)
 class DetectionTestingManagerOutputDto():
     inputQueue: list[Detection] = Field(default_factory=list)
     outputQueue: list[Detection] = Field(default_factory=list)
-    skippedQueue: list[Detection] = Field(default_factory=list)
     currentTestingQueue: dict[str, Union[Detection, None]] = Field(default_factory=dict)
     start_time: Union[datetime.datetime, None] = None
     replay_index: str = "CONTENTCTL_TESTING_INDEX"
@@ -647,11 +652,7 @@ def execute_unit_test(
         # Set the mode and timeframe, if required
         kwargs = {"exec_mode": "blocking"}
 
-        # Iterate over baselines (if any)
-        for baseline in test.baselines:
-            # TODO: this is executing the test, not the baseline...
-            # TODO: should this be in a try/except if the later call is?
-            self.retry_search_until_timeout(detection, test, kwargs, test_start_time)
+
 
         # Set earliest_time and latest_time appropriately if FORCE_ALL_TIME is False
         if not FORCE_ALL_TIME:
@@ -662,7 +663,23 @@ def execute_unit_test(
 
         # Run the detection's search query
         try:
+            # Iterate over baselines (if any)
+            for baseline in detection.baselines:
+                raise CannotRunBaselineException("Detection requires Execution of a Baseline, "
+                                                 "however Baseline execution is not "
+                                                 "currently supported in contentctl. Mark "
+                                                 "this as manual_test.")
             self.retry_search_until_timeout(detection, test, kwargs, test_start_time)
+        except CannotRunBaselineException as e:
+            # Init the test result and record a failure if there was an issue during the search
+            test.result = UnitTestResult()
+            test.result.set_job_content(
+                None,
+                self.infrastructure,
+                TestResultStatus.ERROR,
+                exception=e,
+                duration=time.time() - test_start_time
+            )
         except ContainerStoppedException as e:
             raise e
         except Exception as e:
@@ -1015,18 +1032,15 @@ def retry_search_until_timeout(
         """
         # Get the start time and compute the timeout
         search_start_time = time.time()
-        search_stop_time = time.time() + self.sync_obj.timeout_seconds
-
-        # We will default to ensuring at least one result exists
-        if test.pass_condition is None:
-            search = detection.search
-        else:
-            # Else, use the explicit pass condition
-            search = f"{detection.search} {test.pass_condition}"
+        search_stop_time = time.time() + self.sync_obj.timeout_seconds        
+
+        # Make a copy of the search string since we may 
+        # need to make some small changes to it below
+        search = detection.search
 
         # Ensure searches that do not begin with '|' must begin with 'search '
-        if not search.strip().startswith("|"):                                                      # type: ignore
-            if not search.strip().startswith("search "):                                            # type: ignore
+        if not search.strip().startswith("|"):                                                      
+            if not search.strip().startswith("search "):                                            
                 search = f"search {search}"
 
         # exponential backoff for wait time
@@ -1179,7 +1193,7 @@ def retry_search_until_timeout(
 
         return
 
-    def delete_attack_data(self, attack_data_files: list[UnitTestAttackData]):
+    def delete_attack_data(self, attack_data_files: list[TestAttackData]):
         for attack_data_file in attack_data_files:
             index = attack_data_file.custom_index or self.sync_obj.replay_index
             host = attack_data_file.host or self.sync_obj.replay_host
@@ -1212,7 +1226,7 @@ def replay_attack_data_files(
 
     def replay_attack_data_file(
         self,
-        attack_data_file: UnitTestAttackData,
+        attack_data_file: TestAttackData,
         tmp_dir: str,
         test_group: TestGroup,
         test_group_start_time: float,
@@ -1280,7 +1294,7 @@ def replay_attack_data_file(
     def hec_raw_replay(
         self,
         tempfile: str,
-        attack_data_file: UnitTestAttackData,
+        attack_data_file: TestAttackData,
         verify_ssl: bool = False,
     ):
         if verify_ssl is False:

diff --git a/contentctl/actions/detection_testing/views/DetectionTestingView.py b/contentctl/actions/detection_testing/views/DetectionTestingView.py
@@ -1,5 +1,6 @@
 import abc
 import datetime
+from typing import Any
 
 from pydantic import BaseModel
 
@@ -10,6 +11,7 @@
 )
 from contentctl.helper.utils import Utils
 from contentctl.objects.enums import DetectionStatus
+from contentctl.objects.base_test_result import TestResultStatus
 
 
 class DetectionTestingView(BaseModel, abc.ABC):
@@ -74,18 +76,23 @@ def getSummaryObject(
         self,
         test_result_fields: list[str] = ["success", "message", "exception", "status", "duration", "wait_duration"],
         test_job_fields: list[str] = ["resultCount", "runDuration"],
-    ) -> dict:
+    ) -> dict[str, dict[str, Any] | list[dict[str, Any]] | str]:
         """
         Iterates over detections, consolidating results into a single dict and aggregating metrics
         :param test_result_fields: fields to pull from the test result
         :param test_job_fields: fields to pull from the job content of the test result
         :returns: summary dict
         """
-        # Init the list of tested detections, and some metrics aggregate counters
-        tested_detections = []
+        # Init the list of tested and skipped detections, and some metrics aggregate counters
+        tested_detections: list[dict[str, Any]] = []
+        skipped_detections: list[dict[str, Any]] = []
         total_pass = 0
         total_fail = 0
         total_skipped = 0
+        total_production = 0
+        total_experimental = 0
+        total_deprecated = 0
+        total_manual = 0
 
         # Iterate the detections tested (anything in the output queue was tested)
         for detection in self.sync_obj.outputQueue:
@@ -95,46 +102,59 @@ def getSummaryObject(
             )
 
             # Aggregate detection pass/fail metrics
-            if summary["success"] is False:
+            if detection.test_status == TestResultStatus.FAIL:
                 total_fail += 1
+            elif detection.test_status == TestResultStatus.PASS:
+                total_pass += 1
+            elif detection.test_status == TestResultStatus.SKIP:
+                total_skipped += 1
+
+            # Aggregate production status metrics
+            if detection.status == DetectionStatus.production.value:                                # type: ignore
+                total_production += 1
+            elif detection.status == DetectionStatus.experimental.value:                            # type: ignore
+                total_experimental += 1
+            elif detection.status == DetectionStatus.deprecated.value:                              # type: ignore
+                total_deprecated += 1
+
+            # Check if the detection is manual_test
+            if detection.tags.manual_test is not None:
+                total_manual += 1
+
+            # Append to our list (skipped or tested)
+            if detection.test_status == TestResultStatus.SKIP:
+                skipped_detections.append(summary)
             else:
-                #Test is marked as a success, but we need to determine if there were skipped unit tests
-                #SKIPPED tests still show a success in this field, but we want to count them differently
-                pass_increment = 1
-                for test in summary.get("tests"):
-                    if test.get("test_type") == "unit" and test.get("status") == "skip":
-                        total_skipped += 1
-                        #Test should not count as a pass, so do not increment the count
-                        pass_increment = 0
-                        break
-                total_pass += pass_increment
-
-
-            # Append to our list
-            tested_detections.append(summary)
-
-        # Sort s.t. all failures appear first (then by name)
-        #Second short condition is a hack to get detections with unit skipped tests to appear above pass tests
-        tested_detections.sort(key=lambda x: (x["success"], 0 if x.get("tests",[{}])[0].get("status","status_missing")=="skip" else 1, x["name"]))
+                tested_detections.append(summary)
 
+        # Sort tested detections s.t. all failures appear first, then by name
+        tested_detections.sort(
+            key=lambda x: (
+                x["success"],
+                x["name"]
+            )
+        )
+
+        # Sort skipped detections s.t. detections w/ tests appear before those w/o, then by name
+        skipped_detections.sort(
+            key=lambda x: (
+                0 if len(x["tests"]) > 0 else 1,
+                x["name"]
+            )
+        )
+
+        # TODO (#267): Align test reporting more closely w/ status enums (as it relates to
+        #   "untested")
         # Aggregate summaries for the untested detections (anything still in the input queue was untested)
         total_untested = len(self.sync_obj.inputQueue)
-        untested_detections = []
+        untested_detections: list[dict[str, Any]] = []
         for detection in self.sync_obj.inputQueue:
             untested_detections.append(detection.get_summary())
 
         # Sort by detection name
         untested_detections.sort(key=lambda x: x["name"])
 
-        # Get lists of detections (name only) that were skipped due to their status (experimental or deprecated)
-        experimental_detections = sorted([
-            detection.name for detection in self.sync_obj.skippedQueue if detection.status == DetectionStatus.experimental.value
-        ])
-        deprecated_detections = sorted([
-            detection.name for detection in self.sync_obj.skippedQueue if detection.status == DetectionStatus.deprecated.value
-        ])
-
-        # If any detection failed, the overall success is False
+        # If any detection failed, or if there are untested detections, the overall success is False
         if (total_fail + len(untested_detections)) == 0:
             overall_success = True
         else:
@@ -143,33 +163,39 @@ def getSummaryObject(
         # Compute total detections
         total_detections = total_fail + total_pass + total_untested + total_skipped
 
+        # Compute total detections actually tested (at least one test not skipped)
+        total_tested_detections = total_fail + total_pass
 
         # Compute the percentage of completion for testing, as well as the success rate
         percent_complete = Utils.getPercent(
             len(tested_detections), len(untested_detections), 1
         )
         success_rate = Utils.getPercent(
-            total_pass, total_detections-total_skipped, 1
+            total_pass, total_tested_detections, 1
         )
 
-        # TODO (#230): expand testing metrics reported
+        # TODO (#230): expand testing metrics reported (and make nested)
         # Construct and return the larger results dict
         result_dict = {
             "summary": {
+                "mode": self.config.getModeName(),
+                "enable_integration_testing": self.config.enable_integration_testing,
                 "success": overall_success,
                 "total_detections": total_detections,
+                "total_tested_detections": total_tested_detections,
                 "total_pass": total_pass,
                 "total_fail": total_fail,
                 "total_skipped": total_skipped,
                 "total_untested": total_untested,
-                "total_experimental_or_deprecated": len(deprecated_detections+experimental_detections),
+                "total_production": total_production,
+                "total_experimental": total_experimental,
+                "total_deprecated": total_deprecated,
+                "total_manual": total_manual,
                 "success_rate": success_rate,
             },
             "tested_detections": tested_detections,
+            "skipped_detections": skipped_detections,
             "untested_detections": untested_detections,
             "percent_complete": percent_complete,
-            "deprecated_detections": deprecated_detections,
-            "experimental_detections": experimental_detections
-
         }
         return result_dict
diff --git a/contentctl/actions/detection_testing/views/DetectionTestingViewCLI.py b/contentctl/actions/detection_testing/views/DetectionTestingViewCLI.py
@@ -45,6 +45,7 @@ def setup(self):
 
         self.showStatus()
 
+    # TODO (#267): Align test reporting more closely w/ status enums (as it relates to "untested")
     def showStatus(self, interval: int = 1):
 
         while True:

diff --git a/contentctl/actions/detection_testing/views/DetectionTestingViewFile.py b/contentctl/actions/detection_testing/views/DetectionTestingViewFile.py
@@ -13,7 +13,6 @@ class DetectionTestingViewFile(DetectionTestingView):
     output_filename: str = OUTPUT_FILENAME
 
     def getOutputFilePath(self) -> pathlib.Path:
-
         folder_path = pathlib.Path('.') / self.output_folder
         output_file = folder_path / self.output_filename
 
@@ -27,13 +26,12 @@ def stop(self):
         output_file = self.getOutputFilePath()
 
         folder_path.mkdir(parents=True, exist_ok=True)
-
-
+
         result_dict = self.getSummaryObject()
-        
+
         # use the yaml writer class
         with open(output_file, "w") as res:
-            res.write(yaml.safe_dump(result_dict,sort_keys=False))
+            res.write(yaml.safe_dump(result_dict, sort_keys=False))
 
     def showStatus(self, interval: int = 60):
         pass