Merge branch 'main' into python313

splunk · Oct 16, 2024 · 9830993 · 9830993
2 parents 3ba8a09 + f7a939b
commit 9830993
Show file tree

Hide file tree

Showing 18 changed files with 329 additions and 191 deletions.
diff --git a/contentctl/actions/detection_testing/GitService.py b/contentctl/actions/detection_testing/GitService.py
@@ -67,9 +67,9 @@ def getChanges(self, target_branch:str)->List[Detection]:
 
         #Make a filename to content map
         filepath_to_content_map = { obj.file_path:obj for (_,obj) in self.director.name_to_content_map.items()} 
-        updated_detections:List[Detection] = []
-        updated_macros:List[Macro] = []
-        updated_lookups:List[Lookup] =[]
+        updated_detections:set[Detection] = set()
+        updated_macros:set[Macro] = set()
+        updated_lookups:set[Lookup] = set()
 
         for diff in all_diffs:
             if type(diff) == pygit2.Patch:
@@ -80,14 +80,14 @@ def getChanges(self, target_branch:str)->List[Detection]:
                     if decoded_path.is_relative_to(self.config.path/"detections") and decoded_path.suffix == ".yml":
                         detectionObject = filepath_to_content_map.get(decoded_path, None)
                         if isinstance(detectionObject, Detection):
-                            updated_detections.append(detectionObject)
+                            updated_detections.add(detectionObject)
                         else:
                             raise Exception(f"Error getting detection object for file {str(decoded_path)}")
 
                     elif decoded_path.is_relative_to(self.config.path/"macros") and decoded_path.suffix == ".yml":
                         macroObject = filepath_to_content_map.get(decoded_path, None)
                         if isinstance(macroObject, Macro):
-                            updated_macros.append(macroObject)
+                            updated_macros.add(macroObject)
                         else:
                             raise Exception(f"Error getting macro object for file {str(decoded_path)}")
 
@@ -98,7 +98,7 @@ def getChanges(self, target_branch:str)->List[Detection]:
                             updatedLookup = filepath_to_content_map.get(decoded_path, None)
                             if not isinstance(updatedLookup,Lookup):
                                 raise Exception(f"Expected {decoded_path} to be type {type(Lookup)}, but instead if was {(type(updatedLookup))}")
-                            updated_lookups.append(updatedLookup)
+                            updated_lookups.add(updatedLookup)
 
                         elif decoded_path.suffix == ".csv":
                             # If the CSV was updated, we want to make sure that we 
@@ -125,7 +125,7 @@ def getChanges(self, target_branch:str)->List[Detection]:
                         if updatedLookup is not None and updatedLookup not in updated_lookups:
                             # It is possible that both the CSV and YML have been modified for the same lookup,
                             # and we do not want to add it twice. 
-                            updated_lookups.append(updatedLookup)
+                            updated_lookups.add(updatedLookup)
 
                     else:
                         pass
@@ -136,7 +136,7 @@ def getChanges(self, target_branch:str)->List[Detection]:
 
         # If a detection has at least one dependency on changed content,
         # then we must test it again
-        changed_macros_and_lookups = updated_macros + updated_lookups
+        changed_macros_and_lookups:set[SecurityContentObject] = updated_macros.union(updated_lookups)
 
         for detection in self.director.detections:
             if detection in updated_detections:
@@ -146,14 +146,14 @@ def getChanges(self, target_branch:str)->List[Detection]:
 
             for obj in changed_macros_and_lookups:
                 if obj in detection.get_content_dependencies():
-                   updated_detections.append(detection)
+                   updated_detections.add(detection)
                    break
 
         #Print out the names of all modified/new content
         modifiedAndNewContentString = "\n - ".join(sorted([d.name for d in updated_detections]))
 
         print(f"[{len(updated_detections)}] Pieces of modifed and new content (this may include experimental/deprecated/manual_test content):\n - {modifiedAndNewContentString}")
-        return updated_detections
+        return sorted(list(updated_detections))
 
     def getSelected(self, detectionFilenames: List[FilePath]) -> List[Detection]:
         filepath_to_content_map: dict[FilePath, SecurityContentObject] = {

diff --git a/contentctl/actions/detection_testing/infrastructures/DetectionTestingInfrastructure.py b/contentctl/actions/detection_testing/infrastructures/DetectionTestingInfrastructure.py
@@ -13,7 +13,7 @@
 from shutil import copyfile
 from typing import Union, Optional
 
-from pydantic import BaseModel, PrivateAttr, Field, dataclasses
+from pydantic import ConfigDict, BaseModel, PrivateAttr, Field, dataclasses
 import requests                                                                                     # type: ignore
 import splunklib.client as client                                                                   # type: ignore
 from splunklib.binding import HTTPError                                                             # type: ignore
@@ -48,9 +48,9 @@ class SetupTestGroupResults(BaseModel):
     success: bool = True
     duration: float = 0
     start_time: float
-
-    class Config:
-        arbitrary_types_allowed = True
+    model_config = ConfigDict(
+        arbitrary_types_allowed=True
+    )
 
 
 class CleanupTestGroupResults(BaseModel):
@@ -91,9 +91,9 @@ class DetectionTestingInfrastructure(BaseModel, abc.ABC):
     _conn: client.Service = PrivateAttr()
     pbar: tqdm.tqdm = None
     start_time: Optional[float] = None
-
-    class Config:
-        arbitrary_types_allowed = True
+    model_config = ConfigDict(
+        arbitrary_types_allowed=True
+    )
 
     def __init__(self, **data):
         super().__init__(**data)

diff --git a/contentctl/actions/detection_testing/views/DetectionTestingViewWeb.py b/contentctl/actions/detection_testing/views/DetectionTestingViewWeb.py
@@ -1,12 +1,14 @@
-from bottle import template, Bottle, ServerAdapter
-from contentctl.actions.detection_testing.views.DetectionTestingView import (
-    DetectionTestingView,
-)
+from threading import Thread
 
+from bottle import template, Bottle, ServerAdapter
 from wsgiref.simple_server import make_server, WSGIRequestHandler
 import jinja2
 import webbrowser
-from threading import Thread
+from pydantic import ConfigDict
+
+from contentctl.actions.detection_testing.views.DetectionTestingView import (
+    DetectionTestingView,
+)
 
 DEFAULT_WEB_UI_PORT = 7999
 
@@ -100,9 +102,9 @@ def log_exception(*args, **kwargs):
 class DetectionTestingViewWeb(DetectionTestingView):
     bottleApp: Bottle = Bottle()
     server: SimpleWebServer = SimpleWebServer(host="0.0.0.0", port=DEFAULT_WEB_UI_PORT)
-
-    class Config:
-        arbitrary_types_allowed = True
+    model_config = ConfigDict(
+        arbitrary_types_allowed=True
+    )
 
     def setup(self):
         self.bottleApp.route("/", callback=self.showStatus)

diff --git a/contentctl/actions/inspect.py b/contentctl/actions/inspect.py
@@ -297,9 +297,11 @@ def check_detection_metadata(self, config: inspect) -> None:
             validation_errors[rule_name] = []
             # No detections should be removed from build to build
             if rule_name not in current_build_conf.detection_stanzas:
-                validation_errors[rule_name].append(DetectionMissingError(rule_name=rule_name))
+                if config.suppress_missing_content_exceptions:
+                    print(f"[SUPPRESSED] {DetectionMissingError(rule_name=rule_name).long_message}")
+                else:
+                    validation_errors[rule_name].append(DetectionMissingError(rule_name=rule_name))
                 continue
-
             # Pull out the individual stanza for readability
             previous_stanza = previous_build_conf.detection_stanzas[rule_name]
             current_stanza = current_build_conf.detection_stanzas[rule_name]
@@ -335,7 +337,7 @@ def check_detection_metadata(self, config: inspect) -> None:
                 )
 
         # Convert our dict mapping to a flat list of errors for use in reporting
-        validation_error_list = [x for inner_list in validation_errors.values() for x in inner_list]
+        validation_error_list = [x for inner_list in validation_errors.values() for x in inner_list]    
 
         # Report failure/success
         print("\nDetection Metadata Validation:")
@@ -355,4 +357,4 @@ def check_detection_metadata(self, config: inspect) -> None:
             raise ExceptionGroup(
                 "Validation errors when comparing detection stanzas in current and previous build:",
                 validation_error_list
-            )
+            )
diff --git a/contentctl/enrichments/cve_enrichment.py b/contentctl/enrichments/cve_enrichment.py
@@ -5,7 +5,7 @@
 import shelve
 import time
 from typing import Annotated, Any, Union, TYPE_CHECKING
-from pydantic import BaseModel,Field, computed_field
+from pydantic import ConfigDict, BaseModel,Field, computed_field
 from decimal import Decimal
 from requests.exceptions import ReadTimeout
 from contentctl.objects.annotated_types import CVE_TYPE
@@ -32,13 +32,12 @@ def url(self)->str:
 class CveEnrichment(BaseModel):
     use_enrichment: bool = True
     cve_api_obj: Union[CVESearch,None] = None
-
 
-    class Config:
-        # Arbitrary_types are allowed to let us use the CVESearch Object
-        arbitrary_types_allowed = True
-        frozen = True
-        
+    # Arbitrary_types are allowed to let us use the CVESearch Object
+    model_config = ConfigDict(
+        arbitrary_types_allowed=True,
+        frozen=True
+    )
 
     @staticmethod
     def getCveEnrichment(config:validate, timeout_seconds:int=10, force_disable_enrichment:bool=True)->CveEnrichment:

diff --git a/contentctl/objects/abstract_security_content_objects/detection_abstract.py b/contentctl/objects/abstract_security_content_objects/detection_abstract.py
@@ -36,7 +36,7 @@
 from contentctl.objects.integration_test import IntegrationTest
 from contentctl.objects.data_source import DataSource
 from contentctl.objects.base_test_result import TestResultStatus
-
+from contentctl.objects.drilldown import Drilldown, DRILLDOWN_SEARCH_PLACEHOLDER
 from contentctl.objects.enums import ProvidingTechnology
 from contentctl.enrichments.cve_enrichment import CveEnrichmentObj
 import datetime
@@ -90,6 +90,7 @@ class Detection_Abstract(SecurityContentObject):
     test_groups: list[TestGroup] = []
 
     data_source_objects: list[DataSource] = []
+    drilldown_searches: list[Drilldown] = Field(default=[], description="A list of Drilldowns that should be included with this search")
 
     def get_conf_stanza_name(self, app:CustomApp)->str:
         stanza_name = CONTENTCTL_DETECTION_STANZA_NAME_FORMAT_TEMPLATE.format(app_label=app.label, detection_name=self.name)
@@ -167,6 +168,7 @@ def adjust_tests_and_groups(self) -> None:
         the model from the list of unit tests. Also, preemptively skips all manual tests, as well as
         tests for experimental/deprecated detections and Correlation type detections.
         """
+
         # Since ManualTest and UnitTest are not differentiable without looking at the manual_test
         # tag, Pydantic builds all tests as UnitTest objects. If we see the manual_test flag, we
         # convert these to ManualTest
@@ -563,6 +565,46 @@ def model_post_init(self, __context: Any) -> None:
         # Derive TestGroups and IntegrationTests, adjust for ManualTests, skip as needed
         self.adjust_tests_and_groups()
 
+        # Ensure that if there is at least 1 drilldown, at least
+        # 1 of the drilldowns contains the string Drilldown.SEARCH_PLACEHOLDER.
+        # This is presently a requirement when 1 or more drilldowns are added to a detection.
+        # Note that this is only required for production searches that are not hunting
+
+        if self.type == AnalyticsType.Hunting.value or self.status != DetectionStatus.production.value:
+            #No additional check need to happen on the potential drilldowns.
+            pass
+        else:
+            found_placeholder = False
+            if len(self.drilldown_searches) < 2:
+                raise ValueError(f"This detection is required to have 2 drilldown_searches, but only has [{len(self.drilldown_searches)}]")
+            for drilldown in self.drilldown_searches:
+                if DRILLDOWN_SEARCH_PLACEHOLDER in drilldown.search:
+                    found_placeholder = True
+            if not found_placeholder:
+                raise ValueError("Detection has one or more drilldown_searches, but none of them "
+                                 f"contained '{DRILLDOWN_SEARCH_PLACEHOLDER}. This is a requirement "
+                                 "if drilldown_searches are defined.'")
+
+        # Update the search fields with the original search, if required
+        for drilldown in self.drilldown_searches:
+            drilldown.perform_search_substitutions(self)
+
+        #For experimental purposes, add the default drilldowns
+        #self.drilldown_searches.extend(Drilldown.constructDrilldownsFromDetection(self))
+
+    @property
+    def drilldowns_in_JSON(self) -> list[dict[str,str]]:
+        """This function is required for proper JSON 
+        serializiation of drilldowns to occur in savedsearches.conf.
+        It returns the list[Drilldown] as a list[dict].
+        Without this function, the jinja template is unable
+        to convert list[Drilldown] to JSON
+
+        Returns:
+            list[dict[str,str]]: List of Drilldowns dumped to dict format
+        """        
+        return [drilldown.model_dump() for drilldown in self.drilldown_searches]
+
     @field_validator('lookups', mode="before")
     @classmethod
     def getDetectionLookups(cls, v:list[str], info:ValidationInfo) -> list[Lookup]:
@@ -789,6 +831,45 @@ def search_observables_exist_validate(self):
         # Found everything
         return self
 
+    @field_validator("tests", mode="before")
+    def ensure_yml_test_is_unittest(cls, v:list[dict]):
+        """The typing for the tests field allows it to be one of
+        a number of different types of tests. However, ONLY
+        UnitTest should be allowed to be defined in the YML
+        file.  If part of the UnitTest defined in the YML
+        is incorrect, such as the attack_data file, then
+        it will FAIL to be instantiated as a UnitTest and
+        may instead be instantiated as a different type of
+        test, such as IntegrationTest (since that requires
+        less fields) which is incorrect. Ensure that any
+        raw data read from the YML can actually construct
+        a valid UnitTest and, if not, return errors right
+        away instead of letting Pydantic try to construct
+        it into a different type of test
+
+        Args:
+            v (list[dict]): list of dicts read from the yml. 
+            Each one SHOULD be a valid UnitTest. If we cannot
+            construct a valid unitTest from it, a ValueError should be raised
+
+        Returns:
+            _type_: The input of the function, assuming no 
+            ValueError is raised.
+        """        
+        valueErrors:list[ValueError] = []
+        for unitTest in v:
+            #This raises a ValueError on a failed UnitTest.
+            try:
+                UnitTest.model_validate(unitTest)
+            except ValueError as e:
+                valueErrors.append(e)
+        if len(valueErrors):
+            raise ValueError(valueErrors)
+        # All of these can be constructred as UnitTests with no
+        # Exceptions, so let the normal flow continue
+        return v
+
+
     @field_validator("tests")
     def tests_validate(
         cls,

diff --git a/contentctl/objects/base_test_result.py b/contentctl/objects/base_test_result.py
@@ -1,8 +1,8 @@
 from typing import Union, Any
 from enum import Enum
 
-from pydantic import BaseModel
-from splunklib.data import Record
+from pydantic import ConfigDict, BaseModel
+from splunklib.data import Record                                                                   # type: ignore
 
 from contentctl.helper.utils import Utils
 
@@ -53,11 +53,11 @@ class BaseTestResult(BaseModel):
     # The Splunk endpoint URL
     sid_link: Union[None, str] = None
 
-    class Config:
-        validate_assignment = True
-
-        # Needed to allow for embedding of Exceptions in the model
-        arbitrary_types_allowed = True
+    # Needed to allow for embedding of Exceptions in the model
+    model_config = ConfigDict(
+        validate_assignment=True,
+        arbitrary_types_allowed=True
+    )
 
     @property
     def passed(self) -> bool:

diff --git a/contentctl/objects/config.py b/contentctl/objects/config.py
@@ -159,8 +159,6 @@ def getApp(self, config:test, stage_file=True)->str:
                                 verbose_print=True)
         return str(destination)
 
-
-
 # TODO (#266): disable the use_enum_values configuration
 class Config_Base(BaseModel):
     model_config = ConfigDict(use_enum_values=True,validate_default=True, arbitrary_types_allowed=True)
@@ -288,7 +286,6 @@ def getAPIPath(self)->pathlib.Path:
 
     def getAppTemplatePath(self)->pathlib.Path:
         return self.path/"app_template"
-
 
 
 class StackType(StrEnum):
@@ -311,6 +308,16 @@ class inspect(build):
             "should be enabled."
         )
     )
+    suppress_missing_content_exceptions: bool = Field(
+        default=False,
+        description=(
+            "Suppress exceptions during metadata validation if a detection that existed in "
+            "the previous build does not exist in this build. This is to ensure that content "
+            "is not accidentally removed. In order to support testing both public and private "
+            "content, this warning can be suppressed. If it is suppressed, it will still be "
+            "printed out as a warning."
+        )
+    )
     enrichments: bool = Field(
         default=True,
         description=(
@@ -952,7 +959,6 @@ def check_environment_variable_for_config(cls, v:List[Infrastructure]):
                 index+=1
 
 
-
 class release_notes(Config_Base):
     old_tag:Optional[str] = Field(None, description="Name of the tag to diff against to find new content. "
                                           "If it is not supplied, then it will be inferred as the "
@@ -1034,6 +1040,4 @@ def ensureNewTagOrLatestBranch(self):
     #             raise ValueError("The latest_branch '{self.latest_branch}' was not found in the repository")
 
 
-    #     return self
-
-
+    #     return self