Fix error where duplicate data_sources

were added to an analytic story if multiple detections referenced the same data_source. This was done by making data_sources a computed_field for Story rather than building at while deteciton objects are built. Additionally added eq, lt, and hash methods to SecurityContentObject_Abstract so that set operations and sorts can happen easily for all objects.
splunk · Jul 25, 2024 · b3e7d09 · b3e7d09
1 parent 0eebfd9
commit b3e7d09
Show file tree

Hide file tree

Showing 4 changed files with 72 additions and 11 deletions.
diff --git a/contentctl/objects/abstract_security_content_objects/detection_abstract.py b/contentctl/objects/abstract_security_content_objects/detection_abstract.py
@@ -37,7 +37,7 @@ class Detection_Abstract(SecurityContentObject):
     #contentType: SecurityContentType = SecurityContentType.detections
     type: AnalyticsType = Field(...)
     status: DetectionStatus = Field(...)
-    data_source: Optional[List[str]] = None
+    data_source: list[str] = []
     tags: DetectionTags = Field(...)
     search: Union[str, dict[str,Any]] = Field(...)
     how_to_implement: str = Field(..., min_length=4)
@@ -54,7 +54,7 @@ class Detection_Abstract(SecurityContentObject):
     # A list of groups of tests, relying on the same data
     test_groups: Union[list[TestGroup], None] = Field(None,validate_default=True)
 
-    data_source_objects: Optional[List[DataSource]] = None
+    data_source_objects: list[DataSource] = []
 
 
     @field_validator("search", mode="before")
@@ -420,9 +420,7 @@ def model_post_init(self, ctx:dict[str,Any]):
         self.data_source_objects = matched_data_sources
 
         for story in self.tags.analytic_story:
-            story.detections.append(self)
-            story.data_sources.extend(self.data_source_objects)
-
+            story.detections.append(self)            
         return self
 
 
@@ -446,14 +444,16 @@ def mapDetectionNamesToBaselineObjects(cls, v:list[str], info:ValidationInfo)->L
             raise ValueError("Error, baselines are constructed automatically at runtime.  Please do not include this field.")
 
 
-        name:Union[str,dict] = info.data.get("name",None)
+        name:Union[str,None] = info.data.get("name",None)
         if name is None:
             raise ValueError("Error, cannot get Baselines because the Detection does not have a 'name' defined.")
-
+         
         director:DirectorOutputDto = info.context.get("output_dto",None)
         baselines:List[Baseline] = []
         for baseline in director.baselines:
-            if name in baseline.tags.detections:
+            # This matching is a bit strange, because baseline.tags.detections starts as a list of strings, but 
+            # is eventually updated to a list of Detections as we construct all of the detection objects. 
+            if name in [detection_name for detection_name in baseline.tags.detections if isinstance(detection_name,str)]:
                 baselines.append(baseline)
 
         return baselines

diff --git a/contentctl/objects/abstract_security_content_objects/security_content_object_abstract.py b/contentctl/objects/abstract_security_content_objects/security_content_object_abstract.py
@@ -194,6 +194,33 @@ def __repr__(self)->str:
 
     def __str__(self)->str:
         return(self.__repr__())
+
+    def __lt__(self, other:object)->bool:
+        if not isinstance(other,SecurityContentObject_Abstract):
+            raise Exception(f"SecurityContentObject can only be compared to each other, not to {type(other)}")
+        return self.name < other.name 
+
+    def __eq__(self, other:object)->bool: 
+        if not isinstance(other,SecurityContentObject_Abstract):
+            raise Exception(f"SecurityContentObject can only be compared to each other, not to {type(other)}")
+
+        if id(self) == id(other) and self.name == other.name and self.id == other.id:
+            # Yes, this is the same object
+            return True
+
+        elif id(self) == id(other) or self.name == other.name or self.id == other.id:
+            raise Exception("Attempted to compare two SecurityContentObjects, but their fields indicate they were not globally unique:"
+                            f"\n\tid(obj1)  : {id(self)}"
+                            f"\n\tid(obj2)  : {id(other)}"
+                            f"\n\tobj1.name : {self.name}"
+                            f"\n\tobj2.name : {other.name}"
+                            f"\n\tobj1.id   : {self.id}"
+                            f"\n\tobj2.id   : {other.id}")
+        else:
+            return False
+
+    def __hash__(self) -> NonNegativeInt:
+        return id(self)
 
 
 

diff --git a/contentctl/objects/data_source.py b/contentctl/objects/data_source.py
@@ -1,6 +1,6 @@
 from __future__ import annotations
-from typing import Union, Optional, List
-from pydantic import model_validator, Field, FilePath
+from typing import Optional, Any
+from pydantic import Field, FilePath, model_serializer
 from contentctl.objects.security_content_object import SecurityContentObject
 from contentctl.objects.event_source import EventSource
 
@@ -16,3 +16,27 @@ class DataSource(SecurityContentObject):
     example_log: Optional[str] = None
 
 
+    @model_serializer
+    def serialize_model(self):
+        #Call serializer for parent
+        super_fields = super().serialize_model()
+
+        #All fields custom to this model
+        model:dict[str,Any] = {
+            "source": self.source,
+            "sourcetype": self.sourcetype,
+            "separator": self.separator,
+            "configuration": self.configuration,
+            "supported_TA": self.supported_TA,
+            "fields": self.fields,
+            "field_mappings": self.field_mappings,
+            "convert_to_log_source": self.convert_to_log_source,
+            "example_log":self.example_log
+        }
+
+
+        #Combine fields from this model with fields from parent
+        super_fields.update(model)
+
+        #return the model
+        return super_fields
diff --git a/contentctl/objects/story.py b/contentctl/objects/story.py
@@ -33,7 +33,17 @@ class Story(SecurityContentObject):
     detections:List[Detection] = []
     investigations: List[Investigation] = []
     baselines: List[Baseline] = []
-    data_sources: List[DataSource] = []
+
+
+    @computed_field
+    @property
+    def data_sources(self)-> list[DataSource]:
+        # Only add a data_source if it does not already exist in the story
+        data_source_objects:set[DataSource] = set()
+        for detection in self.detections:
+            data_source_objects.update(set(detection.data_source_objects))
+
+        return sorted(list(data_source_objects))
 
 
     def storyAndInvestigationNamesWithApp(self, app_name:str)->List[str]: