Skip to content

Commit

Permalink
Merge pull request #193 from splunk/improved_data_sources
Browse files Browse the repository at this point in the history
better data source handling
  • Loading branch information
pyth0n1c authored Jul 25, 2024
2 parents 7b10d64 + 85bd7c8 commit 5bd600c
Show file tree
Hide file tree
Showing 15 changed files with 400 additions and 121 deletions.
15 changes: 14 additions & 1 deletion contentctl/actions/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
from contentctl.output.conf_writer import ConfWriter
from contentctl.output.ba_yml_output import BAYmlOutput
from contentctl.output.api_json_output import ApiJsonOutput
from contentctl.output.data_source_writer import DataSourceWriter
from contentctl.objects.lookup import Lookup
import pathlib
import json
import datetime
Expand All @@ -28,9 +30,20 @@ class Build:


def execute(self, input_dto: BuildInputDto) -> DirectorOutputDto:
if input_dto.config.build_app:
if input_dto.config.build_app:

updated_conf_files:set[pathlib.Path] = set()
conf_output = ConfOutput(input_dto.config)

# Construct a special lookup whose CSV is created at runtime and
# written directly into the output folder. It is created with model_construct,
# not model_validate, because the CSV does not exist yet.
data_sources_lookup_csv_path = input_dto.config.getPackageDirectoryPath() / "lookups" / "data_sources.csv"
DataSourceWriter.writeDataSourceCsv(input_dto.director_output_dto.data_sources, data_sources_lookup_csv_path)
input_dto.director_output_dto.addContentToDictMappings(Lookup.model_construct(description= "A lookup file that will contain the data source objects for detections.",
filename=data_sources_lookup_csv_path,
name="data_sources"))

updated_conf_files.update(conf_output.writeHeaders())
updated_conf_files.update(conf_output.writeObjects(input_dto.director_output_dto.detections, SecurityContentType.detections))
updated_conf_files.update(conf_output.writeObjects(input_dto.director_output_dto.stories, SecurityContentType.stories))
Expand Down
1 change: 1 addition & 0 deletions contentctl/actions/initialize.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ def execute(self, config: test) -> None:
('../templates/app_template/', 'app_template'),
('../templates/deployments/', 'deployments'),
('../templates/detections/', 'detections'),
('../templates/data_sources/', 'data_sources'),
('../templates/macros/','macros'),
('../templates/stories/', 'stories'),
]:
Expand Down
1 change: 0 additions & 1 deletion contentctl/actions/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ def execute(self, input_dto: validate) -> DirectorOutputDto:
[],
[],
[],
[],
)

director = Director(director_output_dto)
Expand Down
95 changes: 39 additions & 56 deletions contentctl/input/director.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,6 @@ class DirectorOutputDto:
deployments: list[Deployment]
ssa_detections: list[SSADetection]
data_sources: list[DataSource]
event_sources: list[EventSource]
name_to_content_map: dict[str, SecurityContentObject] = field(default_factory=dict)
uuid_to_content_map: dict[UUID, SecurityContentObject] = field(default_factory=dict)

Expand All @@ -68,17 +67,19 @@ def addContentToDictMappings(self, content: SecurityContentObject):
# Since SSA detections may have the same name as ESCU detection,
# for this function we prepend 'SSA ' to the name.
content_name = f"SSA {content_name}"

if content_name in self.name_to_content_map:
raise ValueError(
f"Duplicate name '{content_name}' with paths:\n"
f" - {content.file_path}\n"
f" - {self.name_to_content_map[content_name].file_path}"
)
elif content.id in self.uuid_to_content_map:

if content.id in self.uuid_to_content_map:
raise ValueError(
f"Duplicate id '{content.id}' with paths:\n"
f" - {content.file_path}\n"
f" - {self.name_to_content_map[content_name].file_path}"
f" - {self.uuid_to_content_map[content.id].file_path}"
)

if isinstance(content, Lookup):
Expand All @@ -99,9 +100,10 @@ def addContentToDictMappings(self, content: SecurityContentObject):
self.detections.append(content)
elif isinstance(content, SSADetection):
self.ssa_detections.append(content)
elif isinstance(content, DataSource):
self.data_sources.append(content)
else:
raise Exception(f"Unknown security content type: {type(content)}")

raise Exception(f"Unknown security content type: {type(content)}")

self.name_to_content_map[content_name] = content
self.uuid_to_content_map[content.id] = content
Expand All @@ -124,41 +126,27 @@ def execute(self, input_dto: validate) -> None:
self.createSecurityContent(SecurityContentType.stories)
self.createSecurityContent(SecurityContentType.baselines)
self.createSecurityContent(SecurityContentType.investigations)
self.createSecurityContent(SecurityContentType.event_sources)
self.createSecurityContent(SecurityContentType.data_sources)
self.createSecurityContent(SecurityContentType.playbooks)
self.createSecurityContent(SecurityContentType.detections)
self.createSecurityContent(SecurityContentType.ssa_detections)


from contentctl.objects.abstract_security_content_objects.detection_abstract import MISSING_SOURCES
if len(MISSING_SOURCES) > 0:
missing_sources_string = "\n 🟡 ".join(sorted(list(MISSING_SOURCES)))
print("WARNING: The following data_sources have been used in detections, but are not yet defined.\n"
"This is not yet an error since not all data_sources have been defined, but will be convered to an error soon:\n 🟡 "
f"{missing_sources_string}")
else:
print("No missing data_sources!")

def createSecurityContent(self, contentType: SecurityContentType) -> None:
if contentType == SecurityContentType.ssa_detections:
files = Utils.get_all_yml_files_from_directory(
os.path.join(self.input_dto.path, "ssa_detections")
)
security_content_files = [f for f in files if f.name.startswith("ssa___")]

elif contentType == SecurityContentType.data_sources:
security_content_files = (
Utils.get_all_yml_files_from_directory_one_layer_deep(
os.path.join(self.input_dto.path, "data_sources")
)
)

elif contentType == SecurityContentType.event_sources:
security_content_files = Utils.get_all_yml_files_from_directory(
os.path.join(self.input_dto.path, "data_sources", "cloud", "event_sources")
)
security_content_files.extend(
Utils.get_all_yml_files_from_directory(
os.path.join(self.input_dto.path, "data_sources", "endpoint", "event_sources")
)
)
security_content_files.extend(
Utils.get_all_yml_files_from_directory(
os.path.join(self.input_dto.path, "data_sources", "network", "event_sources")
)
)

elif contentType in [
SecurityContentType.deployments,
SecurityContentType.lookups,
Expand All @@ -168,6 +156,7 @@ def createSecurityContent(self, contentType: SecurityContentType) -> None:
SecurityContentType.investigations,
SecurityContentType.playbooks,
SecurityContentType.detections,
SecurityContentType.data_sources,
]:
files = Utils.get_all_yml_files_from_directory(
os.path.join(self.input_dto.path, str(contentType.name))
Expand All @@ -190,54 +179,48 @@ def createSecurityContent(self, contentType: SecurityContentType) -> None:
modelDict = YmlReader.load_file(file)

if contentType == SecurityContentType.lookups:
lookup = Lookup.model_validate(modelDict,context={"output_dto":self.output_dto, "config":self.input_dto})
self.output_dto.addContentToDictMappings(lookup)
lookup = Lookup.model_validate(modelDict,context={"output_dto":self.output_dto, "config":self.input_dto})
self.output_dto.addContentToDictMappings(lookup)

elif contentType == SecurityContentType.macros:
macro = Macro.model_validate(modelDict,context={"output_dto":self.output_dto})
self.output_dto.addContentToDictMappings(macro)
macro = Macro.model_validate(modelDict,context={"output_dto":self.output_dto})
self.output_dto.addContentToDictMappings(macro)

elif contentType == SecurityContentType.deployments:
deployment = Deployment.model_validate(modelDict,context={"output_dto":self.output_dto})
self.output_dto.addContentToDictMappings(deployment)
deployment = Deployment.model_validate(modelDict,context={"output_dto":self.output_dto})
self.output_dto.addContentToDictMappings(deployment)

elif contentType == SecurityContentType.playbooks:
playbook = Playbook.model_validate(modelDict,context={"output_dto":self.output_dto})
self.output_dto.addContentToDictMappings(playbook)
playbook = Playbook.model_validate(modelDict,context={"output_dto":self.output_dto})
self.output_dto.addContentToDictMappings(playbook)

elif contentType == SecurityContentType.baselines:
baseline = Baseline.model_validate(modelDict,context={"output_dto":self.output_dto})
self.output_dto.addContentToDictMappings(baseline)
baseline = Baseline.model_validate(modelDict,context={"output_dto":self.output_dto})
self.output_dto.addContentToDictMappings(baseline)

elif contentType == SecurityContentType.investigations:
investigation = Investigation.model_validate(modelDict,context={"output_dto":self.output_dto})
self.output_dto.addContentToDictMappings(investigation)
investigation = Investigation.model_validate(modelDict,context={"output_dto":self.output_dto})
self.output_dto.addContentToDictMappings(investigation)

elif contentType == SecurityContentType.stories:
story = Story.model_validate(modelDict,context={"output_dto":self.output_dto})
self.output_dto.addContentToDictMappings(story)
story = Story.model_validate(modelDict,context={"output_dto":self.output_dto})
self.output_dto.addContentToDictMappings(story)

elif contentType == SecurityContentType.detections:
detection = Detection.model_validate(modelDict,context={"output_dto":self.output_dto, "app":self.input_dto.app})
self.output_dto.addContentToDictMappings(detection)
detection = Detection.model_validate(modelDict,context={"output_dto":self.output_dto, "app":self.input_dto.app})
self.output_dto.addContentToDictMappings(detection)

elif contentType == SecurityContentType.ssa_detections:
self.constructSSADetection(self.ssa_detection_builder, self.output_dto,str(file))
ssa_detection = self.ssa_detection_builder.getObject()
if ssa_detection.status in [DetectionStatus.production.value, DetectionStatus.validation.value]:
self.output_dto.addContentToDictMappings(ssa_detection)
self.constructSSADetection(self.ssa_detection_builder, self.output_dto,str(file))
ssa_detection = self.ssa_detection_builder.getObject()
if ssa_detection.status in [DetectionStatus.production.value, DetectionStatus.validation.value]:
self.output_dto.addContentToDictMappings(ssa_detection)

elif contentType == SecurityContentType.data_sources:
data_source = DataSource.model_validate(
modelDict, context={"output_dto": self.output_dto}
)
self.output_dto.data_sources.append(data_source)

elif contentType == SecurityContentType.event_sources:
event_source = EventSource.model_validate(
modelDict, context={"output_dto": self.output_dto}
)
self.output_dto.event_sources.append(event_source)
self.output_dto.addContentToDictMappings(data_source)

else:
raise Exception(f"Unsupported type: [{contentType}]")
Expand Down
2 changes: 2 additions & 0 deletions contentctl/input/yml_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ def load_file(file_path: pathlib.Path, add_fields=True, STRICT_YML_CHECKING=Fals
if add_fields == False:
return yml_obj


yml_obj['file_path'] = str(file_path)


return yml_obj
Original file line number Diff line number Diff line change
Expand Up @@ -22,25 +22,26 @@
from contentctl.objects.unit_test import UnitTest
from contentctl.objects.test_group import TestGroup
from contentctl.objects.integration_test import IntegrationTest

from contentctl.objects.event_source import EventSource
from contentctl.objects.data_source import DataSource

#from contentctl.objects.playbook import Playbook
from contentctl.objects.enums import DataSource,ProvidingTechnology
from contentctl.objects.enums import ProvidingTechnology
from contentctl.enrichments.cve_enrichment import CveEnrichmentObj

MISSING_SOURCES:set[str] = set()

class Detection_Abstract(SecurityContentObject):
model_config = ConfigDict(use_enum_values=True)

#contentType: SecurityContentType = SecurityContentType.detections
type: AnalyticsType = Field(...)
status: DetectionStatus = Field(...)
data_source: Optional[List[str]] = None
data_source: list[str] = []
tags: DetectionTags = Field(...)
search: Union[str, dict[str,Any]] = Field(...)
how_to_implement: str = Field(..., min_length=4)
known_false_positives: str = Field(..., min_length=4)
data_source_objects: Optional[List[DataSource]] = None

enabled_by_default: bool = False
file_path: FilePath = Field(...)
Expand All @@ -53,6 +54,8 @@ class Detection_Abstract(SecurityContentObject):
# A list of groups of tests, relying on the same data
test_groups: Union[list[TestGroup], None] = Field(None,validate_default=True)

data_source_objects: list[DataSource] = []


@field_validator("search", mode="before")
@classmethod
Expand Down Expand Up @@ -138,6 +141,7 @@ def datamodel(self)->List[DataModel]:
else:
return []


@computed_field
@property
def source(self)->str:
Expand All @@ -161,10 +165,12 @@ def annotations(self)->dict[str,Union[List[str],int,str]]:
annotations_dict["type"] = self.type
#annotations_dict["version"] = self.version

annotations_dict["data_source"] = self.data_source

#The annotations object is a superset of the mappings object.
# So start with the mapping object.
annotations_dict.update(self.mappings)

#Make sure that the results are sorted for readability/easier diffs
return dict(sorted(annotations_dict.items(), key=lambda item: item[0]))

Expand Down Expand Up @@ -384,23 +390,37 @@ def model_post_init(self, ctx:dict[str,Any]):
raise ValueError(f"Error, failed to replace detection reference in Baseline '{baseline.name}' to detection '{self.name}'")
baseline.tags.detections = new_detections

self.data_source_objects = []
for data_source_obj in director.data_sources:
for detection_data_source in self.data_source:
if data_source_obj.name in detection_data_source:
self.data_source_objects.append(data_source_obj)

# Remove duplicate data source objects based on their 'name' property
unique_data_sources = {}
for data_source_obj in self.data_source_objects:
if data_source_obj.name not in unique_data_sources:
unique_data_sources[data_source_obj.name] = data_source_obj
self.data_source_objects = list(unique_data_sources.values())
# Data source may be defined 1 on each line, OR they may be defined as
# SOUCE_1 AND ANOTHERSOURCE AND A_THIRD_SOURCE
# if more than 1 data source is required for a detection (for example, because it includes a join)
# Parse and update the list to resolve individual names and remove potential duplicates
updated_data_source_names:set[str] = set()

for ds in self.data_source:
split_data_sources = {d.strip() for d in ds.split('AND')}
updated_data_source_names.update(split_data_sources)

sources = sorted(list(updated_data_source_names))

matched_data_sources:list[DataSource] = []
missing_sources:list[str] = []
for source in sources:
try:
matched_data_sources += DataSource.mapNamesToSecurityContentObjects([source], director)
except Exception as data_source_mapping_exception:
# We gobble this up and add it to a global set so that we
# can print it ONCE at the end of the build of datasources.
# This will be removed later as per the note below
MISSING_SOURCES.add(source)

if len(missing_sources) > 0:
# This will be changed to ValueError when we have a complete list of data sources
print(f"WARNING: The following exception occurred when mapping the data_source field to DataSource objects:{missing_sources}")

self.data_source_objects = matched_data_sources

for story in self.tags.analytic_story:
story.detections.append(self)
story.data_sources.extend(self.data_source_objects)

story.detections.append(self)
return self


Expand All @@ -424,14 +444,16 @@ def mapDetectionNamesToBaselineObjects(cls, v:list[str], info:ValidationInfo)->L
raise ValueError("Error, baselines are constructed automatically at runtime. Please do not include this field.")


name:Union[str,dict] = info.data.get("name",None)
name:Union[str,None] = info.data.get("name",None)
if name is None:
raise ValueError("Error, cannot get Baselines because the Detection does not have a 'name' defined.")

director:DirectorOutputDto = info.context.get("output_dto",None)
baselines:List[Baseline] = []
for baseline in director.baselines:
if name in baseline.tags.detections:
# This matching is a bit strange, because baseline.tags.detections starts as a list of strings, but
# is eventually updated to a list of Detections as we construct all of the detection objects.
if name in [detection_name for detection_name in baseline.tags.detections if isinstance(detection_name,str)]:
baselines.append(baseline)

return baselines
Expand Down
Loading

0 comments on commit 5bd600c

Please sign in to comment.