From 0fc9ff4e17ca6a82afd4262358725f3085ff3f16 Mon Sep 17 00:00:00 2001 From: Patrick Date: Wed, 17 Jul 2024 14:58:10 +0200 Subject: [PATCH 01/11] better data source handling --- contentctl/actions/build.py | 7 ++- contentctl/actions/validate.py | 1 + contentctl/input/director.py | 23 +------- .../detection_abstract.py | 40 ++++++++++--- .../security_content_object_abstract.py | 16 +++--- contentctl/objects/data_source.py | 31 +++++----- contentctl/objects/event_source.py | 15 ++--- contentctl/objects/story.py | 2 + contentctl/output/data_source_writer.py | 56 +++++++++++++++++++ 9 files changed, 131 insertions(+), 60 deletions(-) create mode 100644 contentctl/output/data_source_writer.py diff --git a/contentctl/actions/build.py b/contentctl/actions/build.py index a0d46195..a1fe906d 100644 --- a/contentctl/actions/build.py +++ b/contentctl/actions/build.py @@ -10,6 +10,7 @@ from contentctl.output.conf_writer import ConfWriter from contentctl.output.ba_yml_output import BAYmlOutput from contentctl.output.api_json_output import ApiJsonOutput +from contentctl.output.data_source_writer import DataSourceWriter import pathlib import json import datetime @@ -28,7 +29,11 @@ class Build: def execute(self, input_dto: BuildInputDto) -> DirectorOutputDto: - if input_dto.config.build_app: + if input_dto.config.build_app: + + DataSourceWriter.writeDataSourceCsv(input_dto.director_output_dto.data_sources, str(input_dto.config.path) + "/lookups/data_sources.csv") + DataSourceWriter.writeEventSourceCsv(input_dto.director_output_dto.event_sources, str(input_dto.config.path) + "/lookups/event_sources.csv") + updated_conf_files:set[pathlib.Path] = set() conf_output = ConfOutput(input_dto.config) updated_conf_files.update(conf_output.writeHeaders()) diff --git a/contentctl/actions/validate.py b/contentctl/actions/validate.py index bd586850..962f1343 100644 --- a/contentctl/actions/validate.py +++ b/contentctl/actions/validate.py @@ -42,6 +42,7 @@ def execute(self, input_dto: validate) -> DirectorOutputDto: director = Director(director_output_dto) director.execute(input_dto) + return director_output_dto def validate_duplicate_uuids( diff --git a/contentctl/input/director.py b/contentctl/input/director.py index b53b4770..b3c2f5d6 100644 --- a/contentctl/input/director.py +++ b/contentctl/input/director.py @@ -137,27 +137,6 @@ def createSecurityContent(self, contentType: SecurityContentType) -> None: ) security_content_files = [f for f in files if f.name.startswith("ssa___")] - elif contentType == SecurityContentType.data_sources: - security_content_files = ( - Utils.get_all_yml_files_from_directory_one_layer_deep( - os.path.join(self.input_dto.path, "data_sources") - ) - ) - - elif contentType == SecurityContentType.event_sources: - security_content_files = Utils.get_all_yml_files_from_directory( - os.path.join(self.input_dto.path, "data_sources", "cloud", "event_sources") - ) - security_content_files.extend( - Utils.get_all_yml_files_from_directory( - os.path.join(self.input_dto.path, "data_sources", "endpoint", "event_sources") - ) - ) - security_content_files.extend( - Utils.get_all_yml_files_from_directory( - os.path.join(self.input_dto.path, "data_sources", "network", "event_sources") - ) - ) elif contentType in [ SecurityContentType.deployments, @@ -168,6 +147,8 @@ def createSecurityContent(self, contentType: SecurityContentType) -> None: SecurityContentType.investigations, SecurityContentType.playbooks, SecurityContentType.detections, + SecurityContentType.data_sources, + SecurityContentType.event_sources, ]: files = Utils.get_all_yml_files_from_directory( os.path.join(self.input_dto.path, str(contentType.name)) diff --git a/contentctl/objects/abstract_security_content_objects/detection_abstract.py b/contentctl/objects/abstract_security_content_objects/detection_abstract.py index 8389ad78..4b7c6b77 100644 --- a/contentctl/objects/abstract_security_content_objects/detection_abstract.py +++ b/contentctl/objects/abstract_security_content_objects/detection_abstract.py @@ -22,7 +22,7 @@ from contentctl.objects.unit_test import UnitTest from contentctl.objects.test_group import TestGroup from contentctl.objects.integration_test import IntegrationTest - +from contentctl.objects.event_source import EventSource #from contentctl.objects.playbook import Playbook from contentctl.objects.enums import DataSource,ProvidingTechnology @@ -41,6 +41,7 @@ class Detection_Abstract(SecurityContentObject): how_to_implement: str = Field(..., min_length=4) known_false_positives: str = Field(..., min_length=4) data_source_objects: Optional[List[DataSource]] = None + event_source_objects: Optional[List[EventSource]] = None enabled_by_default: bool = False file_path: FilePath = Field(...) @@ -161,10 +162,12 @@ def annotations(self)->dict[str,Union[List[str],int,str]]: annotations_dict["type"] = self.type #annotations_dict["version"] = self.version + annotations_dict["data_source"] = self.data_source + #The annotations object is a superset of the mappings object. # So start with the mapping object. annotations_dict.update(self.mappings) - + #Make sure that the results are sorted for readability/easier diffs return dict(sorted(annotations_dict.items(), key=lambda item: item[0])) @@ -385,21 +388,42 @@ def model_post_init(self, ctx:dict[str,Any]): baseline.tags.detections = new_detections self.data_source_objects = [] - for data_source_obj in director.data_sources: - for detection_data_source in self.data_source: - if data_source_obj.name in detection_data_source: - self.data_source_objects.append(data_source_obj) + self.event_source_objects = [] + for detection_data_source in self.data_source: + split_data_sources = [ds.strip() for ds in detection_data_source.split('AND')] + for split_data_source in split_data_sources: + data_source_found = False + for data_source_obj in director.data_sources: + if data_source_obj.name in split_data_source: + self.data_source_objects.append(data_source_obj) + data_source_found = True + break + for event_source_obj in director.event_sources: + if event_source_obj.name == split_data_source: + self.event_source_objects.append(event_source_obj) + data_source_found = True + break + + if not data_source_found: + raise ValueError(f"Error, data source object '{split_data_source}' not found.") - # Remove duplicate data source objects based on their 'name' property + unique_data_sources = {} for data_source_obj in self.data_source_objects: if data_source_obj.name not in unique_data_sources: unique_data_sources[data_source_obj.name] = data_source_obj - self.data_source_objects = list(unique_data_sources.values()) + self.data_source_objects = list(unique_data_sources.values()) + + unique_event_sources = {} + for event_source_obj in self.event_source_objects: + if event_source_obj.name not in unique_event_sources: + unique_event_sources[event_source_obj.name] = event_source_obj + self.event_source_objects = list(unique_event_sources.values()) for story in self.tags.analytic_story: story.detections.append(self) story.data_sources.extend(self.data_source_objects) + story.event_sources.extend(self.event_source_objects) return self diff --git a/contentctl/objects/abstract_security_content_objects/security_content_object_abstract.py b/contentctl/objects/abstract_security_content_objects/security_content_object_abstract.py index 90c5376d..3b081896 100644 --- a/contentctl/objects/abstract_security_content_objects/security_content_object_abstract.py +++ b/contentctl/objects/abstract_security_content_objects/security_content_object_abstract.py @@ -77,16 +77,16 @@ def contentNameToFileName(cls, content_name:str)->str: .lower() + ".yml" - @model_validator(mode="after") - def ensureFileNameMatchesSearchName(self): - file_name = self.contentNameToFileName(self.name) + # @model_validator(mode="after") + # def ensureFileNameMatchesSearchName(self): + # file_name = self.contentNameToFileName(self.name) - if (self.file_path is not None and file_name != self.file_path.name): - raise ValueError(f"The file name MUST be based off the content 'name' field:\n"\ - f"\t- Expected File Name: {file_name}\n"\ - f"\t- Actual File Name : {self.file_path.name}") + # if (self.file_path is not None and file_name != self.file_path.name): + # raise ValueError(f"The file name MUST be based off the content 'name' field:\n"\ + # f"\t- Expected File Name: {file_name}\n"\ + # f"\t- Actual File Name : {self.file_path.name}") - return self + # return self @field_validator('file_path') @classmethod diff --git a/contentctl/objects/data_source.py b/contentctl/objects/data_source.py index 2c87777e..7cd9922c 100644 --- a/contentctl/objects/data_source.py +++ b/contentctl/objects/data_source.py @@ -1,20 +1,21 @@ from __future__ import annotations -from pydantic import BaseModel +from typing import Union, Optional, List +from pydantic import model_validator, Field, FilePath +from contentctl.objects.security_content_object import SecurityContentObject +from contentctl.objects.event_source import EventSource +class DataSource(SecurityContentObject): + source: str = Field(...) + sourcetype: str = Field(...) + separator: Optional[str] = None + configuration: Optional[str] = None + supported_TA: Optional[list] = None + event_names: Optional[list] = None + fields: Optional[list] = None + example_log: Optional[str] = None + + event_sources: Optional[list[EventSource]] = None -class DataSource(BaseModel): - name: str - id: str - author: str - source: str - sourcetype: str - separator: str = None - configuration: str = None - supported_TA: dict - event_names: list = None - event_sources: list = None - fields: list = None - example_log: str = None def model_post_init(self, ctx:dict[str,Any]): context = ctx.get("output_dto") @@ -22,7 +23,7 @@ def model_post_init(self, ctx:dict[str,Any]): if self.event_names: self.event_sources = [] for event_source in context.event_sources: - if any(event['event_name'] == event_source.event_name for event in self.event_names): + if any(event['event_name'] == event_source.name for event in self.event_names): self.event_sources.append(event_source) return self \ No newline at end of file diff --git a/contentctl/objects/event_source.py b/contentctl/objects/event_source.py index c14dcf53..0ed61979 100644 --- a/contentctl/objects/event_source.py +++ b/contentctl/objects/event_source.py @@ -1,10 +1,11 @@ from __future__ import annotations -from pydantic import BaseModel +from typing import Union, Optional, List +from pydantic import BaseModel, Field +from contentctl.objects.security_content_object import SecurityContentObject -class EventSource(BaseModel): - event_name: str - fields: list[str] - field_mappings: list[dict] = None - convert_to_log_source: list[dict] = None - example_log: str = None +class EventSource(SecurityContentObject): + fields: Optional[list[str]] = None + field_mappings: Optional[list[dict]] = None + convert_to_log_source: Optional[list[dict]] = None + example_log: Optional[str] = None diff --git a/contentctl/objects/story.py b/contentctl/objects/story.py index 6a2eac1c..064bfc02 100644 --- a/contentctl/objects/story.py +++ b/contentctl/objects/story.py @@ -8,6 +8,7 @@ from contentctl.objects.investigation import Investigation from contentctl.objects.baseline import Baseline from contentctl.objects.data_source import DataSource + from contentctl.objects.event_source import EventSource from contentctl.objects.security_content_object import SecurityContentObject @@ -34,6 +35,7 @@ class Story(SecurityContentObject): investigations: List[Investigation] = [] baselines: List[Baseline] = [] data_sources: List[DataSource] = [] + event_sources: List[EventSource] = [] def storyAndInvestigationNamesWithApp(self, app_name:str)->List[str]: return [f"{app_name} - {name} - Rule" for name in self.detection_names] + \ diff --git a/contentctl/output/data_source_writer.py b/contentctl/output/data_source_writer.py new file mode 100644 index 00000000..02c90b46 --- /dev/null +++ b/contentctl/output/data_source_writer.py @@ -0,0 +1,56 @@ +import csv +from contentctl.objects.data_source import DataSource +from contentctl.objects.event_source import EventSource +from typing import List + +class DataSourceWriter: + + @staticmethod + def writeDataSourceCsv(data_source_objects: List[DataSource], file_path: str): + with open(file_path, mode='w', newline='') as file: + writer = csv.writer(file) + # Write the header + writer.writerow([ + "name", "id", "author", "source", "sourcetype", "separator", + "supported_TA_name", "supported_TA_version", "supported_TA_url", + "description" + ]) + # Write the data + for data_source in data_source_objects: + if data_source.supported_TA and isinstance(data_source.supported_TA, list) and len(data_source.supported_TA) > 0: + supported_TA_name = data_source.supported_TA[0].get('name', '') + supported_TA_version = data_source.supported_TA[0].get('version', '') + supported_TA_url = data_source.supported_TA[0].get('url', '') + else: + supported_TA_name = '' + supported_TA_version = '' + supported_TA_url = '' + writer.writerow([ + data_source.name, + data_source.id, + data_source.author, + data_source.source, + data_source.sourcetype, + data_source.separator, + supported_TA_name, + supported_TA_version, + supported_TA_url, + data_source.description, + ]) + @staticmethod + def writeEventSourceCsv(event_source_objects: List[EventSource], file_path: str): + with open(file_path, mode='w', newline='') as file: + writer = csv.writer(file) + # Write the header + writer.writerow([ + "name", "id", "author", "description", "fields" + ]) + # Write the data + for event_source in event_source_objects: + writer.writerow([ + event_source.name, + event_source.id, + event_source.author, + event_source.description, + "; ".join(event_source.fields) + ]) \ No newline at end of file From 5ca4ed5aa1b1a79cee9ff1bf49242c9d84aa2ba1 Mon Sep 17 00:00:00 2001 From: Patrick Date: Wed, 17 Jul 2024 15:03:32 +0200 Subject: [PATCH 02/11] better data source handling --- .../templates/detections/endpoint/anomalous_usage_of_7zip.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contentctl/templates/detections/endpoint/anomalous_usage_of_7zip.yml b/contentctl/templates/detections/endpoint/anomalous_usage_of_7zip.yml index 172248d4..77465781 100644 --- a/contentctl/templates/detections/endpoint/anomalous_usage_of_7zip.yml +++ b/contentctl/templates/detections/endpoint/anomalous_usage_of_7zip.yml @@ -13,7 +13,7 @@ description: The following detection identifies a 7z.exe spawned from `Rundll32. any files written to disk and analyze as needed. Review parallel processes for additional behaviors. Typically, archiving files will result in exfiltration. data_source: -- Sysmon Event ID 1 +- Sysmon EventID 1 search: '| tstats `security_content_summariesonly` count min(_time) as firstTime max(_time) as lastTime from datamodel=Endpoint.Processes where Processes.parent_process_name IN ("rundll32.exe", "dllhost.exe") Processes.process_name=*7z* by Processes.dest From 4bfac699cb5538cd39910d5749bd16c069c29c33 Mon Sep 17 00:00:00 2001 From: Patrick Date: Wed, 17 Jul 2024 15:07:41 +0200 Subject: [PATCH 03/11] better data source handling --- contentctl/actions/initialize.py | 2 + .../templates/data_sources/Sysmon_EventID.yml | 51 ++++++++ .../event_sources/Sysmon_EventID_1.yml | 122 ++++++++++++++++++ 3 files changed, 175 insertions(+) create mode 100644 contentctl/templates/data_sources/Sysmon_EventID.yml create mode 100644 contentctl/templates/event_sources/Sysmon_EventID_1.yml diff --git a/contentctl/actions/initialize.py b/contentctl/actions/initialize.py index 679574b8..9d3fb337 100644 --- a/contentctl/actions/initialize.py +++ b/contentctl/actions/initialize.py @@ -28,6 +28,8 @@ def execute(self, config: test) -> None: ('../templates/app_template/', 'app_template'), ('../templates/deployments/', 'deployments'), ('../templates/detections/', 'detections'), + ('../templates/data_sources/', 'data_sources'), + ('../templates/event_sources/', 'event_sources'), ('../templates/macros/','macros'), ('../templates/stories/', 'stories'), ]: diff --git a/contentctl/templates/data_sources/Sysmon_EventID.yml b/contentctl/templates/data_sources/Sysmon_EventID.yml new file mode 100644 index 00000000..3a04aac4 --- /dev/null +++ b/contentctl/templates/data_sources/Sysmon_EventID.yml @@ -0,0 +1,51 @@ +name: Sysmon EventID +id: 848aec1b-90aa-48a9-ae52-31d3a2e79697 +author: Patrick Bareiss, Splunk +source: XmlWinEventLog:Microsoft-Windows-Sysmon/Operational +sourcetype: xmlwineventlog +separator: EventID +configuration: https://github.com/SwiftOnSecurity/sysmon-config +supported_TA: +- name: Splunk Add-on for Sysmon + url: https://splunkbase.splunk.com/app/5709/ + version: 4.0.0 +description: Data source object for Sysmon EventID +event_names: +- data_source: event_sources/Sysmon_EventID_1.yml + event_name: Sysmon EventID 1 +- data_source: event_sources/Sysmon_EventID_10.yml + event_name: Sysmon EventID 10 +- data_source: event_sources/Sysmon_EventID_11.yml + event_name: Sysmon EventID 11 +- data_source: event_sources/Sysmon_EventID_12.yml + event_name: Sysmon EventID 12 +- data_source: event_sources/Sysmon_EventID_13.yml + event_name: Sysmon EventID 13 +- data_source: event_sources/Sysmon_EventID_14.yml + event_name: Sysmon EventID 14 +- data_source: event_sources/Sysmon_EventID_15.yml + event_name: Sysmon EventID 15 +- data_source: event_sources/Sysmon_EventID_17.yml + event_name: Sysmon EventID 17 +- data_source: event_sources/Sysmon_EventID_18.yml + event_name: Sysmon EventID 18 +- data_source: event_sources/Sysmon_EventID_20.yml + event_name: Sysmon EventID 20 +- data_source: event_sources/Sysmon_EventID_21.yml + event_name: Sysmon EventID 21 +- data_source: event_sources/Sysmon_EventID_22.yml + event_name: Sysmon EventID 22 +- data_source: event_sources/Sysmon_EventID_23.yml + event_name: Sysmon EventID 23 +- data_source: event_sources/Sysmon_EventID_3.yml + event_name: Sysmon EventID 3 +- data_source: event_sources/Sysmon_EventID_5.yml + event_name: Sysmon EventID 5 +- data_source: event_sources/Sysmon_EventID_6.yml + event_name: Sysmon EventID 6 +- data_source: event_sources/Sysmon_EventID_7.yml + event_name: Sysmon EventID 7 +- data_source: event_sources/Sysmon_EventID_8.yml + event_name: Sysmon EventID 8 +- data_source: event_sources/Sysmon_EventID_9.yml + event_name: Sysmon EventID 9 diff --git a/contentctl/templates/event_sources/Sysmon_EventID_1.yml b/contentctl/templates/event_sources/Sysmon_EventID_1.yml new file mode 100644 index 00000000..5a19cd0a --- /dev/null +++ b/contentctl/templates/event_sources/Sysmon_EventID_1.yml @@ -0,0 +1,122 @@ +name: Sysmon EventID 1 +id: b375f4d1-d7ca-4bc0-9103-294825c0af17 +author: Patrick Bareiss, Splunk +description: Event source object for Sysmon EventID 1 +fields: +- _time +- Channel +- CommandLine +- Company +- Computer +- CurrentDirectory +- Description +- EventChannel +- EventCode +- EventData_Xml +- EventDescription +- EventID +- EventRecordID +- FileVersion +- Guid +- Hashes +- IMPHASH +- Image +- IntegrityLevel +- Keywords +- Level +- LogonGuid +- LogonId +- MD5 +- Name +- Opcode +- OriginalFileName +- ParentCommandLine +- ParentImage +- ParentProcessGuid +- ParentProcessId +- ProcessGuid +- ProcessID +- ProcessId +- Product +- RecordID +- RecordNumber +- RuleName +- SHA256 +- SecurityID +- SystemTime +- System_Props_Xml +- Task +- TerminalSessionId +- ThreadID +- TimeCreated +- User +- UserID +- UtcTime +- Version +- action +- date_hour +- date_mday +- date_minute +- date_month +- date_second +- date_wday +- date_year +- date_zone +- dest +- dvc_nt_host +- event_id +- eventtype +- host +- id +- index +- linecount +- original_file_name +- os +- parent_process +- parent_process_exec +- parent_process_guid +- parent_process_id +- parent_process_name +- parent_process_path +- process +- process_current_directory +- process_exec +- process_guid +- process_hash +- process_id +- process_integrity_level +- process_name +- process_path +- punct +- signature +- signature_id +- source +- sourcetype +- splunk_server +- tag +- tag::eventtype +- timeendpos +- timestartpos +- user +- user_id +- vendor_product +example_log: "154100x80000000000000004522Microsoft-Windows-Sysmon/Operationalwin-dc-6764986.attackrange.local-2020-10-08\ + \ 11:03:46.615{96128EA2-F212-5F7E-E400-000000007F01}2296C:\\Windows\\System32\\cmd.exe10.0.14393.0 (rs1_release.160715-1616)Windows\ + \ Command ProcessorMicrosoft\xAE Windows\xAE Operating\ + \ SystemMicrosoft CorporationCmd.Exe\"C:\\Windows\\system32\\cmd.exe\" /c \"reg save HKLM\\sam\ + \ %%temp%%\\sam & reg save HKLM\\system %%temp%%\\system & reg save HKLM\\\ + security %%temp%%\\security\" C:\\Users\\ADMINI~1\\\ + AppData\\Local\\Temp\\ATTACKRANGE\\Administrator{96128EA2-F210-5F7E-ACD4-080000000000}0x8d4ac0HighMD5=F4F684066175B77E0C3A000549D2922C,SHA256=935C1861DF1F4018D698E8B65ABFA02D7E9037D8F68CA3C2065B6CA165D44AD2,IMPHASH=3062ED732D4B25D1C64F084DAC97D37A{96128EA2-F211-5F7E-DF00-000000007F01}4624C:\\Windows\\System32\\WindowsPowerShell\\v1.0\\powershell.exe\"powershell.exe\" -noninteractive -encodedcommand WwBDAG8AbgBzAG8AbABlAF0AOgA6AEkAbgBwAHUAdABFAG4AYwBvAGQAaQBuAGcAIAA9ACAATgBlAHcALQBPAGIAagBlAGMAdAAgAFQAZQB4AHQALgBVAFQARgA4AEUAbgBjAG8AZABpAG4AZwAgACQAZgBhAGwAcwBlADsAIABJAG0AcABvAHIAdAAtAE0AbwBkAHUAbABlACAAIgBDADoAXABBAHQAbwBtAGkAYwBSAGUAZABUAGUAYQBtAFwAaQBuAHYAbwBrAGUALQBhAHQAbwBtAGkAYwByAGUAZAB0AGUAYQBtAFwASQBuAHYAbwBrAGUALQBBAHQAbwBtAGkAYwBSAGUAZABUAGUAYQBtAC4AcABzAGQAMQAiACAALQBGAG8AcgBjAGUACgBJAG4AdgBvAGsAZQAtAEEAdABvAG0AaQBjAFQAZQBzAHQAIAAiAFQAMQAwADAAMwAuADAAMAAyACIAIAAtAEMAbwBuAGYAaQByAG0AOgAkAGYAYQBsAHMAZQAgAC0AVABpAG0AZQBvAHUAdABTAGUAYwBvAG4AZABzACAAMwAwADAAIAAtAEUAeABlAGMAdQB0AGkAbwBuAEwAbwBnAFAAYQB0AGgAIABDADoAXABBAHQAbwBtAGkAYwBSAGUAZABUAGUAYQBtAFwAYQB0AGMAXwBlAHgAZQBjAHUAdABpAG8AbgAuAGMAcwB2AA==" From cfe1b0c658789acefd93a0389f47e335c4f83469 Mon Sep 17 00:00:00 2001 From: Patrick Date: Thu, 18 Jul 2024 13:05:38 +0200 Subject: [PATCH 04/11] Enabled file_name match name validator to SecurityContentObjectAbstract --- .../security_content_object_abstract.py | 16 ++++++++-------- contentctl/objects/data_source.py | 14 +------------- 2 files changed, 9 insertions(+), 21 deletions(-) diff --git a/contentctl/objects/abstract_security_content_objects/security_content_object_abstract.py b/contentctl/objects/abstract_security_content_objects/security_content_object_abstract.py index 3b081896..90c5376d 100644 --- a/contentctl/objects/abstract_security_content_objects/security_content_object_abstract.py +++ b/contentctl/objects/abstract_security_content_objects/security_content_object_abstract.py @@ -77,16 +77,16 @@ def contentNameToFileName(cls, content_name:str)->str: .lower() + ".yml" - # @model_validator(mode="after") - # def ensureFileNameMatchesSearchName(self): - # file_name = self.contentNameToFileName(self.name) + @model_validator(mode="after") + def ensureFileNameMatchesSearchName(self): + file_name = self.contentNameToFileName(self.name) - # if (self.file_path is not None and file_name != self.file_path.name): - # raise ValueError(f"The file name MUST be based off the content 'name' field:\n"\ - # f"\t- Expected File Name: {file_name}\n"\ - # f"\t- Actual File Name : {self.file_path.name}") + if (self.file_path is not None and file_name != self.file_path.name): + raise ValueError(f"The file name MUST be based off the content 'name' field:\n"\ + f"\t- Expected File Name: {file_name}\n"\ + f"\t- Actual File Name : {self.file_path.name}") - # return self + return self @field_validator('file_path') @classmethod diff --git a/contentctl/objects/data_source.py b/contentctl/objects/data_source.py index 7cd9922c..08aeeba1 100644 --- a/contentctl/objects/data_source.py +++ b/contentctl/objects/data_source.py @@ -10,20 +10,8 @@ class DataSource(SecurityContentObject): separator: Optional[str] = None configuration: Optional[str] = None supported_TA: Optional[list] = None - event_names: Optional[list] = None fields: Optional[list] = None example_log: Optional[str] = None - event_sources: Optional[list[EventSource]] = None + event_sources: Optional[list] = None - - def model_post_init(self, ctx:dict[str,Any]): - context = ctx.get("output_dto") - - if self.event_names: - self.event_sources = [] - for event_source in context.event_sources: - if any(event['event_name'] == event_source.name for event in self.event_names): - self.event_sources.append(event_source) - - return self \ No newline at end of file From 3720949935877f30c92ebbf5fef349cf28af5ec4 Mon Sep 17 00:00:00 2001 From: Patrick Date: Wed, 24 Jul 2024 13:23:54 +0200 Subject: [PATCH 05/11] Improved handling of data sources --- contentctl/actions/build.py | 6 +- contentctl/actions/validate.py | 1 - contentctl/input/director.py | 70 +++++++++---------- contentctl/input/yml_reader.py | 6 +- .../detection_abstract.py | 65 ++++++++--------- contentctl/objects/data_source.py | 3 +- contentctl/objects/enums.py | 1 - contentctl/objects/story.py | 7 +- contentctl/output/data_source_writer.py | 20 +----- 9 files changed, 75 insertions(+), 104 deletions(-) diff --git a/contentctl/actions/build.py b/contentctl/actions/build.py index a1fe906d..80c93d47 100644 --- a/contentctl/actions/build.py +++ b/contentctl/actions/build.py @@ -31,11 +31,11 @@ class Build: def execute(self, input_dto: BuildInputDto) -> DirectorOutputDto: if input_dto.config.build_app: - DataSourceWriter.writeDataSourceCsv(input_dto.director_output_dto.data_sources, str(input_dto.config.path) + "/lookups/data_sources.csv") - DataSourceWriter.writeEventSourceCsv(input_dto.director_output_dto.event_sources, str(input_dto.config.path) + "/lookups/event_sources.csv") - updated_conf_files:set[pathlib.Path] = set() conf_output = ConfOutput(input_dto.config) + + DataSourceWriter.writeDataSourceCsv(input_dto.director_output_dto.data_sources, str(input_dto.config.path) + "/lookups/data_sources.csv") + updated_conf_files.update(conf_output.writeHeaders()) updated_conf_files.update(conf_output.writeObjects(input_dto.director_output_dto.detections, SecurityContentType.detections)) updated_conf_files.update(conf_output.writeObjects(input_dto.director_output_dto.stories, SecurityContentType.stories)) diff --git a/contentctl/actions/validate.py b/contentctl/actions/validate.py index 962f1343..be65f52b 100644 --- a/contentctl/actions/validate.py +++ b/contentctl/actions/validate.py @@ -37,7 +37,6 @@ def execute(self, input_dto: validate) -> DirectorOutputDto: [], [], [], - [], ) director = Director(director_output_dto) diff --git a/contentctl/input/director.py b/contentctl/input/director.py index b3c2f5d6..4b3388c7 100644 --- a/contentctl/input/director.py +++ b/contentctl/input/director.py @@ -58,7 +58,6 @@ class DirectorOutputDto: deployments: list[Deployment] ssa_detections: list[SSADetection] data_sources: list[DataSource] - event_sources: list[EventSource] name_to_content_map: dict[str, SecurityContentObject] = field(default_factory=dict) uuid_to_content_map: dict[UUID, SecurityContentObject] = field(default_factory=dict) @@ -68,17 +67,19 @@ def addContentToDictMappings(self, content: SecurityContentObject): # Since SSA detections may have the same name as ESCU detection, # for this function we prepend 'SSA ' to the name. content_name = f"SSA {content_name}" - if content_name in self.name_to_content_map: + + if content_name in self.name_to_content_map and isinstance(self.name_to_content_map[content_name], type(content)): raise ValueError( f"Duplicate name '{content_name}' with paths:\n" f" - {content.file_path}\n" f" - {self.name_to_content_map[content_name].file_path}" ) - elif content.id in self.uuid_to_content_map: + + if content.id in self.uuid_to_content_map: raise ValueError( f"Duplicate id '{content.id}' with paths:\n" f" - {content.file_path}\n" - f" - {self.name_to_content_map[content_name].file_path}" + f" - {self.uuid_to_content_map[content.id].file_path}" ) if isinstance(content, Lookup): @@ -99,9 +100,10 @@ def addContentToDictMappings(self, content: SecurityContentObject): self.detections.append(content) elif isinstance(content, SSADetection): self.ssa_detections.append(content) + elif isinstance(content, DataSource): + self.data_sources.append(content) else: - raise Exception(f"Unknown security content type: {type(content)}") - + raise Exception(f"Unknown security content type: {type(content)}") self.name_to_content_map[content_name] = content self.uuid_to_content_map[content.id] = content @@ -116,6 +118,10 @@ def __init__(self, output_dto: DirectorOutputDto) -> None: self.output_dto = output_dto self.ssa_detection_builder = SSADetectionBuilder() + self.output_dto.addContentToDictMappings(Lookup.model_construct(description= "A lookup file that will contain the data source objects for detections.", + filename=pathlib.Path("data_sources.csv"), + name="data_sources")) + def execute(self, input_dto: validate) -> None: self.input_dto = input_dto self.createSecurityContent(SecurityContentType.deployments) @@ -124,7 +130,6 @@ def execute(self, input_dto: validate) -> None: self.createSecurityContent(SecurityContentType.stories) self.createSecurityContent(SecurityContentType.baselines) self.createSecurityContent(SecurityContentType.investigations) - self.createSecurityContent(SecurityContentType.event_sources) self.createSecurityContent(SecurityContentType.data_sources) self.createSecurityContent(SecurityContentType.playbooks) self.createSecurityContent(SecurityContentType.detections) @@ -136,8 +141,6 @@ def createSecurityContent(self, contentType: SecurityContentType) -> None: os.path.join(self.input_dto.path, "ssa_detections") ) security_content_files = [f for f in files if f.name.startswith("ssa___")] - - elif contentType in [ SecurityContentType.deployments, SecurityContentType.lookups, @@ -148,7 +151,6 @@ def createSecurityContent(self, contentType: SecurityContentType) -> None: SecurityContentType.playbooks, SecurityContentType.detections, SecurityContentType.data_sources, - SecurityContentType.event_sources, ]: files = Utils.get_all_yml_files_from_directory( os.path.join(self.input_dto.path, str(contentType.name)) @@ -171,54 +173,48 @@ def createSecurityContent(self, contentType: SecurityContentType) -> None: modelDict = YmlReader.load_file(file) if contentType == SecurityContentType.lookups: - lookup = Lookup.model_validate(modelDict,context={"output_dto":self.output_dto, "config":self.input_dto}) - self.output_dto.addContentToDictMappings(lookup) + lookup = Lookup.model_validate(modelDict,context={"output_dto":self.output_dto, "config":self.input_dto}) + self.output_dto.addContentToDictMappings(lookup) elif contentType == SecurityContentType.macros: - macro = Macro.model_validate(modelDict,context={"output_dto":self.output_dto}) - self.output_dto.addContentToDictMappings(macro) + macro = Macro.model_validate(modelDict,context={"output_dto":self.output_dto}) + self.output_dto.addContentToDictMappings(macro) elif contentType == SecurityContentType.deployments: - deployment = Deployment.model_validate(modelDict,context={"output_dto":self.output_dto}) - self.output_dto.addContentToDictMappings(deployment) + deployment = Deployment.model_validate(modelDict,context={"output_dto":self.output_dto}) + self.output_dto.addContentToDictMappings(deployment) elif contentType == SecurityContentType.playbooks: - playbook = Playbook.model_validate(modelDict,context={"output_dto":self.output_dto}) - self.output_dto.addContentToDictMappings(playbook) + playbook = Playbook.model_validate(modelDict,context={"output_dto":self.output_dto}) + self.output_dto.addContentToDictMappings(playbook) elif contentType == SecurityContentType.baselines: - baseline = Baseline.model_validate(modelDict,context={"output_dto":self.output_dto}) - self.output_dto.addContentToDictMappings(baseline) + baseline = Baseline.model_validate(modelDict,context={"output_dto":self.output_dto}) + self.output_dto.addContentToDictMappings(baseline) elif contentType == SecurityContentType.investigations: - investigation = Investigation.model_validate(modelDict,context={"output_dto":self.output_dto}) - self.output_dto.addContentToDictMappings(investigation) + investigation = Investigation.model_validate(modelDict,context={"output_dto":self.output_dto}) + self.output_dto.addContentToDictMappings(investigation) elif contentType == SecurityContentType.stories: - story = Story.model_validate(modelDict,context={"output_dto":self.output_dto}) - self.output_dto.addContentToDictMappings(story) + story = Story.model_validate(modelDict,context={"output_dto":self.output_dto}) + self.output_dto.addContentToDictMappings(story) elif contentType == SecurityContentType.detections: - detection = Detection.model_validate(modelDict,context={"output_dto":self.output_dto, "app":self.input_dto.app}) - self.output_dto.addContentToDictMappings(detection) + detection = Detection.model_validate(modelDict,context={"output_dto":self.output_dto, "app":self.input_dto.app}) + self.output_dto.addContentToDictMappings(detection) elif contentType == SecurityContentType.ssa_detections: - self.constructSSADetection(self.ssa_detection_builder, self.output_dto,str(file)) - ssa_detection = self.ssa_detection_builder.getObject() - if ssa_detection.status in [DetectionStatus.production.value, DetectionStatus.validation.value]: - self.output_dto.addContentToDictMappings(ssa_detection) + self.constructSSADetection(self.ssa_detection_builder, self.output_dto,str(file)) + ssa_detection = self.ssa_detection_builder.getObject() + if ssa_detection.status in [DetectionStatus.production.value, DetectionStatus.validation.value]: + self.output_dto.addContentToDictMappings(ssa_detection) elif contentType == SecurityContentType.data_sources: data_source = DataSource.model_validate( modelDict, context={"output_dto": self.output_dto} ) - self.output_dto.data_sources.append(data_source) - - elif contentType == SecurityContentType.event_sources: - event_source = EventSource.model_validate( - modelDict, context={"output_dto": self.output_dto} - ) - self.output_dto.event_sources.append(event_source) + self.output_dto.addContentToDictMappings(data_source) else: raise Exception(f"Unsupported type: [{contentType}]") diff --git a/contentctl/input/yml_reader.py b/contentctl/input/yml_reader.py index 37714a2c..ba83eca0 100644 --- a/contentctl/input/yml_reader.py +++ b/contentctl/input/yml_reader.py @@ -40,6 +40,10 @@ def load_file(file_path: pathlib.Path, add_fields=True, STRICT_YML_CHECKING=Fals if add_fields == False: return yml_obj - yml_obj['file_path'] = str(file_path) + try: + yml_obj['file_path'] = str(file_path) + except Exception as e: + import code + code.interact(local=locals()) return yml_obj diff --git a/contentctl/objects/abstract_security_content_objects/detection_abstract.py b/contentctl/objects/abstract_security_content_objects/detection_abstract.py index 4b7c6b77..93591505 100644 --- a/contentctl/objects/abstract_security_content_objects/detection_abstract.py +++ b/contentctl/objects/abstract_security_content_objects/detection_abstract.py @@ -23,9 +23,10 @@ from contentctl.objects.test_group import TestGroup from contentctl.objects.integration_test import IntegrationTest from contentctl.objects.event_source import EventSource +from contentctl.objects.data_source import DataSource #from contentctl.objects.playbook import Playbook -from contentctl.objects.enums import DataSource,ProvidingTechnology +from contentctl.objects.enums import ProvidingTechnology from contentctl.enrichments.cve_enrichment import CveEnrichmentObj @@ -40,8 +41,6 @@ class Detection_Abstract(SecurityContentObject): search: Union[str, dict[str,Any]] = Field(...) how_to_implement: str = Field(..., min_length=4) known_false_positives: str = Field(..., min_length=4) - data_source_objects: Optional[List[DataSource]] = None - event_source_objects: Optional[List[EventSource]] = None enabled_by_default: bool = False file_path: FilePath = Field(...) @@ -54,6 +53,8 @@ class Detection_Abstract(SecurityContentObject): # A list of groups of tests, relying on the same data test_groups: Union[list[TestGroup], None] = Field(None,validate_default=True) + data_source_objects: Optional[List[DataSource]] = None + @field_validator("search", mode="before") @classmethod @@ -139,6 +140,7 @@ def datamodel(self)->List[DataModel]: else: return [] + @computed_field @property def source(self)->str: @@ -387,43 +389,34 @@ def model_post_init(self, ctx:dict[str,Any]): raise ValueError(f"Error, failed to replace detection reference in Baseline '{baseline.name}' to detection '{self.name}'") baseline.tags.detections = new_detections - self.data_source_objects = [] - self.event_source_objects = [] - for detection_data_source in self.data_source: - split_data_sources = [ds.strip() for ds in detection_data_source.split('AND')] - for split_data_source in split_data_sources: - data_source_found = False - for data_source_obj in director.data_sources: - if data_source_obj.name in split_data_source: - self.data_source_objects.append(data_source_obj) - data_source_found = True - break - for event_source_obj in director.event_sources: - if event_source_obj.name == split_data_source: - self.event_source_objects.append(event_source_obj) - data_source_found = True - break - - if not data_source_found: - raise ValueError(f"Error, data source object '{split_data_source}' not found.") - - - unique_data_sources = {} - for data_source_obj in self.data_source_objects: - if data_source_obj.name not in unique_data_sources: - unique_data_sources[data_source_obj.name] = data_source_obj - self.data_source_objects = list(unique_data_sources.values()) - - unique_event_sources = {} - for event_source_obj in self.event_source_objects: - if event_source_obj.name not in unique_event_sources: - unique_event_sources[event_source_obj.name] = event_source_obj - self.event_source_objects = list(unique_event_sources.values()) + # Data source may be defined 1 on each line, OR they may be defined as + # SOUCE_1 AND ANOTHERSOURCE AND A_THIRD_SOURCE + # if more than 1 data source is required for a detection (for example, because it includes a join) + # Parse and update the list to resolve individual names and remove potential duplicates + updated_data_source_names:set[str] = set() + + for ds in self.data_source: + split_data_sources = {d.strip() for d in ds.split('AND')} + updated_data_source_names.update(split_data_sources) + + sources = sorted(list(updated_data_source_names)) + + matched_data_sources:list[DataSource] = [] + missing_sources: list[str] = [] + for source in sources: + try: + matched_data_sources += DataSource.mapNamesToSecurityContentObjects([source], director) + except Exception as data_source_mapping_exception: + missing_sources.append(source) + if len(missing_sources) > 0: + # This will be changed to ValueError when we have a complete list of data sources + print(f"WARNING: The following exception occurred when mapping the data_source field to DataSource objects:{missing_sources}") + + self.data_source_objects = matched_data_sources for story in self.tags.analytic_story: story.detections.append(self) story.data_sources.extend(self.data_source_objects) - story.event_sources.extend(self.event_source_objects) return self diff --git a/contentctl/objects/data_source.py b/contentctl/objects/data_source.py index 08aeeba1..f25e9668 100644 --- a/contentctl/objects/data_source.py +++ b/contentctl/objects/data_source.py @@ -11,7 +11,8 @@ class DataSource(SecurityContentObject): configuration: Optional[str] = None supported_TA: Optional[list] = None fields: Optional[list] = None + field_mappings: Optional[list] = None + convert_to_log_source: Optional[list] = None example_log: Optional[str] = None - event_sources: Optional[list] = None diff --git a/contentctl/objects/enums.py b/contentctl/objects/enums.py index e7016033..fa294302 100644 --- a/contentctl/objects/enums.py +++ b/contentctl/objects/enums.py @@ -56,7 +56,6 @@ class SecurityContentType(enum.Enum): unit_tests = 9 ssa_detections = 10 data_sources = 11 - event_sources = 12 # Bringing these changes back in line will take some time after # the initial merge is complete diff --git a/contentctl/objects/story.py b/contentctl/objects/story.py index 064bfc02..a623a055 100644 --- a/contentctl/objects/story.py +++ b/contentctl/objects/story.py @@ -8,7 +8,6 @@ from contentctl.objects.investigation import Investigation from contentctl.objects.baseline import Baseline from contentctl.objects.data_source import DataSource - from contentctl.objects.event_source import EventSource from contentctl.objects.security_content_object import SecurityContentObject @@ -35,7 +34,7 @@ class Story(SecurityContentObject): investigations: List[Investigation] = [] baselines: List[Baseline] = [] data_sources: List[DataSource] = [] - event_sources: List[EventSource] = [] + def storyAndInvestigationNamesWithApp(self, app_name:str)->List[str]: return [f"{app_name} - {name} - Rule" for name in self.detection_names] + \ @@ -143,7 +142,3 @@ def investigation_names(self)->List[str]: def baseline_names(self)->List[str]: return [baseline.name for baseline in self.baselines] - - - - \ No newline at end of file diff --git a/contentctl/output/data_source_writer.py b/contentctl/output/data_source_writer.py index 02c90b46..ba505905 100644 --- a/contentctl/output/data_source_writer.py +++ b/contentctl/output/data_source_writer.py @@ -2,11 +2,12 @@ from contentctl.objects.data_source import DataSource from contentctl.objects.event_source import EventSource from typing import List +import pathlib class DataSourceWriter: @staticmethod - def writeDataSourceCsv(data_source_objects: List[DataSource], file_path: str): + def writeDataSourceCsv(data_source_objects: List[DataSource], file_path: pathlib.Path): with open(file_path, mode='w', newline='') as file: writer = csv.writer(file) # Write the header @@ -37,20 +38,3 @@ def writeDataSourceCsv(data_source_objects: List[DataSource], file_path: str): supported_TA_url, data_source.description, ]) - @staticmethod - def writeEventSourceCsv(event_source_objects: List[EventSource], file_path: str): - with open(file_path, mode='w', newline='') as file: - writer = csv.writer(file) - # Write the header - writer.writerow([ - "name", "id", "author", "description", "fields" - ]) - # Write the data - for event_source in event_source_objects: - writer.writerow([ - event_source.name, - event_source.id, - event_source.author, - event_source.description, - "; ".join(event_source.fields) - ]) \ No newline at end of file From 2eba93e6b9a907bea4de56ee5532bffb40d94d2b Mon Sep 17 00:00:00 2001 From: Patrick Date: Wed, 24 Jul 2024 13:53:43 +0200 Subject: [PATCH 06/11] merged with develop --- contentctl/input/director.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/contentctl/input/director.py b/contentctl/input/director.py index 4b3388c7..fb839740 100644 --- a/contentctl/input/director.py +++ b/contentctl/input/director.py @@ -118,10 +118,6 @@ def __init__(self, output_dto: DirectorOutputDto) -> None: self.output_dto = output_dto self.ssa_detection_builder = SSADetectionBuilder() - self.output_dto.addContentToDictMappings(Lookup.model_construct(description= "A lookup file that will contain the data source objects for detections.", - filename=pathlib.Path("data_sources.csv"), - name="data_sources")) - def execute(self, input_dto: validate) -> None: self.input_dto = input_dto self.createSecurityContent(SecurityContentType.deployments) From 805a7a6a6e6291352805593e28b0e2a618d91ca6 Mon Sep 17 00:00:00 2001 From: pyth0n1c Date: Wed, 24 Jul 2024 16:17:22 -0700 Subject: [PATCH 07/11] Better printout when data sources are not found. These results are printed at the end instead of during individual validations. Next, fixed a serious issue where the NAME of a piece of content could be mapped to the WRONG piece of content. --- contentctl/actions/build.py | 2 +- contentctl/actions/initialize.py | 1 - contentctl/input/director.py | 12 +++++++++++- contentctl/input/yml_reader.py | 8 +++----- .../detection_abstract.py | 9 +++++++-- .../security_content_object_abstract.py | 4 ++-- 6 files changed, 24 insertions(+), 12 deletions(-) diff --git a/contentctl/actions/build.py b/contentctl/actions/build.py index 80c93d47..769e08ba 100644 --- a/contentctl/actions/build.py +++ b/contentctl/actions/build.py @@ -34,7 +34,7 @@ def execute(self, input_dto: BuildInputDto) -> DirectorOutputDto: updated_conf_files:set[pathlib.Path] = set() conf_output = ConfOutput(input_dto.config) - DataSourceWriter.writeDataSourceCsv(input_dto.director_output_dto.data_sources, str(input_dto.config.path) + "/lookups/data_sources.csv") + DataSourceWriter.writeDataSourceCsv(input_dto.director_output_dto.data_sources, input_dto.config.path / "lookups" / "data_sources.csv") updated_conf_files.update(conf_output.writeHeaders()) updated_conf_files.update(conf_output.writeObjects(input_dto.director_output_dto.detections, SecurityContentType.detections)) diff --git a/contentctl/actions/initialize.py b/contentctl/actions/initialize.py index 9d3fb337..dc2cd91e 100644 --- a/contentctl/actions/initialize.py +++ b/contentctl/actions/initialize.py @@ -29,7 +29,6 @@ def execute(self, config: test) -> None: ('../templates/deployments/', 'deployments'), ('../templates/detections/', 'detections'), ('../templates/data_sources/', 'data_sources'), - ('../templates/event_sources/', 'event_sources'), ('../templates/macros/','macros'), ('../templates/stories/', 'stories'), ]: diff --git a/contentctl/input/director.py b/contentctl/input/director.py index fb839740..0740abe3 100644 --- a/contentctl/input/director.py +++ b/contentctl/input/director.py @@ -68,7 +68,7 @@ def addContentToDictMappings(self, content: SecurityContentObject): # for this function we prepend 'SSA ' to the name. content_name = f"SSA {content_name}" - if content_name in self.name_to_content_map and isinstance(self.name_to_content_map[content_name], type(content)): + if content_name in self.name_to_content_map: raise ValueError( f"Duplicate name '{content_name}' with paths:\n" f" - {content.file_path}\n" @@ -131,6 +131,16 @@ def execute(self, input_dto: validate) -> None: self.createSecurityContent(SecurityContentType.detections) self.createSecurityContent(SecurityContentType.ssa_detections) + + from contentctl.objects.abstract_security_content_objects.detection_abstract import MISSING_SOURCES + if len(MISSING_SOURCES) > 0: + missing_sources_string = "\n 🟡 ".join(sorted(list(MISSING_SOURCES))) + print("WARNING: The following data_sources have been used in detections, but are not yet defined.\n" + "This is not yet an error since not all data_sources have been defined, but will be convered to an error soon:\n 🟡 " + f"{missing_sources_string}") + else: + print("No missing data_sources!") + def createSecurityContent(self, contentType: SecurityContentType) -> None: if contentType == SecurityContentType.ssa_detections: files = Utils.get_all_yml_files_from_directory( diff --git a/contentctl/input/yml_reader.py b/contentctl/input/yml_reader.py index ba83eca0..11bea479 100644 --- a/contentctl/input/yml_reader.py +++ b/contentctl/input/yml_reader.py @@ -40,10 +40,8 @@ def load_file(file_path: pathlib.Path, add_fields=True, STRICT_YML_CHECKING=Fals if add_fields == False: return yml_obj - try: - yml_obj['file_path'] = str(file_path) - except Exception as e: - import code - code.interact(local=locals()) + + yml_obj['file_path'] = str(file_path) + return yml_obj diff --git a/contentctl/objects/abstract_security_content_objects/detection_abstract.py b/contentctl/objects/abstract_security_content_objects/detection_abstract.py index 93591505..dff32294 100644 --- a/contentctl/objects/abstract_security_content_objects/detection_abstract.py +++ b/contentctl/objects/abstract_security_content_objects/detection_abstract.py @@ -29,6 +29,7 @@ from contentctl.objects.enums import ProvidingTechnology from contentctl.enrichments.cve_enrichment import CveEnrichmentObj +MISSING_SOURCES:set[str] = set() class Detection_Abstract(SecurityContentObject): model_config = ConfigDict(use_enum_values=True) @@ -402,12 +403,16 @@ def model_post_init(self, ctx:dict[str,Any]): sources = sorted(list(updated_data_source_names)) matched_data_sources:list[DataSource] = [] - missing_sources: list[str] = [] + missing_sources:list[str] = [] for source in sources: try: matched_data_sources += DataSource.mapNamesToSecurityContentObjects([source], director) except Exception as data_source_mapping_exception: - missing_sources.append(source) + # We gobble this up and add it to a global set so that we + # can print it ONCE at the end of the build of datasources. + # This will be removed later as per the note below + MISSING_SOURCES.add(source) + if len(missing_sources) > 0: # This will be changed to ValueError when we have a complete list of data sources print(f"WARNING: The following exception occurred when mapping the data_source field to DataSource objects:{missing_sources}") diff --git a/contentctl/objects/abstract_security_content_objects/security_content_object_abstract.py b/contentctl/objects/abstract_security_content_objects/security_content_object_abstract.py index 90c5376d..c68839b3 100644 --- a/contentctl/objects/abstract_security_content_objects/security_content_object_abstract.py +++ b/contentctl/objects/abstract_security_content_objects/security_content_object_abstract.py @@ -125,9 +125,9 @@ def mapNamesToSecurityContentObjects(cls, v: list[str], director:Union[DirectorO errors:list[str] = [] if len(missing_objects) > 0: errors.append(f"Failed to find the following '{cls.__name__}': {missing_objects}") - if len(missing_objects) > 0: + if len(mistyped_objects) > 0: for mistyped_object in mistyped_objects: - errors.append(f"'{mistyped_object.name}' expected to have type '{type(Self)}', but actually had type '{type(mistyped_object)}'") + errors.append(f"'{mistyped_object.name}' expected to have type '{cls}', but actually had type '{type(mistyped_object)}'") if len(errors) > 0: error_string = "\n - ".join(errors) From fe8f9df61937b0709387f46a6e51f3e4d77cfc79 Mon Sep 17 00:00:00 2001 From: pyth0n1c Date: Wed, 24 Jul 2024 16:50:53 -0700 Subject: [PATCH 08/11] create data sources lookup and the appropriate CSV at build time for app. --- contentctl/actions/build.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/contentctl/actions/build.py b/contentctl/actions/build.py index 769e08ba..051223bc 100644 --- a/contentctl/actions/build.py +++ b/contentctl/actions/build.py @@ -11,6 +11,7 @@ from contentctl.output.ba_yml_output import BAYmlOutput from contentctl.output.api_json_output import ApiJsonOutput from contentctl.output.data_source_writer import DataSourceWriter +from contentctl.objects.lookup import Lookup import pathlib import json import datetime @@ -34,7 +35,14 @@ def execute(self, input_dto: BuildInputDto) -> DirectorOutputDto: updated_conf_files:set[pathlib.Path] = set() conf_output = ConfOutput(input_dto.config) - DataSourceWriter.writeDataSourceCsv(input_dto.director_output_dto.data_sources, input_dto.config.path / "lookups" / "data_sources.csv") + # Construct a special lookup whose CSV is created at runtime and + # written directly into the output folder. It is created with model_construct, + # not model_validate, because the CSV does not exist yet. + data_sources_lookup_csv_path = input_dto.config.getPackageDirectoryPath() / "lookups" / "data_sources.csv" + DataSourceWriter.writeDataSourceCsv(input_dto.director_output_dto.data_sources, data_sources_lookup_csv_path) + input_dto.director_output_dto.addContentToDictMappings(Lookup.model_construct(description= "A lookup file that will contain the data source objects for detections.", + filename=data_sources_lookup_csv_path, + name="data_sources")) updated_conf_files.update(conf_output.writeHeaders()) updated_conf_files.update(conf_output.writeObjects(input_dto.director_output_dto.detections, SecurityContentType.detections)) From bf3b6834baf2fc06e6db406e97b02ba7bb33a7cb Mon Sep 17 00:00:00 2001 From: pyth0n1c Date: Thu, 25 Jul 2024 10:24:57 -0700 Subject: [PATCH 09/11] get rid of event_sources folder. Update sysmon_eventid_1.yml with latest version from other repo for contentctl init to work properly --- .../templates/data_sources/Sysmon_EventID.yml | 51 ------------------- .../sysmon_eventid_1.yml} | 51 ++++++++++++++++++- 2 files changed, 50 insertions(+), 52 deletions(-) delete mode 100644 contentctl/templates/data_sources/Sysmon_EventID.yml rename contentctl/templates/{event_sources/Sysmon_EventID_1.yml => data_sources/sysmon_eventid_1.yml} (71%) diff --git a/contentctl/templates/data_sources/Sysmon_EventID.yml b/contentctl/templates/data_sources/Sysmon_EventID.yml deleted file mode 100644 index 3a04aac4..00000000 --- a/contentctl/templates/data_sources/Sysmon_EventID.yml +++ /dev/null @@ -1,51 +0,0 @@ -name: Sysmon EventID -id: 848aec1b-90aa-48a9-ae52-31d3a2e79697 -author: Patrick Bareiss, Splunk -source: XmlWinEventLog:Microsoft-Windows-Sysmon/Operational -sourcetype: xmlwineventlog -separator: EventID -configuration: https://github.com/SwiftOnSecurity/sysmon-config -supported_TA: -- name: Splunk Add-on for Sysmon - url: https://splunkbase.splunk.com/app/5709/ - version: 4.0.0 -description: Data source object for Sysmon EventID -event_names: -- data_source: event_sources/Sysmon_EventID_1.yml - event_name: Sysmon EventID 1 -- data_source: event_sources/Sysmon_EventID_10.yml - event_name: Sysmon EventID 10 -- data_source: event_sources/Sysmon_EventID_11.yml - event_name: Sysmon EventID 11 -- data_source: event_sources/Sysmon_EventID_12.yml - event_name: Sysmon EventID 12 -- data_source: event_sources/Sysmon_EventID_13.yml - event_name: Sysmon EventID 13 -- data_source: event_sources/Sysmon_EventID_14.yml - event_name: Sysmon EventID 14 -- data_source: event_sources/Sysmon_EventID_15.yml - event_name: Sysmon EventID 15 -- data_source: event_sources/Sysmon_EventID_17.yml - event_name: Sysmon EventID 17 -- data_source: event_sources/Sysmon_EventID_18.yml - event_name: Sysmon EventID 18 -- data_source: event_sources/Sysmon_EventID_20.yml - event_name: Sysmon EventID 20 -- data_source: event_sources/Sysmon_EventID_21.yml - event_name: Sysmon EventID 21 -- data_source: event_sources/Sysmon_EventID_22.yml - event_name: Sysmon EventID 22 -- data_source: event_sources/Sysmon_EventID_23.yml - event_name: Sysmon EventID 23 -- data_source: event_sources/Sysmon_EventID_3.yml - event_name: Sysmon EventID 3 -- data_source: event_sources/Sysmon_EventID_5.yml - event_name: Sysmon EventID 5 -- data_source: event_sources/Sysmon_EventID_6.yml - event_name: Sysmon EventID 6 -- data_source: event_sources/Sysmon_EventID_7.yml - event_name: Sysmon EventID 7 -- data_source: event_sources/Sysmon_EventID_8.yml - event_name: Sysmon EventID 8 -- data_source: event_sources/Sysmon_EventID_9.yml - event_name: Sysmon EventID 9 diff --git a/contentctl/templates/event_sources/Sysmon_EventID_1.yml b/contentctl/templates/data_sources/sysmon_eventid_1.yml similarity index 71% rename from contentctl/templates/event_sources/Sysmon_EventID_1.yml rename to contentctl/templates/data_sources/sysmon_eventid_1.yml index 5a19cd0a..6e279b45 100644 --- a/contentctl/templates/event_sources/Sysmon_EventID_1.yml +++ b/contentctl/templates/data_sources/sysmon_eventid_1.yml @@ -1,7 +1,16 @@ name: Sysmon EventID 1 id: b375f4d1-d7ca-4bc0-9103-294825c0af17 +version: 1 +date: '2024-07-18' author: Patrick Bareiss, Splunk -description: Event source object for Sysmon EventID 1 +description: Data source object for Sysmon EventID 1 +source: XmlWinEventLog:Microsoft-Windows-Sysmon/Operational +sourcetype: xmlwineventlog +separator: EventID +supported_TA: +- name: Splunk Add-on for Sysmon + url: https://splunkbase.splunk.com/app/5709/ + version: 4.0.0 fields: - _time - Channel @@ -100,6 +109,46 @@ fields: - user - user_id - vendor_product +field_mappings: + - data_model: cim + data_set: Endpoint.Processes + mapping: + ProcessGuid: Processes.process_guid + ProcessId: Processes.process_id + Image: Processes.process_path + Image|endswith: Processes.process_name + CommandLine: Processes.process + CurrentDirectory: Processes.process_current_directory + User: Processes.user + IntegrityLevel: Processes.process_integrity_level + Hashes: Processes.process_hash + ParentProcessGuid: Processes.parent_process_guid + ParentProcessId: Processes.parent_process_id + ParentImage: Processes.parent_process_name + ParentCommandLine: Processes.parent_process + Computer: Processes.dest + OriginalFileName: Processes.original_file_name +convert_to_log_source: + - data_source: Windows Event Log Security 4688 + mapping: + ProcessId: NewProcessId + Image: NewProcessName + Image|endswith: NewProcessName|endswith + CommandLine: Process_Command_Line + User: SubjectUserSid + ParentProcessId: ProcessId + ParentImage: ParentProcessName + ParentImage|endswith: ParentProcessName|endswith + Computer: Computer + OriginalFileName: NewProcessName|endswith + - data_source: Crowdstrike Process + mapping: + ProcessId: RawProcessId + Image: ImageFileName + CommandLine: CommandLine + User: UserSid + ParentProcessId: ParentProcessId + ParentImage: ParentBaseFileName example_log: "154100x80000000000000004522 Date: Thu, 25 Jul 2024 14:58:39 -0700 Subject: [PATCH 10/11] Fix error where duplicate data_sources were added to an analytic story if multiple detections referenced the same data_source. This was done by making data_sources a computed_field for Story rather than building at while deteciton objects are built. Additionally added eq, lt, and hash methods to SecurityContentObject_Abstract so that set operations and sorts can happen easily for all objects. --- .../detection_abstract.py | 16 +++++------ .../security_content_object_abstract.py | 27 ++++++++++++++++++ contentctl/objects/data_source.py | 28 +++++++++++++++++-- contentctl/objects/story.py | 12 +++++++- 4 files changed, 72 insertions(+), 11 deletions(-) diff --git a/contentctl/objects/abstract_security_content_objects/detection_abstract.py b/contentctl/objects/abstract_security_content_objects/detection_abstract.py index dff32294..c9e4a87c 100644 --- a/contentctl/objects/abstract_security_content_objects/detection_abstract.py +++ b/contentctl/objects/abstract_security_content_objects/detection_abstract.py @@ -37,7 +37,7 @@ class Detection_Abstract(SecurityContentObject): #contentType: SecurityContentType = SecurityContentType.detections type: AnalyticsType = Field(...) status: DetectionStatus = Field(...) - data_source: Optional[List[str]] = None + data_source: list[str] = [] tags: DetectionTags = Field(...) search: Union[str, dict[str,Any]] = Field(...) how_to_implement: str = Field(..., min_length=4) @@ -54,7 +54,7 @@ class Detection_Abstract(SecurityContentObject): # A list of groups of tests, relying on the same data test_groups: Union[list[TestGroup], None] = Field(None,validate_default=True) - data_source_objects: Optional[List[DataSource]] = None + data_source_objects: list[DataSource] = [] @field_validator("search", mode="before") @@ -420,9 +420,7 @@ def model_post_init(self, ctx:dict[str,Any]): self.data_source_objects = matched_data_sources for story in self.tags.analytic_story: - story.detections.append(self) - story.data_sources.extend(self.data_source_objects) - + story.detections.append(self) return self @@ -446,14 +444,16 @@ def mapDetectionNamesToBaselineObjects(cls, v:list[str], info:ValidationInfo)->L raise ValueError("Error, baselines are constructed automatically at runtime. Please do not include this field.") - name:Union[str,dict] = info.data.get("name",None) + name:Union[str,None] = info.data.get("name",None) if name is None: raise ValueError("Error, cannot get Baselines because the Detection does not have a 'name' defined.") - + director:DirectorOutputDto = info.context.get("output_dto",None) baselines:List[Baseline] = [] for baseline in director.baselines: - if name in baseline.tags.detections: + # This matching is a bit strange, because baseline.tags.detections starts as a list of strings, but + # is eventually updated to a list of Detections as we construct all of the detection objects. + if name in [detection_name for detection_name in baseline.tags.detections if isinstance(detection_name,str)]: baselines.append(baseline) return baselines diff --git a/contentctl/objects/abstract_security_content_objects/security_content_object_abstract.py b/contentctl/objects/abstract_security_content_objects/security_content_object_abstract.py index c68839b3..430872be 100644 --- a/contentctl/objects/abstract_security_content_objects/security_content_object_abstract.py +++ b/contentctl/objects/abstract_security_content_objects/security_content_object_abstract.py @@ -194,6 +194,33 @@ def __repr__(self)->str: def __str__(self)->str: return(self.__repr__()) + + def __lt__(self, other:object)->bool: + if not isinstance(other,SecurityContentObject_Abstract): + raise Exception(f"SecurityContentObject can only be compared to each other, not to {type(other)}") + return self.name < other.name + + def __eq__(self, other:object)->bool: + if not isinstance(other,SecurityContentObject_Abstract): + raise Exception(f"SecurityContentObject can only be compared to each other, not to {type(other)}") + + if id(self) == id(other) and self.name == other.name and self.id == other.id: + # Yes, this is the same object + return True + + elif id(self) == id(other) or self.name == other.name or self.id == other.id: + raise Exception("Attempted to compare two SecurityContentObjects, but their fields indicate they were not globally unique:" + f"\n\tid(obj1) : {id(self)}" + f"\n\tid(obj2) : {id(other)}" + f"\n\tobj1.name : {self.name}" + f"\n\tobj2.name : {other.name}" + f"\n\tobj1.id : {self.id}" + f"\n\tobj2.id : {other.id}") + else: + return False + + def __hash__(self) -> NonNegativeInt: + return id(self) diff --git a/contentctl/objects/data_source.py b/contentctl/objects/data_source.py index f25e9668..7e31a9a4 100644 --- a/contentctl/objects/data_source.py +++ b/contentctl/objects/data_source.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import Union, Optional, List -from pydantic import model_validator, Field, FilePath +from typing import Optional, Any +from pydantic import Field, FilePath, model_serializer from contentctl.objects.security_content_object import SecurityContentObject from contentctl.objects.event_source import EventSource @@ -16,3 +16,27 @@ class DataSource(SecurityContentObject): example_log: Optional[str] = None + @model_serializer + def serialize_model(self): + #Call serializer for parent + super_fields = super().serialize_model() + + #All fields custom to this model + model:dict[str,Any] = { + "source": self.source, + "sourcetype": self.sourcetype, + "separator": self.separator, + "configuration": self.configuration, + "supported_TA": self.supported_TA, + "fields": self.fields, + "field_mappings": self.field_mappings, + "convert_to_log_source": self.convert_to_log_source, + "example_log":self.example_log + } + + + #Combine fields from this model with fields from parent + super_fields.update(model) + + #return the model + return super_fields \ No newline at end of file diff --git a/contentctl/objects/story.py b/contentctl/objects/story.py index a623a055..36558388 100644 --- a/contentctl/objects/story.py +++ b/contentctl/objects/story.py @@ -33,7 +33,17 @@ class Story(SecurityContentObject): detections:List[Detection] = [] investigations: List[Investigation] = [] baselines: List[Baseline] = [] - data_sources: List[DataSource] = [] + + + @computed_field + @property + def data_sources(self)-> list[DataSource]: + # Only add a data_source if it does not already exist in the story + data_source_objects:set[DataSource] = set() + for detection in self.detections: + data_source_objects.update(set(detection.data_source_objects)) + + return sorted(list(data_source_objects)) def storyAndInvestigationNamesWithApp(self, app_name:str)->List[str]: From 85bd7c83000f456d8b8247c89fd06f7749a54150 Mon Sep 17 00:00:00 2001 From: pyth0n1c Date: Thu, 25 Jul 2024 15:02:30 -0700 Subject: [PATCH 11/11] bump to version 4.2 for release --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 5b0d74cc..28bfa0bd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "contentctl" -version = "4.1.5" +version = "4.2.0" description = "Splunk Content Control Tool" authors = ["STRT "] license = "Apache 2.0"