diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 650a5e95..b1bdacd5 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -12,3 +12,8 @@ updates: schedule: interval: "daily" open-pull-requests-limit: 6 + - package-ecosystem: "github-actions" + directory: "/" + schedule: + # Check for updates to GitHub Actions every week + interval: "weekly" \ No newline at end of file diff --git a/.github/workflows/ruff.yml b/.github/workflows/ruff.yml new file mode 100644 index 00000000..4843361a --- /dev/null +++ b/.github/workflows/ruff.yml @@ -0,0 +1,22 @@ +name: lint & format +on: + pull_request: + types: [opened, reopened, synchronize] + +jobs: + lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Install Python + uses: actions/setup-python@v5 + with: + python-version: "3.11" + - name: Install ruff + run: | + python -m pip install --upgrade pip + pip install ruff + - name: Run lint + run: ruff check --output-format=github contentctl/ + - name: Run Formatter + run: ruff format --check contentctl/ \ No newline at end of file diff --git a/.github/workflows/testEndToEnd.yml b/.github/workflows/testEndToEnd.yml index 29e0958e..6b1a4b20 100644 --- a/.github/workflows/testEndToEnd.yml +++ b/.github/workflows/testEndToEnd.yml @@ -1,8 +1,7 @@ name: testEndToEnd on: - push: pull_request: - types: [opened, reopened] + types: [opened, reopened, synchronize] schedule: - cron: "44 4 * * *" @@ -11,8 +10,8 @@ jobs: strategy: fail-fast: false matrix: - python_version: ["3.11", "3.12"] - operating_system: ["ubuntu-20.04", "ubuntu-22.04", "macos-latest", "macos-14", "windows-2022"] + python_version: ["3.11", "3.12", "3.13"] + operating_system: ["ubuntu-24.04", "macos-15", "windows-2022"] #operating_system: ["ubuntu-20.04", "ubuntu-22.04", "macos-latest"] diff --git a/.github/workflows/test_against_escu.yml b/.github/workflows/test_against_escu.yml index b527a6ee..f29e6a6f 100644 --- a/.github/workflows/test_against_escu.yml +++ b/.github/workflows/test_against_escu.yml @@ -6,9 +6,8 @@ # note yet been fixed in security_content, we may see this workflow fail. name: test_against_escu on: - push: pull_request: - types: [opened, reopened] + types: [opened, reopened, synchronize] schedule: - cron: "44 4 * * *" @@ -17,9 +16,9 @@ jobs: strategy: fail-fast: false matrix: - python_version: ["3.11", "3.12"] + python_version: ["3.11", "3.12", "3.13"] - operating_system: ["ubuntu-20.04", "ubuntu-22.04", "macos-latest", "macos-14"] + operating_system: ["ubuntu-24.04", "macos-15"] # Do not test against ESCU until known character encoding issue is resolved # operating_system: ["ubuntu-20.04", "ubuntu-22.04", "macos-latest", "macos-14", "windows-2022"] @@ -36,6 +35,7 @@ jobs: with: path: security_content repository: splunk/security_content + ref: rba_migration #Install the given version of Python we will test against - name: Install Required Python Version diff --git a/.gitignore b/.gitignore index 2e4fcc96..156005c0 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,7 @@ poetry.lock # usual mac files .DS_Store */.DS_Store +.ruff_cache # custom dist/* diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..30df3046 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,16 @@ +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v5.0.0 # Use the ref you want to point at + hooks: + - id: check-json + - id: check-symlinks + - id: check-yaml + - id: detect-aws-credentials + - id: detect-private-key + - id: forbid-submodules + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.9.2 + hooks: + - id: ruff + args: [ --fix ] + - id: ruff-format diff --git a/.vscode/extensions.json b/.vscode/extensions.json new file mode 100644 index 00000000..8d1a435f --- /dev/null +++ b/.vscode/extensions.json @@ -0,0 +1,5 @@ +{ + "recommendations": [ + "charliermarsh.ruff" + ] +} \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json index 8a62413d..74d85aa8 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -4,7 +4,15 @@ "python.testing.cwd": "${workspaceFolder}", "python.languageServer": "Pylance", "python.analysis.typeCheckingMode": "strict", - "editor.defaultFormatter": "ms-python.black-formatter" + "[python]": { + "editor.formatOnSave": true, + "editor.codeActionsOnSave": { + "source.fixAll": "explicit", + "source.organizeImports": "explicit" + }, + "editor.defaultFormatter": "charliermarsh.ruff", + }, + "ruff.nativeServer": "on" } \ No newline at end of file diff --git a/contentctl/actions/build.py b/contentctl/actions/build.py index feb0351b..5e3acdb3 100644 --- a/contentctl/actions/build.py +++ b/contentctl/actions/build.py @@ -4,17 +4,17 @@ from dataclasses import dataclass -from contentctl.objects.enums import SecurityContentProduct, SecurityContentType +from contentctl.objects.enums import SecurityContentType from contentctl.input.director import Director, DirectorOutputDto from contentctl.output.conf_output import ConfOutput from contentctl.output.conf_writer import ConfWriter from contentctl.output.api_json_output import ApiJsonOutput from contentctl.output.data_source_writer import DataSourceWriter -from contentctl.objects.lookup import Lookup +from contentctl.objects.lookup import CSVLookup, Lookup_Type import pathlib import json import datetime -from typing import Union +import uuid from contentctl.objects.config import build @@ -34,27 +34,41 @@ def execute(self, input_dto: BuildInputDto) -> DirectorOutputDto: updated_conf_files:set[pathlib.Path] = set() conf_output = ConfOutput(input_dto.config) + + # Construct a path to a YML that does not actually exist. + # We mock this "fake" path since the YML does not exist. + # This ensures the checking for the existence of the CSV is correct + data_sources_fake_yml_path = input_dto.config.getPackageDirectoryPath() / "lookups" / "data_sources.yml" + # Construct a special lookup whose CSV is created at runtime and - # written directly into the output folder. It is created with model_construct, - # not model_validate, because the CSV does not exist yet. + # written directly into the lookups folder. We will delete this after a build, + # assuming that it is successful. data_sources_lookup_csv_path = input_dto.config.getPackageDirectoryPath() / "lookups" / "data_sources.csv" - DataSourceWriter.writeDataSourceCsv(input_dto.director_output_dto.data_sources, data_sources_lookup_csv_path) - input_dto.director_output_dto.addContentToDictMappings(Lookup.model_construct(description= "A lookup file that will contain the data source objects for detections.", - filename=data_sources_lookup_csv_path, - name="data_sources")) + + + DataSourceWriter.writeDataSourceCsv(input_dto.director_output_dto.data_sources, data_sources_lookup_csv_path) + input_dto.director_output_dto.addContentToDictMappings(CSVLookup.model_construct(name="data_sources", + id=uuid.UUID("b45c1403-6e09-47b0-824f-cf6e44f15ac8"), + version=1, + author=input_dto.config.app.author_name, + date = datetime.date.today(), + description= "A lookup file that will contain the data source objects for detections.", + lookup_type=Lookup_Type.csv, + file_path=data_sources_fake_yml_path)) updated_conf_files.update(conf_output.writeHeaders()) - updated_conf_files.update(conf_output.writeObjects(input_dto.director_output_dto.detections, SecurityContentType.detections)) - updated_conf_files.update(conf_output.writeObjects(input_dto.director_output_dto.stories, SecurityContentType.stories)) - updated_conf_files.update(conf_output.writeObjects(input_dto.director_output_dto.baselines, SecurityContentType.baselines)) - updated_conf_files.update(conf_output.writeObjects(input_dto.director_output_dto.investigations, SecurityContentType.investigations)) - updated_conf_files.update(conf_output.writeObjects(input_dto.director_output_dto.lookups, SecurityContentType.lookups)) - updated_conf_files.update(conf_output.writeObjects(input_dto.director_output_dto.macros, SecurityContentType.macros)) - updated_conf_files.update(conf_output.writeObjects(input_dto.director_output_dto.dashboards, SecurityContentType.dashboards)) + updated_conf_files.update(conf_output.writeLookups(input_dto.director_output_dto.lookups)) + updated_conf_files.update(conf_output.writeDetections(input_dto.director_output_dto.detections)) + updated_conf_files.update(conf_output.writeStories(input_dto.director_output_dto.stories)) + updated_conf_files.update(conf_output.writeBaselines(input_dto.director_output_dto.baselines)) + updated_conf_files.update(conf_output.writeInvestigations(input_dto.director_output_dto.investigations)) + updated_conf_files.update(conf_output.writeMacros(input_dto.director_output_dto.macros)) + updated_conf_files.update(conf_output.writeDashboards(input_dto.director_output_dto.dashboards)) updated_conf_files.update(conf_output.writeMiscellaneousAppFiles()) + #Ensure that the conf file we just generated/update is syntactically valid for conf_file in updated_conf_files: ConfWriter.validateConfFile(conf_file) @@ -67,17 +81,15 @@ def execute(self, input_dto: BuildInputDto) -> DirectorOutputDto: if input_dto.config.build_api: shutil.rmtree(input_dto.config.getAPIPath(), ignore_errors=True) input_dto.config.getAPIPath().mkdir(parents=True) - api_json_output = ApiJsonOutput() - for output_objects, output_type in [(input_dto.director_output_dto.detections, SecurityContentType.detections), - (input_dto.director_output_dto.stories, SecurityContentType.stories), - (input_dto.director_output_dto.baselines, SecurityContentType.baselines), - (input_dto.director_output_dto.investigations, SecurityContentType.investigations), - (input_dto.director_output_dto.lookups, SecurityContentType.lookups), - (input_dto.director_output_dto.macros, SecurityContentType.macros), - (input_dto.director_output_dto.deployments, SecurityContentType.deployments)]: - api_json_output.writeObjects(output_objects, input_dto.config.getAPIPath(), input_dto.config.app.label, output_type ) - - + api_json_output = ApiJsonOutput(input_dto.config.getAPIPath(), input_dto.config.app.label) + api_json_output.writeDetections(input_dto.director_output_dto.detections) + api_json_output.writeStories(input_dto.director_output_dto.stories) + api_json_output.writeBaselines(input_dto.director_output_dto.baselines) + api_json_output.writeInvestigations(input_dto.director_output_dto.investigations) + api_json_output.writeLookups(input_dto.director_output_dto.lookups) + api_json_output.writeMacros(input_dto.director_output_dto.macros) + api_json_output.writeDeployments(input_dto.director_output_dto.deployments) + #create version file for sse api version_file = input_dto.config.getAPIPath()/"version.json" diff --git a/contentctl/actions/detection_testing/DetectionTestingManager.py b/contentctl/actions/detection_testing/DetectionTestingManager.py index 5ad5e117..13058e2f 100644 --- a/contentctl/actions/detection_testing/DetectionTestingManager.py +++ b/contentctl/actions/detection_testing/DetectionTestingManager.py @@ -5,7 +5,6 @@ from contentctl.actions.detection_testing.infrastructures.DetectionTestingInfrastructureServer import DetectionTestingInfrastructureServer from urllib.parse import urlparse from copy import deepcopy -from contentctl.objects.enums import DetectionTestingTargetInfrastructure import signal import datetime # from queue import Queue diff --git a/contentctl/actions/detection_testing/infrastructures/DetectionTestingInfrastructure.py b/contentctl/actions/detection_testing/infrastructures/DetectionTestingInfrastructure.py index 8e816025..42cad6c0 100644 --- a/contentctl/actions/detection_testing/infrastructures/DetectionTestingInfrastructure.py +++ b/contentctl/actions/detection_testing/infrastructures/DetectionTestingInfrastructure.py @@ -442,7 +442,7 @@ def test_detection(self, detection: Detection) -> None: self.format_pbar_string( TestReportingType.GROUP, test_group.name, - FinalTestingStates.SKIP.value, + FinalTestingStates.SKIP, start_time=time.time(), set_pbar=False, ) @@ -483,7 +483,7 @@ def test_detection(self, detection: Detection) -> None: self.format_pbar_string( TestReportingType.GROUP, test_group.name, - TestingStates.DONE_GROUP.value, + TestingStates.DONE_GROUP, start_time=setup_results.start_time, set_pbar=False, ) @@ -504,7 +504,7 @@ def setup_test_group(self, test_group: TestGroup) -> SetupTestGroupResults: self.format_pbar_string( TestReportingType.GROUP, test_group.name, - TestingStates.BEGINNING_GROUP.value, + TestingStates.BEGINNING_GROUP, start_time=setup_start_time ) # https://github.com/WoLpH/python-progressbar/issues/164 @@ -544,7 +544,7 @@ def cleanup_test_group( self.format_pbar_string( TestReportingType.GROUP, test_group.name, - TestingStates.DELETING.value, + TestingStates.DELETING, start_time=test_group_start_time, ) @@ -632,7 +632,7 @@ def execute_unit_test( self.format_pbar_string( TestReportingType.UNIT, f"{detection.name}:{test.name}", - FinalTestingStates.SKIP.value, + FinalTestingStates.SKIP, start_time=test_start_time, set_pbar=False, ) @@ -664,7 +664,7 @@ def execute_unit_test( self.format_pbar_string( TestReportingType.UNIT, f"{detection.name}:{test.name}", - FinalTestingStates.ERROR.value, + FinalTestingStates.ERROR, start_time=test_start_time, set_pbar=False, ) @@ -724,7 +724,7 @@ def execute_unit_test( res = "ERROR" link = detection.search else: - res = test.result.status.value.upper() # type: ignore + res = test.result.status.upper() # type: ignore link = test.result.get_summary_dict()["sid_link"] self.format_pbar_string( @@ -755,7 +755,7 @@ def execute_unit_test( self.format_pbar_string( TestReportingType.UNIT, f"{detection.name}:{test.name}", - FinalTestingStates.PASS.value, + FinalTestingStates.PASS, start_time=test_start_time, set_pbar=False, ) @@ -766,7 +766,7 @@ def execute_unit_test( self.format_pbar_string( TestReportingType.UNIT, f"{detection.name}:{test.name}", - FinalTestingStates.SKIP.value, + FinalTestingStates.SKIP, start_time=test_start_time, set_pbar=False, ) @@ -777,7 +777,7 @@ def execute_unit_test( self.format_pbar_string( TestReportingType.UNIT, f"{detection.name}:{test.name}", - FinalTestingStates.FAIL.value, + FinalTestingStates.FAIL, start_time=test_start_time, set_pbar=False, ) @@ -788,7 +788,7 @@ def execute_unit_test( self.format_pbar_string( TestReportingType.UNIT, f"{detection.name}:{test.name}", - FinalTestingStates.ERROR.value, + FinalTestingStates.ERROR, start_time=test_start_time, set_pbar=False, ) @@ -821,7 +821,7 @@ def execute_integration_test( test_start_time = time.time() # First, check to see if the test should be skipped (Hunting or Correlation) - if detection.type in [AnalyticsType.Hunting.value, AnalyticsType.Correlation.value]: + if detection.type in [AnalyticsType.Hunting, AnalyticsType.Correlation]: test.skip( f"TEST SKIPPED: detection is type {detection.type} and cannot be integration " "tested at this time" @@ -843,11 +843,11 @@ def execute_integration_test( # Determine the reporting state (we should only encounter SKIP/FAIL/ERROR) state: str if test.result.status == TestResultStatus.SKIP: - state = FinalTestingStates.SKIP.value + state = FinalTestingStates.SKIP elif test.result.status == TestResultStatus.FAIL: - state = FinalTestingStates.FAIL.value + state = FinalTestingStates.FAIL elif test.result.status == TestResultStatus.ERROR: - state = FinalTestingStates.ERROR.value + state = FinalTestingStates.ERROR else: raise ValueError( f"Status for (integration) '{detection.name}:{test.name}' was preemptively set" @@ -891,7 +891,7 @@ def execute_integration_test( self.format_pbar_string( TestReportingType.INTEGRATION, f"{detection.name}:{test.name}", - FinalTestingStates.FAIL.value, + FinalTestingStates.FAIL, start_time=test_start_time, set_pbar=False, ) @@ -935,7 +935,7 @@ def execute_integration_test( if test.result is None: res = "ERROR" else: - res = test.result.status.value.upper() # type: ignore + res = test.result.status.upper() # type: ignore # Get the link to the saved search in this specific instance link = f"https://{self.infrastructure.instance_address}:{self.infrastructure.web_ui_port}" @@ -968,7 +968,7 @@ def execute_integration_test( self.format_pbar_string( TestReportingType.INTEGRATION, f"{detection.name}:{test.name}", - FinalTestingStates.PASS.value, + FinalTestingStates.PASS, start_time=test_start_time, set_pbar=False, ) @@ -979,7 +979,7 @@ def execute_integration_test( self.format_pbar_string( TestReportingType.INTEGRATION, f"{detection.name}:{test.name}", - FinalTestingStates.SKIP.value, + FinalTestingStates.SKIP, start_time=test_start_time, set_pbar=False, ) @@ -990,7 +990,7 @@ def execute_integration_test( self.format_pbar_string( TestReportingType.INTEGRATION, f"{detection.name}:{test.name}", - FinalTestingStates.FAIL.value, + FinalTestingStates.FAIL, start_time=test_start_time, set_pbar=False, ) @@ -1001,7 +1001,7 @@ def execute_integration_test( self.format_pbar_string( TestReportingType.INTEGRATION, f"{detection.name}:{test.name}", - FinalTestingStates.ERROR.value, + FinalTestingStates.ERROR, start_time=test_start_time, set_pbar=False, ) @@ -1077,7 +1077,7 @@ def retry_search_until_timeout( self.format_pbar_string( TestReportingType.UNIT, f"{detection.name}:{test.name}", - TestingStates.PROCESSING.value, + TestingStates.PROCESSING, start_time=start_time ) @@ -1086,7 +1086,7 @@ def retry_search_until_timeout( self.format_pbar_string( TestReportingType.UNIT, f"{detection.name}:{test.name}", - TestingStates.SEARCHING.value, + TestingStates.SEARCHING, start_time=start_time, ) @@ -1094,6 +1094,7 @@ def retry_search_until_timeout( job = self.get_conn().search(query=search, **kwargs) results = JSONResultsReader(job.results(output_mode="json")) + # TODO (cmcginley): @ljstella you're removing this ultimately, right? # Consolidate a set of the distinct observable field names observable_fields_set = set([o.name for o in detection.tags.observable]) # keeping this around for later risk_object_fields_set = set([o.name for o in detection.tags.observable if "Victim" in o.role ]) # just the "Risk Objects" @@ -1121,7 +1122,10 @@ def retry_search_until_timeout( missing_risk_objects = risk_object_fields_set - results_fields_set if len(missing_risk_objects) > 0: # Report a failure in such cases - e = Exception(f"The observable field(s) {missing_risk_objects} are missing in the detection results") + e = Exception( + f"The risk object field(s) {missing_risk_objects} are missing in the " + "detection results" + ) test.result.set_job_content( job.content, self.infrastructure, @@ -1137,6 +1141,8 @@ def retry_search_until_timeout( # on a field. In this case, the field will appear but will not contain any values current_empty_fields: set[str] = set() + # TODO (cmcginley): @ljstella is this something we're keeping for testing as + # well? for field in observable_fields_set: if result.get(field, 'null') == 'null': if field in risk_object_fields_set: @@ -1289,7 +1295,7 @@ def replay_attack_data_file( self.format_pbar_string( TestReportingType.GROUP, test_group.name, - TestingStates.DOWNLOADING.value, + TestingStates.DOWNLOADING, start_time=test_group_start_time ) @@ -1307,7 +1313,7 @@ def replay_attack_data_file( self.format_pbar_string( TestReportingType.GROUP, test_group.name, - TestingStates.REPLAYING.value, + TestingStates.REPLAYING, start_time=test_group_start_time ) diff --git a/contentctl/actions/detection_testing/progress_bar.py b/contentctl/actions/detection_testing/progress_bar.py index 45e30e06..5b5abd1a 100644 --- a/contentctl/actions/detection_testing/progress_bar.py +++ b/contentctl/actions/detection_testing/progress_bar.py @@ -1,10 +1,10 @@ import time -from enum import Enum +from enum import StrEnum from tqdm import tqdm import datetime -class TestReportingType(str, Enum): +class TestReportingType(StrEnum): """ 5-char identifiers for the type of testing being reported on """ @@ -21,7 +21,7 @@ class TestReportingType(str, Enum): INTEGRATION = "INTEG" -class TestingStates(str, Enum): +class TestingStates(StrEnum): """ Defined testing states """ @@ -40,10 +40,10 @@ class TestingStates(str, Enum): # the longest length of any state -LONGEST_STATE = max(len(w.value) for w in TestingStates) +LONGEST_STATE = max(len(w) for w in TestingStates) -class FinalTestingStates(str, Enum): +class FinalTestingStates(StrEnum): """ The possible final states for a test (for pbar reporting) """ @@ -82,7 +82,7 @@ def format_pbar_string( :returns: a formatted string for use w/ pbar """ # Extract and ljust our various fields - field_one = test_reporting_type.value + field_one = test_reporting_type field_two = test_name.ljust(MAX_TEST_NAME_LENGTH) field_three = state.ljust(LONGEST_STATE) field_four = datetime.timedelta(seconds=round(time.time() - start_time)) diff --git a/contentctl/actions/detection_testing/views/DetectionTestingView.py b/contentctl/actions/detection_testing/views/DetectionTestingView.py index 8ff6e583..98cc7122 100644 --- a/contentctl/actions/detection_testing/views/DetectionTestingView.py +++ b/contentctl/actions/detection_testing/views/DetectionTestingView.py @@ -110,11 +110,11 @@ def getSummaryObject( total_skipped += 1 # Aggregate production status metrics - if detection.status == DetectionStatus.production.value: # type: ignore + if detection.status == DetectionStatus.production: total_production += 1 - elif detection.status == DetectionStatus.experimental.value: # type: ignore + elif detection.status == DetectionStatus.experimental: total_experimental += 1 - elif detection.status == DetectionStatus.deprecated.value: # type: ignore + elif detection.status == DetectionStatus.deprecated: total_deprecated += 1 # Check if the detection is manual_test @@ -178,7 +178,7 @@ def getSummaryObject( # Construct and return the larger results dict result_dict = { "summary": { - "mode": self.config.getModeName(), + "mode": self.config.mode.mode_name, "enable_integration_testing": self.config.enable_integration_testing, "success": overall_success, "total_detections": total_detections, diff --git a/contentctl/actions/new_content.py b/contentctl/actions/new_content.py index 0a54cf11..3d5fa5b6 100644 --- a/contentctl/actions/new_content.py +++ b/contentctl/actions/new_content.py @@ -1,77 +1,115 @@ - - from dataclasses import dataclass import questionary from typing import Any from contentctl.input.new_content_questions import NewContentQuestions -from contentctl.output.new_content_yml_output import NewContentYmlOutput from contentctl.objects.config import new, NewContentType import uuid from datetime import datetime import pathlib from contentctl.objects.abstract_security_content_objects.security_content_object_abstract import SecurityContentObject_Abstract from contentctl.output.yml_writer import YmlWriter - +from contentctl.objects.enums import AssetType +from contentctl.objects.constants import SES_OBSERVABLE_TYPE_MAPPING, SES_OBSERVABLE_ROLE_MAPPING class NewContent: + UPDATE_PREFIX = "__UPDATE__" + + DEFAULT_DRILLDOWN_DEF = [ + { + "name": f'View the detection results for - "${UPDATE_PREFIX}FIRST_RISK_OBJECT$" and "${UPDATE_PREFIX}SECOND_RISK_OBJECT$"', + "search": f'%original_detection_search% | search "${UPDATE_PREFIX}FIRST_RISK_OBJECT = "${UPDATE_PREFIX}FIRST_RISK_OBJECT$" second_observable_type_here = "${UPDATE_PREFIX}SECOND_RISK_OBJECT$"', + "earliest_offset": '$info_min_time$', + "latest_offset": '$info_max_time$' + }, + { + "name": f'View risk events for the last 7 days for - "${UPDATE_PREFIX}FIRST_RISK_OBJECT$" and "${UPDATE_PREFIX}SECOND_RISK_OBJECT$"', + "search": f'| from datamodel Risk.All_Risk | search normalized_risk_object IN ("${UPDATE_PREFIX}FIRST_RISK_OBJECT$", "${UPDATE_PREFIX}SECOND_RISK_OBJECT$") starthoursago=168 | stats count min(_time) as firstTime max(_time) as lastTime values(search_name) as "Search Name" values(risk_message) as "Risk Message" values(analyticstories) as "Analytic Stories" values(annotations._all) as "Annotations" values(annotations.mitre_attack.mitre_tactic) as "ATT&CK Tactics" by normalized_risk_object | `security_content_ctime(firstTime)` | `security_content_ctime(lastTime)`', + "earliest_offset": '$info_min_time$', + "latest_offset": '$info_max_time$' + } + ] + - def buildDetection(self)->dict[str,Any]: + def buildDetection(self) -> tuple[dict[str, Any], str]: questions = NewContentQuestions.get_questions_detection() - answers: dict[str,str] = questionary.prompt( - questions, - kbi_msg="User did not answer all of the prompt questions. Exiting...") + answers: dict[str, str] = questionary.prompt( + questions, + kbi_msg="User did not answer all of the prompt questions. Exiting...", + ) if not answers: raise ValueError("User didn't answer one or more questions!") - answers.update(answers) - answers['name'] = answers['detection_name'] - del answers['detection_name'] - answers['id'] = str(uuid.uuid4()) - answers['version'] = 1 - answers['date'] = datetime.today().strftime('%Y-%m-%d') - answers['author'] = answers['detection_author'] - del answers['detection_author'] - answers['data_source'] = answers['data_source'] - answers['type'] = answers['detection_type'] - del answers['detection_type'] - answers['status'] = "production" #start everything as production since that's what we INTEND the content to become - answers['description'] = 'UPDATE_DESCRIPTION' - file_name = answers['name'].replace(' ', '_').replace('-','_').replace('.','_').replace('/','_').lower() - answers['search'] = answers['detection_search'] + ' | `' + file_name + '_filter`' - del answers['detection_search'] - answers['how_to_implement'] = 'UPDATE_HOW_TO_IMPLEMENT' - answers['known_false_positives'] = 'UPDATE_KNOWN_FALSE_POSITIVES' - answers['references'] = ['REFERENCE'] - answers['tags'] = dict() - answers['tags']['analytic_story'] = ['UPDATE_STORY_NAME'] - answers['tags']['asset_type'] = 'UPDATE asset_type' - answers['tags']['confidence'] = 'UPDATE value between 1-100' - answers['tags']['impact'] = 'UPDATE value between 1-100' - answers['tags']['message'] = 'UPDATE message' - answers['tags']['mitre_attack_id'] = [x.strip() for x in answers['mitre_attack_ids'].split(',')] - answers['tags']['observable'] = [{'name': 'UPDATE', 'type': 'UPDATE', 'role': ['UPDATE']}] - answers['tags']['product'] = ['Splunk Enterprise','Splunk Enterprise Security','Splunk Cloud'] - answers['tags']['required_fields'] = ['UPDATE'] - answers['tags']['risk_score'] = 'UPDATE (impact * confidence)/100' - answers['tags']['security_domain'] = answers['security_domain'] - del answers["security_domain"] - answers['tags']['cve'] = ['UPDATE WITH CVE(S) IF APPLICABLE'] - - #generate the tests section - answers['tests'] = [ - { - 'name': "True Positive Test", - 'attack_data': [ - { - 'data': "https://github.com/splunk/contentctl/wiki", - "sourcetype": "UPDATE SOURCETYPE", - "source": "UPDATE SOURCE" - } - ] - } - ] - del answers["mitre_attack_ids"] - return answers - def buildStory(self)->dict[str,Any]: + data_source_field = ( + answers["data_source"] if len(answers["data_source"]) > 0 else [f"{NewContent.UPDATE_PREFIX} zero or more data_sources"] + ) + file_name = ( + answers["detection_name"] + .replace(" ", "_") + .replace("-", "_") + .replace(".", "_") + .replace("/", "_") + .lower() + ) + + #Minimum lenght for a mitre tactic is 5 characters: T1000 + if len(answers["mitre_attack_ids"]) >= 5: + mitre_attack_ids = [x.strip() for x in answers["mitre_attack_ids"].split(",")] + else: + #string was too short, so just put a placeholder + mitre_attack_ids = [f"{NewContent.UPDATE_PREFIX} zero or more mitre_attack_ids"] + + output_file_answers: dict[str, Any] = { + "name": answers["detection_name"], + "id": str(uuid.uuid4()), + "version": 1, + "date": datetime.today().strftime("%Y-%m-%d"), + "author": answers["detection_author"], + "status": "production", # start everything as production since that's what we INTEND the content to become + "type": answers["detection_type"], + "description": f"{NewContent.UPDATE_PREFIX} by providing a description of your search", + "data_source": data_source_field, + "search": f"{answers['detection_search']} | `{file_name}_filter`", + "how_to_implement": f"{NewContent.UPDATE_PREFIX} how to implement your search", + "known_false_positives": f"{NewContent.UPDATE_PREFIX} known false positives for your search", + "references": [f"{NewContent.UPDATE_PREFIX} zero or more http references to provide more information about your search"], + "drilldown_searches": NewContent.DEFAULT_DRILLDOWN_DEF, + "tags": { + "analytic_story": [f"{NewContent.UPDATE_PREFIX} by providing zero or more analytic stories"], + "asset_type": f"{NewContent.UPDATE_PREFIX} by providing and asset type from {list(AssetType._value2member_map_)}", + "confidence": f"{NewContent.UPDATE_PREFIX} by providing a value between 1-100", + "impact": f"{NewContent.UPDATE_PREFIX} by providing a value between 1-100", + "message": f"{NewContent.UPDATE_PREFIX} by providing a risk message. Fields in your search results can be referenced using $fieldName$", + "mitre_attack_id": mitre_attack_ids, + "observable": [ + {"name": f"{NewContent.UPDATE_PREFIX} the field name of the observable. This is a field that exists in your search results.", "type": f"{NewContent.UPDATE_PREFIX} the type of your observable from the list {list(SES_OBSERVABLE_TYPE_MAPPING.keys())}.", "role": [f"{NewContent.UPDATE_PREFIX} the role from the list {list(SES_OBSERVABLE_ROLE_MAPPING.keys())}"]} + ], + "product": [ + "Splunk Enterprise", + "Splunk Enterprise Security", + "Splunk Cloud", + ], + "security_domain": answers["security_domain"], + "cve": [f"{NewContent.UPDATE_PREFIX} with CVE(s) if applicable"], + }, + "tests": [ + { + "name": "True Positive Test", + "attack_data": [ + { + "data": f"{NewContent.UPDATE_PREFIX} the data file to replay. Go to https://github.com/splunk/contentctl/wiki for information about the format of this field", + "sourcetype": f"{NewContent.UPDATE_PREFIX} the sourcetype of your data file.", + "source": f"{NewContent.UPDATE_PREFIX} the source of your datafile", + } + ], + } + ], + } + + if answers["detection_type"] not in ["TTP", "Anomaly", "Correlation"]: + del output_file_answers["drilldown_searches"] + + return output_file_answers, answers['detection_kind'] + + def buildStory(self) -> dict[str, Any]: questions = NewContentQuestions.get_questions_story() answers = questionary.prompt( questions, @@ -96,12 +134,11 @@ def buildStory(self)->dict[str,Any]: del answers['usecase'] answers['tags']['cve'] = ['UPDATE WITH CVE(S) IF APPLICABLE'] return answers - def execute(self, input_dto: new) -> None: if input_dto.type == NewContentType.detection: - content_dict = self.buildDetection() - subdirectory = pathlib.Path('detections') / content_dict.pop('detection_kind') + content_dict, detection_kind = self.buildDetection() + subdirectory = pathlib.Path('detections') / detection_kind elif input_dto.type == NewContentType.story: content_dict = self.buildStory() subdirectory = pathlib.Path('stories') @@ -111,23 +148,3 @@ def execute(self, input_dto: new) -> None: full_output_path = input_dto.path / subdirectory / SecurityContentObject_Abstract.contentNameToFileName(content_dict.get('name')) YmlWriter.writeYmlFile(str(full_output_path), content_dict) - - - def writeObjectNewContent(self, object: dict, subdirectory_name: str, type: NewContentType) -> None: - if type == NewContentType.detection: - file_path = os.path.join(self.output_path, 'detections', subdirectory_name, self.convertNameToFileName(object['name'], object['tags']['product'])) - output_folder = pathlib.Path(self.output_path)/'detections'/subdirectory_name - #make sure the output folder exists for this detection - output_folder.mkdir(exist_ok=True) - - YmlWriter.writeDetection(file_path, object) - print("Successfully created detection " + file_path) - - elif type == NewContentType.story: - file_path = os.path.join(self.output_path, 'stories', self.convertNameToFileName(object['name'], object['tags']['product'])) - YmlWriter.writeStory(file_path, object) - print("Successfully created story " + file_path) - - else: - raise(Exception(f"Object Must be Story or Detection, but is not: {object}")) - diff --git a/contentctl/actions/test.py b/contentctl/actions/test.py index 716ecd71..b3437cef 100644 --- a/contentctl/actions/test.py +++ b/contentctl/actions/test.py @@ -1,7 +1,7 @@ from dataclasses import dataclass from typing import List -from contentctl.objects.config import test_common +from contentctl.objects.config import test_common, Selected, Changes from contentctl.objects.enums import DetectionTestingMode, DetectionStatus, AnalyticsType from contentctl.objects.detection import Detection @@ -78,10 +78,9 @@ def execute(self, input_dto: TestInputDto) -> bool: input_dto=manager_input_dto, output_dto=output_dto ) - mode = input_dto.config.getModeName() if len(input_dto.detections) == 0: print( - f"With Detection Testing Mode '{mode}', there were [0] detections found to test." + f"With Detection Testing Mode '{input_dto.config.mode.mode_name}', there were [0] detections found to test." "\nAs such, we will quit immediately." ) # Directly call stop so that the summary.yml will be generated. Of course it will not @@ -89,8 +88,8 @@ def execute(self, input_dto: TestInputDto) -> bool: # detections were tested. file.stop() else: - print(f"MODE: [{mode}] - Test [{len(input_dto.detections)}] detections") - if mode in [DetectionTestingMode.changes.value, DetectionTestingMode.selected.value]: + print(f"MODE: [{input_dto.config.mode.mode_name}] - Test [{len(input_dto.detections)}] detections") + if isinstance(input_dto.config.mode, Selected) or isinstance(input_dto.config.mode, Changes): files_string = '\n- '.join( [str(pathlib.Path(detection.file_path).relative_to(input_dto.config.path)) for detection in input_dto.detections] ) diff --git a/contentctl/actions/validate.py b/contentctl/actions/validate.py index 9d394d07..c2756f75 100644 --- a/contentctl/actions/validate.py +++ b/contentctl/actions/validate.py @@ -6,6 +6,7 @@ from contentctl.enrichments.attack_enrichment import AttackEnrichment from contentctl.enrichments.cve_enrichment import CveEnrichment from contentctl.objects.atomic import AtomicEnrichment +from contentctl.objects.lookup import FileBackedLookup from contentctl.helper.utils import Utils from contentctl.objects.data_source import DataSource from contentctl.helper.splunk_app import SplunkApp @@ -64,7 +65,7 @@ def ensure_no_orphaned_files_in_lookups(self, repo_path:pathlib.Path, director_o lookupsDirectory = repo_path/"lookups" # Get all of the files referneced by Lookups - usedLookupFiles:list[pathlib.Path] = [lookup.filename for lookup in director_output_dto.lookups if lookup.filename is not None] + [lookup.file_path for lookup in director_output_dto.lookups if lookup.file_path is not None] + usedLookupFiles:list[pathlib.Path] = [lookup.filename for lookup in director_output_dto.lookups if isinstance(lookup, FileBackedLookup)] + [lookup.file_path for lookup in director_output_dto.lookups if lookup.file_path is not None] # Get all of the mlmodel and csv files in the lookups directory csvAndMlmodelFiles = Utils.get_security_content_files_from_directory(lookupsDirectory, allowedFileExtensions=[".yml",".csv",".mlmodel"], fileExtensionsToReturn=[".csv",".mlmodel"]) diff --git a/contentctl/contentctl.py b/contentctl/contentctl.py index efef5853..05d9b952 100644 --- a/contentctl/contentctl.py +++ b/contentctl/contentctl.py @@ -1,31 +1,39 @@ -import traceback +import pathlib import sys +import traceback import warnings -import pathlib + import tyro -from contentctl.actions.initialize import Initialize -from contentctl.objects.config import init, validate, build, new, deploy_acs, test, test_servers, inspect, report, test_common, release_notes -from contentctl.actions.validate import Validate -from contentctl.actions.new_content import NewContent +from contentctl.actions.build import Build, BuildInputDto, DirectorOutputDto +from contentctl.actions.deploy_acs import Deploy from contentctl.actions.detection_testing.GitService import GitService -from contentctl.actions.build import ( - BuildInputDto, - DirectorOutputDto, - Build, -) -from contentctl.actions.test import Test -from contentctl.actions.test import TestInputDto -from contentctl.actions.reporting import ReportingInputDto, Reporting +from contentctl.actions.initialize import Initialize from contentctl.actions.inspect import Inspect -from contentctl.input.yml_reader import YmlReader -from contentctl.actions.deploy_acs import Deploy +from contentctl.actions.new_content import NewContent from contentctl.actions.release_notes import ReleaseNotes +from contentctl.actions.reporting import Reporting, ReportingInputDto +from contentctl.actions.test import Test, TestInputDto +from contentctl.actions.validate import Validate +from contentctl.input.yml_reader import YmlReader +from contentctl.objects.config import ( + build, + deploy_acs, + init, + inspect, + new, + release_notes, + report, + test, + test_common, + test_servers, + validate, +) # def print_ascii_art(): # print( # """ -# Running Splunk Security Content Control Tool (contentctl) +# Running Splunk Security Content Control Tool (contentctl) # ⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⢀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀ # ⠀⠀⠀⠀⠀⠀⠀⠀⠀⢶⠛⡇⠀⠀⠀⠀⠀⠀⣠⣦⡀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀ # ⠀⠀⠀⠀⠀⠀⠀⠀⣀⠼⠖⠛⠋⠉⠉⠓⠢⣴⡻⣾⡇⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀ @@ -53,114 +61,137 @@ # ) - - -def init_func(config:test): +def init_func(config: test): Initialize().execute(config) -def validate_func(config:validate)->DirectorOutputDto: +def validate_func(config: validate) -> DirectorOutputDto: validate = Validate() return validate.execute(config) -def report_func(config:report)->None: + +def report_func(config: report) -> None: # First, perform validation. Remember that the validate # configuration is actually a subset of the build configuration director_output_dto = validate_func(config) - - r = Reporting() - return r.execute(ReportingInputDto(director_output_dto=director_output_dto, - config=config)) - -def build_func(config:build)->DirectorOutputDto: + r = Reporting() + return r.execute( + ReportingInputDto(director_output_dto=director_output_dto, config=config) + ) + + +def build_func(config: build) -> DirectorOutputDto: # First, perform validation. Remember that the validate # configuration is actually a subset of the build configuration director_output_dto = validate_func(config) builder = Build() return builder.execute(BuildInputDto(director_output_dto, config)) -def inspect_func(config:inspect)->str: - #Make sure that we have built the most recent version of the app + +def inspect_func(config: inspect) -> str: + # Make sure that we have built the most recent version of the app _ = build_func(config) inspect_token = Inspect().execute(config) return inspect_token - -def release_notes_func(config:release_notes)->None: + +def release_notes_func(config: release_notes) -> None: ReleaseNotes().release_notes(config) -def new_func(config:new): - NewContent().execute(config) +def new_func(config: new): + NewContent().execute(config) -def deploy_acs_func(config:deploy_acs): +def deploy_acs_func(config: deploy_acs): print("Building and inspecting app...") token = inspect_func(config) print("App successfully built and inspected.") print("Deploying app...") Deploy().execute(config, token) -def test_common_func(config:test_common): + +def test_common_func(config: test_common): if type(config) == test: - #construct the container Infrastructure objects + # construct the container Infrastructure objects config.getContainerInfrastructureObjects() - #otherwise, they have already been passed as servers + # otherwise, they have already been passed as servers director_output_dto = build_func(config) - gitServer = GitService(director=director_output_dto,config=config) + gitServer = GitService(director=director_output_dto, config=config) detections_to_test = gitServer.getContent() - - test_input_dto = TestInputDto(detections_to_test, config) - + t = Test() t.filter_tests(test_input_dto) - + if config.plan_only: - #Emit the test plan and quit. Do not actually run the test - config.dumpCICDPlanAndQuit(gitServer.getHash(),test_input_dto.detections) - return - + # Emit the test plan and quit. Do not actually run the test + config.dumpCICDPlanAndQuit(gitServer.getHash(), test_input_dto.detections) + return + success = t.execute(test_input_dto) - + if success: - #Everything passed! + # Everything passed! print("All tests have run successfully or been marked as 'skipped'") return raise Exception("There was at least one unsuccessful test") + +CONTENTCTL_5_WARNING = """ +***************************************************************************** +WARNING - THIS IS AN ALPHA BUILD OF CONTENTCTL 5. +THERE HAVE BEEN NUMEROUS CHANGES IN CONTENTCTL (ESPECIALLY TO YML FORMATS). +YOU ALMOST CERTAINLY DO NOT WANT TO USE THIS BUILD. +IF YOU ENCOUNTER ERRORS, PLEASE USE THE LATEST CURRENTYLY SUPPORTED RELEASE: + +CONTENTCTL==4.4.7 + +YOU HAVE BEEN WARNED! +***************************************************************************** +""" + + def main(): + print(CONTENTCTL_5_WARNING) try: configFile = pathlib.Path("contentctl.yml") - + # We MUST load a config (with testing info) object so that we can # properly construct the command line, including 'contentctl test' parameters. if not configFile.is_file(): - if "init" not in sys.argv and "--help" not in sys.argv and "-h" not in sys.argv: - raise Exception(f"'{configFile}' not found in the current directory.\n" - "Please ensure you are in the correct directory or run 'contentctl init' to create a new content pack.") - + if ( + "init" not in sys.argv + and "--help" not in sys.argv + and "-h" not in sys.argv + ): + raise Exception( + f"'{configFile}' not found in the current directory.\n" + "Please ensure you are in the correct directory or run 'contentctl init' to create a new content pack." + ) + if "--help" in sys.argv or "-h" in sys.argv: - print("Warning - contentctl.yml is missing from this directory. The configuration values showed at the default and are informational only.\n" - "Please ensure that contentctl.yml exists by manually creating it or running 'contentctl init'") + print( + "Warning - contentctl.yml is missing from this directory. The configuration values showed at the default and are informational only.\n" + "Please ensure that contentctl.yml exists by manually creating it or running 'contentctl init'" + ) # Otherwise generate a stub config file. # It will be used during init workflow t = test() config_obj = t.model_dump() - + else: - #The file exists, so load it up! - config_obj = YmlReader().load_file(configFile) + # The file exists, so load it up! + config_obj = YmlReader().load_file(configFile, add_fields=False) t = test.model_validate(config_obj) except Exception as e: print(f"Error validating 'contentctl.yml':\n{str(e)}") sys.exit(1) - - + # For ease of generating the constructor, we want to allow construction # of an object from default values WITHOUT requiring all fields to be declared # with defaults OR in the config file. As such, we construct the model rather @@ -169,22 +200,19 @@ def main(): models = tyro.extras.subcommand_type_from_defaults( { - "init":init.model_validate(config_obj), + "init": init.model_validate(config_obj), "validate": validate.model_validate(config_obj), "report": report.model_validate(config_obj), - "build":build.model_validate(config_obj), + "build": build.model_validate(config_obj), "inspect": inspect.model_construct(**t.__dict__), - "new":new.model_validate(config_obj), - "test":test.model_validate(config_obj), - "test_servers":test_servers.model_construct(**t.__dict__), + "new": new.model_validate(config_obj), + "test": test.model_validate(config_obj), + "test_servers": test_servers.model_construct(**t.__dict__), "release_notes": release_notes.model_construct(**config_obj), - "deploy_acs": deploy_acs.model_construct(**t.__dict__) + "deploy_acs": deploy_acs.model_construct(**t.__dict__), } ) - - - config = None try: # Since some model(s) were constructed and not model_validated, we have to catch @@ -192,7 +220,6 @@ def main(): with warnings.catch_warnings(action="ignore"): config = tyro.cli(models) - if type(config) == init: t.__dict__.update(config.__dict__) init_func(t) @@ -219,21 +246,29 @@ def main(): print(e) sys.exit(1) except Exception as e: + print(CONTENTCTL_5_WARNING) + if config is None: - print("There was a serious issue where the config file could not be created.\n" - "The entire stack trace is provided below (please include it if filing a bug report).\n") + print( + "There was a serious issue where the config file could not be created.\n" + "The entire stack trace is provided below (please include it if filing a bug report).\n" + ) traceback.print_exc() elif config.verbose: - print("Verbose error logging is ENABLED.\n" - "The entire stack trace has been provided below (please include it if filing a bug report):\n") + print( + "Verbose error logging is ENABLED.\n" + "The entire stack trace has been provided below (please include it if filing a bug report):\n" + ) traceback.print_exc() else: - print("Verbose error logging is DISABLED.\n" - "Please use the --verbose command line argument if you need more context for your error or file a bug report.") + print( + "Verbose error logging is DISABLED.\n" + "Please use the --verbose command line argument if you need more context for your error or file a bug report." + ) print(e) - + sys.exit(1) if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/contentctl/helper/utils.py b/contentctl/helper/utils.py index 261ecb64..e0649f2d 100644 --- a/contentctl/helper/utils.py +++ b/contentctl/helper/utils.py @@ -247,20 +247,6 @@ def validate_git_pull_request(repo_path: str, pr_number: int) -> str: return hash - # @staticmethod - # def check_required_fields( - # thisField: str, definedFields: dict, requiredFields: list[str] - # ): - # missing_fields = [ - # field for field in requiredFields if field not in definedFields - # ] - # if len(missing_fields) > 0: - # raise ( - # ValueError( - # f"Could not validate - please resolve other errors resulting in missing fields {missing_fields}" - # ) - # ) - @staticmethod def verify_file_exists( file_path: str, verbose_print=False, timeout_seconds: int = 10 diff --git a/contentctl/input/director.py b/contentctl/input/director.py index e18dc596..8462d61e 100644 --- a/contentctl/input/director.py +++ b/contentctl/input/director.py @@ -14,7 +14,7 @@ from contentctl.objects.playbook import Playbook from contentctl.objects.deployment import Deployment from contentctl.objects.macro import Macro -from contentctl.objects.lookup import Lookup +from contentctl.objects.lookup import LookupAdapter, Lookup from contentctl.objects.atomic import AtomicEnrichment from contentctl.objects.security_content_object import SecurityContentObject from contentctl.objects.data_source import DataSource @@ -58,13 +58,12 @@ def addContentToDictMappings(self, content: SecurityContentObject): f" - {content.file_path}\n" f" - {self.name_to_content_map[content_name].file_path}" ) - + if content.id in self.uuid_to_content_map: raise ValueError( f"Duplicate id '{content.id}' with paths:\n" f" - {content.file_path}\n" - f" - {self.uuid_to_content_map[content.id].file_path}" - ) + f" - {self.uuid_to_content_map[content.id].file_path}") if isinstance(content, Lookup): self.lookups.append(content) @@ -157,7 +156,8 @@ def createSecurityContent(self, contentType: SecurityContentType) -> None: modelDict = YmlReader.load_file(file) if contentType == SecurityContentType.lookups: - lookup = Lookup.model_validate(modelDict, context={"output_dto":self.output_dto, "config":self.input_dto}) + lookup = LookupAdapter.validate_python(modelDict, context={"output_dto":self.output_dto, "config":self.input_dto}) + #lookup = Lookup.model_validate(modelDict, context={"output_dto":self.output_dto, "config":self.input_dto}) self.output_dto.addContentToDictMappings(lookup) elif contentType == SecurityContentType.macros: diff --git a/contentctl/input/new_content_questions.py b/contentctl/input/new_content_questions.py index 02b20f46..c3ee5343 100644 --- a/contentctl/input/new_content_questions.py +++ b/contentctl/input/new_content_questions.py @@ -48,7 +48,7 @@ def get_questions_detection(cls) -> list[dict[str,Any]]: { 'type': 'checkbox', 'message': 'Your data source', - 'name': 'data_source', + 'name': 'data_sources', #In the future, we should dynamically populate this from the DataSource Objects we have parsed from the data_sources directory 'choices': sorted(DataSource._value2member_map_ ) @@ -57,7 +57,7 @@ def get_questions_detection(cls) -> list[dict[str,Any]]: "type": "text", "message": "enter search (spl)", "name": "detection_search", - "default": "| UPDATE_SPL", + "default": "| __UPDATE__ SPL", }, { "type": "text", diff --git a/contentctl/input/yml_reader.py b/contentctl/input/yml_reader.py index 11bea479..49dfb812 100644 --- a/contentctl/input/yml_reader.py +++ b/contentctl/input/yml_reader.py @@ -1,15 +1,12 @@ from typing import Dict, Any - import yaml - - import sys import pathlib class YmlReader(): @staticmethod - def load_file(file_path: pathlib.Path, add_fields=True, STRICT_YML_CHECKING=False) -> Dict[str,Any]: + def load_file(file_path: pathlib.Path, add_fields:bool=True, STRICT_YML_CHECKING:bool=False) -> Dict[str,Any]: try: file_handler = open(file_path, 'r', encoding="utf-8") @@ -27,8 +24,16 @@ def load_file(file_path: pathlib.Path, add_fields=True, STRICT_YML_CHECKING=Fals print(f"Error loading YML file {file_path}: {str(e)}") sys.exit(1) try: - #yml_obj = list(yaml.safe_load_all(file_handler))[0] - yml_obj = yaml.load(file_handler, Loader=yaml.CSafeLoader) + #Ideally we should use + # from contentctl.actions.new_content import NewContent + # and use NewContent.UPDATE_PREFIX, + # but there is a circular dependency right now which makes that difficult. + # We have instead hardcoded UPDATE_PREFIX + UPDATE_PREFIX = "__UPDATE__" + data = file_handler.read() + if UPDATE_PREFIX in data: + raise Exception(f"The file {file_path} contains the value '{UPDATE_PREFIX}'. Please fill out any unpopulated fields as required.") + yml_obj = yaml.load(data, Loader=yaml.CSafeLoader) except yaml.YAMLError as exc: print(exc) sys.exit(1) diff --git a/contentctl/objects/abstract_security_content_objects/detection_abstract.py b/contentctl/objects/abstract_security_content_objects/detection_abstract.py index dc0350d5..c216eb20 100644 --- a/contentctl/objects/abstract_security_content_objects/detection_abstract.py +++ b/contentctl/objects/abstract_security_content_objects/detection_abstract.py @@ -2,7 +2,7 @@ from typing import TYPE_CHECKING, Union, Optional, List, Any, Annotated import re import pathlib -from enum import Enum +from enum import StrEnum from pydantic import ( field_validator, @@ -16,7 +16,7 @@ ) from contentctl.objects.macro import Macro -from contentctl.objects.lookup import Lookup +from contentctl.objects.lookup import Lookup, FileBackedLookup, KVStoreLookup if TYPE_CHECKING: from contentctl.input.director import DirectorOutputDto from contentctl.objects.baseline import Baseline @@ -35,6 +35,9 @@ from contentctl.objects.test_group import TestGroup from contentctl.objects.integration_test import IntegrationTest from contentctl.objects.data_source import DataSource + +from contentctl.objects.rba import RBAObject + from contentctl.objects.base_test_result import TestResultStatus from contentctl.objects.drilldown import Drilldown, DRILLDOWN_SEARCH_PLACEHOLDER from contentctl.objects.enums import ProvidingTechnology @@ -51,13 +54,11 @@ # Those AnalyticsTypes that we do not test via contentctl SKIPPED_ANALYTICS_TYPES: set[str] = { - AnalyticsType.Correlation.value + AnalyticsType.Correlation } -# TODO (#266): disable the use_enum_values configuration class Detection_Abstract(SecurityContentObject): - model_config = ConfigDict(use_enum_values=True) name:str = Field(...,max_length=CONTENTCTL_MAX_SEARCH_NAME_LENGTH) #contentType: SecurityContentType = SecurityContentType.detections type: AnalyticsType = Field(...) @@ -67,6 +68,7 @@ class Detection_Abstract(SecurityContentObject): search: str = Field(...) how_to_implement: str = Field(..., min_length=4) known_false_positives: str = Field(..., min_length=4) + rba: Optional[RBAObject] = Field(default=None) explanation: None | str = Field( default=None, exclude=True, #Don't serialize this value when dumping the object @@ -78,6 +80,7 @@ class Detection_Abstract(SecurityContentObject): "serialized in analyticstories_detections.j2", ) + enabled_by_default: bool = False file_path: FilePath = Field(...) # For model construction to first attempt construction of the leftmost object. @@ -101,7 +104,7 @@ def get_conf_stanza_name(self, app:CustomApp)->str: def get_action_dot_correlationsearch_dot_label(self, app:CustomApp, max_stanza_length:int=ES_MAX_STANZA_LENGTH)->str: stanza_name = self.get_conf_stanza_name(app) stanza_name_after_saving_in_es = ES_SEARCH_STANZA_NAME_FORMAT_AFTER_CLONING_IN_PRODUCT_TEMPLATE.format( - security_domain_value = self.tags.security_domain.value, + security_domain_value = self.tags.security_domain, search_name = stanza_name ) @@ -210,7 +213,7 @@ def adjust_tests_and_groups(self) -> None: # https://docs.pydantic.dev/latest/api/config/#pydantic.config.ConfigDict.populate_by_name # Skip tests for non-production detections - if self.status != DetectionStatus.production.value: # type: ignore + if self.status != DetectionStatus.production: self.skip_all_tests(f"TEST SKIPPED: Detection is non-production ({self.status})") # Skip tests for detecton types like Correlation which are not supported via contentctl @@ -263,7 +266,7 @@ def test_status(self) -> TestResultStatus | None: @computed_field @property def datamodel(self) -> List[DataModel]: - return [dm for dm in DataModel if dm.value in self.search] + return [dm for dm in DataModel if dm in self.search] @@ -282,10 +285,8 @@ def annotations(self) -> dict[str, Union[List[str], int, str]]: annotations_dict: dict[str, str | list[str] | int] = {} annotations_dict["analytic_story"] = [story.name for story in self.tags.analytic_story] - annotations_dict["confidence"] = self.tags.confidence if len(self.tags.cve or []) > 0: annotations_dict["cve"] = self.tags.cve - annotations_dict["impact"] = self.tags.impact annotations_dict["type"] = self.type annotations_dict["type_list"] = [self.type] # annotations_dict["version"] = self.version @@ -308,13 +309,13 @@ def annotations(self) -> dict[str, Union[List[str], int, str]]: def mappings(self) -> dict[str, List[str]]: mappings: dict[str, Any] = {} if len(self.tags.cis20) > 0: - mappings["cis20"] = [tag.value for tag in self.tags.cis20] + mappings["cis20"] = [tag for tag in self.tags.cis20] if len(self.tags.kill_chain_phases) > 0: - mappings['kill_chain_phases'] = [phase.value for phase in self.tags.kill_chain_phases] + mappings['kill_chain_phases'] = [phase for phase in self.tags.kill_chain_phases] if len(self.tags.mitre_attack_id) > 0: mappings['mitre_attack'] = self.tags.mitre_attack_id if len(self.tags.nist) > 0: - mappings['nist'] = [category.value for category in self.tags.nist] + mappings['nist'] = [category for category in self.tags.nist] # No need to sort the dict! It has been constructed in-order. # However, if this logic is changed, then consider reordering or @@ -361,66 +362,87 @@ def nes_fields(self) -> Optional[str]: def providing_technologies(self) -> List[ProvidingTechnology]: return ProvidingTechnology.getProvidingTechFromSearch(self.search) - # TODO (#247): Refactor the risk property of detection_abstract + @computed_field @property def risk(self) -> list[dict[str, Any]]: risk_objects: list[dict[str, str | int]] = [] - # TODO (#246): "User Name" type should map to a "user" risk object and not "other" - risk_object_user_types = {'user', 'username', 'email address'} - risk_object_system_types = {'device', 'endpoint', 'hostname', 'ip address'} - process_threat_object_types = {'process name', 'process'} - file_threat_object_types = {'file name', 'file', 'file hash'} - url_threat_object_types = {'url string', 'url'} - ip_threat_object_types = {'ip address'} - - for entity in self.tags.observable: + + for entity in self.rba.risk_objects: risk_object: dict[str, str | int] = dict() - if 'Victim' in entity.role and entity.type.lower() in risk_object_user_types: - risk_object['risk_object_type'] = 'user' - risk_object['risk_object_field'] = entity.name - risk_object['risk_score'] = self.tags.risk_score - risk_objects.append(risk_object) - - elif 'Victim' in entity.role and entity.type.lower() in risk_object_system_types: - risk_object['risk_object_type'] = 'system' - risk_object['risk_object_field'] = entity.name - risk_object['risk_score'] = self.tags.risk_score - risk_objects.append(risk_object) - - elif 'Attacker' in entity.role and entity.type.lower() in process_threat_object_types: - risk_object['threat_object_field'] = entity.name - risk_object['threat_object_type'] = "process" - risk_objects.append(risk_object) - - elif 'Attacker' in entity.role and entity.type.lower() in file_threat_object_types: - risk_object['threat_object_field'] = entity.name - risk_object['threat_object_type'] = "file_name" - risk_objects.append(risk_object) - - elif 'Attacker' in entity.role and entity.type.lower() in ip_threat_object_types: - risk_object['threat_object_field'] = entity.name - risk_object['threat_object_type'] = "ip_address" - risk_objects.append(risk_object) - - elif 'Attacker' in entity.role and entity.type.lower() in url_threat_object_types: - risk_object['threat_object_field'] = entity.name - risk_object['threat_object_type'] = "url" - risk_objects.append(risk_object) + risk_object['risk_object_type'] = entity.type + risk_object['risk_object_field'] = entity.field + risk_object['risk_score'] = entity.score + risk_objects.append(risk_object) + + for entity in self.rba.threat_objects: + threat_object: dict[str, str] = dict() + threat_object['threat_object_field'] = entity.field + threat_object['threat_object_type'] = entity.type + risk_objects.append(threat_object) + return risk_objects + + + # TODO Remove observable code + # @computed_field + # @property + # def risk(self) -> list[dict[str, Any]]: + # risk_objects: list[dict[str, str | int]] = [] + # # TODO (#246): "User Name" type should map to a "user" risk object and not "other" + # risk_object_user_types = {'user', 'username', 'email address'} + # risk_object_system_types = {'device', 'endpoint', 'hostname', 'ip address'} + # process_threat_object_types = {'process name', 'process'} + # file_threat_object_types = {'file name', 'file', 'file hash'} + # url_threat_object_types = {'url string', 'url'} + # ip_threat_object_types = {'ip address'} + + # for entity in self.tags.observable: + # risk_object: dict[str, str | int] = dict() + # if 'Victim' in entity.role and entity.type.lower() in risk_object_user_types: + # risk_object['risk_object_type'] = 'user' + # risk_object['risk_object_field'] = entity.name + # risk_object['risk_score'] = self.tags.risk_score + # risk_objects.append(risk_object) + + # elif 'Victim' in entity.role and entity.type.lower() in risk_object_system_types: + # risk_object['risk_object_type'] = 'system' + # risk_object['risk_object_field'] = entity.name + # risk_object['risk_score'] = self.tags.risk_score + # risk_objects.append(risk_object) + + # elif 'Attacker' in entity.role and entity.type.lower() in process_threat_object_types: + # risk_object['threat_object_field'] = entity.name + # risk_object['threat_object_type'] = "process" + # risk_objects.append(risk_object) + + # elif 'Attacker' in entity.role and entity.type.lower() in file_threat_object_types: + # risk_object['threat_object_field'] = entity.name + # risk_object['threat_object_type'] = "file_name" + # risk_objects.append(risk_object) + + # elif 'Attacker' in entity.role and entity.type.lower() in ip_threat_object_types: + # risk_object['threat_object_field'] = entity.name + # risk_object['threat_object_type'] = "ip_address" + # risk_objects.append(risk_object) + + # elif 'Attacker' in entity.role and entity.type.lower() in url_threat_object_types: + # risk_object['threat_object_field'] = entity.name + # risk_object['threat_object_type'] = "url" + # risk_objects.append(risk_object) - elif 'Attacker' in entity.role: - risk_object['threat_object_field'] = entity.name - risk_object['threat_object_type'] = entity.type.lower() - risk_objects.append(risk_object) + # elif 'Attacker' in entity.role: + # risk_object['threat_object_field'] = entity.name + # risk_object['threat_object_type'] = entity.type.lower() + # risk_objects.append(risk_object) - else: - risk_object['risk_object_type'] = 'other' - risk_object['risk_object_field'] = entity.name - risk_object['risk_score'] = self.tags.risk_score - risk_objects.append(risk_object) - continue + # else: + # risk_object['risk_object_type'] = 'other' + # risk_object['risk_object_field'] = entity.name + # risk_object['risk_score'] = self.tags.risk_score + # risk_objects.append(risk_object) + # continue - return risk_objects + # return risk_objects @computed_field @property @@ -435,7 +457,7 @@ def metadata(self) -> dict[str, str|float]: # break the `inspect` action. return { 'detection_id': str(self.id), - 'deprecated': '1' if self.status == DetectionStatus.deprecated.value else '0', # type: ignore + 'deprecated': '1' if self.status == DetectionStatus.deprecated else '0', # type: ignore 'detection_version': str(self.version), 'publish_time': datetime.datetime(self.date.year,self.date.month,self.date.day,0,0,0,0,tzinfo=datetime.timezone.utc).timestamp() } @@ -456,6 +478,11 @@ def serialize_model(self): "source": self.source, "nes_fields": self.nes_fields, } + if self.rba is not None: + model["risk_severity"] = self.rba.severity + model['tags']['risk_score'] = self.rba.risk_score + else: + model['tags']['risk_score'] = 0 # Only a subset of macro fields are required: all_macros: list[dict[str, str | list[str]]] = [] @@ -473,17 +500,17 @@ def serialize_model(self): all_lookups: list[dict[str, str | int | None]] = [] for lookup in self.lookups: - if lookup.collection is not None: + if isinstance(lookup, KVStoreLookup): all_lookups.append( { "name": lookup.name, "description": lookup.description, "collection": lookup.collection, "case_sensitive_match": None, - "fields_list": lookup.fields_list + "fields_list": lookup.fields_to_fields_list_conf_format } ) - elif lookup.filename is not None: + elif isinstance(lookup, FileBackedLookup): all_lookups.append( { "name": lookup.name, @@ -491,9 +518,8 @@ def serialize_model(self): "filename": lookup.filename.name, "default_match": "true" if lookup.default_match else "false", "case_sensitive_match": "true" if lookup.case_sensitive_match else "false", - "match_type": lookup.match_type, - "min_matches": lookup.min_matches, - "fields_list": lookup.fields_list + "match_type": lookup.match_type_to_conf_format, + "min_matches": lookup.min_matches } ) model['lookups'] = all_lookups # type: ignore @@ -570,7 +596,7 @@ def model_post_init(self, __context: Any) -> None: # This is presently a requirement when 1 or more drilldowns are added to a detection. # Note that this is only required for production searches that are not hunting - if self.type == AnalyticsType.Hunting.value or self.status != DetectionStatus.production.value: + if self.type == AnalyticsType.Hunting or self.status != DetectionStatus.production: #No additional check need to happen on the potential drilldowns. pass else: @@ -713,14 +739,14 @@ def only_enabled_if_production_status(cls, v: Any, info: ValidationInfo) -> bool if status != DetectionStatus.production: errors.append( f"status is '{status.name}'. Detections that are enabled by default MUST be " - f"'{DetectionStatus.production.value}'" + f"'{DetectionStatus.production}'" ) if searchType not in [AnalyticsType.Anomaly, AnalyticsType.Correlation, AnalyticsType.TTP]: errors.append( - f"type is '{searchType.value}'. Detections that are enabled by default MUST be one" + f"type is '{searchType}'. Detections that are enabled by default MUST be one" " of the following types: " - f"{[AnalyticsType.Anomaly.value, AnalyticsType.Correlation.value, AnalyticsType.TTP.value]}") + f"{[AnalyticsType.Anomaly, AnalyticsType.Correlation, AnalyticsType.TTP]}") if len(errors) > 0: error_message = "\n - ".join(errors) raise ValueError(f"Detection is 'enabled_by_default: true' however \n - {error_message}") @@ -729,7 +755,7 @@ def only_enabled_if_production_status(cls, v: Any, info: ValidationInfo) -> bool @model_validator(mode="after") def addTags_nist(self): - if self.type == AnalyticsType.TTP.value: + if self.type == AnalyticsType.TTP: self.tags.nist = [NistCategory.DE_CM] else: self.tags.nist = [NistCategory.DE_AE] @@ -757,50 +783,93 @@ def ensureThrottlingFieldsExist(self): @model_validator(mode="after") - def ensureProperObservablesExist(self): + def ensureProperRBAConfig(self): """ - If a detections is PRODUCTION and either TTP or ANOMALY, then it MUST have an Observable with the VICTIM role. - + If a detection has an RBA deployment and is PRODUCTION, then it must have an RBA config, with at least one risk object + Returns: - self: Returns itself if the valdiation passes + self: Returns itself if the validation passes """ - # NOTE: we ignore the type error around self.status because we are using Pydantic's - # use_enum_values configuration - # https://docs.pydantic.dev/latest/api/config/#pydantic.config.ConfigDict.populate_by_name - if self.status not in [DetectionStatus.production.value]: # type: ignore - # Only perform this validation on production detections - return self - if self.type not in [AnalyticsType.TTP.value, AnalyticsType.Anomaly.value]: - # Only perform this validation on TTP and Anomaly detections - return self - - # Detection is required to have a victim - roles: list[str] = [] - for observable in self.tags.observable: - roles.extend(observable.role) + + if self.deployment.alert_action.rba is None or self.deployment.alert_action.rba.enabled is False: + # confirm we don't have an RBA config + if self.rba is None: + return self + else: + raise ValueError( + "Detection does not have a matching RBA deployment config, the RBA portion should be omitted." + ) + else: + if self.rba is None: + raise ValueError( + "Detection is expected to have an RBA object based on its deployment config" + ) + else: + if len(self.rba.risk_objects) > 0: # type: ignore + return self + else: + raise ValueError( + "Detection expects an RBA config with at least one risk object." + ) - if roles.count("Victim") == 0: - raise ValueError( - "Error, there must be AT LEAST 1 Observable with the role 'Victim' declared in " - "Detection.tags.observables. However, none were found." - ) - # Exactly one victim was found - return self + # TODO - Remove old observable code + # @model_validator(mode="after") + # def ensureProperObservablesExist(self): + # """ + # If a detections is PRODUCTION and either TTP or ANOMALY, then it MUST have an Observable with the VICTIM role. + + # Returns: + # self: Returns itself if the valdiation passes + # """ + # # NOTE: we ignore the type error around self.status because we are using Pydantic's + # # use_enum_values configuration + # # https://docs.pydantic.dev/latest/api/config/#pydantic.config.ConfigDict.populate_by_name + # if self.status not in [DetectionStatus.production.value]: # type: ignore + # # Only perform this validation on production detections + # return self + + # if self.type not in [AnalyticsType.TTP.value, AnalyticsType.Anomaly.value]: + # # Only perform this validation on TTP and Anomaly detections + # return self + + # # Detection is required to have a victim + # roles: list[str] = [] + # for observable in self.tags.observable: + # roles.extend(observable.role) + + # if roles.count("Victim") == 0: + # raise ValueError( + # "Error, there must be AT LEAST 1 Observable with the role 'Victim' declared in " + # "Detection.tags.observables. However, none were found." + # ) + + # # Exactly one victim was found + # return self @model_validator(mode="after") - def search_observables_exist_validate(self): - observable_fields = [ob.name.lower() for ob in self.tags.observable] + def search_rba_fields_exist_validate(self): + # Return immediately if RBA isn't required + if (self.deployment.alert_action.rba.enabled is False or self.deployment.alert_action.rba is None) and self.rba is None: #type: ignore + return self + + # Raise error if RBA isn't present + if self.rba is None: + raise ValueError( + "RBA is required for this detection based on its deployment config" + ) + risk_fields = [ob.field.lower() for ob in self.rba.risk_objects] + threat_fields = [ob.field.lower() for ob in self.rba.threat_objects] + rba_fields = risk_fields + threat_fields - # All $field$ fields from the message must appear in the search field_match_regex = r"\$([^\s.]*)\$" missing_fields: set[str] - if self.tags.message: - matches = re.findall(field_match_regex, self.tags.message.lower()) + if self.rba.message: + matches = re.findall(field_match_regex, self.rba.message.lower()) message_fields = [match.replace("$", "").lower() for match in matches] - missing_fields = set([field for field in observable_fields if field not in self.search.lower()]) + missing_fields = set([field for field in rba_fields if field not in self.search.lower()]) else: message_fields = [] missing_fields = set() @@ -808,10 +877,9 @@ def search_observables_exist_validate(self): error_messages: list[str] = [] if len(missing_fields) > 0: error_messages.append( - "The following fields are declared as observables, but do not exist in the " + "The following fields are declared in the rba config, but do not exist in the " f"search: {missing_fields}" ) - missing_fields = set([field for field in message_fields if field not in self.search.lower()]) if len(missing_fields) > 0: error_messages.append( @@ -819,19 +887,59 @@ def search_observables_exist_validate(self): f"the search: {missing_fields}" ) - # NOTE: we ignore the type error around self.status because we are using Pydantic's - # use_enum_values configuration - # https://docs.pydantic.dev/latest/api/config/#pydantic.config.ConfigDict.populate_by_name - if len(error_messages) > 0 and self.status == DetectionStatus.production.value: # type: ignore + if len(error_messages) > 0 and self.status == DetectionStatus.production: + msg = ( - "Use of fields in observables/messages that do not appear in search:\n\t- " + "Use of fields in rba/messages that do not appear in search:\n\t- " "\n\t- ".join(error_messages) ) raise ValueError(msg) - - # Found everything return self + # TODO: Remove old observable code + # @model_validator(mode="after") + # def search_observables_exist_validate(self): + # observable_fields = [ob.name.lower() for ob in self.tags.observable] + + # # All $field$ fields from the message must appear in the search + # field_match_regex = r"\$([^\s.]*)\$" + + # missing_fields: set[str] + # if self.tags.message: + # matches = re.findall(field_match_regex, self.tags.message.lower()) + # message_fields = [match.replace("$", "").lower() for match in matches] + # missing_fields = set([field for field in observable_fields if field not in self.search.lower()]) + # else: + # message_fields = [] + # missing_fields = set() + + # error_messages: list[str] = [] + # if len(missing_fields) > 0: + # error_messages.append( + # "The following fields are declared as observables, but do not exist in the " + # f"search: {missing_fields}" + # ) + + # missing_fields = set([field for field in message_fields if field not in self.search.lower()]) + # if len(missing_fields) > 0: + # error_messages.append( + # "The following fields are used as fields in the message, but do not exist in " + # f"the search: {missing_fields}" + # ) + + # # NOTE: we ignore the type error around self.status because we are using Pydantic's + # # use_enum_values configuration + # # https://docs.pydantic.dev/latest/api/config/#pydantic.config.ConfigDict.populate_by_name + # if len(error_messages) > 0 and self.status == DetectionStatus.production.value: # type: ignore + # msg = ( + # "Use of fields in observables/messages that do not appear in search:\n\t- " + # "\n\t- ".join(error_messages) + # ) + # raise ValueError(msg) + + # # Found everything + # return self + @field_validator("tests", mode="before") def ensure_yml_test_is_unittest(cls, v:list[dict]): """The typing for the tests field allows it to be one of @@ -878,7 +986,7 @@ def tests_validate( info: ValidationInfo ) -> list[UnitTest | IntegrationTest | ManualTest]: # Only production analytics require tests - if info.data.get("status", "") != DetectionStatus.production.value: + if info.data.get("status", "") != DetectionStatus.production: return v # All types EXCEPT Correlation MUST have test(s). Any other type, including newly defined @@ -991,7 +1099,7 @@ def get_summary( value = getattr(self, field) # Enums and Path objects cannot be serialized directly, so we convert it to a string - if isinstance(value, Enum) or isinstance(value, pathlib.Path): + if isinstance(value, StrEnum) or isinstance(value, pathlib.Path): value = str(value) # Alias any fields as needed @@ -1013,7 +1121,7 @@ def get_summary( # Initialize the dict as a mapping of strings to str/bool result: dict[str, Union[str, bool]] = { "name": test.name, - "test_type": test.test_type.value + "test_type": test.test_type } # If result is not None, get a summary of the test result w/ the requested fields diff --git a/contentctl/objects/abstract_security_content_objects/security_content_object_abstract.py b/contentctl/objects/abstract_security_content_objects/security_content_object_abstract.py index f93602f1..f231f5f3 100644 --- a/contentctl/objects/abstract_security_content_objects/security_content_object_abstract.py +++ b/contentctl/objects/abstract_security_content_objects/security_content_object_abstract.py @@ -31,10 +31,8 @@ NO_FILE_NAME = "NO_FILE_NAME" -# TODO (#266): disable the use_enum_values configuration class SecurityContentObject_Abstract(BaseModel, abc.ABC): - model_config = ConfigDict(use_enum_values=True,validate_default=True) - + model_config = ConfigDict(validate_default=True,extra="forbid") name: str = Field(...,max_length=99) author: str = Field(...,max_length=255) date: datetime.date = Field(...) @@ -162,10 +160,10 @@ def getDeploymentFromType(typeField: Union[str, None], info: ValidationInfo) -> raise ValueError("Cannot set deployment - DirectorOutputDto not passed to Detection Constructor in context") type_to_deployment_name_map = { - AnalyticsType.TTP.value: "ESCU Default Configuration TTP", - AnalyticsType.Hunting.value: "ESCU Default Configuration Hunting", - AnalyticsType.Correlation.value: "ESCU Default Configuration Correlation", - AnalyticsType.Anomaly.value: "ESCU Default Configuration Anomaly", + AnalyticsType.TTP: "ESCU Default Configuration TTP", + AnalyticsType.Hunting: "ESCU Default Configuration Hunting", + AnalyticsType.Correlation: "ESCU Default Configuration Correlation", + AnalyticsType.Anomaly: "ESCU Default Configuration Anomaly", "Baseline": "ESCU Default Configuration Baseline" } converted_type_field = type_to_deployment_name_map[typeField] diff --git a/contentctl/objects/alert_action.py b/contentctl/objects/alert_action.py index f2f745d4..d2855292 100644 --- a/contentctl/objects/alert_action.py +++ b/contentctl/objects/alert_action.py @@ -1,5 +1,5 @@ from __future__ import annotations -from pydantic import BaseModel, model_serializer +from pydantic import BaseModel, model_serializer, ConfigDict from typing import Optional from contentctl.objects.deployment_email import DeploymentEmail @@ -9,6 +9,7 @@ from contentctl.objects.deployment_phantom import DeploymentPhantom class AlertAction(BaseModel): + model_config = ConfigDict(extra="forbid") email: Optional[DeploymentEmail] = None notable: Optional[DeploymentNotable] = None rba: Optional[DeploymentRBA] = DeploymentRBA() diff --git a/contentctl/objects/atomic.py b/contentctl/objects/atomic.py index a723304d..7e79227c 100644 --- a/contentctl/objects/atomic.py +++ b/contentctl/objects/atomic.py @@ -41,6 +41,7 @@ class InputArgumentType(StrEnum): Url = "Url" class AtomicExecutor(BaseModel): + model_config = ConfigDict(extra="forbid") name: str elevation_required: Optional[bool] = False #Appears to be optional command: Optional[str] = None diff --git a/contentctl/objects/base_test.py b/contentctl/objects/base_test.py index 20e681cf..a47ed574 100644 --- a/contentctl/objects/base_test.py +++ b/contentctl/objects/base_test.py @@ -1,13 +1,13 @@ -from enum import Enum +from enum import StrEnum from typing import Union from abc import ABC, abstractmethod -from pydantic import BaseModel +from pydantic import BaseModel,ConfigDict from contentctl.objects.base_test_result import BaseTestResult -class TestType(str, Enum): +class TestType(StrEnum): """ Types of tests """ @@ -21,6 +21,7 @@ def __str__(self) -> str: # TODO (#224): enforce distinct test names w/in detections class BaseTest(BaseModel, ABC): + model_config = ConfigDict(extra="forbid") """ A test case for a detection """ diff --git a/contentctl/objects/base_test_result.py b/contentctl/objects/base_test_result.py index d29f93cb..6f9ce11a 100644 --- a/contentctl/objects/base_test_result.py +++ b/contentctl/objects/base_test_result.py @@ -1,5 +1,5 @@ from typing import Union, Any -from enum import Enum +from enum import StrEnum from pydantic import ConfigDict, BaseModel from splunklib.data import Record # type: ignore @@ -10,7 +10,7 @@ # TODO (#267): Align test reporting more closely w/ status enums (as it relates to "untested") # TODO (PEX-432): add status "UNSET" so that we can make sure the result is always of this enum # type; remove mypy ignores associated w/ these typing issues once we do -class TestResultStatus(str, Enum): +class TestResultStatus(StrEnum): """Enum for test status (e.g. pass/fail)""" # Test failed (detection did NOT fire appropriately) FAIL = "fail" @@ -113,7 +113,7 @@ def get_summary_dict( # Exceptions and enums cannot be serialized, so convert to str if isinstance(getattr(self, field), Exception): summary_dict[field] = str(getattr(self, field)) - elif isinstance(getattr(self, field), Enum): + elif isinstance(getattr(self, field), StrEnum): summary_dict[field] = str(getattr(self, field)) else: summary_dict[field] = getattr(self, field) diff --git a/contentctl/objects/baseline.py b/contentctl/objects/baseline.py index 5dc59d8f..f66b5b2b 100644 --- a/contentctl/objects/baseline.py +++ b/contentctl/objects/baseline.py @@ -1,7 +1,10 @@ from __future__ import annotations -from typing import Annotated, Optional, List,Any -from pydantic import field_validator, ValidationInfo, Field, model_serializer +from typing import Annotated, List,Any, TYPE_CHECKING +if TYPE_CHECKING: + from contentctl.input.director import DirectorOutputDto + +from pydantic import field_validator, ValidationInfo, Field, model_serializer, computed_field from contentctl.objects.deployment import Deployment from contentctl.objects.security_content_object import SecurityContentObject from contentctl.objects.enums import DataModel @@ -9,21 +12,34 @@ from contentctl.objects.config import CustomApp - +from contentctl.objects.lookup import Lookup from contentctl.objects.constants import CONTENTCTL_MAX_SEARCH_NAME_LENGTH,CONTENTCTL_BASELINE_STANZA_NAME_FORMAT_TEMPLATE class Baseline(SecurityContentObject): name:str = Field(...,max_length=CONTENTCTL_MAX_SEARCH_NAME_LENGTH) type: Annotated[str,Field(pattern="^Baseline$")] = Field(...) - datamodel: Optional[List[DataModel]] = None search: str = Field(..., min_length=4) how_to_implement: str = Field(..., min_length=4) known_false_positives: str = Field(..., min_length=4) tags: BaselineTags = Field(...) - + lookups: list[Lookup] = Field([], validate_default=True) # enrichment deployment: Deployment = Field({}) - + + + @field_validator('lookups', mode="before") + @classmethod + def getBaselineLookups(cls, v:list[str], info:ValidationInfo) -> list[Lookup]: + ''' + This function has been copied and renamed from the Detection_Abstract class + ''' + director:DirectorOutputDto = info.context.get("output_dto",None) + search: str | None = info.data.get("search",None) + if search is None: + raise ValueError("Search was None - is this file missing the search field?") + + lookups = Lookup.get_lookups(search, director) + return lookups def get_conf_stanza_name(self, app:CustomApp)->str: stanza_name = CONTENTCTL_BASELINE_STANZA_NAME_FORMAT_TEMPLATE.format(app_label=app.label, detection_name=self.name) @@ -34,6 +50,10 @@ def get_conf_stanza_name(self, app:CustomApp)->str: def getDeployment(cls, v:Any, info:ValidationInfo)->Deployment: return Deployment.getDeployment(v,info) + @computed_field + @property + def datamodel(self) -> List[DataModel]: + return [dm for dm in DataModel if dm in self.search] @model_serializer def serialize_model(self): diff --git a/contentctl/objects/baseline_tags.py b/contentctl/objects/baseline_tags.py index ea979664..db5f8048 100644 --- a/contentctl/objects/baseline_tags.py +++ b/contentctl/objects/baseline_tags.py @@ -1,5 +1,5 @@ from __future__ import annotations -from pydantic import BaseModel, Field, field_validator, ValidationInfo, model_serializer +from pydantic import BaseModel, Field, field_validator, ValidationInfo, model_serializer, ConfigDict from typing import List, Any, Union from contentctl.objects.story import Story @@ -12,12 +12,12 @@ class BaselineTags(BaseModel): + model_config = ConfigDict(extra="forbid") analytic_story: list[Story] = Field(...) #deployment: Deployment = Field('SET_IN_GET_DEPLOYMENT_FUNCTION') # TODO (#223): can we remove str from the possible types here? detections: List[Union[Detection,str]] = Field(...) product: List[SecurityContentProductName] = Field(...,min_length=1) - required_fields: List[str] = Field(...,min_length=1) security_domain: SecurityDomain = Field(...) @@ -33,7 +33,6 @@ def serialize_model(self): "analytic_story": [story.name for story in self.analytic_story], "detections": [detection.name for detection in self.detections if isinstance(detection,Detection)], "product": self.product, - "required_fields":self.required_fields, "security_domain":self.security_domain, "deployments": None } diff --git a/contentctl/objects/config.py b/contentctl/objects/config.py index 659d1113..35801bd3 100644 --- a/contentctl/objects/config.py +++ b/contentctl/objects/config.py @@ -33,9 +33,9 @@ SPLUNKBASE_URL = "https://splunkbase.splunk.com/app/{uid}/release/{version}/download" -# TODO (#266): disable the use_enum_values configuration class App_Base(BaseModel,ABC): - model_config = ConfigDict(use_enum_values=True,validate_default=True, arbitrary_types_allowed=True) + + model_config = ConfigDict(validate_default=True, arbitrary_types_allowed=True, extra='forbid') uid: Optional[int] = Field(default=None) title: str = Field(description="Human-readable name used by the app. This can have special characters.") appid: Optional[APPID_TYPE]= Field(default=None,description="Internal name used by your app. " @@ -59,9 +59,8 @@ def ensureAppPathExists(self, config:test, stage_file:bool=False): config.getLocalAppDir().mkdir(parents=True) -# TODO (#266): disable the use_enum_values configuration class TestApp(App_Base): - model_config = ConfigDict(use_enum_values=True,validate_default=True, arbitrary_types_allowed=True) + model_config = ConfigDict(validate_default=True, arbitrary_types_allowed=True) hardcoded_path: Optional[Union[FilePath,HttpUrl]] = Field(default=None, description="This may be a relative or absolute link to a file OR an HTTP URL linking to your app.") @@ -99,9 +98,8 @@ def getApp(self, config:test,stage_file:bool=False)->str: return str(destination) -# TODO (#266): disable the use_enum_values configuration class CustomApp(App_Base): - model_config = ConfigDict(use_enum_values=True,validate_default=True, arbitrary_types_allowed=True) + model_config = ConfigDict(validate_default=True, arbitrary_types_allowed=True) # Fields required for app.conf based on # https://docs.splunk.com/Documentation/Splunk/9.0.4/Admin/Appconf uid: int = Field(ge=2, lt=100000, default_factory=lambda:random.randint(20000,100000)) @@ -159,9 +157,8 @@ def getApp(self, config:test, stage_file=True)->str: verbose_print=True) return str(destination) -# TODO (#266): disable the use_enum_values configuration class Config_Base(BaseModel): - model_config = ConfigDict(use_enum_values=True,validate_default=True, arbitrary_types_allowed=True) + model_config = ConfigDict(validate_default=True, arbitrary_types_allowed=True) path: DirectoryPath = Field(default=DirectoryPath("."), description="The root of your app.") app:CustomApp = Field(default_factory=CustomApp) @@ -175,7 +172,7 @@ def serialize_path(path: DirectoryPath)->str: return str(path) class init(Config_Base): - model_config = ConfigDict(use_enum_values=True,validate_default=True, arbitrary_types_allowed=True) + model_config = ConfigDict(validate_default=True, arbitrary_types_allowed=True) bare: bool = Field(default=False, description="contentctl normally provides some some example content " "(macros, stories, data_sources, and/or analytic stories). This option disables " "initialization with that additional contnet. Note that even if --bare is used, it " @@ -184,9 +181,8 @@ class init(Config_Base): "the deployment/ directory (since it is not yet easily customizable).") -# TODO (#266): disable the use_enum_values configuration class validate(Config_Base): - model_config = ConfigDict(use_enum_values=True,validate_default=True, arbitrary_types_allowed=True) + model_config = ConfigDict(validate_default=True, arbitrary_types_allowed=True) enrichments: bool = Field(default=False, description="Enable MITRE, APP, and CVE Enrichments. "\ "This is useful when outputting a release build "\ "and validating these values, but should otherwise "\ @@ -241,9 +237,8 @@ def getReportingPath(self)->pathlib.Path: return self.path/"reporting/" -# TODO (#266): disable the use_enum_values configuration class build(validate): - model_config = ConfigDict(use_enum_values=True,validate_default=True, arbitrary_types_allowed=True) + model_config = ConfigDict(validate_default=True, arbitrary_types_allowed=True) build_path: DirectoryPath = Field(default=DirectoryPath("dist/"), title="Target path for all build outputs") @field_serializer('build_path',when_used='always') @@ -395,17 +390,15 @@ class new(Config_Base): type: NewContentType = Field(default=NewContentType.detection, description="Specify the type of content you would like to create.") -# TODO (#266): disable the use_enum_values configuration class deploy_acs(inspect): - model_config = ConfigDict(use_enum_values=True,validate_default=False, arbitrary_types_allowed=True) + model_config = ConfigDict(validate_default=False, arbitrary_types_allowed=True) #ignore linter error splunk_cloud_jwt_token: str = Field(exclude=True, description="Splunk JWT used for performing ACS operations on a Splunk Cloud Instance") splunk_cloud_stack: str = Field(description="The name of your Splunk Cloud Stack") -# TODO (#266): disable the use_enum_values configuration class Infrastructure(BaseModel): - model_config = ConfigDict(use_enum_values=True,validate_default=True, arbitrary_types_allowed=True) + model_config = ConfigDict(validate_default=True, arbitrary_types_allowed=True) splunk_app_username:str = Field(default="admin", description="Username for logging in to your Splunk Server") splunk_app_password:str = Field(exclude=True, default="password", description="Password for logging in to your Splunk Server.") instance_address:str = Field(..., description="Address of your splunk server.") @@ -415,15 +408,13 @@ class Infrastructure(BaseModel): instance_name: str = Field(...) -# TODO (#266): disable the use_enum_values configuration class Container(Infrastructure): - model_config = ConfigDict(use_enum_values=True,validate_default=True, arbitrary_types_allowed=True) + model_config = ConfigDict(validate_default=True, arbitrary_types_allowed=True) instance_address:str = Field(default="localhost", description="Address of your splunk server.") -# TODO (#266): disable the use_enum_values configuration class ContainerSettings(BaseModel): - model_config = ConfigDict(use_enum_values=True,validate_default=True, arbitrary_types_allowed=True) + model_config = ConfigDict(validate_default=True, arbitrary_types_allowed=True) leave_running: bool = Field(default=True, description="Leave container running after it is first " "set up to speed up subsequent test runs.") num_containers: PositiveInt = Field(default=1, description="Number of containers to start in parallel. " @@ -444,18 +435,19 @@ def getContainers(self)->List[Container]: class All(BaseModel): #Doesn't need any extra logic + mode_name:str = "All" pass -# TODO (#266): disable the use_enum_values configuration class Changes(BaseModel): - model_config = ConfigDict(use_enum_values=True,validate_default=True, arbitrary_types_allowed=True) + model_config = ConfigDict(validate_default=True, arbitrary_types_allowed=True) + mode_name: str = "Changes" target_branch:str = Field(...,description="The target branch to diff against. Note that this includes uncommitted changes in the working directory as well.") -# TODO (#266): disable the use_enum_values configuration class Selected(BaseModel): - model_config = ConfigDict(use_enum_values=True,validate_default=True, arbitrary_types_allowed=True) + model_config = ConfigDict(validate_default=True, arbitrary_types_allowed=True) + mode_name:str = "Selected" files:List[FilePath] = Field(...,description="List of detection files to test, separated by spaces.") @field_serializer('files',when_used='always') @@ -684,12 +676,12 @@ def serialize_path(paths: List[FilePath])->List[str]: class test_common(build): mode:Union[Changes, Selected, All] = Field(All(), union_mode='left_to_right') post_test_behavior: PostTestBehavior = Field(default=PostTestBehavior.pause_on_failure, description="Controls what to do when a test completes.\n\n" - f"'{PostTestBehavior.always_pause.value}' - the state of " + f"'{PostTestBehavior.always_pause}' - the state of " "the test will always pause after a test, allowing the user to log into the " "server and experiment with the search and data before it is removed.\n\n" - f"'{PostTestBehavior.pause_on_failure.value}' - pause execution ONLY when a test fails. The user may press ENTER in the terminal " + f"'{PostTestBehavior.pause_on_failure}' - pause execution ONLY when a test fails. The user may press ENTER in the terminal " "running the test to move on to the next test.\n\n" - f"'{PostTestBehavior.never_pause.value}' - never stop testing, even if a test fails.\n\n" + f"'{PostTestBehavior.never_pause}' - never stop testing, even if a test fails.\n\n" "***SPECIAL NOTE FOR CI/CD*** 'never_pause' MUST be used for a test to " "run in an unattended manner or in a CI/CD system - otherwise a single failed test " "will result in the testing never finishing as the tool waits for input.") @@ -706,7 +698,7 @@ class test_common(build): " interactive command line workflow that can display progress bars and status information frequently. " "Unfortunately it is incompatible with, or may cause poorly formatted logs, in many CI/CD systems or other unattended environments. " "If you are running contentctl in CI/CD, then please set this argument to True. Note that if you are running in a CI/CD context, " - f"you also MUST set post_test_behavior to {PostTestBehavior.never_pause.value}. Otherwiser, a failed detection will cause" + f"you also MUST set post_test_behavior to {PostTestBehavior.never_pause}. Otherwiser, a failed detection will cause" "the CI/CD running to pause indefinitely.") apps: List[TestApp] = Field(default=DEFAULT_APPS, exclude=False, description="List of apps to install in test environment") @@ -715,7 +707,7 @@ class test_common(build): def dumpCICDPlanAndQuit(self, githash: str, detections:List[Detection]): output_file = self.path / "test_plan.yml" self.mode = Selected(files=sorted([detection.file_path for detection in detections], key=lambda path: str(path))) - self.post_test_behavior = PostTestBehavior.never_pause.value + self.post_test_behavior = PostTestBehavior.never_pause #required so that CI/CD does not get too much output or hang self.disable_tqdm = True @@ -782,12 +774,12 @@ def ensureCommonInformationModel(self)->Self: def suppressTQDM(self)->Self: if self.disable_tqdm: tqdm.tqdm.__init__ = partialmethod(tqdm.tqdm.__init__, disable=True) - if self.post_test_behavior != PostTestBehavior.never_pause.value: + if self.post_test_behavior != PostTestBehavior.never_pause: raise ValueError(f"You have disabled tqdm, presumably because you are " f"running in CI/CD or another unattended context.\n" f"However, post_test_behavior is set to [{self.post_test_behavior}].\n" f"If that is the case, then you MUST set post_test_behavior " - f"to [{PostTestBehavior.never_pause.value}].\n" + f"to [{PostTestBehavior.never_pause}].\n" "Otherwise, if a detection fails in CI/CD, your CI/CD runner will hang forever.") return self @@ -817,18 +809,8 @@ def checkPlanOnlyUse(self)->Self: return self - def getModeName(self)->str: - if isinstance(self.mode, All): - return DetectionTestingMode.all.value - elif isinstance(self.mode, Changes): - return DetectionTestingMode.changes.value - else: - return DetectionTestingMode.selected.value - - -# TODO (#266): disable the use_enum_values configuration class test(test_common): - model_config = ConfigDict(use_enum_values=True,validate_default=True, arbitrary_types_allowed=True) + model_config = ConfigDict(validate_default=True, arbitrary_types_allowed=True) container_settings:ContainerSettings = ContainerSettings() test_instances: List[Container] = Field([], exclude = True, validate_default=True) splunk_api_username: Optional[str] = Field(default=None, exclude = True,description="Splunk API username used for running appinspect or installating apps from Splunkbase") @@ -893,9 +875,8 @@ def getAppFilePath(self): TEST_ARGS_ENV = "CONTENTCTL_TEST_INFRASTRUCTURES" -# TODO (#266): disable the use_enum_values configuration class test_servers(test_common): - model_config = ConfigDict(use_enum_values=True,validate_default=True, arbitrary_types_allowed=True) + model_config = ConfigDict(validate_default=True, arbitrary_types_allowed=True) test_instances:List[Infrastructure] = Field([],description="Test against one or more preconfigured servers.", validate_default=True) server_info:Optional[str] = Field(None, validate_default=True, description='String of pre-configured servers to use for testing. The list MUST be in the format:\n' 'address,username,web_ui_port,hec_port,api_port;address_2,username_2,web_ui_port_2,hec_port_2,api_port_2' diff --git a/contentctl/objects/constants.py b/contentctl/objects/constants.py index c295ec86..f0f0d8f3 100644 --- a/contentctl/objects/constants.py +++ b/contentctl/objects/constants.py @@ -79,6 +79,7 @@ "Actions on Objectives": 7 } +# TODO (cmcginley): @ljstella should this be removed? also referenced in new_content.py SES_OBSERVABLE_ROLE_MAPPING = { "Other": -1, "Unknown": 0, @@ -93,6 +94,7 @@ "Observer": 9 } +# TODO (cmcginley): @ljstella should this be removed? also referenced in new_content.py SES_OBSERVABLE_TYPE_MAPPING = { "Unknown": 0, "Hostname": 1, @@ -135,6 +137,7 @@ "Impact": "TA0040" } +# TODO (cmcginley): is this just for the transition testing? RBA_OBSERVABLE_ROLE_MAPPING = { "Attacker": 0, "Victim": 1 @@ -149,7 +152,7 @@ # errors, if its name is longer than 99 characters. # When an saved search is cloned in Enterprise Security User Interface, # it is wrapped in the following: -# {Detection.tags.security_domain.value} - {SEARCH_STANZA_NAME} - Rule +# {Detection.tags.security_domain} - {SEARCH_STANZA_NAME} - Rule # Similarly, when we generate the search stanza name in contentctl, it # is app.label - detection.name - Rule # However, in product the search name is: diff --git a/contentctl/objects/correlation_search.py b/contentctl/objects/correlation_search.py index c64eed6b..9ce66a76 100644 --- a/contentctl/objects/correlation_search.py +++ b/contentctl/objects/correlation_search.py @@ -2,7 +2,7 @@ import time import json from typing import Any -from enum import Enum +from enum import StrEnum, IntEnum from functools import cached_property from pydantic import ConfigDict, BaseModel, computed_field, Field, PrivateAttr @@ -29,7 +29,6 @@ from contentctl.objects.detection import Detection from contentctl.objects.risk_event import RiskEvent from contentctl.objects.notable_event import NotableEvent -from contentctl.objects.observable import Observable # Suppress logging by default; enable for local testing @@ -76,7 +75,7 @@ def get_logger() -> logging.Logger: return logger -class SavedSearchKeys(str, Enum): +class SavedSearchKeys(StrEnum): """ Various keys into the SavedSearch content """ @@ -89,7 +88,7 @@ class SavedSearchKeys(str, Enum): DISBALED_KEY = "disabled" -class Indexes(str, Enum): +class Indexes(StrEnum): """ Indexes we search against """ @@ -98,7 +97,7 @@ class Indexes(str, Enum): NOTABLE_INDEX = "notable" -class TimeoutConfig(int, Enum): +class TimeoutConfig(IntEnum): """ Configuration values for the exponential backoff timer """ @@ -115,7 +114,7 @@ class TimeoutConfig(int, Enum): # TODO (#226): evaluate sane defaults for timeframe for integration testing (e.g. 5y is good # now, but maybe not always...); maybe set latest/earliest to None? -class ScheduleConfig(str, Enum): +class ScheduleConfig(StrEnum): """ Configuraton values for the saved search schedule """ @@ -145,24 +144,24 @@ def __init__(self, response_reader: ResponseReader) -> None: def __iter__(self) -> "ResultIterator": return self - def __next__(self) -> dict: + def __next__(self) -> dict[Any, Any]: # Use a reader for JSON format so we can iterate over our results for result in self.results_reader: # log messages, or raise if error if isinstance(result, Message): # convert level string to level int - level_name = result.type.strip().upper() + level_name = result.type.strip().upper() # type: ignore level: int = logging.getLevelName(level_name) # log message at appropriate level and raise if needed - message = f"SPLUNK: {result.message}" + message = f"SPLUNK: {result.message}" # type: ignore self.logger.log(level, message) if level == logging.ERROR: raise ServerError(message) # if dict, just return elif isinstance(result, dict): - return result + return result # type: ignore # raise for any unexpected types else: @@ -310,9 +309,11 @@ def earliest_time(self) -> str: The earliest time configured for the saved search """ if self.saved_search is not None: - return self.saved_search.content[SavedSearchKeys.EARLIEST_TIME_KEY.value] + return self.saved_search.content[SavedSearchKeys.EARLIEST_TIME_KEY] # type: ignore else: - raise ClientError("Something unexpected went wrong in initialization; saved_search was not populated") + raise ClientError( + "Something unexpected went wrong in initialization; saved_search was not populated" + ) @property def latest_time(self) -> str: @@ -320,9 +321,11 @@ def latest_time(self) -> str: The latest time configured for the saved search """ if self.saved_search is not None: - return self.saved_search.content[SavedSearchKeys.LATEST_TIME_KEY.value] + return self.saved_search.content[SavedSearchKeys.LATEST_TIME_KEY] # type: ignore else: - raise ClientError("Something unexpected went wrong in initialization; saved_search was not populated") + raise ClientError( + "Something unexpected went wrong in initialization; saved_search was not populated" + ) @property def cron_schedule(self) -> str: @@ -330,9 +333,11 @@ def cron_schedule(self) -> str: The cron schedule configured for the saved search """ if self.saved_search is not None: - return self.saved_search.content[SavedSearchKeys.CRON_SCHEDULE_KEY.value] + return self.saved_search.content[SavedSearchKeys.CRON_SCHEDULE_KEY] # type: ignore else: - raise ClientError("Something unexpected went wrong in initialization; saved_search was not populated") + raise ClientError( + "Something unexpected went wrong in initialization; saved_search was not populated" + ) @property def enabled(self) -> bool: @@ -340,12 +345,14 @@ def enabled(self) -> bool: Whether the saved search is enabled """ if self.saved_search is not None: - if int(self.saved_search.content[SavedSearchKeys.DISBALED_KEY.value]): + if int(self.saved_search.content[SavedSearchKeys.DISBALED_KEY]): # type: ignore return False else: return True else: - raise ClientError("Something unexpected went wrong in initialization; saved_search was not populated") + raise ClientError( + "Something unexpected went wrong in initialization; saved_search was not populated" + ) @ property def has_risk_analysis_action(self) -> bool: @@ -368,7 +375,7 @@ def _get_risk_analysis_action(content: dict[str, Any]) -> RiskAnalysisAction | N :param content: a dict of strings to values :returns: a RiskAnalysisAction, or None if none exists """ - if int(content[SavedSearchKeys.RISK_ACTION_KEY.value]): + if int(content[SavedSearchKeys.RISK_ACTION_KEY]): try: return RiskAnalysisAction.parse_from_dict(content) except ValueError as e: @@ -383,23 +390,10 @@ def _get_notable_action(content: dict[str, Any]) -> NotableAction | None: :returns: a NotableAction, or None if none exists """ # grab notable details if present - if int(content[SavedSearchKeys.NOTABLE_ACTION_KEY.value]): + if int(content[SavedSearchKeys.NOTABLE_ACTION_KEY]): return NotableAction.parse_from_dict(content) return None - @staticmethod - def _get_relevant_observables(observables: list[Observable]) -> list[Observable]: - """ - Given a list of observables, identify the subset of those relevant for risk matching - :param observables: the Observable objects to filter - :returns: the filtered list of relevant observables - """ - relevant = [] - for observable in observables: - if not RiskEvent.ignore_observable(observable): - relevant.append(observable) - return relevant - def _parse_risk_and_notable_actions(self) -> None: """Parses the risk/notable metadata we care about from self.saved_search.content @@ -463,9 +457,9 @@ def disable(self, refresh: bool = True) -> None: def update_timeframe( self, - earliest_time: str = ScheduleConfig.EARLIEST_TIME.value, - latest_time: str = ScheduleConfig.LATEST_TIME.value, - cron_schedule: str = ScheduleConfig.CRON_SCHEDULE.value, + earliest_time: str = ScheduleConfig.EARLIEST_TIME, + latest_time: str = ScheduleConfig.LATEST_TIME, + cron_schedule: str = ScheduleConfig.CRON_SCHEDULE, refresh: bool = True ) -> None: """Updates the correlation search timeframe to work with test data @@ -481,9 +475,9 @@ def update_timeframe( """ # update the SavedSearch accordingly data = { - SavedSearchKeys.EARLIEST_TIME_KEY.value: earliest_time, - SavedSearchKeys.LATEST_TIME_KEY.value: latest_time, - SavedSearchKeys.CRON_SCHEDULE_KEY.value: cron_schedule + SavedSearchKeys.EARLIEST_TIME_KEY: earliest_time, + SavedSearchKeys.LATEST_TIME_KEY: latest_time, + SavedSearchKeys.CRON_SCHEDULE_KEY: cron_schedule } self.logger.info(data) self.logger.info(f"Updating timeframe for '{self.name}': {data}") @@ -495,7 +489,7 @@ def update_timeframe( if refresh: self.refresh() - def force_run(self, refresh=True) -> None: + def force_run(self, refresh: bool = True) -> None: """Forces a detection run Enables the detection, adjusts the cron schedule to run every 1 minute, and widens the earliest/latest window @@ -506,7 +500,7 @@ def force_run(self, refresh=True) -> None: if not self.enabled: self.enable(refresh=False) else: - self.logger.warn(f"Detection '{self.name}' was already enabled") + self.logger.warning(f"Detection '{self.name}' was already enabled") if refresh: self.refresh() @@ -554,10 +548,10 @@ def get_risk_events(self, force_update: bool = False) -> list[RiskEvent]: for result in result_iterator: # sanity check that this result from the iterator is a risk event and not some # other metadata - if result["index"] == Indexes.RISK_INDEX.value: + if result["index"] == Indexes.RISK_INDEX: try: parsed_raw = json.loads(result["_raw"]) - event = RiskEvent.parse_obj(parsed_raw) + event = RiskEvent.model_validate(parsed_raw) except Exception: self.logger.error(f"Failed to parse RiskEvent from search result: {result}") raise @@ -619,10 +613,10 @@ def get_notable_events(self, force_update: bool = False) -> list[NotableEvent]: for result in result_iterator: # sanity check that this result from the iterator is a notable event and not some # other metadata - if result["index"] == Indexes.NOTABLE_INDEX.value: + if result["index"] == Indexes.NOTABLE_INDEX: try: parsed_raw = json.loads(result["_raw"]) - event = NotableEvent.parse_obj(parsed_raw) + event = NotableEvent.model_validate(parsed_raw) except Exception: self.logger.error(f"Failed to parse NotableEvent from search result: {result}") raise @@ -646,24 +640,21 @@ def validate_risk_events(self) -> None: """Validates the existence of any expected risk events First ensure the risk event exists, and if it does validate its risk message and make sure - any events align with the specified observables. Also adds the risk index to the purge list + any events align with the specified risk object. Also adds the risk index to the purge list if risk events existed :param elapsed_sleep_time: an int representing the amount of time slept thus far waiting to check the risks/notables :returns: an IntegrationTestResult on failure; None on success """ - # Create a mapping of the relevant observables to counters - observables = CorrelationSearch._get_relevant_observables(self.detection.tags.observable) - observable_counts: dict[str, int] = {str(x): 0 for x in observables} - - # NOTE: we intentionally want this to be an error state and not a failure state, as - # ultimately this validation should be handled during the build process - if len(observables) != len(observable_counts): - raise ClientError( - f"At least two observables in '{self.detection.name}' have the same name; " - "each observable for a detection should be unique." + # Ensure the rba object is defined + if self.detection.rba is None: + raise ValidationFailed( + f"Unexpected error: Detection '{self.detection.name}' has no RBA objects associated" + " with it; cannot validate." ) + risk_object_counts: dict[int, int] = {id(x): 0 for x in self.detection.rba.risk_objects} + # Get the risk events; note that we use the cached risk events, expecting they were # saved by a prior call to risk_event_exists events = self.get_risk_events() @@ -673,63 +664,66 @@ def validate_risk_events(self) -> None: for event in events: c += 1 self.logger.debug( - f"Validating risk event ({event.risk_object}, {event.risk_object_type}): " + f"Validating risk event ({event.es_risk_object}, {event.es_risk_object_type}): " f"{c}/{len(events)}" ) event.validate_against_detection(self.detection) - # Update observable count based on match - matched_observable = event.get_matched_observable(self.detection.tags.observable) + # Update risk object count based on match + matched_risk_object = event.get_matched_risk_object(self.detection.rba.risk_objects) self.logger.debug( - f"Matched risk event (object={event.risk_object}, type={event.risk_object_type}) " - f"to observable (name={matched_observable.name}, type={matched_observable.type}, " - f"role={matched_observable.role}) using the source field " + f"Matched risk event (object={event.es_risk_object}, type={event.es_risk_object_type}) " + f"to detection's risk object (name={matched_risk_object.field}, " + f"type={matched_risk_object.type.value}) using the source field " f"'{event.source_field_name}'" ) - observable_counts[str(matched_observable)] += 1 + risk_object_counts[id(matched_risk_object)] += 1 - # Report any observables which did not have at least one match to a risk event - for observable in observables: + # Report any risk objects which did not have at least one match to a risk event + for risk_object in self.detection.rba.risk_objects: self.logger.debug( - f"Matched observable (name={observable.name}, type={observable.type}, " - f"role={observable.role}) to {observable_counts[str(observable)]} risk events." + f"Matched risk object (name={risk_object.field}, type={risk_object.type.value} " + f"to {risk_object_counts[id(risk_object)]} risk events." ) - if observable_counts[str(observable)] == 0: + if risk_object_counts[id(risk_object)] == 0: raise ValidationFailed( - f"Observable (name={observable.name}, type={observable.type}, " - f"role={observable.role}) was not matched to any risk events." + f"Risk object (name={risk_object.field}, type={risk_object.type.value}) " + "was not matched to any risk events." ) # TODO (#250): Re-enable and refactor code that validates the specific risk counts # Validate risk events in aggregate; we should have an equal amount of risk events for each - # relevant observable, and the total count should match the total number of events + # relevant risk object, and the total count should match the total number of events # individual_count: int | None = None # total_count = 0 - # for observable_str in observable_counts: + # for risk_object_id in risk_object_counts: # self.logger.debug( - # f"Observable <{observable_str}> match count: {observable_counts[observable_str]}" + # f"Risk object <{risk_object_id}> match count: {risk_object_counts[risk_object_id]}" # ) # # Grab the first value encountered if not set yet # if individual_count is None: - # individual_count = observable_counts[observable_str] + # individual_count = risk_object_counts[risk_object_id] # else: - # # Confirm that the count for the current observable matches the count of the others - # if observable_counts[observable_str] != individual_count: + # # Confirm that the count for the current risk object matches the count of the + # # others + # if risk_object_counts[risk_object_id] != individual_count: # raise ValidationFailed( - # f"Count of risk events matching observable <\"{observable_str}\"> " - # f"({observable_counts[observable_str]}) does not match the count of those " - # f"matching other observables ({individual_count})." + # f"Count of risk events matching detection's risk object <\"{risk_object_id}\"> " + # f"({risk_object_counts[risk_object_id]}) does not match the count of those " + # f"matching other risk objects ({individual_count})." # ) - # # Aggregate total count of events matched to observables - # total_count += observable_counts[observable_str] + # # Aggregate total count of events matched to risk objects + # total_count += risk_object_counts[risk_object_id] - # # Raise if the the number of events doesn't match the number of those matched to observables + # # Raise if the the number of events doesn't match the number of those matched to risk + # # objects # if len(events) != total_count: # raise ValidationFailed( # f"The total number of risk events {len(events)} does not match the number of " - # f"risk events we were able to match against observables ({total_count})." + # "risk events we were able to match against risk objects from the detection " + # f"({total_count})." # ) # TODO (PEX-434): implement deeper notable validation @@ -746,7 +740,7 @@ def validate_notable_events(self) -> None: # NOTE: it would be more ideal to switch this to a system which gets the handle of the saved search job and polls # it for completion, but that seems more tricky - def test(self, max_sleep: int = TimeoutConfig.MAX_SLEEP.value, raise_on_exc: bool = False) -> IntegrationTestResult: + def test(self, max_sleep: int = TimeoutConfig.MAX_SLEEP, raise_on_exc: bool = False) -> IntegrationTestResult: """Execute the integration test Executes an integration test for this CorrelationSearch. First, ensures no matching risk/notables already exist @@ -760,10 +754,10 @@ def test(self, max_sleep: int = TimeoutConfig.MAX_SLEEP.value, raise_on_exc: boo """ # max_sleep must be greater than the base value we must wait for the scheduled searchjob to run (jobs run every # 60s) - if max_sleep < TimeoutConfig.BASE_SLEEP.value: + if max_sleep < TimeoutConfig.BASE_SLEEP: raise ClientError( f"max_sleep value of {max_sleep} is less than the base sleep required " - f"({TimeoutConfig.BASE_SLEEP.value})" + f"({TimeoutConfig.BASE_SLEEP})" ) # initialize result as None @@ -774,7 +768,7 @@ def test(self, max_sleep: int = TimeoutConfig.MAX_SLEEP.value, raise_on_exc: boo num_tries = 0 # set the initial base sleep time - time_to_sleep = TimeoutConfig.BASE_SLEEP.value + time_to_sleep = TimeoutConfig.BASE_SLEEP try: # first make sure the indexes are currently empty and the detection is starting from a disabled state @@ -783,11 +777,11 @@ def test(self, max_sleep: int = TimeoutConfig.MAX_SLEEP.value, raise_on_exc: boo ) self.update_pbar(TestingStates.PRE_CLEANUP) if self.risk_event_exists(): - self.logger.warn( + self.logger.warning( f"Risk events matching '{self.name}' already exist; marking for deletion" ) if self.notable_event_exists(): - self.logger.warn( + self.logger.warning( f"Notable events matching '{self.name}' already exist; marking for deletion" ) self.cleanup() @@ -934,11 +928,11 @@ def _search(self, query: str) -> ResultIterator: :param query: the SPL string to run """ self.logger.debug(f"Executing query: `{query}`") - job = self.service.search(query, exec_mode="blocking") + job = self.service.search(query, exec_mode="blocking") # type: ignore # query the results, catching any HTTP status code errors try: - response_reader: ResponseReader = job.results(output_mode="json") + response_reader: ResponseReader = job.results(output_mode="json") # type: ignore except HTTPError as e: # e.g. -> HTTP 400 Bad Request -- b'{"messages":[{"type":"FATAL","text":"Error in \'delete\' command: You # have insufficient privileges to delete events."}]}' @@ -946,7 +940,7 @@ def _search(self, query: str) -> ResultIterator: self.logger.error(message) raise ServerError(message) - return ResultIterator(response_reader) + return ResultIterator(response_reader) # type: ignore def _delete_index(self, index: str) -> None: """Deletes events in a given index @@ -979,7 +973,7 @@ def _delete_index(self, index: str) -> None: message = f"No result returned showing deletion in index {index}" raise ServerError(message) - def cleanup(self, delete_test_index=False) -> None: + def cleanup(self, delete_test_index: bool = False) -> None: """Cleans up after an integration test First, disable the detection; then dump the risk, notable, and (optionally) test indexes. The test index is @@ -999,9 +993,9 @@ def cleanup(self, delete_test_index=False) -> None: if delete_test_index: self.indexes_to_purge.add(self.test_index) # type: ignore if self._risk_events is not None: - self.indexes_to_purge.add(Indexes.RISK_INDEX.value) + self.indexes_to_purge.add(Indexes.RISK_INDEX) if self._notable_events is not None: - self.indexes_to_purge.add(Indexes.NOTABLE_INDEX.value) + self.indexes_to_purge.add(Indexes.NOTABLE_INDEX) # delete the indexes for index in self.indexes_to_purge: diff --git a/contentctl/objects/data_source.py b/contentctl/objects/data_source.py index 868bdd51..2ed9c80c 100644 --- a/contentctl/objects/data_source.py +++ b/contentctl/objects/data_source.py @@ -1,8 +1,7 @@ from __future__ import annotations from typing import Optional, Any -from pydantic import Field, HttpUrl, model_serializer, BaseModel +from pydantic import Field, HttpUrl, model_serializer, BaseModel, ConfigDict from contentctl.objects.security_content_object import SecurityContentObject -from contentctl.objects.event_source import EventSource class TA(BaseModel): @@ -15,10 +14,10 @@ class DataSource(SecurityContentObject): separator: Optional[str] = None configuration: Optional[str] = None supported_TA: list[TA] = [] - fields: Optional[list] = None - field_mappings: Optional[list] = None - convert_to_log_source: Optional[list] = None - example_log: Optional[str] = None + fields: None | list = None + field_mappings: None | list = None + convert_to_log_source: None | list = None + example_log: None | str = None @model_serializer diff --git a/contentctl/objects/deployment.py b/contentctl/objects/deployment.py index 832c048d..6e2cc6d2 100644 --- a/contentctl/objects/deployment.py +++ b/contentctl/objects/deployment.py @@ -1,5 +1,5 @@ from __future__ import annotations -from pydantic import Field, computed_field,ValidationInfo, model_serializer, NonNegativeInt +from pydantic import Field, computed_field,ValidationInfo, model_serializer, NonNegativeInt, ConfigDict from typing import Any import uuid import datetime @@ -10,14 +10,7 @@ from contentctl.objects.enums import DeploymentType -class Deployment(SecurityContentObject): - #id: str = None - #date: str = None - #author: str = None - #description: str = None - #contentType: SecurityContentType = SecurityContentType.deployments - - +class Deployment(SecurityContentObject): scheduling: DeploymentScheduling = Field(...) alert_action: AlertAction = AlertAction() type: DeploymentType = Field(...) @@ -72,7 +65,6 @@ def serialize_model(self): "tags": self.tags } - #Combine fields from this model with fields from parent model.update(super_fields) diff --git a/contentctl/objects/deployment_email.py b/contentctl/objects/deployment_email.py index a607502c..1d1269fe 100644 --- a/contentctl/objects/deployment_email.py +++ b/contentctl/objects/deployment_email.py @@ -1,8 +1,9 @@ from __future__ import annotations -from pydantic import BaseModel +from pydantic import BaseModel, ConfigDict class DeploymentEmail(BaseModel): + model_config = ConfigDict(extra="forbid") message: str subject: str to: str \ No newline at end of file diff --git a/contentctl/objects/deployment_notable.py b/contentctl/objects/deployment_notable.py index b6e2c463..7f064b43 100644 --- a/contentctl/objects/deployment_notable.py +++ b/contentctl/objects/deployment_notable.py @@ -1,8 +1,9 @@ from __future__ import annotations -from pydantic import BaseModel +from pydantic import BaseModel, ConfigDict from typing import List class DeploymentNotable(BaseModel): + model_config = ConfigDict(extra="forbid") rule_description: str rule_title: str nes_fields: List[str] \ No newline at end of file diff --git a/contentctl/objects/deployment_phantom.py b/contentctl/objects/deployment_phantom.py index 11df2feb..1d4a9975 100644 --- a/contentctl/objects/deployment_phantom.py +++ b/contentctl/objects/deployment_phantom.py @@ -1,8 +1,9 @@ from __future__ import annotations -from pydantic import BaseModel +from pydantic import BaseModel, ConfigDict class DeploymentPhantom(BaseModel): + model_config = ConfigDict(extra="forbid") cam_workers : str label : str phantom_server : str diff --git a/contentctl/objects/deployment_rba.py b/contentctl/objects/deployment_rba.py index b3412b3f..58917c70 100644 --- a/contentctl/objects/deployment_rba.py +++ b/contentctl/objects/deployment_rba.py @@ -1,6 +1,7 @@ from __future__ import annotations -from pydantic import BaseModel +from pydantic import BaseModel, ConfigDict class DeploymentRBA(BaseModel): + model_config = ConfigDict(extra="forbid") enabled: bool = False \ No newline at end of file diff --git a/contentctl/objects/deployment_scheduling.py b/contentctl/objects/deployment_scheduling.py index 6c5a75a8..b21673d8 100644 --- a/contentctl/objects/deployment_scheduling.py +++ b/contentctl/objects/deployment_scheduling.py @@ -1,8 +1,9 @@ from __future__ import annotations -from pydantic import BaseModel +from pydantic import BaseModel, ConfigDict class DeploymentScheduling(BaseModel): + model_config = ConfigDict(extra="forbid") cron_schedule: str earliest_time: str latest_time: str diff --git a/contentctl/objects/deployment_slack.py b/contentctl/objects/deployment_slack.py index 294836e2..03cf5ebb 100644 --- a/contentctl/objects/deployment_slack.py +++ b/contentctl/objects/deployment_slack.py @@ -1,7 +1,8 @@ from __future__ import annotations -from pydantic import BaseModel +from pydantic import BaseModel, ConfigDict class DeploymentSlack(BaseModel): + model_config = ConfigDict(extra="forbid") channel: str message: str \ No newline at end of file diff --git a/contentctl/objects/detection_tags.py b/contentctl/objects/detection_tags.py index b1d489f4..aea02bfe 100644 --- a/contentctl/objects/detection_tags.py +++ b/contentctl/objects/detection_tags.py @@ -4,8 +4,6 @@ from pydantic import ( BaseModel, Field, - NonNegativeInt, - PositiveInt, computed_field, UUID4, HttpUrl, @@ -27,7 +25,6 @@ Cis18Value, AssetType, SecurityDomain, - RiskSeverity, KillChainPhase, NistCategory, SecurityContentProductName @@ -35,57 +32,29 @@ from contentctl.objects.atomic import AtomicEnrichment, AtomicTest from contentctl.objects.annotated_types import MITRE_ATTACK_ID_TYPE, CVE_TYPE -# TODO (#266): disable the use_enum_values configuration + class DetectionTags(BaseModel): # detection spec - model_config = ConfigDict(use_enum_values=True, validate_default=False) + + model_config = ConfigDict(validate_default=False, extra='forbid') analytic_story: list[Story] = Field(...) asset_type: AssetType = Field(...) - - confidence: NonNegativeInt = Field(..., le=100) - impact: NonNegativeInt = Field(..., le=100) - - @computed_field - @property - def risk_score(self) -> int: - return round((self.confidence * self.impact)/100) - - @computed_field - @property - def severity(self)->RiskSeverity: - if 0 <= self.risk_score <= 20: - return RiskSeverity.INFORMATIONAL - elif 20 < self.risk_score <= 40: - return RiskSeverity.LOW - elif 40 < self.risk_score <= 60: - return RiskSeverity.MEDIUM - elif 60 < self.risk_score <= 80: - return RiskSeverity.HIGH - elif 80 < self.risk_score <= 100: - return RiskSeverity.CRITICAL - else: - raise Exception(f"Error getting severity - risk_score must be between 0-100, but was actually {self.risk_score}") - + group: list[str] = [] mitre_attack_id: List[MITRE_ATTACK_ID_TYPE] = [] nist: list[NistCategory] = [] + # TODO (cmcginley): observable should be removed as well, yes? # TODO (#249): Add pydantic validator to ensure observables are unique within a detection observable: List[Observable] = [] - message: str = Field(...) product: list[SecurityContentProductName] = Field(..., min_length=1) - required_fields: list[str] = Field(min_length=1) throttling: Optional[Throttling] = None security_domain: SecurityDomain = Field(...) cve: List[CVE_TYPE] = [] atomic_guid: List[AtomicTest] = [] - # enrichment mitre_attack_enrichments: List[MitreAttackEnrichment] = Field([], validate_default=True) - confidence_id: Optional[PositiveInt] = Field(None, ge=1, le=3) - impact_id: Optional[PositiveInt] = Field(None, ge=1, le=5) - evidence_str: Optional[str] = None @computed_field @property @@ -114,7 +83,7 @@ def cis20(self) -> list[Cis18Value]: # TODO (#268): Validate manual_test has length > 0 if not None manual_test: Optional[str] = None - + # The following validator is temporarily disabled pending further discussions # @validator('message') # def validate_message(cls,v,values): @@ -152,15 +121,11 @@ def serialize_model(self): # Since this field has no parent, there is no need to call super() serialization function return { "analytic_story": [story.name for story in self.analytic_story], - "asset_type": self.asset_type.value, + "asset_type": self.asset_type, "cis20": self.cis20, "kill_chain_phases": self.kill_chain_phases, "nist": self.nist, - "observable": self.observable, - "message": self.message, - "risk_score": self.risk_score, "security_domain": self.security_domain, - "risk_severity": self.severity, "mitre_attack_id": self.mitre_attack_id, "mitre_attack_enrichments": self.mitre_attack_enrichments } diff --git a/contentctl/objects/drilldown.py b/contentctl/objects/drilldown.py index 3fe41e7c..b5604748 100644 --- a/contentctl/objects/drilldown.py +++ b/contentctl/objects/drilldown.py @@ -23,6 +23,7 @@ class Drilldown(BaseModel): "but it is NOT the default value and must be supplied explicitly.", min_length= 1) + # TODO (cmcginley): @ljstella the drilldowns will need to be updated @classmethod def constructDrilldownsFromDetection(cls, detection: Detection) -> list[Drilldown]: victim_observables = [o for o in detection.tags.observable if o.role[0] == "Victim"] diff --git a/contentctl/objects/enums.py b/contentctl/objects/enums.py index 333ef358..8070d4a4 100644 --- a/contentctl/objects/enums.py +++ b/contentctl/objects/enums.py @@ -1,15 +1,15 @@ from __future__ import annotations from typing import List -import enum +from enum import StrEnum, IntEnum -class AnalyticsType(str, enum.Enum): +class AnalyticsType(StrEnum): TTP = "TTP" Anomaly = "Anomaly" Hunting = "Hunting" Correlation = "Correlation" -class DeploymentType(str, enum.Enum): +class DeploymentType(StrEnum): TTP = "TTP" Anomaly = "Anomaly" Hunting = "Hunting" @@ -18,7 +18,7 @@ class DeploymentType(str, enum.Enum): Embedded = "Embedded" -class DataModel(str,enum.Enum): +class DataModel(StrEnum): ENDPOINT = "Endpoint" NETWORK_TRAFFIC = "Network_Traffic" AUTHENTICATION = "Authentication" @@ -40,11 +40,11 @@ class DataModel(str,enum.Enum): SPLUNK_AUDIT = "Splunk_Audit" -class PlaybookType(str, enum.Enum): +class PlaybookType(StrEnum): INVESTIGATION = "Investigation" RESPONSE = "Response" -class SecurityContentType(enum.Enum): +class SecurityContentType(IntEnum): detections = 1 baselines = 2 stories = 3 @@ -68,20 +68,15 @@ class SecurityContentType(enum.Enum): # json_objects = "json_objects" -class SecurityContentProduct(enum.Enum): - SPLUNK_APP = 1 - API = 3 - CUSTOM = 4 - -class SecurityContentProductName(str, enum.Enum): +class SecurityContentProductName(StrEnum): SPLUNK_ENTERPRISE = "Splunk Enterprise" SPLUNK_ENTERPRISE_SECURITY = "Splunk Enterprise Security" SPLUNK_CLOUD = "Splunk Cloud" SPLUNK_SECURITY_ANALYTICS_FOR_AWS = "Splunk Security Analytics for AWS" SPLUNK_BEHAVIORAL_ANALYTICS = "Splunk Behavioral Analytics" -class SecurityContentInvestigationProductName(str, enum.Enum): +class SecurityContentInvestigationProductName(StrEnum): SPLUNK_ENTERPRISE = "Splunk Enterprise" SPLUNK_ENTERPRISE_SECURITY = "Splunk Enterprise Security" SPLUNK_CLOUD = "Splunk Cloud" @@ -90,33 +85,20 @@ class SecurityContentInvestigationProductName(str, enum.Enum): SPLUNK_PHANTOM = "Splunk Phantom" -class DetectionStatus(enum.Enum): - production = "production" - deprecated = "deprecated" - experimental = "experimental" - validation = "validation" - - -class DetectionStatusSSA(enum.Enum): +class DetectionStatus(StrEnum): production = "production" deprecated = "deprecated" experimental = "experimental" validation = "validation" -class LogLevel(enum.Enum): +class LogLevel(StrEnum): NONE = "NONE" ERROR = "ERROR" INFO = "INFO" -class AlertActions(enum.Enum): - notable = "notable" - rba = "rba" - email = "email" - - -class StoryCategory(str, enum.Enum): +class StoryCategory(StrEnum): ABUSE = "Abuse" ADVERSARY_TACTICS = "Adversary Tactics" BEST_PRACTICES = "Best Practices" @@ -139,37 +121,18 @@ class StoryCategory(str, enum.Enum): UNAUTHORIZED_SOFTWARE = "Unauthorized Software" -class PostTestBehavior(str, enum.Enum): +class PostTestBehavior(StrEnum): always_pause = "always_pause" pause_on_failure = "pause_on_failure" never_pause = "never_pause" -class DetectionTestingMode(str, enum.Enum): +class DetectionTestingMode(StrEnum): selected = "selected" all = "all" changes = "changes" -class DetectionTestingTargetInfrastructure(str, enum.Enum): - container = "container" - server = "server" - - -class InstanceState(str, enum.Enum): - starting = "starting" - running = "running" - error = "error" - stopping = "stopping" - stopped = "stopped" - - -class SigmaConverterTarget(enum.Enum): - CIM = 1 - RAW = 2 - OCSF = 3 - ALL = 4 - # It's unclear why we use a mix of constants and enums. The following list was taken from: # contentctl/contentctl/helper/constants.py. # We convect it to an enum here @@ -183,7 +146,7 @@ class SigmaConverterTarget(enum.Enum): # "Command And Control": 6, # "Actions on Objectives": 7 # } -class KillChainPhase(str, enum.Enum): +class KillChainPhase(StrEnum): UNKNOWN ="Unknown" RECONNAISSANCE = "Reconnaissance" WEAPONIZATION = "Weaponization" @@ -194,7 +157,7 @@ class KillChainPhase(str, enum.Enum): ACTIONS_ON_OBJECTIVES = "Actions on Objectives" -class DataSource(str,enum.Enum): +class DataSource(StrEnum): OSQUERY_ES_PROCESS_EVENTS = "OSQuery ES Process Events" POWERSHELL_4104 = "Powershell 4104" SYSMON_EVENT_ID_1 = "Sysmon EventID 1" @@ -234,7 +197,7 @@ class DataSource(str,enum.Enum): WINDOWS_SECURITY_5145 = "Windows Security 5145" WINDOWS_SYSTEM_7045 = "Windows System 7045" -class ProvidingTechnology(str, enum.Enum): +class ProvidingTechnology(StrEnum): AMAZON_SECURITY_LAKE = "Amazon Security Lake" AMAZON_WEB_SERVICES_CLOUDTRAIL = "Amazon Web Services - Cloudtrail" AZURE_AD = "Azure AD" @@ -302,7 +265,7 @@ def getProvidingTechFromSearch(search_string:str)->List[ProvidingTechnology]: return sorted(list(matched_technologies)) -class Cis18Value(str,enum.Enum): +class Cis18Value(StrEnum): CIS_0 = "CIS 0" CIS_1 = "CIS 1" CIS_2 = "CIS 2" @@ -323,7 +286,7 @@ class Cis18Value(str,enum.Enum): CIS_17 = "CIS 17" CIS_18 = "CIS 18" -class SecurityDomain(str, enum.Enum): +class SecurityDomain(StrEnum): ENDPOINT = "endpoint" NETWORK = "network" THREAT = "threat" @@ -331,7 +294,7 @@ class SecurityDomain(str, enum.Enum): ACCESS = "access" AUDIT = "audit" -class AssetType(str, enum.Enum): +class AssetType(StrEnum): AWS_ACCOUNT = "AWS Account" AWS_EKS_KUBERNETES_CLUSTER = "AWS EKS Kubernetes cluster" AWS_FEDERATED_ACCOUNT = "AWS Federated Account" @@ -382,7 +345,7 @@ class AssetType(str, enum.Enum): WEB_APPLICATION = "Web Application" WINDOWS = "Windows" -class NistCategory(str, enum.Enum): +class NistCategory(StrEnum): ID_AM = "ID.AM" ID_BE = "ID.BE" ID_GV = "ID.GV" @@ -406,7 +369,7 @@ class NistCategory(str, enum.Enum): RC_IM = "RC.IM" RC_CO = "RC.CO" -class RiskSeverity(str,enum.Enum): +class RiskSeverity(StrEnum): # Levels taken from the following documentation link # https://docs.splunk.com/Documentation/ES/7.3.2/User/RiskScoring # 20 - info (0-20 for us) diff --git a/contentctl/objects/event_source.py b/contentctl/objects/event_source.py deleted file mode 100644 index 0ed61979..00000000 --- a/contentctl/objects/event_source.py +++ /dev/null @@ -1,11 +0,0 @@ -from __future__ import annotations -from typing import Union, Optional, List -from pydantic import BaseModel, Field - -from contentctl.objects.security_content_object import SecurityContentObject - -class EventSource(SecurityContentObject): - fields: Optional[list[str]] = None - field_mappings: Optional[list[dict]] = None - convert_to_log_source: Optional[list[dict]] = None - example_log: Optional[str] = None diff --git a/contentctl/objects/investigation.py b/contentctl/objects/investigation.py index 293e3331..0d35a9db 100644 --- a/contentctl/objects/investigation.py +++ b/contentctl/objects/investigation.py @@ -12,17 +12,13 @@ ) from contentctl.objects.config import CustomApp -# TODO (#266): disable the use_enum_values configuration class Investigation(SecurityContentObject): - model_config = ConfigDict(use_enum_values=True,validate_default=False) + model_config = ConfigDict(validate_default=False) type: str = Field(...,pattern="^Investigation$") - datamodel: list[DataModel] = Field(...) name:str = Field(...,max_length=CONTENTCTL_MAX_SEARCH_NAME_LENGTH) search: str = Field(...) how_to_implement: str = Field(...) known_false_positives: str = Field(...) - - tags: InvestigationTags # enrichment @@ -38,6 +34,11 @@ def inputs(self)->List[str]: return inputs + @computed_field + @property + def datamodel(self) -> List[DataModel]: + return [dm for dm in DataModel if dm in self.search] + @computed_field @property def lowercase_name(self)->str: diff --git a/contentctl/objects/investigation_tags.py b/contentctl/objects/investigation_tags.py index 6db99eff..c4b812e6 100644 --- a/contentctl/objects/investigation_tags.py +++ b/contentctl/objects/investigation_tags.py @@ -1,13 +1,13 @@ from __future__ import annotations from typing import List -from pydantic import BaseModel, Field, field_validator, ValidationInfo, model_serializer +from pydantic import BaseModel, Field, field_validator, ValidationInfo, model_serializer,ConfigDict from contentctl.objects.story import Story from contentctl.objects.enums import SecurityContentInvestigationProductName, SecurityDomain class InvestigationTags(BaseModel): + model_config = ConfigDict(extra="forbid") analytic_story: List[Story] = Field([],min_length=1) product: List[SecurityContentInvestigationProductName] = Field(...,min_length=1) - required_fields: List[str] = Field(min_length=1) security_domain: SecurityDomain = Field(...) @@ -23,7 +23,6 @@ def serialize_model(self): model= { "analytic_story": [story.name for story in self.analytic_story], "product": self.product, - "required_fields": self.required_fields, "security_domain": self.security_domain, } diff --git a/contentctl/objects/lookup.py b/contentctl/objects/lookup.py index e37e60e9..8d42c55e 100644 --- a/contentctl/objects/lookup.py +++ b/contentctl/objects/lookup.py @@ -1,10 +1,13 @@ from __future__ import annotations -from pydantic import field_validator, ValidationInfo, model_validator, FilePath, model_serializer, Field, NonNegativeInt -from typing import TYPE_CHECKING, Optional, Any, Union + +from pydantic import field_validator, ValidationInfo, model_validator, FilePath, model_serializer, Field, NonNegativeInt, computed_field, TypeAdapter +from enum import StrEnum, auto +from typing import TYPE_CHECKING, Optional, Any, Union, Literal, Annotated, Self import re import csv -import uuid -import datetime +import abc +from functools import cached_property +import pathlib if TYPE_CHECKING: from contentctl.input.director import DirectorOutputDto from contentctl.objects.config import validate @@ -15,32 +18,41 @@ LOOKUPS_TO_IGNORE.add("ut_shannon_lookup") #In the URL toolbox app which is recommended for ESCU LOOKUPS_TO_IGNORE.add("identity_lookup_expanded") #Shipped with the Asset and Identity Framework LOOKUPS_TO_IGNORE.add("cim_corporate_web_domain_lookup") #Shipped with the Asset and Identity Framework +LOOKUPS_TO_IGNORE.add("cim_corporate_email_domain_lookup") #Shipped with the Enterprise Security +LOOKUPS_TO_IGNORE.add("cim_cloud_domain_lookup") #Shipped with the Enterprise Security + LOOKUPS_TO_IGNORE.add("alexa_lookup_by_str") #Shipped with the Asset and Identity Framework LOOKUPS_TO_IGNORE.add("interesting_ports_lookup") #Shipped with the Asset and Identity Framework +LOOKUPS_TO_IGNORE.add("asset_lookup_by_str") #Shipped with the Asset and Identity Framework LOOKUPS_TO_IGNORE.add("admon_groups_def") #Shipped with the SA-admon addon +LOOKUPS_TO_IGNORE.add("identity_lookup_expanded") #Shipped with the Enterprise Security #Special case for the Detection "Exploit Public Facing Application via Apache Commons Text" LOOKUPS_TO_IGNORE.add("=") LOOKUPS_TO_IGNORE.add("other_lookups") +class Lookup_Type(StrEnum): + csv = auto() + kvstore = auto() + mlmodel = auto() + + + # TODO (#220): Split Lookup into 2 classes -class Lookup(SecurityContentObject): - - collection: Optional[str] = None - fields_list: Optional[str] = None - filename: Optional[FilePath] = None +class Lookup(SecurityContentObject, abc.ABC): default_match: Optional[bool] = None - match_type: Optional[str] = None - min_matches: Optional[int] = None - case_sensitive_match: Optional[bool] = None - # TODO: Add id field to all lookup ymls - id: uuid.UUID = Field(default_factory=uuid.uuid4) - date: datetime.date = Field(datetime.date.today()) - author: str = Field("NO AUTHOR DEFINED",max_length=255) - version: NonNegativeInt = 1 + # Per the documentation for transforms.conf, EXACT should not be specified in this list, + # so we include only WILDCARD and CIDR + match_type: list[Annotated[str, Field(pattern=r"(^WILDCARD|CIDR)\(.+\)$")]] = Field(default=[]) + min_matches: None | NonNegativeInt = Field(default=None) + max_matches: None | Annotated[NonNegativeInt, Field(ge=1, le=1000)] = Field(default=None) + case_sensitive_match: None | bool = Field(default=None) + + + @model_serializer def serialize_model(self): #Call parent serializer @@ -48,13 +60,12 @@ def serialize_model(self): #All fields custom to this model model= { - "filename": self.filename.name if self.filename is not None else None, + "default_match": "true" if self.default_match is True else "false", - "match_type": self.match_type, + "match_type": self.match_type_to_conf_format, "min_matches": self.min_matches, + "max_matches": self.max_matches, "case_sensitive_match": "true" if self.case_sensitive_match is True else "false", - "collection": self.collection, - "fields_list": self.fields_list } #return the model @@ -72,31 +83,91 @@ def fix_lookup_path(cls, data:Any, info: ValidationInfo)->Any: return data - def model_post_init(self, ctx:dict[str,Any]): - if not self.filename: - return - import pathlib - filenamePath = pathlib.Path(self.filename) - - if filenamePath.suffix not in [".csv", ".mlmodel"]: - raise ValueError(f"All Lookup files must be CSV files and end in .csv. The following file does not: '{filenamePath}'") + @computed_field + @cached_property + def match_type_to_conf_format(self)->str: + return ', '.join(self.match_type) + + @staticmethod + def get_lookups(text_field: str, director:DirectorOutputDto, ignore_lookups:set[str]=LOOKUPS_TO_IGNORE)->list[Lookup]: + # Comprehensively match all kinds of lookups, including inputlookup and outputlookup + inputLookupsToGet = set(re.findall(r'[^\w]inputlookup(?:\s*(?:(?:append|strict|start|max)\s*=\s*(?:true|t|false|f))){0,4}\s+([\w]+)', text_field, re.IGNORECASE)) + outputLookupsToGet = set(re.findall(r'[^\w]outputlookup(?:\s*(?:(?:append|create_empty|override_if_empty|max|key_field|allow_updates|createinapp|create_context|output_format)\s*=\s*[^\s]*))*\s+([\w]+)',text_field,re.IGNORECASE)) + lookupsToGet = set(re.findall(r'[^\w](?:(?Self: + if not self.filename.exists(): + raise ValueError(f"Expected lookup filename {self.filename} does not exist") + return self + + @computed_field + @cached_property + def filename(self)->FilePath: + if self.file_path is None: + raise ValueError(f"Cannot get the filename of the lookup {self.lookup_type} because the YML file_path attribute is None") #type: ignore + + csv_file = self.file_path.parent / f"{self.file_path.stem}.{self.lookup_type}" #type: ignore + return csv_file + + @computed_field + @cached_property + def app_filename(self)->FilePath: + ''' + We may consider two options: + 1. Always apply the datetime stamp to the end of the file. This makes the code easier + 2. Only apply the datetime stamp if it is version > 1. This makes the code a small fraction + more complicated, but preserves longstanding CSV that have not been modified in a long time + ''' + return pathlib.Path(f"{self.filename.stem}_{self.date.year}{self.date.month:02}{self.date.day:02}.{self.lookup_type}") #type: ignore + +class CSVLookup(FileBackedLookup): + lookup_type:Literal[Lookup_Type.csv] + + @model_serializer + def serialize_model(self): + #Call parent serializer + super_fields = super().serialize_model() + + #All fields custom to this model + model= { + "filename": self.app_filename.name + } + + #return the model + model.update(super_fields) + return model + + @model_validator(mode="after") + def ensure_correct_csv_structure(self)->Self: # https://docs.python.org/3/library/csv.html#csv.DictReader # Column Names (fieldnames) determine by the number of columns in the first row. # If a row has MORE fields than fieldnames, they will be dumped in a list under the key 'restkey' - this should throw an Exception # If a row has LESS fields than fieldnames, then the field should contain None by default. This should also throw an exception. csv_errors:list[str] = [] - with open(filenamePath, "r") as csv_fp: + with open(self.filename, "r") as csv_fp: RESTKEY = "extra_fields_in_a_row" csv_dict = csv.DictReader(csv_fp, restkey=RESTKEY) if csv_dict.fieldnames is None: - raise ValueError(f"Error validating the CSV referenced by the lookup: {filenamePath}:\n\t" + raise ValueError(f"Error validating the CSV referenced by the lookup: {self.filename}:\n\t" "Unable to read fieldnames from CSV. Is the CSV empty?\n" " Please try opening the file with a CSV Editor to ensure that it is correct.") # Remember that row 1 has the headers and we do not iterate over it in the loop below @@ -113,41 +184,52 @@ def model_post_init(self, ctx:dict[str,Any]): f"but instead had [{column_index}].") if len(csv_errors) > 0: err_string = '\n\t'.join(csv_errors) - raise ValueError(f"Error validating the CSV referenced by the lookup: {filenamePath}:\n\t{err_string}\n" + raise ValueError(f"Error validating the CSV referenced by the lookup: {self.filename}:\n\t{err_string}\n" f" Please try opening the file with a CSV Editor to ensure that it is correct.") - return - - - @field_validator('match_type') + return self + + + +class KVStoreLookup(Lookup): + lookup_type: Literal[Lookup_Type.kvstore] + fields: list[str] = Field(description="The names of the fields/headings for the KVStore.", min_length=1) + + @field_validator("fields", mode='after') @classmethod - def match_type_valid(cls, v: Union[str,None], info: ValidationInfo): - if not v: - #Match type can be None and that's okay - return v + def ensure_key(cls, values: list[str]): + if values[0] != "_key": + raise ValueError(f"fields MUST begin with '_key', not '{values[0]}'") + return values - if not (v.startswith("WILDCARD(") or v.endswith(")")) : - raise ValueError(f"All match_types must take the format 'WILDCARD(field_name)'. The following file does not: '{v}'") - return v + @computed_field + @cached_property + def collection(self)->str: + return self.name + @computed_field + @cached_property + def fields_to_fields_list_conf_format(self)->str: + return ', '.join(self.fields) - #Ensure that exactly one of location or filename are defined - @model_validator(mode='after') - def ensure_mutually_exclusive_fields(self)->Lookup: - if self.filename is not None and self.collection is not None: - raise ValueError("filename and collection cannot be defined in the lookup file. Exactly one must be defined.") - elif self.filename is None and self.collection is None: - raise ValueError("Neither filename nor collection were defined in the lookup file. Exactly one must " - "be defined.") + @model_serializer + def serialize_model(self): + #Call parent serializer + super_fields = super().serialize_model() + #All fields custom to this model + model= { + "collection": self.collection, + "fields_list": self.fields_to_fields_list_conf_format + } + + #return the model + model.update(super_fields) + return model - return self +class MlModel(FileBackedLookup): + lookup_type: Literal[Lookup_Type.mlmodel] - - @staticmethod - def get_lookups(text_field: str, director:DirectorOutputDto, ignore_lookups:set[str]=LOOKUPS_TO_IGNORE)->list[Lookup]: - lookups_to_get = set(re.findall(r'[^output]lookup (?:update=true)?(?:append=t)?\s*([^\s]*)', text_field)) - lookups_to_ignore = set([lookup for lookup in lookups_to_get if any(to_ignore in lookups_to_get for to_ignore in ignore_lookups)]) - lookups_to_get -= lookups_to_ignore - return Lookup.mapNamesToSecurityContentObjects(list(lookups_to_get), director) - \ No newline at end of file + +LookupAdapter = TypeAdapter(Annotated[CSVLookup | KVStoreLookup | MlModel, Field(discriminator="lookup_type")]) + diff --git a/contentctl/objects/macro.py b/contentctl/objects/macro.py index ba5faa8f..8c25dff7 100644 --- a/contentctl/objects/macro.py +++ b/contentctl/objects/macro.py @@ -48,7 +48,6 @@ def serialize_model(self): return model @staticmethod - def get_macros(text_field:str, director:DirectorOutputDto , ignore_macros:set[str]=MACROS_TO_IGNORE)->list[Macro]: #Remove any comments, allowing there to be macros (which have a single backtick) inside those comments #If a comment ENDS in a macro, for example ```this is a comment with a macro `macro_here```` @@ -59,10 +58,10 @@ def get_macros(text_field:str, director:DirectorOutputDto , ignore_macros:set[st "This may have occurred when a macro was commented out.\n" "Please ammend your search to remove the substring '````'") - # replace all the macros with a space + # Replace all the comments with a space. This prevents a comment from looking like a macro to the parser below text_field = re.sub(r"\`\`\`[\s\S]*?\`\`\`", " ", text_field) - + # Find all the macros, which start and end with a '`' character macros_to_get = re.findall(r'`([^\s]+)`', text_field) #If macros take arguments, stop at the first argument. We just want the name of the macro macros_to_get = set([macro[:macro.find('(')] if macro.find('(') != -1 else macro for macro in macros_to_get]) diff --git a/contentctl/objects/mitre_attack_enrichment.py b/contentctl/objects/mitre_attack_enrichment.py index 401774e9..4a09209a 100644 --- a/contentctl/objects/mitre_attack_enrichment.py +++ b/contentctl/objects/mitre_attack_enrichment.py @@ -83,9 +83,9 @@ def standardize_contributors(cls, contributors:list[str] | None) -> list[str]: return [] return contributors -# TODO (#266): disable the use_enum_values configuration class MitreAttackEnrichment(BaseModel): - ConfigDict(use_enum_values=True) + + ConfigDict(extra='forbid') mitre_attack_id: MITRE_ATTACK_ID_TYPE = Field(...) mitre_attack_technique: str = Field(...) mitre_attack_tactics: List[MitreTactics] = Field(...) diff --git a/contentctl/objects/observable.py b/contentctl/objects/observable.py index daf7a70b..35eb535a 100644 --- a/contentctl/objects/observable.py +++ b/contentctl/objects/observable.py @@ -1,8 +1,10 @@ -from pydantic import BaseModel, field_validator +from pydantic import BaseModel, field_validator, ConfigDict from contentctl.objects.constants import SES_OBSERVABLE_TYPE_MAPPING, RBA_OBSERVABLE_ROLE_MAPPING +# TODO (cmcginley): should this class be removed? class Observable(BaseModel): + model_config = ConfigDict(extra="forbid") name: str type: str role: list[str] diff --git a/contentctl/objects/playbook_tags.py b/contentctl/objects/playbook_tags.py index fd4a21e6..10d90ac1 100644 --- a/contentctl/objects/playbook_tags.py +++ b/contentctl/objects/playbook_tags.py @@ -1,6 +1,6 @@ from __future__ import annotations from typing import TYPE_CHECKING, Optional, List -from pydantic import BaseModel, Field +from pydantic import BaseModel, Field,ConfigDict import enum from contentctl.objects.detection import Detection @@ -36,6 +36,7 @@ class DefendTechnique(str,enum.Enum): D3_SRA = "D3-SRA" D3_RUAA = "D3-RUAA" class PlaybookTag(BaseModel): + model_config = ConfigDict(extra="forbid") analytic_story: Optional[list] = None detections: Optional[list] = None platform_tags: list[str] = Field(...,min_length=0) @@ -46,5 +47,8 @@ class PlaybookTag(BaseModel): use_cases: list[PlaybookUseCase] = Field([],min_length=0) defend_technique_id: Optional[List[DefendTechnique]] = None + labels:list[str] = [] + playbook_outputs:list[str] = [] + detection_objects: list[Detection] = [] \ No newline at end of file diff --git a/contentctl/objects/rba.py b/contentctl/objects/rba.py new file mode 100644 index 00000000..d1581e0f --- /dev/null +++ b/contentctl/objects/rba.py @@ -0,0 +1,90 @@ +from enum import Enum +from pydantic import BaseModel, computed_field, Field +from abc import ABC +from typing import Set, Annotated +from contentctl.objects.enums import RiskSeverity + + +RiskScoreValue_Type = Annotated[int, Field(ge=1, le=100)] + +class RiskObjectType(str, Enum): + SYSTEM = "system" + USER = "user" + OTHER = "other" + +class ThreatObjectType(str, Enum): + CERTIFICATE_COMMON_NAME = "certificate_common_name" + CERTIFICATE_ORGANIZATION = "certificate_organization" + CERTIFICATE_SERIAL = "certificate_serial" + CERTIFICATE_UNIT = "certificate_unit" + COMMAND = "command" + DOMAIN = "domain" + EMAIL_ADDRESS = "email_address" + EMAIL_SUBJECT = "email_subject" + FILE_HASH = "file_hash" + FILE_NAME = "file_name" + FILE_PATH = "file_path" + HTTP_USER_AGENT = "http_user_agent" + IP_ADDRESS = "ip_address" + PROCESS = "process" + PROCESS_NAME = "process_name" + PARENT_PROCESS = "parent_process" + PARENT_PROCESS_NAME = "parent_process_name" + PROCESS_HASH = "process_hash" + REGISTRY_PATH = "registry_path" + REGISTRY_VALUE_NAME = "registry_value_name" + REGISTRY_VALUE_TEXT = "registry_value_text" + SERVICE = "service" + SIGNATURE = "signature" + SYSTEM = "system" + TLS_HASH = "tls_hash" + URL = "url" + +class RiskObject(BaseModel): + field: str + type: RiskObjectType + score: RiskScoreValue_Type + + def __hash__(self): + return hash((self.field, self.type, self.score)) + +class ThreatObject(BaseModel): + field: str + type: ThreatObjectType + + def __hash__(self): + return hash((self.field, self.type)) + +class RBAObject(BaseModel, ABC): + message: str + risk_objects: Annotated[Set[RiskObject], Field(min_length=1)] + threat_objects: Set[ThreatObject] + + + + @computed_field + @property + def risk_score(self)->RiskScoreValue_Type: + # First get the maximum score associated with + # a risk object. If there are no objects, then + # we should throw an exception. + if len(self.risk_objects) == 0: + raise Exception("There must be at least one Risk Object present to get Severity.") + return max([risk_object.score for risk_object in self.risk_objects]) + + @computed_field + @property + def severity(self)->RiskSeverity: + if 0 <= self.risk_score <= 20: + return RiskSeverity.INFORMATIONAL + elif 20 < self.risk_score <= 40: + return RiskSeverity.LOW + elif 40 < self.risk_score <= 60: + return RiskSeverity.MEDIUM + elif 60 < self.risk_score <= 80: + return RiskSeverity.HIGH + elif 80 < self.risk_score <= 100: + return RiskSeverity.CRITICAL + else: + raise Exception(f"Error getting severity - risk_score must be between 0-100, but was actually {self.risk_score}") + diff --git a/contentctl/objects/risk_event.py b/contentctl/objects/risk_event.py index de98bd0b..71ef3ed0 100644 --- a/contentctl/objects/risk_event.py +++ b/contentctl/objects/risk_event.py @@ -4,48 +4,7 @@ from pydantic import ConfigDict, BaseModel, Field, PrivateAttr, field_validator, computed_field from contentctl.objects.errors import ValidationFailed from contentctl.objects.detection import Detection -from contentctl.objects.observable import Observable - -# TODO (#259): Map our observable types to more than user/system -# TODO (#247): centralize this mapping w/ usage of SES_OBSERVABLE_TYPE_MAPPING (see -# observable.py) and the ad hoc mapping made in detection_abstract.py (see the risk property func) -TYPE_MAP: dict[str, list[str]] = { - "system": [ - "Hostname", - "IP Address", - "Endpoint" - ], - "user": [ - "User", - "User Name", - "Email Address", - "Email" - ], - "hash_values": [], - "network_artifacts": [], - "host_artifacts": [], - "tools": [], - "other": [ - "Process", - "URL String", - "Unknown", - "Process Name", - "MAC Address", - "File Name", - "File Hash", - "Resource UID", - "Uniform Resource Locator", - "File", - "Geo Location", - "Container", - "Registry Key", - "Registry Value", - "Other" - ] -} - -# Roles that should not generate risks -IGNORE_ROLES: list[str] = ["Attacker"] +from contentctl.objects.rba import RiskObject class RiskEvent(BaseModel): @@ -55,10 +14,12 @@ class RiskEvent(BaseModel): search_name: str # The subject of the risk event (e.g. a username, process name, system name, account ID, etc.) - risk_object: int | str + # (not to be confused w/ the risk object from the detection) + es_risk_object: int | str - # The type of the risk object (e.g. user, system, or other) - risk_object_type: str + # The type of the risk object from ES (e.g. user, system, or other) (not to be confused w/ + # the risk object from the detection) + es_risk_object_type: str # The level of risk associated w/ the risk event risk_score: int @@ -79,11 +40,11 @@ class RiskEvent(BaseModel): ) # Contributing events search query (we use this to derive the corresponding field from the - # observables) + # detection's risk object definition) contributing_events_search: str - # Private attribute caching the observable this RiskEvent is mapped to - _matched_observable: Observable | None = PrivateAttr(default=None) + # Private attribute caching the risk object this RiskEvent is mapped to + _matched_risk_object: RiskObject | None = PrivateAttr(default=None) # Allowing fields that aren't explicitly defined to be passed since some of the risk event's # fields vary depending on the SPL which generated them @@ -108,7 +69,7 @@ def _convert_str_value_to_singleton(cls, v: str | list[str]) -> list[str]: def source_field_name(self) -> str: """ A cached derivation of the source field name the risk event corresponds to in the relevant - event(s). Useful for mapping back to an observable in the detection. + event(s). Useful for mapping back to a risk object in the detection. """ pattern = re.compile( r"\| savedsearch \"" + self.search_name + r"\" \| search (?P[^=]+)=.+" @@ -128,13 +89,6 @@ def validate_against_detection(self, detection: Detection) -> None: :param detection: the detection associated w/ this risk event :raises: ValidationFailed """ - # Check risk_score - if self.risk_score != detection.tags.risk_score: - raise ValidationFailed( - f"Risk score observed in risk event ({self.risk_score}) does not match risk score in " - f"detection ({detection.tags.risk_score})." - ) - # Check analyticstories self.validate_analyticstories(detection) @@ -151,8 +105,15 @@ def validate_against_detection(self, detection: Detection) -> None: # Check risk_message self.validate_risk_message(detection) - # Check several conditions against the observables - self.validate_risk_against_observables(detection.tags.observable) + # Ensure the rba object is defined + if detection.rba is None: + raise ValidationFailed( + f"Unexpected error: Detection '{detection.name}' has no RBA objects associated " + "with it; cannot validate." + ) + + # Check several conditions against the detection's risk objects + self.validate_risk_against_risk_objects(detection.rba.risk_objects) def validate_mitre_ids(self, detection: Detection) -> None: """ @@ -177,7 +138,7 @@ def validate_analyticstories(self, detection: Detection) -> None: if sorted(self.analyticstories) != sorted(detection_analytic_story): raise ValidationFailed( f"Analytic stories in risk event ({self.analyticstories}) do not match those" - f" in detection ({detection.tags.analytic_story})." + f" in detection ({[x.name for x in detection.tags.analytic_story]})." ) def validate_risk_message(self, detection: Detection) -> None: @@ -186,10 +147,20 @@ def validate_risk_message(self, detection: Detection) -> None: :param detection: the detection associated w/ this risk event :raises: ValidationFailed """ + # Ensure the rba object is defined + if detection.rba is None: + raise ValidationFailed( + f"Unexpected error: Detection '{detection.name}' has no RBA objects associated " + "with it; cannot validate." + ) + # Extract the field replacement tokens ("$...$") field_replacement_pattern = re.compile(r"\$\S+\$") - tokens = field_replacement_pattern.findall(detection.tags.message) + tokens = field_replacement_pattern.findall(detection.rba.message) + # TODO (#346): could expand this to get the field values from the raw events and check + # to see that allexpected strings ARE in the risk message (as opposed to checking only + # that unexpected strings aren't) # Check for the presence of each token in the message from the risk event for token in tokens: if token in self.risk_message: @@ -205,7 +176,7 @@ def validate_risk_message(self, detection: Detection) -> None: escaped_source_message_with_placeholder: str = re.escape( field_replacement_pattern.sub( tmp_placeholder, - detection.tags.message + detection.rba.message ) ) placeholder_replacement_pattern = re.compile(tmp_placeholder) @@ -221,114 +192,86 @@ def validate_risk_message(self, detection: Detection) -> None: raise ValidationFailed( "Risk message in event does not match the pattern set by the detection. Message in " f"risk event: \"{self.risk_message}\". Message in detection: " - f"\"{detection.tags.message}\"." + f"\"{detection.rba.message}\"." ) - def validate_risk_against_observables(self, observables: list[Observable]) -> None: + def validate_risk_against_risk_objects(self, risk_objects: set[RiskObject]) -> None: """ - Given the observables from the associated detection, validate the risk event against those - observables - :param observables: the Observable objects from the detection + Given the risk objects from the associated detection, validate the risk event against those + risk objects + :param risk_objects: the risk objects from the detection :raises: ValidationFailed """ - # Get the matched observable; will raise validation errors if no match can be made or if - # risk is missing values associated w/ observables - matched_observable = self.get_matched_observable(observables) + # Get the matched risk object; will raise validation errors if no match can be made or if + # risk is missing values associated w/ risk objects + matched_risk_object = self.get_matched_risk_object(risk_objects) - # The risk object type should match our mapping of observable types to risk types - expected_type = RiskEvent.observable_type_to_risk_type(matched_observable.type) - if self.risk_object_type != expected_type: + # The risk object type from the risk event should match our mapping of internal risk object + # types + if self.es_risk_object_type != matched_risk_object.type.value: raise ValidationFailed( - f"The risk object type ({self.risk_object_type}) does not match the expected type " - f"based on the matched observable ({matched_observable.type}->{expected_type}): " - f"risk=(object={self.risk_object}, type={self.risk_object_type}, " - f"source_field_name={self.source_field_name}), " - f"observable=(name={matched_observable.name}, type={matched_observable.type}, " - f"role={matched_observable.role})" + f"The risk object type from the risk event ({self.es_risk_object_type}) does not match" + " the expected type based on the matched risk object " + f"({matched_risk_object.type.value}): risk event=(object={self.es_risk_object}, " + f"type={self.es_risk_object_type}, source_field_name={self.source_field_name}), " + f"risk object=(name={matched_risk_object.field}, " + f"type={matched_risk_object.type.value})" ) - @staticmethod - def observable_type_to_risk_type(observable_type: str) -> str: - """ - Given a string representing the observable type, use our mapping to convert it to the - expected type in the risk event - :param observable_type: the type of the observable - :returns: a string (the risk object type) - :raises ValueError: if the observable type has not yet been mapped to a risk object type - """ - # Iterate over the map and search the lists for a match - for risk_type in TYPE_MAP: - if observable_type in TYPE_MAP[risk_type]: - return risk_type - - raise ValueError( - f"Observable type {observable_type} does not have a mapping to a risk type in TYPE_MAP" - ) + # Check risk_score + if self.risk_score != matched_risk_object.score: + raise ValidationFailed( + f"Risk score observed in risk event ({self.risk_score}) does not match risk score in " + f"matched risk object from detection ({matched_risk_object.score})." + ) - @staticmethod - def ignore_observable(observable: Observable) -> bool: - """ - Given an observable, determine based on its roles if it should be ignored in risk/observable - matching (e.g. Attacker role observables should not generate risk events) - :param observable: the Observable object we are checking the roles of - :returns: a bool indicating whether this observable should be ignored or not - """ - ignore = False - for role in observable.role: - if role in IGNORE_ROLES: - ignore = True - break - return ignore - - def get_matched_observable(self, observables: list[Observable]) -> Observable: + def get_matched_risk_object(self, risk_objects: set[RiskObject]) -> RiskObject: """ - Given a list of observables, return the one this risk event matches - :param observables: the list of Observable objects we are checking against - :returns: the matched Observable object + Given a set of risk objects, return the one this risk event matches + :param risk_objects: the list of risk objects we are checking against + :returns: the matched risk object :raises ValidationFailed: if a match could not be made or if an expected field (based on - one of the observables) could not be found in the risk event + one of the risk objects) could not be found in the risk event """ # Return the cached match if already found - if self._matched_observable is not None: - return self._matched_observable + if self._matched_risk_object is not None: + return self._matched_risk_object - matched_observable: Observable | None = None + matched_risk_object: RiskObject | None = None # Iterate over the obervables and check for a match - for observable in observables: + for risk_object in risk_objects: # TODO (#252): Refactor and re-enable per-field validation of risk events - # Each the field name used in each observable shoud be present in the risk event - # if not hasattr(self, observable.name): + # Each the field name used in each risk object shoud be present in the risk event + # if not hasattr(self, risk_object.field): # raise ValidationFailed( - # f"Observable field \"{observable.name}\" not found in risk event." + # f"Risk object field \"{risk_object.field}\" not found in risk event." # ) - # Try to match the risk_object against a specific observable for the obervables with - # a valid role (some, like Attacker, shouldn't get converted to risk events) - if self.source_field_name == observable.name: - if matched_observable is not None: + # Try to match the risk_object against a specific risk object + if self.source_field_name == risk_object.field: + # TODO (#347): enforce that field names are not repeated across risk objects as + # part of build/validate + if matched_risk_object is not None: raise ValueError( - "Unexpected conditon: we don't expect the source event field " - "corresponding to an observables field name to be repeated." + "Unexpected conditon: we don't expect multiple risk objects to use the " + "same field name, so we should not be able match the risk event to " + "multiple risk objects." ) - # Report any risk events we find that shouldn't be there - if RiskEvent.ignore_observable(observable): - raise ValidationFailed( - "Risk event matched an observable with an invalid role: " - f"(name={observable.name}, type={observable.type}, role={observable.role})") - # NOTE: we explicitly do not break early as we want to check each observable - matched_observable = observable + # NOTE: we explicitly do not break early as we want to check each risk object + matched_risk_object = risk_object - # Ensure we were able to match the risk event to a specific observable - if matched_observable is None: + # Ensure we were able to match the risk event to a specific risk object + if matched_risk_object is None: raise ValidationFailed( - f"Unable to match risk event (object={self.risk_object}, type=" - f"{self.risk_object_type}, source_field_name={self.source_field_name}) to an " - "observable; please check for errors in the observable roles/types for this " - "detection, as well as the risk event build process in contentctl." + f"Unable to match risk event (object={self.es_risk_object}, type=" + f"{self.es_risk_object_type}, source_field_name={self.source_field_name}) to a " + "risk object in the detection; please check for errors in the risk object types for this " + "detection, as well as the risk event build process in contentctl (e.g. threat " + "objects aren't being converted to risk objects somehow)." ) - # Cache and return the matched observable - self._matched_observable = matched_observable - return self._matched_observable + # Cache and return the matched risk object + self._matched_risk_object = matched_risk_object + return self._matched_risk_object diff --git a/contentctl/objects/story_tags.py b/contentctl/objects/story_tags.py index 42eb2f37..e1bd45dc 100644 --- a/contentctl/objects/story_tags.py +++ b/contentctl/objects/story_tags.py @@ -18,9 +18,8 @@ class StoryUseCase(str,Enum): OTHER = "Other" -# TODO (#266): disable the use_enum_values configuration class StoryTags(BaseModel): - model_config = ConfigDict(extra='forbid', use_enum_values=True) + model_config = ConfigDict(extra='forbid') category: List[StoryCategory] = Field(...,min_length=1) product: List[SecurityContentProductName] = Field(...,min_length=1) usecase: StoryUseCase = Field(...) diff --git a/contentctl/objects/test_attack_data.py b/contentctl/objects/test_attack_data.py index 2c53df0b..5d5f9c80 100644 --- a/contentctl/objects/test_attack_data.py +++ b/contentctl/objects/test_attack_data.py @@ -1,8 +1,9 @@ from __future__ import annotations -from pydantic import BaseModel, HttpUrl, FilePath, Field +from pydantic import BaseModel, HttpUrl, FilePath, Field, ConfigDict class TestAttackData(BaseModel): + model_config = ConfigDict(extra="forbid") data: HttpUrl | FilePath = Field(...) # TODO - should source and sourcetype should be mapped to a list # of supported source and sourcetypes in a given environment? diff --git a/contentctl/objects/unit_test_baseline.py b/contentctl/objects/unit_test_baseline.py index 9ba49336..66a60594 100644 --- a/contentctl/objects/unit_test_baseline.py +++ b/contentctl/objects/unit_test_baseline.py @@ -1,9 +1,10 @@ -from pydantic import BaseModel +from pydantic import BaseModel,ConfigDict from typing import Union class UnitTestBaseline(BaseModel): + model_config = ConfigDict(extra="forbid") name: str file: str pass_condition: str diff --git a/contentctl/output/api_json_output.py b/contentctl/output/api_json_output.py index d81b8162..87760373 100644 --- a/contentctl/output/api_json_output.py +++ b/contentctl/output/api_json_output.py @@ -1,246 +1,259 @@ +from __future__ import annotations +from typing import TYPE_CHECKING +if TYPE_CHECKING: + from contentctl.objects.detection import Detection + from contentctl.objects.lookup import Lookup + from contentctl.objects.macro import Macro + from contentctl.objects.story import Story + from contentctl.objects.baseline import Baseline + from contentctl.objects.investigation import Investigation + from contentctl.objects.deployment import Deployment + import os -import json import pathlib from contentctl.output.json_writer import JsonWriter -from contentctl.objects.enums import SecurityContentType -from contentctl.objects.abstract_security_content_objects.security_content_object_abstract import ( - SecurityContentObject_Abstract, -) class ApiJsonOutput: + output_path: pathlib.Path + app_label: str + + def __init__(self, output_path:pathlib.Path, app_label: str): + self.output_path = output_path + self.app_label = app_label - def writeObjects( + def writeDetections( self, - objects: list[SecurityContentObject_Abstract], - output_path: pathlib.Path, - app_label:str = "ESCU", - contentType: SecurityContentType = None + objects: list[Detection], ) -> None: - """#Serialize all objects - try: - for obj in objects: - - serialized_objects.append(obj.model_dump()) - except Exception as e: - raise Exception(f"Error serializing object with name '{obj.name}' and type '{type(obj).__name__}': '{str(e)}'") - """ - - if contentType == SecurityContentType.detections: - detections = [ - detection.model_dump( - include=set( - [ - "name", - "author", - "date", - "version", - "id", - "description", - "tags", - "search", - "how_to_implement", - "known_false_positives", - "references", - "datamodel", - "macros", - "lookups", - "source", - "nes_fields", - ] - ) + detections = [ + detection.model_dump( + include=set( + [ + "name", + "author", + "date", + "version", + "id", + "description", + "tags", + "search", + "how_to_implement", + "known_false_positives", + "references", + "datamodel", + "macros", + "lookups", + "source", + "nes_fields", + ] ) - for detection in objects - ] - #Only a subset of macro fields are required: - # for detection in detections: - # new_macros = [] - # for macro in detection.get("macros",[]): - # new_macro_fields = {} - # new_macro_fields["name"] = macro.get("name") - # new_macro_fields["definition"] = macro.get("definition") - # new_macro_fields["description"] = macro.get("description") - # if len(macro.get("arguments", [])) > 0: - # new_macro_fields["arguments"] = macro.get("arguments") - # new_macros.append(new_macro_fields) - # detection["macros"] = new_macros - # del() - - - JsonWriter.writeJsonObject( - os.path.join(output_path, "detections.json"), "detections", detections - ) - - elif contentType == SecurityContentType.macros: - macros = [ - macro.model_dump(include=set(["definition", "description", "name"])) - for macro in objects - ] - for macro in macros: - for k in ["author", "date","version","id","references"]: - if k in macro: - del(macro[k]) - JsonWriter.writeJsonObject( - os.path.join(output_path, "macros.json"), "macros", macros ) - - elif contentType == SecurityContentType.stories: - stories = [ - story.model_dump( - include=set( - [ - "name", - "author", - "date", - "version", - "id", - "description", - "narrative", - "references", - "tags", - "detections_names", - "investigation_names", - "baseline_names", - "detections", - ] - ) - ) - for story in objects - ] - # Only get certain fields from detections - for story in stories: - # Only use a small subset of fields from the detection - story["detections"] = [ - { - "name": detection["name"], - "source": detection["source"], - "type": detection["type"], - "tags": detection["tags"].get("mitre_attack_enrichments", []), - } - for detection in story["detections"] - ] - story["detection_names"] = [f"{app_label} - {name} - Rule" for name in story["detection_names"]] + for detection in objects + ] + #Only a subset of macro fields are required: + # for detection in detections: + # new_macros = [] + # for macro in detection.get("macros",[]): + # new_macro_fields = {} + # new_macro_fields["name"] = macro.get("name") + # new_macro_fields["definition"] = macro.get("definition") + # new_macro_fields["description"] = macro.get("description") + # if len(macro.get("arguments", [])) > 0: + # new_macro_fields["arguments"] = macro.get("arguments") + # new_macros.append(new_macro_fields) + # detection["macros"] = new_macros + # del() - - JsonWriter.writeJsonObject( - os.path.join(output_path, "stories.json"), "stories", stories + + JsonWriter.writeJsonObject( + os.path.join(self.output_path, "detections.json"), "detections", detections + ) + + def writeMacros( + self, + objects: list[Macro], + ) -> None: + macros = [ + macro.model_dump(include=set(["definition", "description", "name"])) + for macro in objects + ] + for macro in macros: + for k in ["author", "date","version","id","references"]: + if k in macro: + del(macro[k]) + JsonWriter.writeJsonObject( + os.path.join(self.output_path, "macros.json"), "macros", macros + ) + + def writeStories( + self, + objects: list[Story], + ) -> None: + stories = [ + story.model_dump( + include=set( + [ + "name", + "author", + "date", + "version", + "id", + "description", + "narrative", + "references", + "tags", + "detections_names", + "investigation_names", + "baseline_names", + "detections", + ] + ) ) + for story in objects + ] + # Only get certain fields from detections + for story in stories: + # Only use a small subset of fields from the detection + story["detections"] = [ + { + "name": detection["name"], + "source": detection["source"], + "type": detection["type"], + "tags": detection["tags"].get("mitre_attack_enrichments", []), + } + for detection in story["detections"] + ] + story["detection_names"] = [f"{self.app_label} - {name} - Rule" for name in story["detection_names"]] + - elif contentType == SecurityContentType.baselines: - try: - baselines = [ - baseline.model_dump( - include=set( - [ - "name", - "author", - "date", - "version", - "id", - "description", - "type", - "datamodel", - "search", - "how_to_implement", - "known_false_positives", - "references", - "tags", - ] - ) - ) - for baseline in objects - ] - except Exception as e: - print(e) - print('wait') + JsonWriter.writeJsonObject( + os.path.join(self.output_path, "stories.json"), "stories", stories + ) - JsonWriter.writeJsonObject( - os.path.join(output_path, "baselines.json"), "baselines", baselines + def writeBaselines( + self, + objects: list[Baseline], + ) -> None: + baselines = [ + baseline.model_dump( + include=set( + [ + "name", + "author", + "date", + "version", + "id", + "description", + "type", + "datamodel", + "search", + "how_to_implement", + "known_false_positives", + "references", + "tags", + ] ) + ) + for baseline in objects + ] + + JsonWriter.writeJsonObject( + os.path.join(self.output_path, "baselines.json"), "baselines", baselines + ) - elif contentType == SecurityContentType.investigations: - investigations = [ - investigation.model_dump( - include=set( - [ - "name", - "author", - "date", - "version", - "id", - "description", - "type", - "datamodel", - "search", - "how_to_implemnet", - "known_false_positives", - "references", - "inputs", - "tags", - "lowercase_name", - ] - ) + def writeInvestigations( + self, + objects: list[Investigation], + ) -> None: + investigations = [ + investigation.model_dump( + include=set( + [ + "name", + "author", + "date", + "version", + "id", + "description", + "type", + "datamodel", + "search", + "how_to_implemnet", + "known_false_positives", + "references", + "inputs", + "tags", + "lowercase_name", + ] ) - for investigation in objects - ] - JsonWriter.writeJsonObject( - os.path.join(output_path, "response_tasks.json"), - "response_tasks", - investigations, ) + for investigation in objects + ] + JsonWriter.writeJsonObject( + os.path.join(self.output_path, "response_tasks.json"), + "response_tasks", + investigations, + ) - elif contentType == SecurityContentType.lookups: - lookups = [ - lookup.model_dump( - include=set( - [ - "name", - "description", - "collection", - "fields_list", - "filename", - "default_match", - "match_type", - "min_matches", - "case_sensitive_match", - ] - ) + def writeLookups( + self, + objects: list[Lookup], + ) -> None: + lookups = [ + lookup.model_dump( + include=set( + [ + "name", + "description", + "collection", + "fields_list", + "filename", + "default_match", + "match_type", + "min_matches", + "case_sensitive_match", + ] ) - for lookup in objects - ] - for lookup in lookups: - for k in ["author","date","version","id","references"]: - if k in lookup: - del(lookup[k]) - JsonWriter.writeJsonObject( - os.path.join(output_path, "lookups.json"), "lookups", lookups ) + for lookup in objects + ] + for lookup in lookups: + for k in ["author","date","version","id","references"]: + if k in lookup: + del(lookup[k]) + JsonWriter.writeJsonObject( + os.path.join(self.output_path, "lookups.json"), "lookups", lookups + ) - elif contentType == SecurityContentType.deployments: - deployments = [ - deployment.model_dump( - include=set( - [ - "name", - "author", - "date", - "version", - "id", - "description", - "scheduling", - "rba", - "tags" - ] - ) + def writeDeployments( + self, + objects: list[Deployment], + ) -> None: + deployments = [ + deployment.model_dump( + include=set( + [ + "name", + "author", + "date", + "version", + "id", + "description", + "scheduling", + "rba", + "tags" + ] ) - for deployment in objects - ] - #references are not to be included, but have been deleted in the - #model_serialization logic - JsonWriter.writeJsonObject( - os.path.join(output_path, "deployments.json"), - "deployments", - deployments, - ) \ No newline at end of file + ) + for deployment in objects + ] + #references are not to be included, but have been deleted in the + #model_serialization logic + JsonWriter.writeJsonObject( + os.path.join(self.output_path, "deployments.json"), + "deployments", + deployments, + ) \ No newline at end of file diff --git a/contentctl/output/conf_output.py b/contentctl/output/conf_output.py index e53aeba0..c5a67673 100644 --- a/contentctl/output/conf_output.py +++ b/contentctl/output/conf_output.py @@ -1,22 +1,22 @@ -from dataclasses import dataclass -import os -import glob +from __future__ import annotations +from typing import TYPE_CHECKING, Callable +if TYPE_CHECKING: + from contentctl.objects.detection import Detection + from contentctl.objects.lookup import Lookup + from contentctl.objects.macro import Macro + from contentctl.objects.dashboard import Dashboard + from contentctl.objects.story import Story + from contentctl.objects.baseline import Baseline + from contentctl.objects.investigation import Investigation + +from contentctl.objects.lookup import FileBackedLookup import shutil -import sys import tarfile -from typing import Union -from pathlib import Path import pathlib -import time import timeit import datetime -import shutil -import json from contentctl.output.conf_writer import ConfWriter -from contentctl.objects.enums import SecurityContentType from contentctl.objects.config import build -from requests import Session, post, get -from requests.auth import HTTPBasicAuth class ConfOutput: config: build @@ -80,25 +80,33 @@ def writeMiscellaneousAppFiles(self)->set[pathlib.Path]: return written_files - def writeObjects(self, objects: list, type: SecurityContentType = None) -> set[pathlib.Path]: + def writeDetections(self, objects:list[Detection]) -> set[pathlib.Path]: written_files:set[pathlib.Path] = set() - if type == SecurityContentType.detections: - for output_app_path, template_name in [ ('default/savedsearches.conf', 'savedsearches_detections.j2'), - ('default/analyticstories.conf', 'analyticstories_detections.j2')]: - written_files.add(ConfWriter.writeConfFile(pathlib.Path(output_app_path), - template_name, self.config, objects)) - - elif type == SecurityContentType.stories: + for output_app_path, template_name in [ ('default/savedsearches.conf', 'savedsearches_detections.j2'), + ('default/analyticstories.conf', 'analyticstories_detections.j2')]: + written_files.add(ConfWriter.writeConfFile(pathlib.Path(output_app_path), + template_name, self.config, objects)) + return written_files + + + def writeStories(self, objects:list[Story]) -> set[pathlib.Path]: + written_files:set[pathlib.Path] = set() written_files.add(ConfWriter.writeConfFile(pathlib.Path('default/analyticstories.conf'), 'analyticstories_stories.j2', self.config, objects)) + return written_files + - elif type == SecurityContentType.baselines: + def writeBaselines(self, objects:list[Baseline]) -> set[pathlib.Path]: + written_files:set[pathlib.Path] = set() written_files.add(ConfWriter.writeConfFile(pathlib.Path('default/savedsearches.conf'), 'savedsearches_baselines.j2', self.config, objects)) + return written_files + - elif type == SecurityContentType.investigations: + def writeInvestigations(self, objects:list[Investigation]) -> set[pathlib.Path]: + written_files:set[pathlib.Path] = set() for output_app_path, template_name in [ ('default/savedsearches.conf', 'savedsearches_investigations.j2'), ('default/analyticstories.conf', 'analyticstories_investigations.j2')]: ConfWriter.writeConfFile(pathlib.Path(output_app_path), @@ -106,7 +114,7 @@ def writeObjects(self, objects: list, type: SecurityContentType = None) -> set[p self.config, objects) - workbench_panels = [] + workbench_panels:list[Investigation] = [] for investigation in objects: if investigation.inputs: response_file_name_xml = investigation.lowercase_name + "___response_task.xml" @@ -128,8 +136,11 @@ def writeObjects(self, objects: list, type: SecurityContentType = None) -> set[p template_name, self.config, workbench_panels)) + return written_files + - elif type == SecurityContentType.lookups: + def writeLookups(self, objects:list[Lookup]) -> set[pathlib.Path]: + written_files:set[pathlib.Path] = set() for output_app_path, template_name in [ ('default/collections.conf', 'collections.j2'), ('default/transforms.conf', 'transforms.j2')]: written_files.add(ConfWriter.writeConfFile(pathlib.Path(output_app_path), @@ -137,9 +148,7 @@ def writeObjects(self, objects: list, type: SecurityContentType = None) -> set[p self.config, objects)) - - #we want to copy all *.mlmodel files as well, not just csvs - files = list(glob.iglob(str(self.config.path/ 'lookups/*.csv'))) + list(glob.iglob(str(self.config.path / 'lookups/*.mlmodel'))) + #Get the path to the lookups folder lookup_folder = self.config.getPackageDirectoryPath()/"lookups" # Make the new folder for the lookups @@ -147,26 +156,24 @@ def writeObjects(self, objects: list, type: SecurityContentType = None) -> set[p lookup_folder.mkdir(exist_ok=True) #Copy each lookup into the folder - for lookup_name in files: - lookup_path = pathlib.Path(lookup_name) - if lookup_path.is_file(): - shutil.copy(lookup_path, lookup_folder/lookup_path.name) - else: - raise(Exception(f"Error copying lookup/mlmodel file. Path {lookup_path} does not exist or is not a file.")) - - elif type == SecurityContentType.macros: + for lookup in objects: + if isinstance(lookup, FileBackedLookup): + shutil.copy(lookup.filename, lookup_folder/lookup.app_filename.name) + return written_files + + + def writeMacros(self, objects:list[Macro]) -> set[pathlib.Path]: + written_files:set[pathlib.Path] = set() written_files.add(ConfWriter.writeConfFile(pathlib.Path('default/macros.conf'), 'macros.j2', self.config, objects)) - - elif type == SecurityContentType.dashboards: - written_files.update(ConfWriter.writeDashboardFiles(self.config, objects)) - - - return written_files - - + return written_files + + def writeDashboards(self, objects:list[Dashboard]) -> set[pathlib.Path]: + written_files:set[pathlib.Path] = set() + written_files.update(ConfWriter.writeDashboardFiles(self.config, objects)) + return written_files def packageAppTar(self) -> None: @@ -202,7 +209,7 @@ def packageAppSlim(self) -> None: - def packageApp(self, method=packageAppTar)->None: + def packageApp(self, method: Callable[[ConfOutput],None]=packageAppTar)->None: return method(self) diff --git a/contentctl/output/conf_writer.py b/contentctl/output/conf_writer.py index 410ce4f6..bcfb6d19 100644 --- a/contentctl/output/conf_writer.py +++ b/contentctl/output/conf_writer.py @@ -1,16 +1,17 @@ -from typing import Any +import configparser import datetime -import re -import os import json -import configparser -from xmlrpc.client import APPLICATION_ERROR -from jinja2 import Environment, FileSystemLoader, StrictUndefined +import os import pathlib -from contentctl.objects.security_content_object import SecurityContentObject -from contentctl.objects.dashboard import Dashboard -from contentctl.objects.config import build +import re import xml.etree.ElementTree as ET +from typing import Any, Sequence + +from jinja2 import Environment, FileSystemLoader, StrictUndefined + +from contentctl.objects.config import CustomApp, build +from contentctl.objects.dashboard import Dashboard +from contentctl.objects.security_content_object import SecurityContentObject # This list is not exhaustive of all default conf files, but should be # sufficient for our purposes. @@ -82,59 +83,68 @@ "workload_rules.conf", ] -class ConfWriter(): +class ConfWriter: @staticmethod - def custom_jinja2_enrichment_filter(string:str, object:SecurityContentObject): + def custom_jinja2_enrichment_filter(string: str, object: SecurityContentObject): substitutions = re.findall(r"%[^%]*%", string) updated_string = string for sub in substitutions: - sub_without_percents = sub.replace("%","") + sub_without_percents = sub.replace("%", "") if hasattr(object, sub_without_percents): - updated_string = updated_string.replace(sub, str(getattr(object, sub_without_percents))) - elif hasattr(object,'tags') and hasattr(object.tags, sub_without_percents): - updated_string = updated_string.replace(sub, str(getattr(object.tags, sub_without_percents))) + updated_string = updated_string.replace( + sub, str(getattr(object, sub_without_percents)) + ) + elif hasattr(object, "tags") and hasattr(object.tags, sub_without_percents): + updated_string = updated_string.replace( + sub, str(getattr(object.tags, sub_without_percents)) + ) else: raise Exception(f"Unable to find field {sub} in object {object.name}") - + return updated_string - + @staticmethod - def escapeNewlines(obj:Any): + def escapeNewlines(obj: Any): # Ensure that any newlines that occur in a string are escaped with a \. # Failing to do so will result in an improperly formatted conf files that # cannot be parsed - if isinstance(obj,str): - # Remove leading and trailing characters. Conf parsers may erroneously - # Parse fields if they have leading or trailing newlines/whitespace and we + if isinstance(obj, str): + # Remove leading and trailing characters. Conf parsers may erroneously + # Parse fields if they have leading or trailing newlines/whitespace and we # probably don't want that anyway as it doesn't look good in output - return obj.strip().replace(f"\n"," \\\n") + return obj.strip().replace("\n", " \\\n") else: return obj - @staticmethod - def writeConfFileHeader(app_output_path:pathlib.Path, config: build) -> pathlib.Path: - output = ConfWriter.writeFileHeader(app_output_path, config) - - output_path = config.getPackageDirectoryPath()/app_output_path + def writeConfFileHeader( + app_output_path: pathlib.Path, config: build + ) -> pathlib.Path: + output = ConfWriter.writeFileHeader(app_output_path, config) + + output_path = config.getPackageDirectoryPath() / app_output_path output_path.parent.mkdir(parents=True, exist_ok=True) - with open(output_path, 'w') as f: - output = output.encode('utf-8', 'ignore').decode('utf-8') + with open(output_path, "w") as f: + output = output.encode("utf-8", "ignore").decode("utf-8") f.write(output) - #Ensure that the conf file we just generated/update is syntactically valid - ConfWriter.validateConfFile(output_path) + # Ensure that the conf file we just generated/update is syntactically valid + ConfWriter.validateConfFile(output_path) return output_path @staticmethod - def getCustomConfFileStems(config:build)->list[str]: + def getCustomConfFileStems(config: build) -> list[str]: # Get all the conf files in the default directory. We must make a reload.conf_file = simple key/value for them if # they are custom conf files - default_path = config.getPackageDirectoryPath()/"default" + default_path = config.getPackageDirectoryPath() / "default" conf_files = default_path.glob("*.conf") - - custom_conf_file_stems = [conf_file.stem for conf_file in conf_files if conf_file.name not in DEFAULT_CONF_FILES] + + custom_conf_file_stems = [ + conf_file.stem + for conf_file in conf_files + if conf_file.name not in DEFAULT_CONF_FILES + ] return sorted(custom_conf_file_stems) @staticmethod @@ -145,16 +155,17 @@ def writeServerConf(config: build) -> pathlib.Path: j2_env = ConfWriter.getJ2Environment() template = j2_env.get_template(template_name) - output = template.render(custom_conf_files=ConfWriter.getCustomConfFileStems(config)) - - output_path = config.getPackageDirectoryPath()/app_output_path + output = template.render( + custom_conf_files=ConfWriter.getCustomConfFileStems(config) + ) + + output_path = config.getPackageDirectoryPath() / app_output_path output_path.parent.mkdir(parents=True, exist_ok=True) - with open(output_path, 'a') as f: - output = output.encode('utf-8', 'ignore').decode('utf-8') + with open(output_path, "a") as f: + output = output.encode("utf-8", "ignore").decode("utf-8") f.write(output) return output_path - @staticmethod def writeAppConf(config: build) -> pathlib.Path: app_output_path = pathlib.Path("default/app.conf") @@ -163,135 +174,195 @@ def writeAppConf(config: build) -> pathlib.Path: j2_env = ConfWriter.getJ2Environment() template = j2_env.get_template(template_name) - output = template.render(custom_conf_files=ConfWriter.getCustomConfFileStems(config), - app=config.app) - - output_path = config.getPackageDirectoryPath()/app_output_path + output = template.render( + custom_conf_files=ConfWriter.getCustomConfFileStems(config), app=config.app + ) + + output_path = config.getPackageDirectoryPath() / app_output_path output_path.parent.mkdir(parents=True, exist_ok=True) - with open(output_path, 'a') as f: - output = output.encode('utf-8', 'ignore').decode('utf-8') + with open(output_path, "a") as f: + output = output.encode("utf-8", "ignore").decode("utf-8") f.write(output) return output_path @staticmethod - def writeManifestFile(app_output_path:pathlib.Path, template_name : str, config: build, objects : list) -> pathlib.Path: + def writeManifestFile( + app_output_path: pathlib.Path, + template_name: str, + config: build, + objects: list[CustomApp], + ) -> pathlib.Path: j2_env = ConfWriter.getJ2Environment() template = j2_env.get_template(template_name) - - output = template.render(objects=objects, app=config.app, currentDate=datetime.datetime.now(datetime.UTC).date().isoformat()) - - output_path = config.getPackageDirectoryPath()/app_output_path + + output = template.render( + objects=objects, + app=config.app, + currentDate=datetime.datetime.now(datetime.UTC).date().isoformat(), + ) + + output_path = config.getPackageDirectoryPath() / app_output_path output_path.parent.mkdir(parents=True, exist_ok=True) - with open(output_path, 'w') as f: - output = output.encode('utf-8', 'ignore').decode('utf-8') + with open(output_path, "w") as f: + output = output.encode("utf-8", "ignore").decode("utf-8") f.write(output) return output_path - - @staticmethod - def writeFileHeader(app_output_path:pathlib.Path, config: build) -> str: - #Do not output microseconds or +00:000 at the end of the datetime string - utc_time = datetime.datetime.now(datetime.UTC).replace(microsecond=0,tzinfo=None).isoformat() - - j2_env = Environment( - loader=FileSystemLoader(os.path.join(os.path.dirname(__file__), 'templates')), - trim_blocks=True) + def writeFileHeader(app_output_path: pathlib.Path, config: build) -> str: + # Do not output microseconds or +00:000 at the end of the datetime string + utc_time = ( + datetime.datetime.now(datetime.UTC) + .replace(microsecond=0, tzinfo=None) + .isoformat() + ) - template = j2_env.get_template('header.j2') - output = template.render(time=utc_time, author=' - '.join([config.app.author_name,config.app.author_company]), author_email=config.app.author_email) - - return output + j2_env = Environment( + loader=FileSystemLoader( + os.path.join(os.path.dirname(__file__), "templates") + ), + trim_blocks=True, + ) + template = j2_env.get_template("header.j2") + output = template.render( + time=utc_time, + author=" - ".join([config.app.author_name, config.app.author_company]), + author_email=config.app.author_email, + ) + return output @staticmethod - def writeXmlFile(app_output_path:pathlib.Path, template_name : str, config: build, objects : list) -> None: - - + def writeXmlFile( + app_output_path: pathlib.Path, + template_name: str, + config: build, + objects: list[str], + ) -> None: j2_env = ConfWriter.getJ2Environment() template = j2_env.get_template(template_name) - + output = template.render(objects=objects, app=config.app) - - output_path = config.getPackageDirectoryPath()/app_output_path + + output_path = config.getPackageDirectoryPath() / app_output_path output_path.parent.mkdir(parents=True, exist_ok=True) - with open(output_path, 'a') as f: - output = output.encode('utf-8', 'ignore').decode('utf-8') + with open(output_path, "a") as f: + output = output.encode("utf-8", "ignore").decode("utf-8") f.write(output) - - #Ensure that the conf file we just generated/update is syntactically valid - ConfWriter.validateXmlFile(output_path) - + # Ensure that the conf file we just generated/update is syntactically valid + ConfWriter.validateXmlFile(output_path) @staticmethod - def writeDashboardFiles(config:build, dashboards:list[Dashboard])->set[pathlib.Path]: - written_files:set[pathlib.Path] = set() + def writeDashboardFiles( + config: build, dashboards: list[Dashboard] + ) -> set[pathlib.Path]: + written_files: set[pathlib.Path] = set() for dashboard in dashboards: output_file_path = dashboard.getOutputFilepathRelativeToAppRoot(config) # Check that the full output path does not exist so that we are not having an # name collision with a file in app_template - if (config.getPackageDirectoryPath()/output_file_path).exists(): - raise FileExistsError(f"ERROR: Overwriting Dashboard File {output_file_path}. Does this file exist in {config.getAppTemplatePath()} AND {config.path/'dashboards'}?") - + if (config.getPackageDirectoryPath() / output_file_path).exists(): + raise FileExistsError( + f"ERROR: Overwriting Dashboard File {output_file_path}. Does this file exist in {config.getAppTemplatePath()} AND {config.path / 'dashboards'}?" + ) + ConfWriter.writeXmlFileHeader(output_file_path, config) dashboard.writeDashboardFile(ConfWriter.getJ2Environment(), config) - ConfWriter.validateXmlFile(config.getPackageDirectoryPath()/output_file_path) + ConfWriter.validateXmlFile( + config.getPackageDirectoryPath() / output_file_path + ) written_files.add(output_file_path) return written_files - @staticmethod - def writeXmlFileHeader(app_output_path:pathlib.Path, config: build) -> None: - output = ConfWriter.writeFileHeader(app_output_path, config) + def writeXmlFileHeader(app_output_path: pathlib.Path, config: build) -> None: + output = ConfWriter.writeFileHeader(app_output_path, config) output_with_xml_comment = f"\n" - output_path = config.getPackageDirectoryPath()/app_output_path + output_path = config.getPackageDirectoryPath() / app_output_path output_path.parent.mkdir(parents=True, exist_ok=True) - with open(output_path, 'w') as f: - output_with_xml_comment = output_with_xml_comment.encode('utf-8', 'ignore').decode('utf-8') + with open(output_path, "w") as f: + output_with_xml_comment = output_with_xml_comment.encode( + "utf-8", "ignore" + ).decode("utf-8") f.write(output_with_xml_comment) - - # We INTENTIONALLY do not validate the comment we wrote to the header. This is because right now, - # the file is an empty XML document (besides the commented header). This means that it will FAIL validation + # We INTENTIONALLY do not validate the comment we wrote to the header. This is because right now, + # the file is an empty XML document (besides the commented header). This means that it will FAIL validation @staticmethod - def getJ2Environment()->Environment: + def getJ2Environment() -> Environment: j2_env = Environment( - loader=FileSystemLoader(os.path.join(os.path.dirname(__file__), 'templates')), + loader=FileSystemLoader( + os.path.join(os.path.dirname(__file__), "templates") + ), trim_blocks=True, - undefined=StrictUndefined) - j2_env.globals.update(objectListToNameList=SecurityContentObject.objectListToNameList) - - - j2_env.filters['custom_jinja2_enrichment_filter'] = ConfWriter.custom_jinja2_enrichment_filter - j2_env.filters['escapeNewlines'] = ConfWriter.escapeNewlines + undefined=StrictUndefined, + ) + j2_env.globals.update( + objectListToNameList=SecurityContentObject.objectListToNameList + ) + + j2_env.filters["custom_jinja2_enrichment_filter"] = ( + ConfWriter.custom_jinja2_enrichment_filter + ) + j2_env.filters["escapeNewlines"] = ConfWriter.escapeNewlines return j2_env @staticmethod - def writeConfFile(app_output_path:pathlib.Path, template_name : str, config: build, objects : list) -> pathlib.Path: - output_path = config.getPackageDirectoryPath()/app_output_path + def writeConfFile( + app_output_path: pathlib.Path, + template_name: str, + config: build, + objects: Sequence[SecurityContentObject] | list[CustomApp], + ) -> pathlib.Path: + output_path = config.getPackageDirectoryPath() / app_output_path j2_env = ConfWriter.getJ2Environment() - + template = j2_env.get_template(template_name) - output = template.render(objects=objects, app=config.app) - - output_path.parent.mkdir(parents=True, exist_ok=True) - with open(output_path, 'a') as f: - output = output.encode('utf-8', 'ignore').decode('utf-8') - f.write(output) + + # The following code, which is commented out, serializes one object at a time. + # This is extremely useful from a debugging perspective, because sometimes when + # serializing a large number of objects, exceptions throw in Jinja2 templates can + # be quite hard to diagnose. We leave this code in for use in debugging workflows: + SERIALIZE_ONE_AT_A_TIME = False + if SERIALIZE_ONE_AT_A_TIME: + outputs: list[str] = [] + for obj in objects: + try: + outputs.append(template.render(objects=[obj], app=config.app)) + except Exception as e: + raise Exception( + f"Failed writing the following object to file:\n" + f"Name:{obj.name if not isinstance(obj, CustomApp) else obj.title}\n" + f"Type {type(obj)}: \n" + f"Output File: {app_output_path}\n" + f"Error: {str(e)}\n" + ) + + output_path.parent.mkdir(parents=True, exist_ok=True) + with open(output_path, "a") as f: + output = "".join(outputs).encode("utf-8", "ignore").decode("utf-8") + f.write(output) + else: + output = template.render(objects=objects, app=config.app) + + output_path.parent.mkdir(parents=True, exist_ok=True) + with open(output_path, "a") as f: + output = output.encode("utf-8", "ignore").decode("utf-8") + f.write(output) + return output_path - - + @staticmethod - def validateConfFile(path:pathlib.Path): + def validateConfFile(path: pathlib.Path): """Ensure that the conf file is valid. We will do this by reading back the conf using RawConfigParser to ensure that it does not throw any parsing errors. This is particularly relevant because newlines contained in string fields may break the formatting of the conf file if they have been incorrectly escaped with - the 'ConfWriter.escapeNewlines()' function. + the 'ConfWriter.escapeNewlines()' function. If a conf file failes validation, we will throw an exception @@ -300,7 +371,7 @@ def validateConfFile(path:pathlib.Path): """ return if path.suffix != ".conf": - #there may be some other files built, so just ignore them + # there may be some other files built, so just ignore them return try: _ = configparser.RawConfigParser().read(path) @@ -308,30 +379,35 @@ def validateConfFile(path:pathlib.Path): raise Exception(f"Failed to validate .conf file {str(path)}: {str(e)}") @staticmethod - def validateXmlFile(path:pathlib.Path): + def validateXmlFile(path: pathlib.Path): """Ensure that the XML file is valid XML. Args: path (pathlib.Path): path to the xml file to validate - """ - + """ + try: - with open(path, 'r') as xmlFile: + with open(path, "r") as xmlFile: _ = ET.fromstring(xmlFile.read()) except Exception as e: raise Exception(f"Failed to validate .xml file {str(path)}: {str(e)}") - @staticmethod - def validateManifestFile(path:pathlib.Path): + def validateManifestFile(path: pathlib.Path): """Ensure that the Manifest file is valid JSON. Args: path (pathlib.Path): path to the manifest JSON file to validate - """ + """ return try: - with open(path, 'r') as manifestFile: + with open(path, "r") as manifestFile: _ = json.load(manifestFile) except Exception as e: - raise Exception(f"Failed to validate .manifest file {str(path)} (Note that .manifest files should contain only valid JSON-formatted data): {str(e)}") + raise Exception( + f"Failed to validate .manifest file {str(path)} (Note that .manifest files should contain only valid JSON-formatted data): {str(e)}" + ) + except Exception as e: + raise Exception( + f"Failed to validate .manifest file {str(path)} (Note that .manifest files should contain only valid JSON-formatted data): {str(e)}" + ) diff --git a/contentctl/output/data_source_writer.py b/contentctl/output/data_source_writer.py index 97967a72..1a6e4f95 100644 --- a/contentctl/output/data_source_writer.py +++ b/contentctl/output/data_source_writer.py @@ -1,6 +1,5 @@ import csv from contentctl.objects.data_source import DataSource -from contentctl.objects.event_source import EventSource from typing import List import pathlib diff --git a/contentctl/output/detection_writer.py b/contentctl/output/detection_writer.py deleted file mode 100644 index 2f439ca9..00000000 --- a/contentctl/output/detection_writer.py +++ /dev/null @@ -1,28 +0,0 @@ - -import yaml - - -class DetectionWriter: - - @staticmethod - def writeYmlFile(file_path : str, obj : dict) -> None: - - new_obj = dict() - new_obj["name"] = obj["name"] - new_obj["id"] = obj["id"] - new_obj["version"] = obj["version"] - new_obj["date"] = obj["date"] - new_obj["author"] = obj["author"] - new_obj["type"] = obj["type"] - new_obj["status"] = obj["status"] - new_obj["description"] = obj["description"] - new_obj["data_source"] = obj["data_source"] - new_obj["search"] = obj["search"] - new_obj["how_to_implement"] = obj["how_to_implement"] - new_obj["known_false_positives"] = obj["known_false_positives"] - new_obj["references"] = obj["references"] - new_obj["tags"] = obj["tags"] - new_obj["tests"] = obj["tests"] - - with open(file_path, 'w') as outfile: - yaml.safe_dump(new_obj, outfile, default_flow_style=False, sort_keys=False) \ No newline at end of file diff --git a/contentctl/output/json_writer.py b/contentctl/output/json_writer.py index fe3696d9..ee272255 100644 --- a/contentctl/output/json_writer.py +++ b/contentctl/output/json_writer.py @@ -1,11 +1,9 @@ import json -from contentctl.objects.abstract_security_content_objects.security_content_object_abstract import SecurityContentObject_Abstract -from typing import List -from io import TextIOWrapper +from typing import Any class JsonWriter(): @staticmethod - def writeJsonObject(file_path : str, object_name: str, objs: List[dict],readable_output=False) -> None: + def writeJsonObject(file_path : str, object_name: str, objs: list[dict[str,Any]],readable_output:bool=True) -> None: try: with open(file_path, 'w') as outfile: if readable_output: diff --git a/contentctl/output/new_content_yml_output.py b/contentctl/output/new_content_yml_output.py deleted file mode 100644 index 38730b37..00000000 --- a/contentctl/output/new_content_yml_output.py +++ /dev/null @@ -1,56 +0,0 @@ -import os -import pathlib -from contentctl.objects.enums import SecurityContentType -from contentctl.output.yml_writer import YmlWriter -import pathlib -from contentctl.objects.config import NewContentType -class NewContentYmlOutput(): - output_path: pathlib.Path - - def __init__(self, output_path:pathlib.Path): - self.output_path = output_path - - - def writeObjectNewContent(self, object: dict, subdirectory_name: str, type: NewContentType) -> None: - if type == NewContentType.detection: - - file_path = os.path.join(self.output_path, 'detections', subdirectory_name, self.convertNameToFileName(object['name'], object['tags']['product'])) - output_folder = pathlib.Path(self.output_path)/'detections'/subdirectory_name - #make sure the output folder exists for this detection - output_folder.mkdir(exist_ok=True) - - YmlWriter.writeYmlFile(file_path, object) - print("Successfully created detection " + file_path) - - elif type == NewContentType.story: - file_path = os.path.join(self.output_path, 'stories', self.convertNameToFileName(object['name'], object['tags']['product'])) - YmlWriter.writeYmlFile(file_path, object) - print("Successfully created story " + file_path) - - else: - raise(Exception(f"Object Must be Story or Detection, but is not: {object}")) - - - - def convertNameToFileName(self, name: str, product: list): - file_name = name \ - .replace(' ', '_') \ - .replace('-','_') \ - .replace('.','_') \ - .replace('/','_') \ - .lower() - - file_name = file_name + '.yml' - return file_name - - - def convertNameToTestFileName(self, name: str, product: list): - file_name = name \ - .replace(' ', '_') \ - .replace('-','_') \ - .replace('.','_') \ - .replace('/','_') \ - .lower() - - file_name = file_name + '.test.yml' - return file_name \ No newline at end of file diff --git a/contentctl/output/svg_output.py b/contentctl/output/svg_output.py index d454ccb2..2d0c9d56 100644 --- a/contentctl/output/svg_output.py +++ b/contentctl/output/svg_output.py @@ -35,7 +35,7 @@ def writeObjects(self, detections: List[Detection], output_path: pathlib.Path, t total_dict:dict[str,Any] = self.get_badge_dict("Detections", detections, detections) - production_dict:dict[str,Any] = self.get_badge_dict("% Production", detections, [detection for detection in detections if detection.status == DetectionStatus.production.value]) + production_dict:dict[str,Any] = self.get_badge_dict("% Production", detections, [detection for detection in detections if detection.status == DetectionStatus.production]) #deprecated_dict = self.get_badge_dict("Deprecated", detections, [detection for detection in detections if detection.status == DetectionStatus.deprecated]) #experimental_dict = self.get_badge_dict("Experimental", detections, [detection for detection in detections if detection.status == DetectionStatus.experimental]) diff --git a/contentctl/output/templates/analyticstories_detections.j2 b/contentctl/output/templates/analyticstories_detections.j2 index e97f82a8..d24a1217 100644 --- a/contentctl/output/templates/analyticstories_detections.j2 +++ b/contentctl/output/templates/analyticstories_detections.j2 @@ -5,7 +5,7 @@ {% if (detection.type == 'TTP' or detection.type == 'Anomaly' or detection.type == 'Hunting' or detection.type == 'Correlation') %} [savedsearch://{{ detection.get_conf_stanza_name(app) }}] type = detection -asset_type = {{ detection.tags.asset_type.value }} +asset_type = {{ detection.tags.asset_type }} confidence = medium explanation = {{ (detection.explanation if detection.explanation else detection.description) | escapeNewlines() }} {% if detection.how_to_implement is defined %} diff --git a/contentctl/output/templates/collections.j2 b/contentctl/output/templates/collections.j2 index 06e49140..cdf0eda7 100644 --- a/contentctl/output/templates/collections.j2 +++ b/contentctl/output/templates/collections.j2 @@ -1,6 +1,6 @@ {% for lookup in objects %} -{% if lookup.collection is defined and lookup.collection != None %} +{% if lookup.collection is defined %} [{{ lookup.name }}] enforceTypes = false replicate = false diff --git a/contentctl/output/templates/doc_detections.j2 b/contentctl/output/templates/doc_detections.j2 index 5430b0ed..60f0282f 100644 --- a/contentctl/output/templates/doc_detections.j2 +++ b/contentctl/output/templates/doc_detections.j2 @@ -162,11 +162,6 @@ The SPL above uses the following Lookups: {% endfor %} {% endif -%} -#### Required field -{% for field in object.tags.required_fields -%} -* {{ field }} -{% endfor %} - #### How To Implement {{ object.how_to_implement}} diff --git a/contentctl/output/templates/savedsearches_detections.j2 b/contentctl/output/templates/savedsearches_detections.j2 index 396bb2c6..0a5c634f 100644 --- a/contentctl/output/templates/savedsearches_detections.j2 +++ b/contentctl/output/templates/savedsearches_detections.j2 @@ -44,7 +44,7 @@ action.escu.providing_technologies = null action.escu.analytic_story = {{ objectListToNameList(detection.tags.analytic_story) | tojson }} {% if detection.deployment.alert_action.rba.enabled%} action.risk = 1 -action.risk.param._risk_message = {{ detection.tags.message | escapeNewlines() }} +action.risk.param._risk_message = {{ detection.rba.message | escapeNewlines() }} action.risk.param._risk = {{ detection.risk | tojson }} action.risk.param._risk_score = 0 action.risk.param.verbose = 0 @@ -70,8 +70,13 @@ action.notable.param.nes_fields = {{ detection.nes_fields }} {% endif %} action.notable.param.rule_description = {{ detection.deployment.alert_action.notable.rule_description | custom_jinja2_enrichment_filter(detection) | escapeNewlines()}} action.notable.param.rule_title = {% if detection.type | lower == "correlation" %}RBA: {{ detection.deployment.alert_action.notable.rule_title | custom_jinja2_enrichment_filter(detection) }}{% else %}{{ detection.deployment.alert_action.notable.rule_title | custom_jinja2_enrichment_filter(detection) }}{% endif +%} -action.notable.param.security_domain = {{ detection.tags.security_domain.value }} -action.notable.param.severity = {{ detection.tags.severity.value }} +action.notable.param.security_domain = {{ detection.tags.security_domain }} +{% if detection.rba %} +action.notable.param.severity = {{ detection.rba.severity }} +{% else %} +{# Correlations do not have detection.rba defined, but should get a default severity #} +action.notable.param.severity = high +{% endif %} {% endif %} {% if detection.deployment.alert_action.email %} action.email.subject.alert = {{ detection.deployment.alert_action.email.subject | custom_jinja2_enrichment_filter(detection) | escapeNewlines() }} diff --git a/contentctl/output/templates/transforms.j2 b/contentctl/output/templates/transforms.j2 index 2fd029ec..e8ec8c4b 100644 --- a/contentctl/output/templates/transforms.j2 +++ b/contentctl/output/templates/transforms.j2 @@ -1,8 +1,8 @@ {% for lookup in objects %} [{{ lookup.name }}] -{% if lookup.filename is defined and lookup.filename != None %} -filename = {{ lookup.filename.name }} +{% if lookup.app_filename is defined and lookup.app_filename != None %} +filename = {{ lookup.app_filename.name }} {% else %} collection = {{ lookup.collection }} external_type = kvstore @@ -25,8 +25,8 @@ max_matches = {{ lookup.max_matches }} {% if lookup.min_matches is defined and lookup.min_matches != None %} min_matches = {{ lookup.min_matches }} {% endif %} -{% if lookup.fields_list is defined and lookup.fields_list != None %} -fields_list = {{ lookup.fields_list }} +{% if lookup.fields_to_fields_list_conf_format is defined %} +fields_list = {{ lookup.fields_to_fields_list_conf_format }} {% endif %} {% if lookup.filter is defined and lookup.filter != None %} filter = {{ lookup.filter }} diff --git a/contentctl/output/yml_output.py b/contentctl/output/yml_output.py deleted file mode 100644 index 93eae5dc..00000000 --- a/contentctl/output/yml_output.py +++ /dev/null @@ -1,66 +0,0 @@ -import os - -from contentctl.output.detection_writer import DetectionWriter -from contentctl.objects.detection import Detection - - -class YmlOutput(): - - - def writeDetections(self, objects: list, output_path : str) -> None: - for obj in objects: - file_path = obj.file_path - obj.id = str(obj.id) - - DetectionWriter.writeYmlFile(os.path.join(output_path, file_path), obj.dict( - exclude_none=True, - include = - { - "name": True, - "id": True, - "version": True, - "date": True, - "author": True, - "type": True, - "status": True, - "description": True, - "data_source": True, - "search": True, - "how_to_implement": True, - "known_false_positives": True, - "references": True, - "tags": - { - "analytic_story": True, - "asset_type": True, - "atomic_guid": True, - "confidence": True, - "impact": True, - "drilldown_search": True, - "mappings": True, - "message": True, - "mitre_attack_id": True, - "kill_chain_phases:": True, - "observable": True, - "product": True, - "required_fields": True, - "risk_score": True, - "security_domain": True - }, - "tests": - { - '__all__': - { - "name": True, - "attack_data": { - '__all__': - { - "data": True, - "source": True, - "sourcetype": True - } - } - } - } - } - )) \ No newline at end of file diff --git a/contentctl/output/yml_writer.py b/contentctl/output/yml_writer.py index 7d71762b..2e408c83 100644 --- a/contentctl/output/yml_writer.py +++ b/contentctl/output/yml_writer.py @@ -1,6 +1,21 @@ import yaml from typing import Any +from enum import StrEnum, IntEnum + +# Set the following so that we can write StrEnum and IntEnum +# to files. Otherwise, we will get the following errors when trying +# to write to files: +# yaml.representer.RepresenterError: ('cannot represent an object',..... +yaml.SafeDumper.add_multi_representer( + StrEnum, + yaml.representer.SafeRepresenter.represent_str +) + +yaml.SafeDumper.add_multi_representer( + IntEnum, + yaml.representer.SafeRepresenter.represent_int +) class YmlWriter: diff --git a/contentctl/templates/detections/endpoint/anomalous_usage_of_7zip.yml b/contentctl/templates/detections/endpoint/anomalous_usage_of_7zip.yml index a101fd7d..3eea8300 100644 --- a/contentctl/templates/detections/endpoint/anomalous_usage_of_7zip.yml +++ b/contentctl/templates/detections/endpoint/anomalous_usage_of_7zip.yml @@ -38,51 +38,33 @@ drilldown_searches: search: '| from datamodel Risk.All_Risk | search normalized_risk_object IN ($user$, $dest$) starthoursago=168 endhoursago=1 | stats count min(_time) as firstTime max(_time) as lastTime values(search_name) as "Search Name" values(risk_message) as "Risk Message" values(analyticstories) as "Analytic Stories" values(annotations._all) as "Annotations" values(annotations.mitre_attack.mitre_tactic) as "ATT&CK Tactics" by normalized_risk_object | `security_content_ctime(firstTime)` | `security_content_ctime(lastTime)`' earliest_offset: $info_min_time$ latest_offset: $info_max_time$ +rba: + message: An instance of $parent_process_name$ spawning $process_name$ was identified + on endpoint $dest$ by user $user$. This behavior is indicative of suspicious loading + of 7zip. + risk_objects: + - field: user + type: user + score: 56 + - field: dest + type: system + score: 60 + threat_objects: + - field: parent_process_name + type: parent_process_name + - field: process_name + type: process_name tags: analytic_story: - Cobalt Strike asset_type: Endpoint - confidence: 80 - impact: 80 - message: An instance of $parent_process_name$ spawning $process_name$ was identified - on endpoint $dest$ by user $user$. This behavior is indicative of suspicious loading - of 7zip. mitre_attack_id: - T1560.001 - T1560 - observable: - - name: user - type: User - role: - - Victim - - name: dest - type: Hostname - role: - - Victim - - name: parent_process_name - type: Process - role: - - Attacker - - name: process_name - type: Process - role: - - Attacker product: - Splunk Enterprise - Splunk Enterprise Security - Splunk Cloud - required_fields: - - _time - - Processes.process_name - - Processes.process - - Processes.dest - - Processes.user - - Processes.parent_process_name - - Processes.process_name - - Processes.parent_process - - Processes.process_id - - Processes.parent_process_id - risk_score: 64 security_domain: endpoint tests: - name: True Positive Test diff --git a/pyproject.toml b/pyproject.toml index 7b59084b..7d15db05 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "contentctl" -version = "4.4.7" +version = "5.0.0-alpha" description = "Splunk Content Control Tool" authors = ["STRT "] @@ -11,7 +11,8 @@ readme = "README.md" contentctl = 'contentctl.contentctl:main' [tool.poetry.dependencies] -python = "^3.11,<3.13" + +python = "^3.11,<3.14" pydantic = "~2.9.2" PyYAML = "^6.0.2" requests = "~2.32.3" @@ -26,11 +27,92 @@ semantic-version = "^2.10.0" bottle = ">=0.12.25,<0.14.0" tqdm = "^4.66.5" pygit2 = "^1.15.1" -tyro = ">=0.8.3,<0.10.0" +tyro = "^0.9.2" gitpython = "^3.1.43" setuptools = ">=69.5.1,<76.0.0" [tool.poetry.dev-dependencies] +[tool.poetry.group.dev.dependencies] +ruff = "^0.9.2" + [build-system] requires = ["poetry-core>=1.0.0"] build-backend = "poetry.core.masonry.api" + +[tool.ruff] +# Exclude a variety of commonly ignored directories. +exclude = [ + ".bzr", + ".direnv", + ".eggs", + ".git", + ".git-rewrite", + ".hg", + ".ipynb_checkpoints", + ".mypy_cache", + ".nox", + ".pants.d", + ".pyenv", + ".pytest_cache", + ".pytype", + ".ruff_cache", + ".svn", + ".tox", + ".venv", + ".vscode", + "__pypackages__", + "_build", + "buck-out", + "build", + "dist", + "node_modules", + "site-packages", + "venv", +] + +# Same as Black. +line-length = 88 +indent-width = 4 + +target-version = "py311" + +[tool.ruff.lint] +# Enable Pyflakes (`F`) and a subset of the pycodestyle (`E`) codes by default. +# Unlike Flake8, Ruff doesn't enable pycodestyle warnings (`W`) or +# McCabe complexity (`C901`) by default. +select = ["E4", "E7", "E9", "F"] +ignore = [] + +# Allow fix for all enabled rules (when `--fix`) is provided. +fixable = ["ALL"] +unfixable = [] + +# Allow unused variables when underscore-prefixed. +dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" + +[tool.ruff.format] +# Like Black, use double quotes for strings. +quote-style = "double" + +# Like Black, indent with spaces, rather than tabs. +indent-style = "space" + +# Like Black, respect magic trailing commas. +skip-magic-trailing-comma = false + +# Like Black, automatically detect the appropriate line ending. +line-ending = "auto" + +# Enable auto-formatting of code examples in docstrings. Markdown, +# reStructuredText code/literal blocks and doctests are all supported. +# +# This is currently disabled by default, but it is planned for this +# to be opt-out in the future. +docstring-code-format = false + +# Set the line length limit used when formatting code snippets in +# docstrings. +# +# This only has an effect when the `docstring-code-format` setting is +# enabled. +docstring-code-line-length = "dynamic"