Skip to content

Commit

Permalink
Merge pull request #440 from target/yara-and-ocr-update
Browse files Browse the repository at this point in the history
Enhancements to ScanYara Scanner for Improved Rule Organization and Metadata Extraction
  • Loading branch information
phutelmyer authored Mar 4, 2024
2 parents 5b6c153 + d21501b commit 48bbc24
Show file tree
Hide file tree
Showing 8 changed files with 576 additions and 384 deletions.
22 changes: 18 additions & 4 deletions configs/python/backend/backend.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -385,7 +385,7 @@ scanners:
extract_text: False
tmp_directory: "/dev/shm/"
pdf_to_png: True
remove_formatting: True
remove_formatting: False
split_words: True
create_thumbnail: True
thumbnail_size: [250, 250]
Expand Down Expand Up @@ -674,13 +674,27 @@ scanners:
'ScanYara':
- positive:
flavors:
- '*'
- "*"
priority: 5
options:
location: '/etc/strelka/yara/'
location: "/etc/strelka/yara/"
compiled:
enabled: False
enabled: True
filename: "rules.compiled"
category_key: "scope"
categories:
collection:
show_meta: False
detection:
show_meta: True
information:
show_meta: False
meta_fields:
- "author"
- "description"
- "hash"
- "intel"
show_all_meta: False
store_offset: True
offset_meta_key: "StrelkaHexDump"
offset_padding: 32
Expand Down
13 changes: 7 additions & 6 deletions configs/python/backend/taste/taste.yara
Original file line number Diff line number Diff line change
Expand Up @@ -982,7 +982,8 @@ rule wmv_file {

// PII

rule credit_cards {
rule credit_cards
{
meta:
// https://github.com/sbousseaden/YaraHunts/blob/master/hunt_creditcard_memscrap.yara
// https://stackoverflow.com/questions/9315647/regex-credit-card-number-tests
Expand All @@ -991,12 +992,12 @@ rule credit_cards {
// https://baymard.com/checkout-usability/credit-card-patterns
description = "Identify popular credit card numbers"
author = "ryan.ohoro"
date = "12/29/2022"
date = "01/26/2023"
strings:
// $amex = /[^0-9]3[47][0-9]{13}[^0-9]/ // Amex Card
// $disc = /[^0-9]6[0-9]{15}[^0-9]/ // Discover Card
// $mast = /[^0-9]5[1-5]{1}[0-9]{14}[^0-9]/ // Mastercard
$visa = /[^0-9]4[0-9]{15}[^0-9]/ // Visa Card
// $amex = /[^0-9]3[0-9]{14}[^0-9]/
$visa = /[^0-9]4[0-9]{15}[^0-9]/
// $mast = /[^0-9]5[0-9]{15}[^0-9]/
// $disc = /[^0-9]6[0-9]{15}[^0-9]/
condition:
any of them
}
Expand Down
140 changes: 70 additions & 70 deletions docs/README.md

Large diffs are not rendered by default.

622 changes: 345 additions & 277 deletions poetry.lock

Large diffs are not rendered by default.

5 changes: 2 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "strelka-worker"
version = "0.24.01.19"
version = "0.24.03.04"
description = "Strelka's backend Python worker"
authors = [
"Paul Hutelmyer <[email protected]>",
Expand All @@ -15,7 +15,7 @@ beautifulsoup4 = "4.12.2"
boltons = "23.0.0"
boto3 = "1.28.60"
construct = "2.10.68"
cryptography = "42.0.2"
cryptography = "42.0.5"
dncil = "1.0.2"
dnfile = "0.14.1"
docker = "6.1.3"
Expand Down Expand Up @@ -89,4 +89,3 @@ speakeasy-emulator = { git = "https://github.com/mandiant/speakeasy.git", rev =
[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"

38 changes: 33 additions & 5 deletions src/python/strelka/scanners/scan_yara.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,10 @@ def scan(self, data, file, options, expire_at):
# Load YARA configuration options only once.
# This prevents loading the configs on every execution.
if not self.loaded_configs:
self.categories = options.get("categories", {})
self.category_key = options.get("category_key", "")
self.meta_fields = options.get("meta_fields", [])
self.show_all_meta = options.get("show_all_meta", False)
self.store_offset = options.get("store_offset", False)
self.offset_meta_key = options.get("offset_meta_key", "")
self.offset_padding = options.get("offset_padding", 32)
Expand All @@ -86,6 +90,29 @@ def scan(self, data, file, options, expire_at):
if self.compiled_yara:
yara_matches = self.compiled_yara.match(data=data)
for match in yara_matches:
# add the rule and ruleset name to the category meta
rule = {
"name": match.rule,
"ruleset": match.namespace,
}
# include meta if its in the meta_fields list
for k, v in match.meta.items():
if k.lower() in self.meta_fields:
rule.update({k.lower(): v})
for category, params in self.categories.items():
if not self.event.get(category):
self.event[category] = []
# check if the category matches the category_key
if category in match.meta.get(self.category_key, "").lower():
# show meta for specific category if enabled
if params.get("show_meta", False):
self.event[category].append(rule)
else:
self.event[category].append(match.rule)
# show meta for specific tag if present
# if category in list(map(str.lower, match.tags)):
# self.event[category].append(rule)

# Append rule matches and update tags.
self.event["matches"].append(match.rule)
self.event["tags"].extend(match.tags)
Expand All @@ -105,11 +132,12 @@ def scan(self, data, file, options, expire_at):
self.offset_padding,
)

# Append meta information if configured to do so.
for k, v in match.meta.items():
self.event["meta"].append(
{"rule": match.rule, "identifier": k, "value": v}
)
# Append meta information if configured to do so
if self.show_all_meta:
for k, v in match.meta.items():
self.event["meta"].append(
{"rule": match.rule, "identifier": k, "value": v}
)

# De-duplicate tags.
self.event["tags"] = list(set(self.event["tags"]))
Expand Down
15 changes: 15 additions & 0 deletions src/python/strelka/tests/fixtures/test.yara
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@



rule test
// This rule verifies the scanYara scanner works. Add rules here for quickstart scanning.
{
Expand All @@ -13,7 +16,19 @@ rule hex_extraction_test

strings:
$match_str = "Venenatis tellus in metus vulputate."
condition:
$match_str
}

rule meta_test
// This rule verifies the ScanYara scanner works for meta categories
{
meta:
author = "John Doe"
scope = "detection"

strings:
$match_str = "Lorem ipsum dolor sit amet"
condition:
$match_str
}
105 changes: 86 additions & 19 deletions src/python/strelka/tests/test_scan_yara.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,19 +14,19 @@ def test_scan_yara(mocker):
test_scan_event = {
"elapsed": mock.ANY,
"flags": [],
"matches": ["test", "hex_extraction_test"],
"rules_loaded": 2,
"hex": [],
"matches": ["test", "hex_extraction_test", "meta_test"],
"meta": mock.ANY,
"rules_loaded": 3,
"tags": [],
"hex": [],
}

scanner_event = run_test_scan(
mocker=mocker,
scan_class=ScanUnderTest,
fixture_path=Path(__file__).parent / "fixtures/test.txt",
fixture_path=str(Path(__file__).parent / "fixtures/test.txt"),
options={
"location": str(Path(Path(__file__).parent / "fixtures/test.yara")),
"location": str(Path(__file__).parent / "fixtures/test.yara"),
"compiled": {
"enabled": False,
"filename": "rules.compiled",
Expand All @@ -53,19 +53,19 @@ def test_scan_bad_yara(mocker):
'compiling_error_syntax_/strelka/strelka/tests/fixtures/test_elk_linux_torte.yara(31): undefined identifier "is__elf"',
"no_rules_loaded",
],
"hex": [],
"matches": [],
"rules_loaded": 0,
"meta": mock.ANY,
"rules_loaded": 0,
"tags": [],
"hex": [],
}

scanner_event = run_test_scan(
mocker=mocker,
scan_class=ScanUnderTest,
fixture_path=Path(__file__).parent / "fixtures/test.txt",
fixture_path=str(Path(__file__).parent / "fixtures/test.txt"),
options={
"location": str(Path(Path(__file__).parent / "fixtures/")),
"location": str(Path(__file__).parent / "fixtures/"),
"compiled": {
"enabled": False,
"filename": "rules.compiled",
Expand All @@ -83,41 +83,108 @@ def test_scan_yara_hex_extraction(mocker):
Failure: Unable to load file or sample event fails to match.
"""
matched_hex = {
"rule": "hex_extraction_test",
"dump": [
"000002ff 74 20 6d 69 20 70 72 6f 69 6e 20 73 65 64 2e 20 t mi proin sed. ",
"0000030f 56 65 6e 65 6e 61 74 69 73 20 74 65 6c 6c 75 73 Venenatis tellus",
"0000031f 20 69 6e 20 6d 65 74 75 73 20 76 75 6c 70 75 74 in metus vulput",
"0000032f 61 74 65 2e 20 44 69 63 74 75 6d 73 74 20 76 65 ate. Dictumst ve",
"0000033f 73 74 69 62 75 6c 75 6d 20 72 68 6f 6e 63 75 73 stibulum rhoncus",
],
"rule": "hex_extraction_test",
}

test_scan_event = {
"elapsed": mock.ANY,
"flags": [],
"matches": ["test", "hex_extraction_test"],
"rules_loaded": 2,
"hex": [matched_hex],
"matches": ["test", "hex_extraction_test", "meta_test"],
"meta": [],
"rules_loaded": 3,
"tags": [],
}

scanner_event = run_test_scan(
mocker=mocker,
scan_class=ScanUnderTest,
fixture_path=str(Path(__file__).parent / "fixtures/test.txt"),
options={
"location": str(Path(__file__).parent / "fixtures/test.yara"),
"show_all_meta": False,
"store_offset": True,
"offset_meta_key": "StrelkaHexDump",
"offset_padding": 32,
"compiled": {
"enabled": False,
"filename": "rules.compiled",
},
},
)

TestCase.maxDiff = None
TestCase().assertDictEqual(test_scan_event, scanner_event)


def test_scan_yara_meta(mocker):
"""
Pass: Sample event matches output of scanner.
Failure: Unable to load file or sample event fails to match.
"""

test_scan_event = {
"collection": [],
"detection": [
{
"name": "meta_test",
"ruleset": "default",
"author": "John Doe",
},
],
"elapsed": mock.ANY,
"flags": [],
"hex": [],
"information": [],
"matches": ["test", "hex_extraction_test", "meta_test"],
"meta": [
{
"identifier": "StrelkaHexDump",
"rule": "hex_extraction_test",
"value": True,
}
},
{
"identifier": "author",
"rule": "meta_test",
"value": "John Doe",
},
{
"identifier": "scope",
"rule": "meta_test",
"value": "detection",
},
],
"rules_loaded": 3,
"tags": [],
"hex": [matched_hex],
}

scanner_event = run_test_scan(
mocker=mocker,
scan_class=ScanUnderTest,
fixture_path=Path(__file__).parent / "fixtures/test.txt",
fixture_path=str(Path(__file__).parent / "fixtures/test.txt"),
options={
"location": str(Path(Path(__file__).parent / "fixtures/test.yara")),
"store_offset": True,
"offset_meta_key": "StrelkaHexDump",
"offset_padding": 32,
"location": str(Path(__file__).parent / "fixtures/test.yara"),
"category_key": "scope",
"categories": {
"collection": {},
"detection": {
"show_meta": True,
},
"information": {},
},
"meta_fields": {
"name": "",
"ruleset": "",
"author": "",
},
"show_all_meta": True,
"compiled": {
"enabled": False,
"filename": "rules.compiled",
Expand Down

0 comments on commit 48bbc24

Please sign in to comment.