Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ScanOcr Gif Support and ScanBase64Pe Rename #449

Merged
merged 2 commits into from
Mar 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions configs/python/backend/backend.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -380,6 +380,8 @@ scanners:
- 'image/webp'
- 'application/pdf'
- 'pdf_file'
- "image/gif"
- "gif_file"
priority: 5
options:
extract_text: False
Expand Down
2 changes: 1 addition & 1 deletion src/python/strelka/scanners/scan_base64_pe.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from strelka import strelka


class ScanBase64PE(strelka.Scanner):
class ScanBase64Pe(strelka.Scanner):
"""Decodes base64-encoded file."""

def scan(self, data, file, options, expire_at):
Expand Down
Binary file added src/python/strelka/tests/fixtures/test_text.gif
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 1 addition & 1 deletion src/python/strelka/tests/test_scan_base64_pe.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from pathlib import Path
from unittest import TestCase, mock

from strelka.scanners.scan_base64_pe import ScanBase64PE as ScanUnderTest
from strelka.scanners.scan_base64_pe import ScanBase64Pe as ScanUnderTest
from strelka.tests import run_test_scan


Expand Down
153 changes: 153 additions & 0 deletions src/python/strelka/tests/test_scan_ocr.py
Original file line number Diff line number Diff line change
Expand Up @@ -465,6 +465,159 @@ def test_scan_ocr_webp(mocker):
TestCase().assertDictEqual(test_scan_event, scanner_event)


def test_scan_ocr_gif(mocker):
"""
Pass: Sample event matches output of scanner.
Failure: Unable to load file or sample event fails to match.
"""

test_scan_event = {
"elapsed": mock.ANY,
"flags": [],
"string_text": b"Lorem Ipsum Lorem ipsum dolor sit amet, consectetur adipisci"
b"ng elit. Cras lobortis sem dui. Morbi at magna quis ligula f"
b"aucibusconsectetur feugiat at purus. Sed nec lorem nibh. Nam"
b" vel libero odio. Vivamus tempus non enim egestas pretium.Ve"
b"stibulum turpis arcu, maximus nec libero quis, imperdiet sus"
b"cipit purus. Vestibulum blandit quis lacus nonsollicitudin. "
b"Nullam non convallis dui, et aliquet risus. Sed accumsan ull"
b"amcorper vehicula. Proin non urna facilisis,condimentum eros"
b" quis, suscipit purus. Morbi euismod imperdiet neque ferment"
b"um dictum. Integer aliquam, erat sitamet fringilla tempus, m"
b"auris ligula blandit sapien, et varius sem mauris eu diam. S"
b"ed fringilla neque est, in laoreetfelis tristique in. Donec "
b"luctus velit a posuere posuere. Suspendisse sodales pellente"
b"sque quam.",
"text": [
b"Lorem",
b"Ipsum",
b"Lorem",
b"ipsum",
b"dolor",
b"sit",
b"amet,",
b"consectetur",
b"adipiscing",
b"elit.",
b"Cras",
b"lobortis",
b"sem",
b"dui.",
b"Morbi",
b"at",
b"magna",
b"quis",
b"ligula",
b"faucibus",
b"consectetur",
b"feugiat",
b"at",
b"purus.",
b"Sed",
b"nec",
b"lorem",
b"nibh.",
b"Nam",
b"vel",
b"libero",
b"odio.",
b"Vivamus",
b"tempus",
b"non",
b"enim",
b"egestas",
b"pretium.",
b"Vestibulum",
b"turpis",
b"arcu,",
b"maximus",
b"nec",
b"libero",
b"quis,",
b"imperdiet",
b"suscipit",
b"purus.",
b"Vestibulum",
b"blandit",
b"quis",
b"lacus",
b"non",
b"sollicitudin.",
b"Nullam",
b"non",
b"convallis",
b"dui,",
b"et",
b"aliquet",
b"risus.",
b"Sed",
b"accumsan",
b"ullamcorper",
b"vehicula.",
b"Proin",
b"non",
b"urna",
b"facilisis,",
b"condimentum",
b"eros",
b"quis,",
b"suscipit",
b"purus.",
b"Morbi",
b"euismod",
b"imperdiet",
b"neque",
b"fermentum",
b"dictum.",
b"Integer",
b"aliquam,",
b"erat",
b"sit",
b"amet",
b"fringilla",
b"tempus,",
b"mauris",
b"ligula",
b"blandit",
b"sapien,",
b"et",
b"varius",
b"sem",
b"mauris",
b"eu",
b"diam.",
b"Sed",
b"fringilla",
b"neque",
b"est,",
b"in",
b"laoreet",
b"felis",
b"tristique",
b"in.",
b"Donec",
b"luctus",
b"velit",
b"a",
b"posuere",
b"posuere.",
b"Suspendisse",
b"sodales",
b"pellentesque",
b"quam.",
],
}

scanner_event = run_test_scan(
mocker=mocker,
scan_class=ScanUnderTest,
fixture_path=Path(__file__).parent / "fixtures/test_text.gif",
)

TestCase.maxDiff = None
TestCase().assertDictEqual(test_scan_event, scanner_event)


def test_scan_ocr_keep_formatting(mocker):
"""
Pass: Sample event matches output of scanner.
Expand Down
Loading