Skip to content

Commit

Permalink
test: add integration tests
Browse files Browse the repository at this point in the history
  • Loading branch information
Elijas committed Dec 30, 2023
1 parent 1f40110 commit 86d39d8
Show file tree
Hide file tree
Showing 11 changed files with 183 additions and 5 deletions.
8 changes: 6 additions & 2 deletions Taskfile.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ tasks:
cmds:
# Recommended coverage viewer in VSCode: https://marketplace.visualstudio.com/items?itemName=ryanluker.vscode-coverage-gutters
# Note: also update .codecov.yml when changing the target coverage.
- poetry run ptw -- -- {{.CLI_ARGS}} -rx --cov --cov-report=lcov:lcov.info --cov-report=term:skip-covered --cov-fail-under=90 tests/unit/
- poetry run ptw -- -- {{.CLI_ARGS}} -rx --cov --cov-report=lcov:lcov.info --cov-report=term:skip-covered --cov-fail-under=90 tests/unit/ tests/integration/

monitor-accuracy-tests:
desc: "Run unit tests and rerun them immediately upon file modification."
Expand Down Expand Up @@ -108,7 +108,11 @@ tasks:
cmds:
# Recommended coverage viewer in VSCode: https://marketplace.visualstudio.com/items?itemName=ryanluker.vscode-coverage-gutters
# Note: also update .codecov.yml when changing the target coverage.
- poetry run pytest -rx --cov --cov-report=lcov:lcov.info --cov-report=term:skip-covered --cov-fail-under=90 {{.CLI_ARGS}} tests/unit/
- poetry run pytest -rx --cov --cov-report=lcov:lcov.info --cov-report=term:skip-covered --cov-fail-under=90 {{.CLI_ARGS}} tests/unit/ tests/integration/

integration-tests: # Execute integration tests.
cmds:
- poetry run pytest -rx {{.CLI_ARGS}} tests/integration/

lint: # Perform linting on the code and automatically fix issues.
cmds:
Expand Down
4 changes: 4 additions & 0 deletions tests/accuracy/structure_and_text/test_structure_and_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,10 @@ def test_structure_and_text(
if actual_element not in expected_elements_json
]

# STEP: Sanity check
if not elements_not_found_in_actual and not elements_not_expected_but_present:
assert actual_json == expected_elements_json

# STEP: Report and save the results
total_expected = len(expected_elements_json)
total_missing = len(elements_not_found_in_actual)
Expand Down
6 changes: 6 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,12 @@ def pytest_addoption(parser):
default=False,
help="Create missing files. Overwrite files that were previously generated automatically.",
)
parser.addoption(
"--create-missing",
action="store_true",
default=False,
help="Create missing files.",
)


@pytest.fixture(scope="session")
Expand Down
File renamed without changes.
123 changes: 123 additions & 0 deletions tests/integration/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
from __future__ import annotations

import json
from typing import TYPE_CHECKING, Callable

import pytest

if TYPE_CHECKING:
from pathlib import Path

from sec_parser.processing_engine.core import AbstractSemanticElementParser
from sec_parser.semantic_elements.abstract_semantic_element import (
AbstractSemanticElement,
)


@pytest.fixture(scope="session")
def check() -> (
Callable[[AbstractSemanticElementParser, Path, pytest.FixtureRequest], None]
):
def _check(
parser: AbstractSemanticElementParser,
html_path: Path,
request: pytest.FixtureRequest,
) -> None:
# Arrange
assert html_path.exists(), f"{html_path} does not exist"
with html_path.open("r") as file:
html_content = file.read()

# Act
elements = parser.parse(html_content)
actual_elements_dicts = _elements_to_dicts(elements)

# Pre-Assert: Load expected results or save actual results as expected
json_file = html_path.with_suffix(".json")
if (
not json_file.exists() and request.config.getoption("--create-missing")
) or request.config.getoption("--update"):
with json_file.open("w") as f:
json.dump(
actual_elements_dicts,
f,
indent=4,
ensure_ascii=False,
sort_keys=True,
)
pytest.skip(f"{json_file} was missing and has been created.")
elif not json_file.exists():
pytest.fail(f"{json_file} is missing. Use --create-missing to create it.")

# Assert
with json_file.open("r") as f:
expected_elements_dicts = json.load(f)
missing, unexpected = _compare_elements(
expected_elements_dicts,
actual_elements_dicts,
)
error_messages = []
if unexpected:
e = json.dumps(
missing,
indent=4,
ensure_ascii=False,
sort_keys=True,
)
error_messages.append(
f"Unexpected elements in {html_path}:\n{e}",
)
if missing:
e = json.dumps(
missing,
indent=4,
ensure_ascii=False,
sort_keys=True,
)
error_messages.append(
f"Missing elements in {html_path}:\n{e}",
)
if error_messages:
pytest.fail("\n\n".join(error_messages))

return _check


def _elements_to_dicts(elements: list[AbstractSemanticElement]) -> list[dict]:
return [
e.to_dict(
include_previews=False,
include_contents=True,
)
for e in elements
]


def _compare_elements(
expected_elements_dicts: list[dict],
actual_elements_dicts: list[dict],
) -> tuple[list[dict], list[dict]]:
# STEP: Compare the actual elements to the expected elements
index_of_last_matched_element = 0
elements_not_found_in_actual = []
for expected_element in expected_elements_dicts:
for index_in_actual in range(
index_of_last_matched_element,
len(actual_elements_dicts),
):
if actual_elements_dicts[index_in_actual] == expected_element:
index_of_last_matched_element = index_in_actual + 1
break
else:
elements_not_found_in_actual.append(expected_element)
elements_not_expected_but_present = [
actual_element
for actual_element in actual_elements_dicts
if actual_element not in expected_elements_dicts
]

# STEP: Sanity check
if not elements_not_found_in_actual and not elements_not_expected_but_present:
assert actual_elements_dicts == expected_elements_dicts

return elements_not_found_in_actual, elements_not_expected_but_present
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
<P STYLE="margin-top:0pt; margin-bottom:0pt; font-size:10pt; font-family:Times New Roman">
<B>D. Restructuring and Other Charges</B> &#150; In the second quarter and
<FONT STYLE="white-space:nowrap">six-month</FONT> period of 2018, Alcoa Corporation
recorded Restructuring and other charges of $231 and $212, respectively, which were
comprised of the following components: $167 and $144 (net), respectively, related
to settlements and/or curtailments of certain pension and other postretirement employee
benefits (see Note K); $80 and $84, respectively, for additional costs related to the
curtailed Wenatchee (Washington) smelter, including $73 in both periods
associated with recent management decisions (see below); a $15 net benefit in both
periods related to the Portovesme (Italy) smelter (see &#147;Italy 148&#148; in the
Litigation section of Note N); and a $1 net benefit in both periods for
miscellaneous items.
</P>
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
[
{
"cls_name": "TextElement",
"text_content": "D. Restructuring and Other Charges – In the second quarter and\n six-month period of 2018, Alcoa Corporation\n recorded Restructuring and other charges of $231 and $212, respectively, which were\n comprised of the following components: $167 and $144 (net), respectively, related\n to settlements and/or curtailments of certain pension and other postretirement employee\n benefits (see Note K); $80 and $84, respectively, for additional costs related to the\n curtailed Wenatchee (Washington) smelter, including $73 in both periods\n associated with recent management decisions (see below); a $15 net benefit in both\n periods related to the Portovesme (Italy) smelter (see “Italy 148” in the\n Litigation section of Note N); and a $1 net benefit in both periods for\n miscellaneous items."
}
]
25 changes: 25 additions & 0 deletions tests/integration/section_title/test_section_title.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import json
from pathlib import Path
from typing import Callable

import pytest

from sec_parser.processing_engine.core import (
AbstractSemanticElementParser,
Edgar10QParser,
)

CURRENT_DIR = Path(__file__).resolve().parent


@pytest.mark.parametrize("html_path", list((CURRENT_DIR / "data").glob("*.html")))
def test_bold_titles(
html_path: Path,
check: Callable[[AbstractSemanticElementParser, Path, pytest.FixtureRequest], None],
request: pytest.FixtureRequest,
):
def get_steps():
return [k for k in Edgar10QParser().get_default_steps() if True]

parser = Edgar10QParser(get_steps)
check(parser, html_path, request)
Empty file.
3 changes: 0 additions & 3 deletions tests/unit/integration/section_title/test_bold_titles.py

This file was deleted.

0 comments on commit 86d39d8

Please sign in to comment.