-
Notifications
You must be signed in to change notification settings - Fork 48
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
11 changed files
with
183 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,123 @@ | ||
from __future__ import annotations | ||
|
||
import json | ||
from typing import TYPE_CHECKING, Callable | ||
|
||
import pytest | ||
|
||
if TYPE_CHECKING: | ||
from pathlib import Path | ||
|
||
from sec_parser.processing_engine.core import AbstractSemanticElementParser | ||
from sec_parser.semantic_elements.abstract_semantic_element import ( | ||
AbstractSemanticElement, | ||
) | ||
|
||
|
||
@pytest.fixture(scope="session") | ||
def check() -> ( | ||
Callable[[AbstractSemanticElementParser, Path, pytest.FixtureRequest], None] | ||
): | ||
def _check( | ||
parser: AbstractSemanticElementParser, | ||
html_path: Path, | ||
request: pytest.FixtureRequest, | ||
) -> None: | ||
# Arrange | ||
assert html_path.exists(), f"{html_path} does not exist" | ||
with html_path.open("r") as file: | ||
html_content = file.read() | ||
|
||
# Act | ||
elements = parser.parse(html_content) | ||
actual_elements_dicts = _elements_to_dicts(elements) | ||
|
||
# Pre-Assert: Load expected results or save actual results as expected | ||
json_file = html_path.with_suffix(".json") | ||
if ( | ||
not json_file.exists() and request.config.getoption("--create-missing") | ||
) or request.config.getoption("--update"): | ||
with json_file.open("w") as f: | ||
json.dump( | ||
actual_elements_dicts, | ||
f, | ||
indent=4, | ||
ensure_ascii=False, | ||
sort_keys=True, | ||
) | ||
pytest.skip(f"{json_file} was missing and has been created.") | ||
elif not json_file.exists(): | ||
pytest.fail(f"{json_file} is missing. Use --create-missing to create it.") | ||
|
||
# Assert | ||
with json_file.open("r") as f: | ||
expected_elements_dicts = json.load(f) | ||
missing, unexpected = _compare_elements( | ||
expected_elements_dicts, | ||
actual_elements_dicts, | ||
) | ||
error_messages = [] | ||
if unexpected: | ||
e = json.dumps( | ||
missing, | ||
indent=4, | ||
ensure_ascii=False, | ||
sort_keys=True, | ||
) | ||
error_messages.append( | ||
f"Unexpected elements in {html_path}:\n{e}", | ||
) | ||
if missing: | ||
e = json.dumps( | ||
missing, | ||
indent=4, | ||
ensure_ascii=False, | ||
sort_keys=True, | ||
) | ||
error_messages.append( | ||
f"Missing elements in {html_path}:\n{e}", | ||
) | ||
if error_messages: | ||
pytest.fail("\n\n".join(error_messages)) | ||
|
||
return _check | ||
|
||
|
||
def _elements_to_dicts(elements: list[AbstractSemanticElement]) -> list[dict]: | ||
return [ | ||
e.to_dict( | ||
include_previews=False, | ||
include_contents=True, | ||
) | ||
for e in elements | ||
] | ||
|
||
|
||
def _compare_elements( | ||
expected_elements_dicts: list[dict], | ||
actual_elements_dicts: list[dict], | ||
) -> tuple[list[dict], list[dict]]: | ||
# STEP: Compare the actual elements to the expected elements | ||
index_of_last_matched_element = 0 | ||
elements_not_found_in_actual = [] | ||
for expected_element in expected_elements_dicts: | ||
for index_in_actual in range( | ||
index_of_last_matched_element, | ||
len(actual_elements_dicts), | ||
): | ||
if actual_elements_dicts[index_in_actual] == expected_element: | ||
index_of_last_matched_element = index_in_actual + 1 | ||
break | ||
else: | ||
elements_not_found_in_actual.append(expected_element) | ||
elements_not_expected_but_present = [ | ||
actual_element | ||
for actual_element in actual_elements_dicts | ||
if actual_element not in expected_elements_dicts | ||
] | ||
|
||
# STEP: Sanity check | ||
if not elements_not_found_in_actual and not elements_not_expected_but_present: | ||
assert actual_elements_dicts == expected_elements_dicts | ||
|
||
return elements_not_found_in_actual, elements_not_expected_but_present |
File renamed without changes.
13 changes: 13 additions & 0 deletions
13
tests/integration/section_title/data/AA_0001193125-18-236766_01.html
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
<P STYLE="margin-top:0pt; margin-bottom:0pt; font-size:10pt; font-family:Times New Roman"> | ||
<B>D. Restructuring and Other Charges</B> – In the second quarter and | ||
<FONT STYLE="white-space:nowrap">six-month</FONT> period of 2018, Alcoa Corporation | ||
recorded Restructuring and other charges of $231 and $212, respectively, which were | ||
comprised of the following components: $167 and $144 (net), respectively, related | ||
to settlements and/or curtailments of certain pension and other postretirement employee | ||
benefits (see Note K); $80 and $84, respectively, for additional costs related to the | ||
curtailed Wenatchee (Washington) smelter, including $73 in both periods | ||
associated with recent management decisions (see below); a $15 net benefit in both | ||
periods related to the Portovesme (Italy) smelter (see “Italy 148” in the | ||
Litigation section of Note N); and a $1 net benefit in both periods for | ||
miscellaneous items. | ||
</P> |
6 changes: 6 additions & 0 deletions
6
tests/integration/section_title/data/AA_0001193125-18-236766_01.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
[ | ||
{ | ||
"cls_name": "TextElement", | ||
"text_content": "D. Restructuring and Other Charges In the second quarter and\n six-month period of 2018, Alcoa Corporation\n recorded Restructuring and other charges of $231 and $212, respectively, which were\n comprised of the following components: $167 and $144 (net), respectively, related\n to settlements and/or curtailments of certain pension and other postretirement employee\n benefits (see Note K); $80 and $84, respectively, for additional costs related to the\n curtailed Wenatchee (Washington) smelter, including $73 in both periods\n associated with recent management decisions (see below); a $15 net benefit in both\n periods related to the Portovesme (Italy) smelter (see Italy 148 in the\n Litigation section of Note N); and a $1 net benefit in both periods for\n miscellaneous items." | ||
} | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
import json | ||
from pathlib import Path | ||
from typing import Callable | ||
|
||
import pytest | ||
|
||
from sec_parser.processing_engine.core import ( | ||
AbstractSemanticElementParser, | ||
Edgar10QParser, | ||
) | ||
|
||
CURRENT_DIR = Path(__file__).resolve().parent | ||
|
||
|
||
@pytest.mark.parametrize("html_path", list((CURRENT_DIR / "data").glob("*.html"))) | ||
def test_bold_titles( | ||
html_path: Path, | ||
check: Callable[[AbstractSemanticElementParser, Path, pytest.FixtureRequest], None], | ||
request: pytest.FixtureRequest, | ||
): | ||
def get_steps(): | ||
return [k for k in Edgar10QParser().get_default_steps() if True] | ||
|
||
parser = Edgar10QParser(get_steps) | ||
check(parser, html_path, request) |
Empty file.
This file was deleted.
Oops, something went wrong.