From 55653f67a575d2f69720ed6ea4ce504831a1a1d3 Mon Sep 17 00:00:00 2001 From: Vitor Bellini Date: Wed, 27 Mar 2024 11:20:48 -0300 Subject: [PATCH] update tests --- src/searchers.py | 4 ++- tests/inlabs_hook_test.py | 63 +++++++++++++++++++++++++++++------ tests/inlabs_searcher_test.py | 58 +++++++++++++++++++++++++++----- 3 files changed, 104 insertions(+), 21 deletions(-) diff --git a/src/searchers.py b/src/searchers.py index f142bf7..bd0f525 100644 --- a/src/searchers.py +++ b/src/searchers.py @@ -416,7 +416,7 @@ def exec_search( inlabs_hook = INLABSHook() search_terms = self._prepare_search_terms(terms) - self._apply_filters( + search_terms = self._apply_filters( search_terms, dou_sections, department, @@ -466,6 +466,8 @@ def _apply_filters( publish_to = reference_date.strftime("%Y-%m-%d") search_terms["pub_date"] = [publish_from, publish_to] + return search_terms + @staticmethod def _split_sql_terms(terms: Dict) -> List: """Split SQL terms into a list, removing duplicates. diff --git a/tests/inlabs_hook_test.py b/tests/inlabs_hook_test.py index 9f7b9d4..9238aed 100644 --- a/tests/inlabs_hook_test.py +++ b/tests/inlabs_hook_test.py @@ -1,6 +1,40 @@ import pytest +@pytest.mark.parametrize( + "text, keys, matches", + [ + ( + "Lorem ipsum dolor sit amet, consectetur adipiscing elit.", + ["lorem", "sit", "not_find"], + ["lorem", "sit"] + ), + ], +) +def test_find_matches(inlabs_hook, text, keys, matches): + assert inlabs_hook.TextDictHandler()._find_matches(text, keys) == matches + + +@pytest.mark.parametrize( + "text_in, text_out", + [ + ("çãAî é", "caai e"), + ], +) +def test_normalize(inlabs_hook, text_in, text_out): + assert inlabs_hook.TextDictHandler()._normalize(text_in) == text_out + + +@pytest.mark.parametrize( + "date_in, date_out", + [ + ("2024-03-07", "07/03/2024"), + ], +) +def test_format_date(inlabs_hook, date_in, date_out): + assert inlabs_hook.TextDictHandler()._format_date(date_in) == date_out + + @pytest.mark.parametrize( "pub_name_in, pub_name_out", [ @@ -29,28 +63,34 @@ def test_rename_section(inlabs_hook, pub_name_in, pub_name_out):

Analista

""", # texto_out - ("Título da Publicação Título da Publicação 2 Lorem ipsum dolor sit amet, " - "consectetur adipiscing elit. Phasellus venenatis auctor mauris. " - "Brasília/DF, 15 de março de 2024. Pessoa 1 Analista") + ("Título da Publicação Título da Publicação 2 Lorem ipsum dolor sit amet, " + "consectetur adipiscing elit. Phasellus venenatis auctor mauris. " + "Brasília/DF, 15 de março de 2024. Pessoa 1 Analista") ) ], ) -def test_parse_html_text(inlabs_hook, texto_in, texto_out): - assert inlabs_hook.TextDictHandler()._parse_html_text(texto_in) == texto_out +def test_remove_html_tags(inlabs_hook, texto_in, texto_out): + print(inlabs_hook.TextDictHandler()._remove_html_tags(texto_in)) + assert inlabs_hook.TextDictHandler()._remove_html_tags(texto_in) == texto_out @pytest.mark.parametrize( "term, texto_in, texto_out", [ ( - "elementum", + ["elementum"], "Pellentesque vel elementum mauris, id semper tellus.", "Pellentesque vel <%%>elementum mauris, id semper tellus.", ), + ( + ["elementum", "tellus"], + "Pellentesque vel elementum mauris, id semper tellus.", + "Pellentesque vel <%%>elementum mauris, id semper <%%>tellus.", + ), ], ) -def test_highlight_term(inlabs_hook, term, texto_in, texto_out): - assert inlabs_hook.TextDictHandler()._highlight_term(term, texto_in) == texto_out +def test_highlight_terms(inlabs_hook, term, texto_in, texto_out): + assert inlabs_hook.TextDictHandler()._highlight_terms(term, texto_in) == texto_out @pytest.mark.parametrize( @@ -76,11 +116,12 @@ def test_highlight_term(inlabs_hook, term, texto_in, texto_out): viverra finibus a et magna. <%%>Pellentesque vel elementum mauris, id semper tellus. Vivamus convallis lacinia ex sed fermentum. Nulla mollis cursus ipsum vel interdum. Mauris - facilisis posuere elit. Proin co (...)""") + facilisis posue (...)""") ), ], ) def test_trim_text(inlabs_hook, texto_in, texto_out): + print(inlabs_hook.TextDictHandler()._trim_text(texto_in)) assert inlabs_hook.TextDictHandler()._trim_text(texto_in) == texto_out @@ -228,7 +269,7 @@ def test_update_nested_dict(inlabs_hook, terms, list_in, dict_out): ], ) def test_transform_search_results(inlabs_hook, terms, list_in, dict_out): - r = inlabs_hook.TextDictHandler()._transform_search_results( + r = inlabs_hook.TextDictHandler().transform_search_results( response=list_in, text_terms=terms, ignore_signature_match=False ) assert r == dict_out @@ -293,7 +334,7 @@ def test_transform_search_results(inlabs_hook, terms, list_in, dict_out): ], ) def test_ignore_signature(inlabs_hook, terms, list_in, dict_out): - r = inlabs_hook.TextDictHandler()._transform_search_results( + r = inlabs_hook.TextDictHandler().transform_search_results( response=list_in, text_terms=terms, ignore_signature_match=True ) assert r == dict_out diff --git a/tests/inlabs_searcher_test.py b/tests/inlabs_searcher_test.py index ba34e61..d4f024b 100644 --- a/tests/inlabs_searcher_test.py +++ b/tests/inlabs_searcher_test.py @@ -1,30 +1,68 @@ """INLABS Seracher unit tests """ +from datetime import datetime import pytest +@pytest.mark.parametrize( + "search_terms, sections, department, reference_date, search_date, filters_applyed", + [ + ({"texto": ["a", "b"]}, ["SECAO_2"], ["Ministério"], datetime.now(), "DIA", + { + "texto": ["a", "b"], + "pub_name": ["DO2"], + "art_category": ["Ministério"], + "pub_date": [datetime.now().strftime("%Y-%m-%d"), datetime.now().strftime("%Y-%m-%d")], + } + ), + ], +) +def test_apply_filters( + inlabs_searcher, search_terms, sections, department, reference_date, search_date, filters_applyed +): + assert inlabs_searcher._apply_filters( + search_terms, sections, department, reference_date, search_date + ) == filters_applyed + + +@pytest.mark.parametrize( + "terms, search_terms", + [ + (["a", "b", "c"], {"texto": ["a", "b", "c"]}), + ( + '{"termo": {"0": "Pessoa 0","1": "Pessoa 1"}, "termo_group": {"0": "Grupo 1","1": "Grupo 2"}}', + {"texto": ["Pessoa 0", "Pessoa 1"]}, + ), + ], +) +def test_prepare_search_terms(inlabs_searcher, terms, search_terms): + assert inlabs_searcher._prepare_search_terms(terms) == search_terms + + @pytest.mark.parametrize( "raw_sections, parsed_sections", [ - (["SECAO_1"], ["1"]), - (["SECAO_2"], ["2"]), - (["SECAO_3"], ["3"]), - (["SECAO_1", "EDICAO_EXTRA"], ["1", "E"]), + (["SECAO_1"], ["DO1"]), + (["SECAO_2"], ["DO2"]), + (["SECAO_3"], ["DO3"]), + (["SECAO_1", "EDICAO_EXTRA"], ["DO1", "DO1E"]), ( ["SECAO_2", "EDICAO_EXTRA_1A", "EDICAO_EXTRA_2B", "EDICAO_EXTRA_3D"], - ["2", "1E", "2E", "3E"], + ["DO2", "DO1E", "DO2E", "DO3E"], ), ], ) def test_parse_sections(inlabs_searcher, raw_sections, parsed_sections): - assert inlabs_searcher._parse_sections(raw_sections) == parsed_sections + assert sorted(inlabs_searcher._parse_sections(raw_sections)) == sorted( + parsed_sections + ) @pytest.mark.parametrize( "sql_terms, sql_splitted_terms", [ - ( # sql_terms + ( # sql_terms { "termo": { "0": "Pessoa 0", @@ -66,9 +104,11 @@ def test_parse_sections(inlabs_searcher, raw_sections, parsed_sections): "Pessoa 8", "Pessoa 9", "Pessoa 10", - ] + ], ), ], ) def test_split_sql_terms(inlabs_searcher, sql_terms, sql_splitted_terms): - assert inlabs_searcher._split_sql_terms(sql_terms) == sql_splitted_terms + assert sorted(inlabs_searcher._split_sql_terms(sql_terms)) == sorted( + sql_splitted_terms + )