Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Create filter for department #77

Merged
merged 14 commits into from
Feb 21, 2024
20 changes: 20 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -325,6 +325,26 @@ dag:
webhook: https://hooks.slack.com/services/XXXXXXXX/XXXXNFDXXX/n6QXXXXrPwxQ71ZXXXXXT9
```

### Exemplo 10
Esta configuração filtra os resultados por órgão/unidade selecionados.
Por enquanto disponível apenas para as pesquisas no DOU.

```yaml {11}
dag:
id: department_example
description: DAG de teste (filtro por departamento)
search:
terms:
- dados abertos
department:
- Ministério da Gestão e da Inovação em Serviços Públicos
- Ministério da Defesa
report:
emails:
- [email protected]
subject: "Teste do Ro-dou"
```

## Compreendendo um pouco mais a pesquisa no DOU

Todos os parâmetros disponíveis para pesquisa foram criados a partir da API da
Expand Down
13 changes: 13 additions & 0 deletions dag_confs/examples_and_tests/department_example.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
dag:
id: department_example
description: DAG de teste (filtro por departamento)
search:
terms:
- dados abertos
department:
- Ministério da Gestão e da Inovação em Serviços Públicos
- Ministério da Defesa
report:
emails:
- [email protected]
subject: "Teste do Ro-dou"
8 changes: 8 additions & 0 deletions schemas/ro-dou.json
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,14 @@
}
]
},
"department": {
"type": "array",
"description": "departamento para filtro na busca",
"items": {
"type": "string",
"description": "nome do departamento"
}
},
"field": {
"type": "string",
"description": "description",
Expand Down
3 changes: 3 additions & 0 deletions src/dou_dag_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,7 @@ def create_dag(self, specs: DAGConfig, config_file: str) -> DAG:
'is_exact_search': specs.is_exact_search,
'ignore_signature_match': specs.ignore_signature_match,
'force_rematch': specs.force_rematch,
'department': specs.department,
'result_as_email': result_as_html(specs),
},
)
Expand Down Expand Up @@ -216,6 +217,7 @@ def perform_searches(
ignore_signature_match: bool,
force_rematch: bool,
result_as_email: bool,
department: List[str],
**context) -> dict:
"""Performs the search in each source and merge the results
"""
Expand All @@ -232,6 +234,7 @@ def perform_searches(
is_exact_search,
ignore_signature_match,
force_rematch,
department,
get_trigger_date(context, local_time = True))

if 'QD' in sources:
Expand Down
1 change: 1 addition & 0 deletions src/dou_hook.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,7 @@ def search_text(
item["date"] = content["pubDate"]
item["id"] = content["classPK"]
item["display_date_sortable"] = content["displayDateSortable"]
item["hierarchyList"] = content["hierarchyList"]

all_results.append(item)

Expand Down
7 changes: 7 additions & 0 deletions src/notification/email_sender.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,13 @@ def generate_email_content(self) -> str:
with open(file_path, 'r') as f:
blocks = [f'<style>\n{f.read()}</style>']

if self.specs.department:
blocks.append("""<p class="secao-marker">Filtrando resultados somente para:</p>""")
blocks.append("<ul>")
for dpt in self.specs.department:
blocks.append(f"<li>{dpt}</li>")
blocks.append("</ul>")

for group, results in self.search_report.items():
if group != 'single_group':
blocks.append('\n')
Expand Down
4 changes: 4 additions & 0 deletions src/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ class DAGConfig:
terms: List[str]
sql: str
conn_id: str
department: List[str]
emails: List[str]
subject: str
attach_csv: bool
Expand All @@ -36,6 +37,7 @@ class DAGConfig:
dag_tags: Set[str]
owner: str


class FileParser(ABC):
"""Abstract class to build file parsers with DAG configuration.
"""
Expand Down Expand Up @@ -111,6 +113,7 @@ def _parse_yaml(self) -> DAGConfig:
is_exact_search = search.get('is_exact_search', True)
ignore_signature_match = search.get('ignore_signature_match', False)
force_rematch = search.get('force_rematch', None)
department = search.get('department', None)
schedule = self._get_safe_schedule(dag, self.DEFAULT_SCHEDULE)
doc_md = dag.get('doc_md', None)
if doc_md:
Expand All @@ -137,6 +140,7 @@ def _parse_yaml(self) -> DAGConfig:
terms=terms,
sql=sql,
conn_id=conn_id,
department=department,
emails=emails,
subject=subject,
attach_csv=attach_csv,
Expand Down
23 changes: 21 additions & 2 deletions src/searchers.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ def exec_search(self,
is_exact_search: bool,
ignore_signature_match: bool,
force_rematch: bool,
department: List[str],
reference_date: datetime):
search_results = self._search_all_terms(
self._cast_term_list(term_list),
Expand All @@ -125,7 +126,8 @@ def exec_search(self,
field,
is_exact_search,
ignore_signature_match,
force_rematch)
force_rematch,
department)

return self._group_results(search_results, term_list)

Expand All @@ -137,7 +139,8 @@ def _search_all_terms(self,
field,
is_exact_search,
ignore_signature_match,
force_rematch) -> dict:
force_rematch,
department) -> dict:
search_results = {}
for search_term in term_list:
logging.info('Starting search for term: %s', search_term)
Expand All @@ -158,6 +161,10 @@ def _search_all_terms(self,
if self._really_matched(search_term,
r.get('abstract'))]

if department:
self._match_department(results, department)
# results = [r for r in results if any(item in r.get('hierarchyList') for item in department)]

self._render_section_descriptions(results)

self._add_standard_highlight_formatting(results)
Expand Down Expand Up @@ -240,6 +247,18 @@ def _is_signature(self, search_term: str, abstract: str) -> bool:
# ' JOSÉ `ANTONIO DE OLIVEIRA` MATOS'
norm_abstract_without_start_name.startswith(norm_term))
)

def _match_department(self, results: list, department: list) -> list:
"""Aplica o filtro nos resultados pela lista de unidades informada
no parâmetro 'department' do YAML
"""
logging.info ("Applying filter for department list")
logging.info (department)
logging.info (results)
for result in results[:]:
if not any(dpt in result["hierarchyList"]
for dpt in department):
results.remove(result)

def _get_prior_and_matched_name(self, raw_html: str) -> Tuple[str, str]:
groups = self.SPLIT_MATCH_RE.match(raw_html).groups()
Expand Down
34 changes: 34 additions & 0 deletions tests/parsers_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ def test_hash_dag_id(yaml_parser, dag_id, size, hashed):
"lei de acesso à informação"],
"sql": None,
"conn_id": None,
"department": None,
"emails": ["[email protected]"],
"subject": "Teste do Ro-dou",
"attach_csv": False,
Expand Down Expand Up @@ -74,6 +75,7 @@ def test_hash_dag_id(yaml_parser, dag_id, size, hashed):
"lei de acesso à informação"],
"sql": None,
"conn_id": None,
"department": None,
"emails": ["[email protected]", "[email protected]"],
"subject": "Assunto do Email",
"attach_csv": True,
Expand Down Expand Up @@ -106,6 +108,7 @@ def test_hash_dag_id(yaml_parser, dag_id, size, hashed):
"UNION SELECT 'uso de máscara' as TERMO, 'Ações efetivas' as GRUPO "
"UNION SELECT 'distanciamento social' as TERMO, 'Ações efetivas' as GRUPO\n"),
"conn_id": "example_database_conn",
"department": None,
"emails": ["[email protected]"],
"subject": "[String] com caracteres especiais deve estar entre aspas",
"attach_csv": True,
Expand Down Expand Up @@ -133,6 +136,7 @@ def test_hash_dag_id(yaml_parser, dag_id, size, hashed):
"terms": ["cimentodaaroeira"],
"sql": None,
"conn_id": None,
"department": None,
"emails": ["[email protected]"],
"subject": 'Teste do Ro-dou',
"attach_csv": False,
Expand Down Expand Up @@ -162,6 +166,7 @@ def test_hash_dag_id(yaml_parser, dag_id, size, hashed):
"lei de acesso à informação"],
"sql": None,
"conn_id": None,
"department": None,
"emails": ["[email protected]"],
"subject": "Teste do Ro-dou",
"attach_csv": False,
Expand All @@ -181,6 +186,35 @@ def test_hash_dag_id(yaml_parser, dag_id, size, hashed):
"owner": "",
}
),
("department_example.yaml",
{
"dag_id": "department_example",
"sources": ["DOU"],
"territory_id": None,
"dou_sections": ["TODOS"],
"search_date": "DIA",
"field": "TUDO",
"is_exact_search": True,
"ignore_signature_match": False,
"force_rematch": None,
"terms": ["dados abertos"],
"sql": None,
"conn_id": None,
"department": ["Ministério da Gestão e da Inovação em Serviços Públicos",
"Ministério da Defesa"],
"emails": ["[email protected]"],
"subject": "Teste do Ro-dou",
"attach_csv": False,
"discord_webhook": None,
"slack_webhook": None,
"schedule": "59 5 * * *",
"description": "DAG de teste (filtro por departamento)",
"skip_null": True,
"doc_md": None,
"dag_tags": {"dou", "generated_dag"},
"owner": "",
}
),
])
def test_parse(filepath, result_tuple):
filepath = os.path.join(DouDigestDagGenerator().YAMLS_DIR,
Expand Down
29 changes: 29 additions & 0 deletions tests/searchers_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,35 @@ def test_is_signature(dou_searcher, search_term, abstract):
def test_really_matched(dou_searcher, search_term, abstract):
assert dou_searcher._really_matched(search_term, abstract)

def test_match_department(dou_searcher):
department = ['Ministério da Defesa']
results = [
{
"section": "Seção 3",
"title": "EXTRATO DE COMPROMISSO",
"href": "https://www.in.gov.br/web/dou/-/extrato-de-compromisso-342504508",
"abstract": "ALESSANDRO GLAUCO DOS ANJOS DE VASCONCELOS - Secretário-Executivo Adjunto...",
"date": "02/09/2021",
"hierarchyList": ["Ministério da Defesa",
"Comando do Exército",
"Comando Militar do Nordeste",
"6ª Região Militar",
"28º Batalhão de Caçadores"],
},
{
"section": "Seção 3",
"title": "EXTRATO DE COMPROMISSO",
"href": "https://www.in.gov.br/web/dou/-/extrato-de-compromisso-342504508",
"abstract": "ALESSANDRO GLAUCO DOS ANJOS DE VASCONCELOS - Secretário-Executivo Adjunto...",
"date": "02/09/2021",
"hierarchyList": ["Ministério dos Povos Indígenas"],
}
]
dou_searcher._match_department(results, department)
assert len(results) == 1



@pytest.mark.parametrize(
'pre_term_list, casted_term_list',
[
Expand Down
Loading