Skip to content

Commit

Permalink
adding filter by department on DOU Search
Browse files Browse the repository at this point in the history
  • Loading branch information
luisglabarreto committed Feb 16, 2024
1 parent 70a45b3 commit 9436b07
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 4 deletions.
3 changes: 3 additions & 0 deletions src/dou_dag_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,7 @@ def create_dag(self, specs: DAGConfig, config_file: str) -> DAG:
'is_exact_search': specs.is_exact_search,
'ignore_signature_match': specs.ignore_signature_match,
'force_rematch': specs.force_rematch,
'department': specs.department,
'result_as_email': result_as_html(specs),
},
)
Expand Down Expand Up @@ -216,6 +217,7 @@ def perform_searches(
ignore_signature_match: bool,
force_rematch: bool,
result_as_email: bool,
department: str,
**context) -> dict:
"""Performs the search in each source and merge the results
"""
Expand All @@ -232,6 +234,7 @@ def perform_searches(
is_exact_search,
ignore_signature_match,
force_rematch,
department,
get_trigger_date(context, local_time = True))

if 'QD' in sources:
Expand Down
2 changes: 2 additions & 0 deletions src/dou_hook.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,7 @@ def search_text(

if search_results:
for content in search_results:
#if "Ministério da Gestão e da Inovação em Serviços Públicos" in content["hierarchyList"]:
item = {}
item["section"] = content["pubName"].lower()
item["title"] = content["title"]
Expand All @@ -205,6 +206,7 @@ def search_text(
item["date"] = content["pubDate"]
item["id"] = content["classPK"]
item["display_date_sortable"] = content["displayDateSortable"]
item["hierarchyList"] = content["hierarchyList"]

all_results.append(item)

Expand Down
3 changes: 3 additions & 0 deletions src/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ class DAGConfig:
doc_md: str
dag_tags: Set[str]
owner: str
department: str

class FileParser(ABC):
"""Abstract class to build file parsers with DAG configuration.
Expand Down Expand Up @@ -111,6 +112,7 @@ def _parse_yaml(self) -> DAGConfig:
is_exact_search = search.get('is_exact_search', True)
ignore_signature_match = search.get('ignore_signature_match', False)
force_rematch = search.get('force_rematch', None)
department = search.get('department', None)
schedule = self._get_safe_schedule(dag, self.DEFAULT_SCHEDULE)
doc_md = dag.get('doc_md', None)
if doc_md:
Expand Down Expand Up @@ -148,6 +150,7 @@ def _parse_yaml(self) -> DAGConfig:
doc_md=doc_md,
dag_tags=set(dag_tags),
owner=owner,
department=department,
)

def _get_terms_params(self, search) -> Tuple[List[str], str, str]:
Expand Down
22 changes: 18 additions & 4 deletions src/searchers.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,8 @@ def exec_search(self,
is_exact_search: bool,
ignore_signature_match: bool,
force_rematch: bool,
reference_date: datetime):
reference_date: datetime,
department: str):
search_results = self._search_all_terms(
self._cast_term_list(term_list),
dou_sections,
Expand All @@ -125,7 +126,8 @@ def exec_search(self,
field,
is_exact_search,
ignore_signature_match,
force_rematch)
force_rematch,
department)

return self._group_results(search_results, term_list)

Expand All @@ -137,7 +139,8 @@ def _search_all_terms(self,
field,
is_exact_search,
ignore_signature_match,
force_rematch) -> dict:
force_rematch,
department) -> dict:
search_results = {}
for search_term in term_list:
logging.info('Starting search for term: %s', search_term)
Expand All @@ -157,7 +160,8 @@ def _search_all_terms(self,
results = [r for r in results
if self._really_matched(search_term,
r.get('abstract'))]

self._department_matched(results, department)

self._render_section_descriptions(results)

self._add_standard_highlight_formatting(results)
Expand Down Expand Up @@ -240,6 +244,16 @@ def _is_signature(self, search_term: str, abstract: str) -> bool:
# ' JOSÉ `ANTONIO DE OLIVEIRA` MATOS'
norm_abstract_without_start_name.startswith(norm_term))
)

def _department_matched(self, results: list, department: str) -> list:
"""Verifica se o termo encontrado pela API realmente é igual ao
órgão de busca. Esta função é útil para filtrar resultados
retornardos pela API, mas que são específicas do órgão.
"""

for result in results[:]:
if not department in result["hierarchyList"]:
results.remove(result)

def _get_prior_and_matched_name(self, raw_html: str) -> Tuple[str, str]:
groups = self.SPLIT_MATCH_RE.match(raw_html).groups()
Expand Down

0 comments on commit 9436b07

Please sign in to comment.