From be4c0b405db62a18807c64f25122848c9d7ce4d2 Mon Sep 17 00:00:00 2001 From: vitor Date: Sun, 21 Jan 2024 16:41:31 -0300 Subject: [PATCH 1/2] add tests to validate DAG yaml --- .github/workflows/ci-tests.yml | 2 +- .vscode_edit/settings.json | 12 ++ Dockerfile | 2 +- README.md | 2 +- .../all_parameters_example.yaml | 6 +- .../basic_example.yaml | 0 .../basic_example_skip_null.yaml | 0 .../discord_example.yaml | 0 .../markdown_docs_example.yaml | 0 .../{ => examples_and_tests}/qd_example.yaml | 0 .../slack_example.yaml | 0 .../terms_from_db_example.yaml | 2 +- .../terms_from_variable.yaml | 2 +- docker-compose.yml | 3 +- schemas/ro-dou.json | 202 ++++++++++++++++++ tests-requirements.txt | 5 +- tests/conftest.py | 7 +- tests/dag_generator_test.py | 4 +- tests/discord_sender_test.py | 16 +- tests/parsers_test.py | 6 +- tests/qd_searcher_test.py | 2 +- tests/test_validate_yaml_schemas.py | 47 ++++ 22 files changed, 293 insertions(+), 27 deletions(-) create mode 100644 .vscode_edit/settings.json rename dag_confs/{ => examples_and_tests}/all_parameters_example.yaml (86%) rename dag_confs/{ => examples_and_tests}/basic_example.yaml (100%) rename dag_confs/{ => examples_and_tests}/basic_example_skip_null.yaml (100%) rename dag_confs/{ => examples_and_tests}/discord_example.yaml (100%) rename dag_confs/{ => examples_and_tests}/markdown_docs_example.yaml (100%) rename dag_confs/{ => examples_and_tests}/qd_example.yaml (100%) rename dag_confs/{ => examples_and_tests}/slack_example.yaml (100%) rename dag_confs/{ => examples_and_tests}/terms_from_db_example.yaml (97%) rename dag_confs/{ => examples_and_tests}/terms_from_variable.yaml (93%) create mode 100644 schemas/ro-dou.json create mode 100644 tests/test_validate_yaml_schemas.py diff --git a/.github/workflows/ci-tests.yml b/.github/workflows/ci-tests.yml index 1bb93fd..f8c2b39 100644 --- a/.github/workflows/ci-tests.yml +++ b/.github/workflows/ci-tests.yml @@ -2,7 +2,7 @@ name: CI Tests on: push: - branches: [ main ] + branches: [ "*" ] pull_request: branches: [ main ] diff --git a/.vscode_edit/settings.json b/.vscode_edit/settings.json new file mode 100644 index 0000000..81bd57a --- /dev/null +++ b/.vscode_edit/settings.json @@ -0,0 +1,12 @@ +{ + "yaml.schemas": { + // XXX para usar um schema do repo oficial https://github.com/gestaogovbr/Ro-dou: + // "https://raw.githubusercontent.com/gestaogovbr/Ro-dou/main/schemas/ro-dou.json": + // XXX para usar um schema de arquivo local: + "./schemas/ro-dou.json": [ + // XXX edit to your DAGs yaml directory + "dag_confs/**/*.yml", + "dag_confs/**/*.yaml" + ], + }, +} \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 215c6f4..fcc8783 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,7 +3,7 @@ FROM apache/airflow:2.7.3-python3.10 USER root # Copy Ro-dou core files from the host Docker context -COPY src /opt/airflow/dags/ro_dou +COPY src /opt/airflow/dags/ro_dou_src RUN chown -R airflow /opt/airflow diff --git a/README.md b/README.md index 57f9601..85cc95b 100644 --- a/README.md +++ b/README.md @@ -49,7 +49,7 @@ houve algum resultado encontrado na API da Imprensa Nacional para os termos e demais parâmetros deste clipping. Se a tarefa chamada **"send_report"** estiver na cor verde significa que houve resultado e que o email foi enviado. -Para visualizar o email acesse o endereço http://localhost:5000/. Este é um +Para visualizar o email acesse o endereço http://localhost:5001/. Este é um serviço que simula uma caixa de email (servidor SMTP) para fins de experimentação. **_Voilà!_**. O arquivo de configuração deste Clipping está na pasta `dag_confs/`. Confira [aqui](https://github.com/gestaogovbr/Ro-dou/blob/main/dag_confs/all_parameters_example.yaml) no Github. diff --git a/dag_confs/all_parameters_example.yaml b/dag_confs/examples_and_tests/all_parameters_example.yaml similarity index 86% rename from dag_confs/all_parameters_example.yaml rename to dag_confs/examples_and_tests/all_parameters_example.yaml index 04c11a2..2987f9d 100644 --- a/dag_confs/all_parameters_example.yaml +++ b/dag_confs/examples_and_tests/all_parameters_example.yaml @@ -14,9 +14,9 @@ dag: - governo aberto - lei de acesso à informação field: TUDO - is_exact_search: On - ignore_signature_match: On - force_rematch: On + is_exact_search: True + ignore_signature_match: True + force_rematch: True date: MES dou_sections: - SECAO_1 diff --git a/dag_confs/basic_example.yaml b/dag_confs/examples_and_tests/basic_example.yaml similarity index 100% rename from dag_confs/basic_example.yaml rename to dag_confs/examples_and_tests/basic_example.yaml diff --git a/dag_confs/basic_example_skip_null.yaml b/dag_confs/examples_and_tests/basic_example_skip_null.yaml similarity index 100% rename from dag_confs/basic_example_skip_null.yaml rename to dag_confs/examples_and_tests/basic_example_skip_null.yaml diff --git a/dag_confs/discord_example.yaml b/dag_confs/examples_and_tests/discord_example.yaml similarity index 100% rename from dag_confs/discord_example.yaml rename to dag_confs/examples_and_tests/discord_example.yaml diff --git a/dag_confs/markdown_docs_example.yaml b/dag_confs/examples_and_tests/markdown_docs_example.yaml similarity index 100% rename from dag_confs/markdown_docs_example.yaml rename to dag_confs/examples_and_tests/markdown_docs_example.yaml diff --git a/dag_confs/qd_example.yaml b/dag_confs/examples_and_tests/qd_example.yaml similarity index 100% rename from dag_confs/qd_example.yaml rename to dag_confs/examples_and_tests/qd_example.yaml diff --git a/dag_confs/slack_example.yaml b/dag_confs/examples_and_tests/slack_example.yaml similarity index 100% rename from dag_confs/slack_example.yaml rename to dag_confs/examples_and_tests/slack_example.yaml diff --git a/dag_confs/terms_from_db_example.yaml b/dag_confs/examples_and_tests/terms_from_db_example.yaml similarity index 97% rename from dag_confs/terms_from_db_example.yaml rename to dag_confs/examples_and_tests/terms_from_db_example.yaml index d85ab61..65e98ef 100644 --- a/dag_confs/terms_from_db_example.yaml +++ b/dag_confs/examples_and_tests/terms_from_db_example.yaml @@ -16,5 +16,5 @@ dag: report: emails: - destination@economia.gov.br - attach_csv: On + attach_csv: True subject: "[String] com caracteres especiais deve estar entre aspas" diff --git a/dag_confs/terms_from_variable.yaml b/dag_confs/examples_and_tests/terms_from_variable.yaml similarity index 93% rename from dag_confs/terms_from_variable.yaml rename to dag_confs/examples_and_tests/terms_from_variable.yaml index 9326643..9c18d5c 100644 --- a/dag_confs/terms_from_variable.yaml +++ b/dag_confs/examples_and_tests/terms_from_variable.yaml @@ -8,6 +8,6 @@ dag: report: emails: - destination@economia.gov.br - attach_csv: On + attach_csv: True subject: "Exemplo de termos na Variável" diff --git a/docker-compose.yml b/docker-compose.yml index ce96812..4bbadeb 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -76,6 +76,7 @@ - ./src:/opt/airflow/dags/ro_dou_src # for development purpose - ./dag_confs:/opt/airflow/dags/ro_dou/dag_confs - ./tests:/opt/airflow/tests # for test purpose + - ./schemas:/opt/airflow/schemas # for test purpose depends_on: postgres: condition: service_healthy @@ -118,7 +119,7 @@ image: rnwood/smtp4dev:v3 restart: always ports: - - '5000:80' + - '5001:80' - '25:25' # Change the number before : to the port the SMTP server should be accessible on - '143:143' # Change the number before : to the port the IMAP server should be accessible on volumes: diff --git a/schemas/ro-dou.json b/schemas/ro-dou.json new file mode 100644 index 0000000..8105ac6 --- /dev/null +++ b/schemas/ro-dou.json @@ -0,0 +1,202 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "dag": { + "type": "object", + "description": "Instanciação da DAG", + "properties": { + "id": { + "type": "string", + "description": "Nome único da DAG" + }, + "description": { + "type": "string", + "description": "Descrição da DAG" + }, + "tags": { + "type": "array", + "description": "Lista de tags para filtragem da DAG no Airflow", + "items": { + "type": "string" + } + }, + "owner": { + "type": "array", + "description": "Lista de owners para filtragem da DAG no Airflow", + "items": { + "type": "string" + } + }, + "schedule": { + "type": "string", + "description": "Expressão cron válida ou uma das seguintes strings: @once, @continuous, @hourly, @daily, @weekly, @monthly, @quarterly, @yearly.", + "pattern": "^(\\S+\\s+){4}\\S+$|^@(once|continuous|hourly|daily|weekly|monthly|quarterly|yearly)$" + }, + "search": { + "type": "object", + "description": "Seção para definição da busca no Diário", + "properties": { + "sources": { + "type": "array", + "description": "description", + "items": { + "type": "string", + "enum": ["QD", "DOU"] + } + }, + "territory_id": { + "type": "integer", + "description": "Id do território no Querido Diário - QD" + }, + "terms": { + "oneOf": [ + { + "type": "array", + "description": "Lista de termos de busca no Diário", + "items": { + "type": "string" + } + }, + { + "type": "object", + "description": "description", + "properties": { + "from_airflow_variable": { + "type": "string", + "description": "description" + }, + "from_db_select": { + "type": "object", + "description": "description", + "properties": { + "sql": { + "type": "string", + "description": "description" + }, + "conn_id": { + "type": "string", + "description": "description" + } + } + } + } + } + ] + }, + "field": { + "type": "string", + "description": "description", + "enum": ["TUDO", "TITULO", "CONTEUDO"] + }, + "is_exact_search": { + "type": "boolean", + "description": "description" + }, + "ignore_signature_match": { + "type": "boolean", + "description": "description" + }, + "force_rematch": { + "type": "boolean", + "description": "description" + }, + "date": { + "type": "string", + "description": "description", + "enum": [ + "DIA", + "SEMANA", + "MES", + "ANO" + ] + }, + "dou_sections": { + "type": "array", + "description": "description", + "items": { + "type": "string", + "enum": [ + "SECAO_1", + "SECAO_2", + "SECAO_3", + "EDICAO_EXTRA", + "EDICAO_EXTRA_1A", + "EDICAO_EXTRA_1B", + "EDICAO_EXTRA_1D", + "EDICAO_EXTRA_2A", + "EDICAO_EXTRA_2B", + "EDICAO_EXTRA_2D", + "EDICAO_EXTRA_3A", + "EDICAO_EXTRA_3B", + "EDICAO_EXTRA_3D", + "EDICAO_SUPLEMENTAR", + "TODOS" + ] + } + } + }, + "required": ["terms"], + "additionalProperties": false + }, + "doc_md": { + "type": "string", + "description": "description" + }, + "report": { + "type": "object", + "description": "Aceita: `slack`, `discord`, `emails`, `attach_csv`, `subject`, `skip_null`", + "properties": { + "slack": { + "type": "object", + "description": "description", + "properties": { + "webhook": { + "type": "string", + "description": "description", + "format": "uri-reference" + } + } + }, + "discord": { + "type": "object", + "description": "description", + "properties": { + "webhook": { + "type": "string", + "description": "description", + "format": "uri-reference" + } + } + }, + "emails": { + "type": "array", + "description": "description", + "items": { + "type": "string", + "format": "email" + } + }, + "attach_csv": { + "type": "boolean", + "description": "description" + }, + "subject": { + "type": "string", + "description": "description" + }, + "skip_null": { + "type": "boolean", + "description": "description" + } + }, + "additionalProperties": false + } + }, + "required": ["id", "description", "search", "report"], + "additionalProperties": false + } + }, + "required": ["dag"], + "additionalProperties": false +} diff --git a/tests-requirements.txt b/tests-requirements.txt index ed35915..d09afca 100644 --- a/tests-requirements.txt +++ b/tests-requirements.txt @@ -6,4 +6,7 @@ pytest-mock==3.10.0 unidecode==1.2.0 xlrd==1.2.0 ijson==3.0.4 -openpyxl==3.0.7 \ No newline at end of file +openpyxl==3.0.7 +jsonschema==4.21.1 +PyYAML==6.0.1 +requests==2.31.0 \ No newline at end of file diff --git a/tests/conftest.py b/tests/conftest.py index 09564a0..37ccf26 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -6,10 +6,10 @@ import pytest from typing import Tuple -from dags.ro_dou.dou_dag_generator import (DouDigestDagGenerator, +from dags.ro_dou_src.dou_dag_generator import (DouDigestDagGenerator, SearchResult) -from dags.ro_dou.parsers import YAMLParser -from dags.ro_dou.searchers import DOUSearcher +from dags.ro_dou_src.parsers import YAMLParser +from dags.ro_dou_src.searchers import DOUSearcher TEST_AIRFLOW_HOME = '/opt/airflow' @@ -42,6 +42,7 @@ def dag_gen() -> DouDigestDagGenerator: @pytest.fixture() def yaml_parser()-> YAMLParser: filepath = os.path.join(DouDigestDagGenerator().YAMLS_DIR, + "examples_and_tests", 'basic_example.yaml') return YAMLParser(filepath=filepath) diff --git a/tests/dag_generator_test.py b/tests/dag_generator_test.py index eec5fa8..1284111 100644 --- a/tests/dag_generator_test.py +++ b/tests/dag_generator_test.py @@ -3,8 +3,8 @@ import pandas as pd import pytest -from dags.ro_dou.dou_dag_generator import merge_results -from dags.ro_dou.notification.email_sender import EmailSender, repack_match +from dags.ro_dou_src.dou_dag_generator import merge_results +from dags.ro_dou_src.notification.email_sender import EmailSender, repack_match def test_repack_match(report_example): diff --git a/tests/discord_sender_test.py b/tests/discord_sender_test.py index 48a04ca..5a3a5af 100644 --- a/tests/discord_sender_test.py +++ b/tests/discord_sender_test.py @@ -1,7 +1,7 @@ from collections import namedtuple import pytest -from dags.ro_dou.notification.discord_sender import DiscordSender, requests +from dags.ro_dou_src.notification.discord_sender import DiscordSender, requests from pytest_mock import MockerFixture WEBHOOK = 'https://some-url.com/xxx' @@ -13,7 +13,7 @@ def mocked_specs(): def test_send_discord_data(session_mocker: MockerFixture, mocked_specs): session_mocker.patch( - 'dags.ro_dou.notification.discord_sender.requests.post') + 'dags.ro_dou_src.notification.discord_sender.requests.post') sender = DiscordSender(mocked_specs) sender.send_data( @@ -31,7 +31,7 @@ def test_send_discord_data(session_mocker: MockerFixture, mocked_specs): def test_send_text_to_discord(session_mocker: MockerFixture, mocked_specs): session_mocker.patch( - 'dags.ro_dou.notification.discord_sender.requests.post') + 'dags.ro_dou_src.notification.discord_sender.requests.post') sender = DiscordSender(mocked_specs) sender.send_text('string') @@ -46,7 +46,7 @@ def test_send_text_to_discord(session_mocker: MockerFixture, mocked_specs): def test_send_embeds_to_discord(session_mocker: MockerFixture, mocked_specs): session_mocker.patch( - 'dags.ro_dou.notification.discord_sender.requests.post') + 'dags.ro_dou_src.notification.discord_sender.requests.post') sender = DiscordSender(mocked_specs) items = [ { @@ -127,9 +127,9 @@ def _send_report(specs): def test_send_report_to_discord__texts(session_mocker: MockerFixture, mocked_specs): session_mocker.patch( - 'dags.ro_dou.notification.discord_sender.DiscordSender.send_text') + 'dags.ro_dou_src.notification.discord_sender.DiscordSender.send_text') session_mocker.patch( - 'dags.ro_dou.notification.discord_sender.DiscordSender.send_embeds') + 'dags.ro_dou_src.notification.discord_sender.DiscordSender.send_embeds') _send_report(mocked_specs) @@ -147,9 +147,9 @@ def test_send_report_to_discord__texts(session_mocker: MockerFixture, def test_send_report_to_discord__embeds(session_mocker: MockerFixture, mocked_specs): session_mocker.patch( - 'dags.ro_dou.notification.discord_sender.DiscordSender.send_text') + 'dags.ro_dou_src.notification.discord_sender.DiscordSender.send_text') session_mocker.patch( - 'dags.ro_dou.notification.discord_sender.DiscordSender.send_embeds') + 'dags.ro_dou_src.notification.discord_sender.DiscordSender.send_embeds') _send_report(mocked_specs) diff --git a/tests/parsers_test.py b/tests/parsers_test.py index 8d6365a..339f941 100644 --- a/tests/parsers_test.py +++ b/tests/parsers_test.py @@ -8,8 +8,6 @@ import pytest -import pandas as pd - currentdir = os.path.dirname( os.path.abspath(inspect.getfile(inspect.currentframe()))) parentdir = os.path.dirname(currentdir) @@ -185,7 +183,9 @@ def test_hash_dag_id(yaml_parser, dag_id, size, hashed): ), ]) def test_parse(filepath, result_tuple): - filepath = os.path.join(DouDigestDagGenerator().YAMLS_DIR, filepath) + filepath = os.path.join(DouDigestDagGenerator().YAMLS_DIR, + "examples_and_tests", + filepath) parsed = YAMLParser(filepath=filepath).parse() assert parsed == DAGConfig(**result_tuple) diff --git a/tests/qd_searcher_test.py b/tests/qd_searcher_test.py index 0a2d6f7..309582e 100644 --- a/tests/qd_searcher_test.py +++ b/tests/qd_searcher_test.py @@ -1,6 +1,6 @@ from datetime import datetime import pytest -from dags.ro_dou.searchers import QDSearcher, _build_query_payload +from dags.ro_dou_src.searchers import QDSearcher, _build_query_payload @pytest.mark.parametrize( diff --git a/tests/test_validate_yaml_schemas.py b/tests/test_validate_yaml_schemas.py new file mode 100644 index 0000000..4cd91fc --- /dev/null +++ b/tests/test_validate_yaml_schemas.py @@ -0,0 +1,47 @@ +import json +import jsonschema +import pytest +import glob +import yaml +import requests +from urllib.parse import urlparse + + +YAMLS_DIR = "../dags/ro_dou/dag_confs" +SCHEMA_FILEPATH = "../schemas/ro-dou.json" +# or +# SCHEMA_FILEPATH = "https://raw.githubusercontent.com/gestaogovbr/Ro-dou/main/schemas/ro-dou.json" + + +def get_schema(filepath): + def _is_valid_url(url): + try: + result = urlparse(url) + return all([result.scheme, result.netloc]) + except ValueError: + return False + + if _is_valid_url(filepath): + response = requests.get(filepath) + response.raise_for_status() + return json.loads(response.text) + else: + with open(filepath) as f: + return json.load(f) + + +SCHEMA = get_schema(SCHEMA_FILEPATH) + +@pytest.mark.parametrize( + "data_file", + [ + data_file + for data_file in glob.glob(f"{YAMLS_DIR}/**/*.yml", recursive=True) + + glob.glob(f"{YAMLS_DIR}/**/*.yaml", recursive=True) + ], +) +def test_json_schema_validation(data_file): + with open(data_file) as data_fp: + data = yaml.safe_load(data_fp) + + jsonschema.validate(instance=data, schema=SCHEMA) From 9932d165ee7b2aa0a986c3c21db34eaac38bc884 Mon Sep 17 00:00:00 2001 From: vitor Date: Thu, 15 Feb 2024 08:27:04 -0300 Subject: [PATCH 2/2] up airflow to 2.8.1 --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index fcc8783..3c2632c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM apache/airflow:2.7.3-python3.10 +FROM apache/airflow:2.8.1-python3.10 USER root