From 7c14d109946c23098ed91dabec9dd43e62281500 Mon Sep 17 00:00:00 2001 From: Andrew Vaccaro Date: Thu, 7 Apr 2022 11:24:27 -0400 Subject: [PATCH 1/7] avoid writing empty files to gcs, and log bit better --- gtfs_rt_validator_api.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/gtfs_rt_validator_api.py b/gtfs_rt_validator_api.py index 2bcba4d..60cfef0 100644 --- a/gtfs_rt_validator_api.py +++ b/gtfs_rt_validator_api.py @@ -6,6 +6,7 @@ import os import shutil import subprocess +import traceback from collections import defaultdict from concurrent.futures import ProcessPoolExecutor from pathlib import Path @@ -200,11 +201,13 @@ def validate_gcs_bucket( logger.info(f"Saving aggregate counts as: {results_bucket}") error_counts = rollup_error_counts(dst_path_rt) - df = pd.DataFrame(error_counts) - with NamedTemporaryFile() as tmp_file: - df.to_parquet(tmp_file.name) - fs.put(tmp_file.name, results_bucket) + if error_counts: + df = pd.DataFrame(error_counts) + + with NamedTemporaryFile() as tmp_file: + df.to_parquet(tmp_file.name) + fs.put(tmp_file.name, results_bucket) elif results_bucket and not aggregate_counts: # validator stores results as {filename}.results.json @@ -228,6 +231,7 @@ def validate_gcs_bucket( fs.put(final_files, results_bucket) except Exception as e: + typer.echo(f"got exception during validation: {traceback.format_exc()}") raise e finally: From 422cc1dfbefa408a793ad14adba7f21b85f40d3e Mon Sep 17 00:00:00 2001 From: Andrew Vaccaro Date: Thu, 7 Apr 2022 11:24:54 -0400 Subject: [PATCH 2/7] only download schedule after rt --- gtfs_rt_validator_api.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/gtfs_rt_validator_api.py b/gtfs_rt_validator_api.py index 60cfef0..93778c2 100644 --- a/gtfs_rt_validator_api.py +++ b/gtfs_rt_validator_api.py @@ -185,15 +185,15 @@ def validate_gcs_bucket( dst_path_gtfs = f"{tmp_dir_name}/gtfs" dst_path_rt = f"{tmp_dir_name}/rt" - # fetch and zip gtfs schedule - download_gtfs_schedule_zip(gtfs_schedule_path, dst_path_gtfs, fs) - # fetch rt data if gtfs_rt_glob_path is None: raise ValueError("One of gtfs rt glob path or date must be specified") num_files = download_rt_files(dst_path_rt, fs, glob_path=gtfs_rt_glob_path) + # fetch and zip gtfs schedule + download_gtfs_schedule_zip(gtfs_schedule_path, dst_path_gtfs, fs) + logger.info(f"validating {num_files} files") validate(f"{dst_path_gtfs}.zip", dst_path_rt, verbose=verbose) From f82c2a616bcd2232867a27c3de321408ebe9ec9f Mon Sep 17 00:00:00 2001 From: Andrew Vaccaro Date: Thu, 7 Apr 2022 11:33:52 -0400 Subject: [PATCH 3/7] update black --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c39fa60..d17d456 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -10,7 +10,7 @@ repos: - id: end-of-file-fixer - id: check-added-large-files - repo: https://github.com/psf/black - rev: 19.10b0 + rev: 22.3.0 hooks: - id: black - repo: https://github.com/pycqa/isort From 2633f01d67da8b21b5e2ce25cb5817d568fa47c7 Mon Sep 17 00:00:00 2001 From: Andrew Vaccaro Date: Thu, 7 Apr 2022 11:35:33 -0400 Subject: [PATCH 4/7] fmt --- tests/test_download.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/test_download.py b/tests/test_download.py index cbe1700..4c4b9cd 100644 --- a/tests/test_download.py +++ b/tests/test_download.py @@ -52,7 +52,9 @@ def test_validation_manual(): ) download_rt_files( - dir_rt, fs, glob_path=f"{GCS_BASE_DIR}/gtfs_rt_126/*/126/0/*", + dir_rt, + fs, + glob_path=f"{GCS_BASE_DIR}/gtfs_rt_126/*/126/0/*", ) print("validating") From 0cff0c84399840b33c750981a7030498b7e432cb Mon Sep 17 00:00:00 2001 From: Andrew Vaccaro Date: Thu, 7 Apr 2022 11:40:58 -0400 Subject: [PATCH 5/7] fix workflow --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8316cfb..f0e4fa5 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - - uses: google-github-actions/setup-gcloud@master + - uses: google-github-actions/setup-gcloud@v0 with: service_account_key: ${{ secrets.GCP_SA_KEY }} export_default_credentials: true From f02bff284561065f831088adae85b932f5f5bb89 Mon Sep 17 00:00:00 2001 From: Andrew Vaccaro Date: Thu, 7 Apr 2022 11:56:24 -0400 Subject: [PATCH 6/7] update typer --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 170ae06..b2ec1aa 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,7 +5,7 @@ google-auth==1.32.1 gtfs-realtime-bindings==0.0.7 pytest==6.2.5 black==19.10b0 -typer==0.4.0 +typer==0.4.1 pendulum==2.1.2 structlog==21.5.0 calitp==0.0.8 From 37c1a101e07c8f1d4fff2c34db6f5cb2a8052d87 Mon Sep 17 00:00:00 2001 From: Andrew Vaccaro Date: Thu, 7 Apr 2022 13:32:00 -0400 Subject: [PATCH 7/7] fix version --- gtfs_rt_validator_api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gtfs_rt_validator_api.py b/gtfs_rt_validator_api.py index 93778c2..6889aeb 100644 --- a/gtfs_rt_validator_api.py +++ b/gtfs_rt_validator_api.py @@ -1,4 +1,4 @@ -__version__ = "0.0.3" +__version__ = "0.0.5" import concurrent import json