From ab9e1c83c6e414f06eadf9118f60a90ed802a670 Mon Sep 17 00:00:00 2001 From: David Gamez Diaz <1192523+davidgamez@users.noreply.github.com> Date: Tue, 12 Sep 2023 18:02:50 -0400 Subject: [PATCH] test replace setoutput --- .../store_latest_datasets_cronjob.yml | 381 +++++++++--------- 1 file changed, 192 insertions(+), 189 deletions(-) diff --git a/.github/workflows/store_latest_datasets_cronjob.yml b/.github/workflows/store_latest_datasets_cronjob.yml index 0a5ca340..5bec7ef1 100644 --- a/.github/workflows/store_latest_datasets_cronjob.yml +++ b/.github/workflows/store_latest_datasets_cronjob.yml @@ -1,8 +1,10 @@ name: Store latest datasets cronjob on: - schedule: - - cron: "0 0 * * *" + push: + branches: [ '**' ] +# schedule: +# - cron: "0 0 * * *" jobs: get-urls: @@ -109,7 +111,8 @@ jobs: run: | DATASETS=$(jq . ./urls_matrix.json -c) echo $DATASETS - echo "::set-output name=matrix::$DATASETS" + echo "matrix=$DATASETS" >> $GITHUB_OUTPUT +# echo "::set-output name=matrix::$DATASETS" - name: Persist URLs matrix artifact if: always() uses: actions/upload-artifact@v2 @@ -124,189 +127,189 @@ jobs: path: ./get_urls_report.txt outputs: matrix: ${{ steps.set-matrix.outputs.matrix }} - store-datasets: - needs: [ get-urls ] - runs-on: ubuntu-latest - strategy: - matrix: ${{ fromJson(needs.get-urls.outputs.matrix) }} - steps: - - uses: actions/checkout@v2 - - name: Set up Python 3.9 - uses: actions/setup-python@v2 - with: - python-version: 3.9 - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install wheel requests - - name: Validate and download the datasets - shell: python - env: - API_SOURCE_SECRETS: ${{ secrets.API_SOURCE_SECRETS }} - run: | - import os - import json - import requests - from zipfile import ZipFile - - # OS constants - ROOT = os.getcwd() - DATASETS = "datasets" - FOLDER = """${{ matrix.hash }}""" - DOWNLOAD_REPORTS = "download_reports" - - # Jobs constants - BASE = "base" - DIRECT_DOWNLOAD = "direct_download" - AUTHENTICATION_TYPE = "authentication_type" - API_KEY_PARAMETER_NAME = "api_key_parameter_name" - - # Secrets constants - API_SOURCE_SECRETS = "API_SOURCE_SECRETS" - - # Load API source secrets - api_source_secrets = json.loads(os.environ[API_SOURCE_SECRETS]) - - jobs = """${{ matrix.data }}""".split() - for job in jobs: - job_json = json.loads(job) - base = job_json[BASE] - url = job_json[DIRECT_DOWNLOAD] - authentication_type = job_json[AUTHENTICATION_TYPE] - api_key_parameter_name = job_json[API_KEY_PARAMETER_NAME] - api_key_parameter_value = api_source_secrets.get(base) - - # Download the dataset - zip_path = os.path.join(ROOT, DATASETS, FOLDER, f"{base}.zip") - os.makedirs(os.path.dirname(zip_path), exist_ok=True) - - params = {} - headers = {} - if authentication_type == 1: - params[api_key_parameter_name] = api_key_parameter_value - elif authentication_type == 2: - headers[api_key_parameter_name] = api_key_parameter_value - - is_downloadable = True - try: - zip_file_req = requests.get(url, params=params, headers=headers, allow_redirects=True) - zip_file_req.raise_for_status() - except Exception as e: - is_downloadable = False - file_log = ( - f"{base}: FAILURE! Exception {e} occurred when downloading URL {url}.\n" - ) - - if is_downloadable: - zip_file = zip_file_req.content - with open(zip_path, "wb") as f: - f.write(zip_file) - # Make sure that the download file is a zip file - try: - ZipFile(zip_path, "r") - file_log = ( - f"{base}: SUCCESS! A GTFS dataset zip file was downloaded.\n" - ) - except Exception as e: - os.remove(zip_path) - file_log = ( - f"{base}: FAILURE! Exception {e} occurred when loading the zip file.\n" - ) - report_path = os.path.join(ROOT, DOWNLOAD_REPORTS, f"{base}.txt") - os.makedirs(os.path.dirname(report_path), exist_ok=True) - with open(report_path, "w") as fp: - fp.write(file_log) - - name: Set up and authorize Cloud - uses: google-github-actions/auth@v0 - with: - credentials_json: ${{ secrets.ARCHIVE_DATASET_SA_KEY }} - - name: Upload datasets to Google Cloud Storage - id: upload-datasets - uses: google-github-actions/upload-cloud-storage@main - with: - path: datasets/${{ matrix.hash }} - destination: mdb-latest - parent: false - - name: Persist Download Reports artifact - if: always() - uses: actions/upload-artifact@v2 - with: - name: download_reports - path: download_reports - validate-latest: - needs: [ get-urls, store-datasets ] - runs-on: ubuntu-latest - strategy: - matrix: ${{ fromJson(needs.get-urls.outputs.matrix) }} - steps: - - uses: actions/checkout@v2 - - name: Set up Python 3.9 - uses: actions/setup-python@v2 - with: - python-version: 3.9 - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install wheel requests - - name: Validate the latest datasets - shell: python - run: | - import os - import json - import requests - from zipfile import ZipFile - - # OS constants - ROOT = os.getcwd() - DATASETS = "datasets" - FOLDER = """${{ matrix.hash }}""" - LATEST_REPORTS = "latest_reports" - - # Jobs constants - BASE = "base" - LATEST = "latest" - - jobs = """${{ matrix.data }}""".split() - for job in jobs: - job_json = json.loads(job) - base = job_json[BASE] - url = job_json[LATEST] - - # Download the dataset - zip_path = os.path.join(ROOT, DATASETS, FOLDER, f"{base}.zip") - os.makedirs(os.path.dirname(zip_path), exist_ok=True) - is_downloadable = True - try: - zip_file_req = requests.get(url, allow_redirects=True) - zip_file_req.raise_for_status() - except Exception as e: - is_downloadable = False - file_log = ( - f"{base}: FAILURE! Exception {e} occurred when downloading URL {url}.\n" - ) - - if is_downloadable: - zip_file = zip_file_req.content - with open(zip_path, "wb") as f: - f.write(zip_file) - # Make sure that the download file is a zip file - try: - ZipFile(zip_path, "r") - file_log = ( - f"{base}: SUCCESS! A GTFS dataset zip file was downloaded.\n" - ) - except Exception as e: - os.remove(zip_path) - file_log = ( - f"{base}: FAILURE! Exception {e} occurred when loading the zip file.\n" - ) - report_path = os.path.join(ROOT, LATEST_REPORTS, f"{base}.txt") - os.makedirs(os.path.dirname(report_path), exist_ok=True) - with open(report_path, "w") as fp: - fp.write(file_log) - - name: Persist Latest Reports artifact - if: always() - uses: actions/upload-artifact@v2 - with: - name: latest_reports - path: latest_reports +# store-datasets: +# needs: [ get-urls ] +# runs-on: ubuntu-latest +# strategy: +# matrix: ${{ fromJson(needs.get-urls.outputs.matrix) }} +# steps: +# - uses: actions/checkout@v2 +# - name: Set up Python 3.9 +# uses: actions/setup-python@v2 +# with: +# python-version: 3.9 +# - name: Install dependencies +# run: | +# python -m pip install --upgrade pip +# pip install wheel requests +# - name: Validate and download the datasets +# shell: python +# env: +# API_SOURCE_SECRETS: ${{ secrets.API_SOURCE_SECRETS }} +# run: | +# import os +# import json +# import requests +# from zipfile import ZipFile +# +# # OS constants +# ROOT = os.getcwd() +# DATASETS = "datasets" +# FOLDER = """${{ matrix.hash }}""" +# DOWNLOAD_REPORTS = "download_reports" +# +# # Jobs constants +# BASE = "base" +# DIRECT_DOWNLOAD = "direct_download" +# AUTHENTICATION_TYPE = "authentication_type" +# API_KEY_PARAMETER_NAME = "api_key_parameter_name" +# +# # Secrets constants +# API_SOURCE_SECRETS = "API_SOURCE_SECRETS" +# +# # Load API source secrets +# api_source_secrets = json.loads(os.environ[API_SOURCE_SECRETS]) +# +# jobs = """${{ matrix.data }}""".split() +# for job in jobs: +# job_json = json.loads(job) +# base = job_json[BASE] +# url = job_json[DIRECT_DOWNLOAD] +# authentication_type = job_json[AUTHENTICATION_TYPE] +# api_key_parameter_name = job_json[API_KEY_PARAMETER_NAME] +# api_key_parameter_value = api_source_secrets.get(base) +# +# # Download the dataset +# zip_path = os.path.join(ROOT, DATASETS, FOLDER, f"{base}.zip") +# os.makedirs(os.path.dirname(zip_path), exist_ok=True) +# +# params = {} +# headers = {} +# if authentication_type == 1: +# params[api_key_parameter_name] = api_key_parameter_value +# elif authentication_type == 2: +# headers[api_key_parameter_name] = api_key_parameter_value +# +# is_downloadable = True +# try: +# zip_file_req = requests.get(url, params=params, headers=headers, allow_redirects=True) +# zip_file_req.raise_for_status() +# except Exception as e: +# is_downloadable = False +# file_log = ( +# f"{base}: FAILURE! Exception {e} occurred when downloading URL {url}.\n" +# ) +# +# if is_downloadable: +# zip_file = zip_file_req.content +# with open(zip_path, "wb") as f: +# f.write(zip_file) +# # Make sure that the download file is a zip file +# try: +# ZipFile(zip_path, "r") +# file_log = ( +# f"{base}: SUCCESS! A GTFS dataset zip file was downloaded.\n" +# ) +# except Exception as e: +# os.remove(zip_path) +# file_log = ( +# f"{base}: FAILURE! Exception {e} occurred when loading the zip file.\n" +# ) +# report_path = os.path.join(ROOT, DOWNLOAD_REPORTS, f"{base}.txt") +# os.makedirs(os.path.dirname(report_path), exist_ok=True) +# with open(report_path, "w") as fp: +# fp.write(file_log) +# - name: Set up and authorize Cloud +# uses: google-github-actions/auth@v0 +# with: +# credentials_json: ${{ secrets.ARCHIVE_DATASET_SA_KEY }} +# - name: Upload datasets to Google Cloud Storage +# id: upload-datasets +# uses: google-github-actions/upload-cloud-storage@main +# with: +# path: datasets/${{ matrix.hash }} +# destination: mdb-latest +# parent: false +# - name: Persist Download Reports artifact +# if: always() +# uses: actions/upload-artifact@v2 +# with: +# name: download_reports +# path: download_reports +# validate-latest: +# needs: [ get-urls, store-datasets ] +# runs-on: ubuntu-latest +# strategy: +# matrix: ${{ fromJson(needs.get-urls.outputs.matrix) }} +# steps: +# - uses: actions/checkout@v2 +# - name: Set up Python 3.9 +# uses: actions/setup-python@v2 +# with: +# python-version: 3.9 +# - name: Install dependencies +# run: | +# python -m pip install --upgrade pip +# pip install wheel requests +# - name: Validate the latest datasets +# shell: python +# run: | +# import os +# import json +# import requests +# from zipfile import ZipFile +# +# # OS constants +# ROOT = os.getcwd() +# DATASETS = "datasets" +# FOLDER = """${{ matrix.hash }}""" +# LATEST_REPORTS = "latest_reports" +# +# # Jobs constants +# BASE = "base" +# LATEST = "latest" +# +# jobs = """${{ matrix.data }}""".split() +# for job in jobs: +# job_json = json.loads(job) +# base = job_json[BASE] +# url = job_json[LATEST] +# +# # Download the dataset +# zip_path = os.path.join(ROOT, DATASETS, FOLDER, f"{base}.zip") +# os.makedirs(os.path.dirname(zip_path), exist_ok=True) +# is_downloadable = True +# try: +# zip_file_req = requests.get(url, allow_redirects=True) +# zip_file_req.raise_for_status() +# except Exception as e: +# is_downloadable = False +# file_log = ( +# f"{base}: FAILURE! Exception {e} occurred when downloading URL {url}.\n" +# ) +# +# if is_downloadable: +# zip_file = zip_file_req.content +# with open(zip_path, "wb") as f: +# f.write(zip_file) +# # Make sure that the download file is a zip file +# try: +# ZipFile(zip_path, "r") +# file_log = ( +# f"{base}: SUCCESS! A GTFS dataset zip file was downloaded.\n" +# ) +# except Exception as e: +# os.remove(zip_path) +# file_log = ( +# f"{base}: FAILURE! Exception {e} occurred when loading the zip file.\n" +# ) +# report_path = os.path.join(ROOT, LATEST_REPORTS, f"{base}.txt") +# os.makedirs(os.path.dirname(report_path), exist_ok=True) +# with open(report_path, "w") as fp: +# fp.write(file_log) +# - name: Persist Latest Reports artifact +# if: always() +# uses: actions/upload-artifact@v2 +# with: +# name: latest_reports +# path: latest_reports