Skip to content

Commit

Permalink
test replace setoutput
Browse files Browse the repository at this point in the history
  • Loading branch information
davidgamez committed Sep 12, 2023
1 parent 8a6163d commit ab9e1c8
Showing 1 changed file with 192 additions and 189 deletions.
381 changes: 192 additions & 189 deletions .github/workflows/store_latest_datasets_cronjob.yml
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
name: Store latest datasets cronjob

on:
schedule:
- cron: "0 0 * * *"
push:
branches: [ '**' ]
# schedule:
# - cron: "0 0 * * *"

jobs:
get-urls:
Expand Down Expand Up @@ -109,7 +111,8 @@ jobs:
run: |
DATASETS=$(jq . ./urls_matrix.json -c)
echo $DATASETS
echo "::set-output name=matrix::$DATASETS"
echo "matrix=$DATASETS" >> $GITHUB_OUTPUT
# echo "::set-output name=matrix::$DATASETS"
- name: Persist URLs matrix artifact
if: always()
uses: actions/upload-artifact@v2
Expand All @@ -124,189 +127,189 @@ jobs:
path: ./get_urls_report.txt
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
store-datasets:
needs: [ get-urls ]
runs-on: ubuntu-latest
strategy:
matrix: ${{ fromJson(needs.get-urls.outputs.matrix) }}
steps:
- uses: actions/checkout@v2
- name: Set up Python 3.9
uses: actions/setup-python@v2
with:
python-version: 3.9
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install wheel requests
- name: Validate and download the datasets
shell: python
env:
API_SOURCE_SECRETS: ${{ secrets.API_SOURCE_SECRETS }}
run: |
import os
import json
import requests
from zipfile import ZipFile
# OS constants
ROOT = os.getcwd()
DATASETS = "datasets"
FOLDER = """${{ matrix.hash }}"""
DOWNLOAD_REPORTS = "download_reports"
# Jobs constants
BASE = "base"
DIRECT_DOWNLOAD = "direct_download"
AUTHENTICATION_TYPE = "authentication_type"
API_KEY_PARAMETER_NAME = "api_key_parameter_name"
# Secrets constants
API_SOURCE_SECRETS = "API_SOURCE_SECRETS"
# Load API source secrets
api_source_secrets = json.loads(os.environ[API_SOURCE_SECRETS])
jobs = """${{ matrix.data }}""".split()
for job in jobs:
job_json = json.loads(job)
base = job_json[BASE]
url = job_json[DIRECT_DOWNLOAD]
authentication_type = job_json[AUTHENTICATION_TYPE]
api_key_parameter_name = job_json[API_KEY_PARAMETER_NAME]
api_key_parameter_value = api_source_secrets.get(base)
# Download the dataset
zip_path = os.path.join(ROOT, DATASETS, FOLDER, f"{base}.zip")
os.makedirs(os.path.dirname(zip_path), exist_ok=True)
params = {}
headers = {}
if authentication_type == 1:
params[api_key_parameter_name] = api_key_parameter_value
elif authentication_type == 2:
headers[api_key_parameter_name] = api_key_parameter_value
is_downloadable = True
try:
zip_file_req = requests.get(url, params=params, headers=headers, allow_redirects=True)
zip_file_req.raise_for_status()
except Exception as e:
is_downloadable = False
file_log = (
f"{base}: FAILURE! Exception {e} occurred when downloading URL {url}.\n"
)
if is_downloadable:
zip_file = zip_file_req.content
with open(zip_path, "wb") as f:
f.write(zip_file)
# Make sure that the download file is a zip file
try:
ZipFile(zip_path, "r")
file_log = (
f"{base}: SUCCESS! A GTFS dataset zip file was downloaded.\n"
)
except Exception as e:
os.remove(zip_path)
file_log = (
f"{base}: FAILURE! Exception {e} occurred when loading the zip file.\n"
)
report_path = os.path.join(ROOT, DOWNLOAD_REPORTS, f"{base}.txt")
os.makedirs(os.path.dirname(report_path), exist_ok=True)
with open(report_path, "w") as fp:
fp.write(file_log)
- name: Set up and authorize Cloud
uses: google-github-actions/auth@v0
with:
credentials_json: ${{ secrets.ARCHIVE_DATASET_SA_KEY }}
- name: Upload datasets to Google Cloud Storage
id: upload-datasets
uses: google-github-actions/upload-cloud-storage@main
with:
path: datasets/${{ matrix.hash }}
destination: mdb-latest
parent: false
- name: Persist Download Reports artifact
if: always()
uses: actions/upload-artifact@v2
with:
name: download_reports
path: download_reports
validate-latest:
needs: [ get-urls, store-datasets ]
runs-on: ubuntu-latest
strategy:
matrix: ${{ fromJson(needs.get-urls.outputs.matrix) }}
steps:
- uses: actions/checkout@v2
- name: Set up Python 3.9
uses: actions/setup-python@v2
with:
python-version: 3.9
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install wheel requests
- name: Validate the latest datasets
shell: python
run: |
import os
import json
import requests
from zipfile import ZipFile
# OS constants
ROOT = os.getcwd()
DATASETS = "datasets"
FOLDER = """${{ matrix.hash }}"""
LATEST_REPORTS = "latest_reports"
# Jobs constants
BASE = "base"
LATEST = "latest"
jobs = """${{ matrix.data }}""".split()
for job in jobs:
job_json = json.loads(job)
base = job_json[BASE]
url = job_json[LATEST]
# Download the dataset
zip_path = os.path.join(ROOT, DATASETS, FOLDER, f"{base}.zip")
os.makedirs(os.path.dirname(zip_path), exist_ok=True)
is_downloadable = True
try:
zip_file_req = requests.get(url, allow_redirects=True)
zip_file_req.raise_for_status()
except Exception as e:
is_downloadable = False
file_log = (
f"{base}: FAILURE! Exception {e} occurred when downloading URL {url}.\n"
)
if is_downloadable:
zip_file = zip_file_req.content
with open(zip_path, "wb") as f:
f.write(zip_file)
# Make sure that the download file is a zip file
try:
ZipFile(zip_path, "r")
file_log = (
f"{base}: SUCCESS! A GTFS dataset zip file was downloaded.\n"
)
except Exception as e:
os.remove(zip_path)
file_log = (
f"{base}: FAILURE! Exception {e} occurred when loading the zip file.\n"
)
report_path = os.path.join(ROOT, LATEST_REPORTS, f"{base}.txt")
os.makedirs(os.path.dirname(report_path), exist_ok=True)
with open(report_path, "w") as fp:
fp.write(file_log)
- name: Persist Latest Reports artifact
if: always()
uses: actions/upload-artifact@v2
with:
name: latest_reports
path: latest_reports
# store-datasets:
# needs: [ get-urls ]
# runs-on: ubuntu-latest
# strategy:
# matrix: ${{ fromJson(needs.get-urls.outputs.matrix) }}
# steps:
# - uses: actions/checkout@v2
# - name: Set up Python 3.9
# uses: actions/setup-python@v2
# with:
# python-version: 3.9
# - name: Install dependencies
# run: |
# python -m pip install --upgrade pip
# pip install wheel requests
# - name: Validate and download the datasets
# shell: python
# env:
# API_SOURCE_SECRETS: ${{ secrets.API_SOURCE_SECRETS }}
# run: |
# import os
# import json
# import requests
# from zipfile import ZipFile
#
# # OS constants
# ROOT = os.getcwd()
# DATASETS = "datasets"
# FOLDER = """${{ matrix.hash }}"""
# DOWNLOAD_REPORTS = "download_reports"
#
# # Jobs constants
# BASE = "base"
# DIRECT_DOWNLOAD = "direct_download"
# AUTHENTICATION_TYPE = "authentication_type"
# API_KEY_PARAMETER_NAME = "api_key_parameter_name"
#
# # Secrets constants
# API_SOURCE_SECRETS = "API_SOURCE_SECRETS"
#
# # Load API source secrets
# api_source_secrets = json.loads(os.environ[API_SOURCE_SECRETS])
#
# jobs = """${{ matrix.data }}""".split()
# for job in jobs:
# job_json = json.loads(job)
# base = job_json[BASE]
# url = job_json[DIRECT_DOWNLOAD]
# authentication_type = job_json[AUTHENTICATION_TYPE]
# api_key_parameter_name = job_json[API_KEY_PARAMETER_NAME]
# api_key_parameter_value = api_source_secrets.get(base)
#
# # Download the dataset
# zip_path = os.path.join(ROOT, DATASETS, FOLDER, f"{base}.zip")
# os.makedirs(os.path.dirname(zip_path), exist_ok=True)
#
# params = {}
# headers = {}
# if authentication_type == 1:
# params[api_key_parameter_name] = api_key_parameter_value
# elif authentication_type == 2:
# headers[api_key_parameter_name] = api_key_parameter_value
#
# is_downloadable = True
# try:
# zip_file_req = requests.get(url, params=params, headers=headers, allow_redirects=True)
# zip_file_req.raise_for_status()
# except Exception as e:
# is_downloadable = False
# file_log = (
# f"{base}: FAILURE! Exception {e} occurred when downloading URL {url}.\n"
# )
#
# if is_downloadable:
# zip_file = zip_file_req.content
# with open(zip_path, "wb") as f:
# f.write(zip_file)
# # Make sure that the download file is a zip file
# try:
# ZipFile(zip_path, "r")
# file_log = (
# f"{base}: SUCCESS! A GTFS dataset zip file was downloaded.\n"
# )
# except Exception as e:
# os.remove(zip_path)
# file_log = (
# f"{base}: FAILURE! Exception {e} occurred when loading the zip file.\n"
# )
# report_path = os.path.join(ROOT, DOWNLOAD_REPORTS, f"{base}.txt")
# os.makedirs(os.path.dirname(report_path), exist_ok=True)
# with open(report_path, "w") as fp:
# fp.write(file_log)
# - name: Set up and authorize Cloud
# uses: google-github-actions/auth@v0
# with:
# credentials_json: ${{ secrets.ARCHIVE_DATASET_SA_KEY }}
# - name: Upload datasets to Google Cloud Storage
# id: upload-datasets
# uses: google-github-actions/upload-cloud-storage@main
# with:
# path: datasets/${{ matrix.hash }}
# destination: mdb-latest
# parent: false
# - name: Persist Download Reports artifact
# if: always()
# uses: actions/upload-artifact@v2
# with:
# name: download_reports
# path: download_reports
# validate-latest:
# needs: [ get-urls, store-datasets ]
# runs-on: ubuntu-latest
# strategy:
# matrix: ${{ fromJson(needs.get-urls.outputs.matrix) }}
# steps:
# - uses: actions/checkout@v2
# - name: Set up Python 3.9
# uses: actions/setup-python@v2
# with:
# python-version: 3.9
# - name: Install dependencies
# run: |
# python -m pip install --upgrade pip
# pip install wheel requests
# - name: Validate the latest datasets
# shell: python
# run: |
# import os
# import json
# import requests
# from zipfile import ZipFile
#
# # OS constants
# ROOT = os.getcwd()
# DATASETS = "datasets"
# FOLDER = """${{ matrix.hash }}"""
# LATEST_REPORTS = "latest_reports"
#
# # Jobs constants
# BASE = "base"
# LATEST = "latest"
#
# jobs = """${{ matrix.data }}""".split()
# for job in jobs:
# job_json = json.loads(job)
# base = job_json[BASE]
# url = job_json[LATEST]
#
# # Download the dataset
# zip_path = os.path.join(ROOT, DATASETS, FOLDER, f"{base}.zip")
# os.makedirs(os.path.dirname(zip_path), exist_ok=True)
# is_downloadable = True
# try:
# zip_file_req = requests.get(url, allow_redirects=True)
# zip_file_req.raise_for_status()
# except Exception as e:
# is_downloadable = False
# file_log = (
# f"{base}: FAILURE! Exception {e} occurred when downloading URL {url}.\n"
# )
#
# if is_downloadable:
# zip_file = zip_file_req.content
# with open(zip_path, "wb") as f:
# f.write(zip_file)
# # Make sure that the download file is a zip file
# try:
# ZipFile(zip_path, "r")
# file_log = (
# f"{base}: SUCCESS! A GTFS dataset zip file was downloaded.\n"
# )
# except Exception as e:
# os.remove(zip_path)
# file_log = (
# f"{base}: FAILURE! Exception {e} occurred when loading the zip file.\n"
# )
# report_path = os.path.join(ROOT, LATEST_REPORTS, f"{base}.txt")
# os.makedirs(os.path.dirname(report_path), exist_ok=True)
# with open(report_path, "w") as fp:
# fp.write(file_log)
# - name: Persist Latest Reports artifact
# if: always()
# uses: actions/upload-artifact@v2
# with:
# name: latest_reports
# path: latest_reports

0 comments on commit ab9e1c8

Please sign in to comment.