Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
SammyAgrawal authored May 30, 2024
0 parents commit 217fcfd
Show file tree
Hide file tree
Showing 15 changed files with 740 additions and 0 deletions.
11 changes: 11 additions & 0 deletions .github/dependabot.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# To get started with Dependabot version updates, you'll need to specify which
# package ecosystems to update and where the package manifests are located.
# Please see the documentation for all configuration options:
# https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file

version: 2
updates:
- package-ecosystem: "github-actions"
directory: "/"
schedule:
interval: "weekly"
54 changes: 54 additions & 0 deletions .github/workflows/deploy_recipe.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
name: Deploy Recipes to Google Dataflow

env:
JOB_NAME: ${{ github.event.inputs.recipe_id }}-${{ github.run_id }}-${{ github.run_attempt }}
# Can we sanitize the job name here to remove special characters?

on:
workflow_dispatch:
inputs:
recipe_id:
description: 'The id of a single recipe to submit to Dataflow'
required: true
default: 'all'

jobs:
deploy-recipes:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: "Authenticate to Google Cloud"
id: "auth"
uses: "google-github-actions/auth@v2"
with:
credentials_json: "${{ secrets.LEAP_BAKERY_SERVICE_ACCOUNT }}"
- name: "Install dependencies"
run: |
python -m pip install --upgrade pip
pip install pangeo-forge-runner
- name: "Deploy recipes"
run: |
pangeo-forge-runner bake \
--repo=${{ github.server_url }}/${{ github.repository }}.git \
--ref=${{ github.sha }} \
--feedstock-subdir='feedstock' \
--Bake.job_name=${{ env.JOB_NAME }} \
--Bake.recipe_id=${{ github.event.inputs.recipe_id }} \
-f configs/config_dataflow.py
env:
GOOGLE_APPLICATION_CREDENTIALS: "${{ steps.auth.outputs.credentials_file_path }}"
- name: Wait for Dataflow jobs to finish
# I tried to make this reusable but the fucking thing would not accept env.JOB_NAME as input.
# AT that point, screw it, not worth it.
run: |
jobname="${{ env.JOB_NAME }}"
while true; do
count=$(gcloud dataflow jobs list --status=active --filter="name:${jobname}" --format="value(id)" | wc -l)
echo "Active Dataflow jobs: $count"
if [ "$count" -eq "0" ]; then
echo "No active Dataflow jobs found."
break
fi
echo "Waiting for Dataflow jobs to finish..."
sleep 20
done
27 changes: 27 additions & 0 deletions .github/workflows/validate-catalog.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
name: Catalog

on:
pull_request:
branches:
push:
branches:

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true

jobs:
validate:
runs-on: ubuntu-latest
defaults:
run:
shell: bash -l {0}
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: '3.10'
- name: validate feedstock entry
uses: leap-stc/data-catalog-actions/leap-catalog@main
with:
single-feedstock: "./feedstock/catalog.yaml"
163 changes: 163 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
.pybuilder/
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version

# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock

# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock

# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/#use-with-ide
.pdm.toml

# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

# pytype static type analyzer
.pytype/

# Cython debug symbols
cython_debug/

# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/


proto_feedstock/
22 changes: 22 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
ci:
autoupdate_schedule: quarterly
autofix_prs: true

repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.5.0
hooks:
- id: check-added-large-files
- id: check-toml
- id: trailing-whitespace
- id: end-of-file-fixer
- id: check-yaml
- id: debug-statements
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.3.5
hooks:
# Run the linter.
- id: ruff
args: [ --fix ]
# Run the formatter.
- id: ruff-format
Loading

0 comments on commit 217fcfd

Please sign in to comment.