Skip to content

Run data pipeline

Run data pipeline #59

name: Run data pipeline
on:
workflow_run:
workflows: ["Increase Data Version"]
types:
- completed
workflow_dispatch:
jobs:
run-dvc-pipeline:
runs-on: ubuntu-latest
steps:
- name: Checkout Repository
uses: actions/checkout@v3
- name: Setup DVC
uses: iterative/setup-dvc@v1
with:
version: '3.56.0'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install python-box ensure pandas pathlib scikit-learn dagshub mlflow numpy
- name: Run DVC pipeline | retrain model
env:
DAGSHUB_TOKEN: ${{ secrets.DAGSHUB_TOKEN }}
run: |
dvc repro data_update
dvc repro data_validation
dvc repro data_transformation
dvc repro model_train
dvc repro model_evaluation
- name: Commit DVC outputs
run: dvc commit
- name: Push DVC outputs
env:
DVC_REMOTE_URL: ${{ secrets.DVC_REMOTE_URL }}
DAGSHUB_TOKEN: ${{ secrets.DAGSHUB_TOKEN }}
DAGSHUB_USERNAME: ${{ secrets.DAGSHUB_USERNAME }}
DVC_ACCESS_KEY: ${{ secrets.DVC_ACCESS_KEY }}
run: |
dvc remote modify origin --local access_key_id $DVC_ACCESS_KEY
dvc remote modify origin --local secret_access_key $DVC_ACCESS_KEY
dvc push
- name: Git Commit DVC changes
run: |
git config --global user.name 'github-actions'
git config --global user.email '[email protected]'
git add .
if ! git diff --cached --exit-code; then
git commit -m "[Github Action]: Update DVC outputs"
git push
echo "No changes to commit"
fi