Skip to content

ci: add full stack NLP regression test #862

ci: add full stack NLP regression test

ci: add full stack NLP regression test #862

Workflow file for this run

name: CI
on:
pull_request:
push:
branches:
- main
paths-ignore:
- 'docs/**'
- '*.md'
# The goal here is to cancel older workflows when a PR is updated (because it's pointless work)
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.ref_name }}
cancel-in-progress: true
jobs:
unittest:
name: unit tests
runs-on: ubuntu-22.04
strategy:
matrix:
# while we are still private, don't go crazy with the Python versions as they eat up CI minutes
python-version: ["3.10"]
steps:
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install pytest
pip install .[tests]
- name: Check out MS tool
uses: actions/checkout@v4
with:
repository: microsoft/Tools-for-Health-Data-Anonymization
path: mstool
- name: Build MS tool
run: |
sudo apt-get update
sudo apt-get install dotnet6
dotnet publish \
--runtime=linux-x64 \
--configuration=Release \
-p:PublishSingleFile=true \
--output=$HOME/.local/bin \
mstool/FHIR/src/Microsoft.Health.Fhir.Anonymizer.R4.CommandLineTool
- name: Test with pytest
run: |
python -m pytest
nlp-regression:
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v4
- name: Install docker
# At time of writing, latest/stable channel holds version 20.10.24 which has this bug:
# https://stackoverflow.com/questions/77225539/
# So for now, install from latest/edge (with version 24.0.5 as I write this)
run: sudo snap install docker --channel=latest/edge
- name: Install docker images
run: |
export UMLS_API_KEY=${{ secrets.UMLS_API_KEY }}
docker compose --profile covid-symptom up -d
- name: Run NLP
run: |
export DATADIR=$(realpath tests/data/nlp-regression)
# Actually do NLP
docker compose run --rm \
--volume $DATADIR:/in \
cumulus-etl \
/in/input \
/in/run-output \
/in/phi \
--output-format=ndjson \
--task covid_symptom__nlp_results
# Compare results
diff -upr $DATADIR/expected-output/covid_symptom__nlp_results \
$DATADIR/run-output/covid_symptom__nlp_results
lint:
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v4
- name: Install linters
# black is synced with the .pre-commit-hooks version
run: |
python -m pip install --upgrade pip
pip install bandit[toml] pycodestyle pylint black==23.11.0
- name: Run pycodestyle
# E203: pycodestyle is a little too rigid about slices & whitespace
# See https://black.readthedocs.io/en/stable/the_black_code_style/current_style.html#slices
# W503: a default ignore that we are restoring
run: |
pycodestyle --max-line-length=120 --ignore=E203,W503 .
- name: Run pylint
if: success() || failure() # still run pylint if above checks fail
run: |
pylint cumulus_etl tests
- name: Run bandit
if: success() || failure() # still run bandit if above checks fail
run: |
bandit -c pyproject.toml -r .
- name: Run black
if: success() || failure() # still run black if above checks fails
run: |
black --check --verbose --line-length 120 .