Skip to content

Commit

Permalink
Adjust Dockerfile & deployment use micromamba
Browse files Browse the repository at this point in the history
* Use micromamba not conda in Dockerfile CMD, also use pip install --no-deps
* Use micromamba not conda in command passed to build container
* Use default mambauser rather than catalyst in docker container
* Remove --no-capture-output which isn't supported by micromamba. Is this a problem?
* Remove uninterpolated vars in .env and more --no-capture-output
* Separate ETL and pytest commands.
* Stop trying to run tests in parallel. Sigh.
* Add google cloud sdk to conda environment.
* Install Google Cloud SDK from conda-forge.
* Add back in the making of required directories. Oops.
* Attempt to have micromamba run pass through output
* Use prettier to standardize formatting of lockfiles.
* Add dagster (server startup) target to Makefile
* Update conda lockfile and rerender environment files
  • Loading branch information
zaneselvans committed Oct 31, 2023
1 parent ab168a2 commit 2ae5658
Show file tree
Hide file tree
Showing 14 changed files with 4,473 additions and 20,350 deletions.
9 changes: 5 additions & 4 deletions .github/workflows/build-deploy-pudl.yml
Original file line number Diff line number Diff line change
Expand Up @@ -92,11 +92,12 @@ jobs:
gcloud compute instances update-container "$GCE_INSTANCE" \
--zone "$GCE_INSTANCE_ZONE" \
--container-image "docker.io/catalystcoop/pudl-etl:${{ env.GITHUB_REF }}" \
--container-command "conda" \
--container-command "micromamba" \
--container-arg="run" \
--container-arg="--no-capture-output" \
--container-arg="-p" \
--container-arg="/home/catalyst/env" \
--container-arg="--prefix" \
--container-arg="/home/mambauser/env" \
--container-arg="--attach" \
--container-arg='' \
--container-arg="bash" \
--container-arg="./docker/gcp_pudl_etl.sh" \
--container-env-file="./docker/.env" \
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/update-lockfile.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ jobs:
create-args: >-
python=3.11
conda-lock
prettier
- name: Run conda-lock to recreate lockfile from scratch
run: |
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/zenodo-cache-sync.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ env:
PUBLIC_ZENODO_CACHE_BUCKET: gs://zenodo-cache.catalyst.coop
GITHUB_REF: ${{ github.ref_name }} # This is changed to dev if running on a schedule
PUDL_OUTPUT: ~/pudl-work/output
PUDL_INPUT: ~/pudl-work/data/
PUDL_INPUT: ~/pudl-work/input/

jobs:
zenodo-cache-sync:
Expand Down
12 changes: 8 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,12 @@ etl_fast_yml := src/pudl/package_data/settings/etl_fast.yml
etl_full_yml := src/pudl/package_data/settings/etl_full.yml
pip_install_pudl := pip install --no-deps --editable ./

########################################################################################
# Start up the Dagster UI
########################################################################################
dagster:
dagster dev -m pudl.etl -m pudl.ferc_to_sqlite

########################################################################################
# Conda lockfile generation
########################################################################################
Expand All @@ -20,7 +26,7 @@ endif

# Regenerate the conda lockfile and render platform specific conda environments.
conda-lock:
rm -f environments/conda-lock.yml
rm -f environments/conda-*lock.yml
conda-lock \
--${mamba} \
--file=pyproject.toml \
Expand All @@ -31,11 +37,11 @@ conda-lock:
--extras docs \
--extras datasette \
conda-lock.yml)
prettier --write environments/*.yml

########################################################################################
# Build documentation (for local use)
########################################################################################

docs-clean:
rm -rf docs/_build

Expand All @@ -46,7 +52,6 @@ docs-build: docs-clean
########################################################################################
# Generic pytest commands for local use, without test coverage
########################################################################################

pytest-unit:
pytest --doctest-modules src/pudl test/unit

Expand All @@ -60,7 +65,6 @@ pytest-validate:
########################################################################################
# More complex pytest commands for local use that collect test coverage
########################################################################################

# Run unit & integration tests on 1-2 years of data and collect test coverage data.
local-pytest-ci: docs-clean
${coverage_erase}
Expand Down
16 changes: 8 additions & 8 deletions docker/.env
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
HOST_PUDL_IN=./pudl_in
HOST_PUDL_OUT=./pudl_out
CONTAINER_HOME=/home/catalyst
PUDL_INPUT=/home/catalyst/pudl_work/data
PUDL_OUTPUT=/home/catalyst/pudl_work/output
DAGSTER_HOME=/home/catalyst/pudl_work/dagster_home
CONDA_PREFIX=/home/catalyst/env
PUDL_SETTINGS_YML=/home/catalyst/src/pudl/package_data/settings/etl_full.yml
LOGFILE=/home/catalyst/pudl_work/output/pudl-etl.log
CONDA_RUN="conda run --no-capture-output --prefix /home/catalyst/env"
CONTAINER_HOME=/home/mambauser
PUDL_INPUT=/home/mambauser/pudl_work/input
PUDL_OUTPUT=/home/mambauser/pudl_work/output
DAGSTER_HOME=/home/mambauser/pudl_work/dagster_home
CONDA_PREFIX=/home/mambauser/env
PUDL_SETTINGS_YML=/home/mambauser/src/pudl/package_data/settings/etl_full.yml
LOGFILE=/home/mambauser/pudl_work/output/pudl-etl.log
CONDA_RUN="micromamba run --prefix /home/mambauser/env --attach ''"
GCS_CACHE=gs://zenodo-cache.catalyst.coop
7 changes: 5 additions & 2 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@ ENV PUDL_INPUT=${CONTAINER_PUDL_WORKSPACE}/input
ENV PUDL_OUTPUT=${CONTAINER_PUDL_WORKSPACE}/output
ENV DAGSTER_HOME=${CONTAINER_PUDL_WORKSPACE}/dagster_home

# Create data input/output directories
RUN mkdir -p ${PUDL_INPUT} ${PUDL_OUTPUT} ${DAGSTER_HOME}

# Create a conda environment based on the specification in the repo
COPY environments/conda-lock.yml environments/conda-lock.yml
RUN micromamba create --prefix ${CONDA_PREFIX} --yes --category main dev docs test datasette --file environments/conda-lock.yml && \
Expand All @@ -46,9 +49,9 @@ ENV LD_LIBRARY_PATH=${CONDA_PREFIX}/lib
# We need information from .git to get version with setuptools_scm so we mount that
# directory without copying it into the image.
RUN --mount=type=bind,source=.git,target=${PUDL_REPO}/.git \
${CONDA_RUN} pip install --no-cache-dir --editable . && \
${CONDA_RUN} pip install --no-cache-dir --no-deps --editable . && \
# Run the PUDL setup script so we know where to read and write data
${CONDA_RUN} pudl_setup

# Run the unit tests:
CMD ["conda", "run", "--no-capture-output", "--prefix", "${CONDA_PREFIX}", "pytest", "test/unit"]
CMD ["micromamba", "run", "--prefix", "${CONDA_PREFIX}", "--attach", "''", "pytest", "test/unit"]
6 changes: 2 additions & 4 deletions docker/gcp_pudl_etl.sh
Original file line number Diff line number Diff line change
Expand Up @@ -33,16 +33,14 @@ function run_pudl_etl() {
--max-concurrent 6 \
--gcs-cache-path gs://internal-zenodo-cache.catalyst.coop \
$PUDL_SETTINGS_YML && \
# Run multiple pytest processes in the background and wait for them to exit
pytest \
--gcs-cache-path=gs://internal-zenodo-cache.catalyst.coop \
--etl-settings=$PUDL_SETTINGS_YML \
--live-dbs test/integration test/unit & \
--live-dbs test/integration test/unit && \
pytest \
--gcs-cache-path=gs://internal-zenodo-cache.catalyst.coop \
--etl-settings=$PUDL_SETTINGS_YML \
--live-dbs test/validate & \
wait
--live-dbs test/validate
}

function shutdown_vm() {
Expand Down
Loading

0 comments on commit 2ae5658

Please sign in to comment.