diff --git a/.dockerignore b/.dockerignore index 51cbe41e..46bd8ae2 100644 --- a/.dockerignore +++ b/.dockerignore @@ -2,4 +2,5 @@ .direnv __pycache__ .mypy_cache -.pytest_cache \ No newline at end of file +.pytest_cache +pctasks_frontend/node_modules diff --git a/Dockerfile.task_base b/Dockerfile.task_base index 632165f1..9bce9b6c 100644 --- a/Dockerfile.task_base +++ b/Dockerfile.task_base @@ -1,69 +1,26 @@ -FROM ubuntu:20.04 +FROM python:3.10.6-buster +ARG REQUIREMENTS_BASE=requirements.base.txt -# Setup timezone info +ENV PIP_NO_CACHE_DIR=1 ENV TZ=UTC - ENV LC_ALL=C.UTF-8 ENV LANG=C.UTF-8 -RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone - -RUN apt-get update && apt-get install -y software-properties-common - -RUN add-apt-repository ppa:ubuntugis/ppa && \ - apt-get update && \ - apt-get install -y build-essential python3-dev python3-pip \ - jq unzip ca-certificates wget curl git && \ - apt-get autoremove && apt-get autoclean && apt-get clean - -RUN update-alternatives --install /usr/bin/python python /usr/bin/python3 10 - -# See https://github.com/mapbox/rasterio/issues/1289 -ENV CURL_CA_BUNDLE=/etc/ssl/certs/ca-certificates.crt - -# Install Python 3.8 -RUN curl -L -O "https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-$(uname)-$(uname -m).sh" \ - && bash "Mambaforge-$(uname)-$(uname -m).sh" -b -p /opt/conda \ - && rm -rf "Mambaforge-$(uname)-$(uname -m).sh" - -ENV PATH /opt/conda/bin:$PATH -ENV LD_LIBRARY_PATH /opt/conda/lib/:$LD_LIBRARY_PATH - -RUN mamba install -y -c conda-forge python=3.8 gdal pip setuptools cython numpy +COPY ${REQUIREMENTS_BASE} /requirements.base.txt +RUN python3 -m pip install -U pip \ + && python3 -m pip install -r /requirements.base.txt -RUN python -m pip install --upgrade pip - -# Install common packages -COPY requirements-task-base.txt /tmp/requirements.txt -RUN python -m pip install --no-build-isolation -r /tmp/requirements.txt - -# # Copy and install packages -# COPY pctasks/core /opt/src/pctasks/core -RUN cd /opt/src/pctasks/core && \ - pip install . - COPY pctasks/cli /opt/src/pctasks/cli -RUN cd /opt/src/pctasks/cli && \ - pip install . - COPY pctasks/task /opt/src/pctasks/task -RUN cd /opt/src/pctasks/task && \ - pip install . - COPY pctasks/client /opt/src/pctasks/client -RUN cd /opt/src/pctasks/client && \ - pip install . - COPY pctasks/ingest /opt/src/pctasks/ingest -RUN cd /opt/src/pctasks/ingest && \ - pip install . - COPY pctasks/dataset /opt/src/pctasks/dataset -RUN cd /opt/src/pctasks/dataset && \ - pip install . + +COPY requirements.pctasks.txt /opt/src/requirements.pctasks.txt +RUN cd /opt/src && python -m pip install -r requirements.pctasks.txt # Setup Python Path to allow import of test modules ENV PYTHONPATH=/opt/src:$PYTHONPATH diff --git a/datasets/goes/goes-glm/Dockerfile b/datasets/goes/goes-glm/Dockerfile deleted file mode 100644 index 12124c87..00000000 --- a/datasets/goes/goes-glm/Dockerfile +++ /dev/null @@ -1,74 +0,0 @@ -FROM ubuntu:20.04 - -# Setup timezone info -ENV TZ=UTC - -ENV LC_ALL=C.UTF-8 -ENV LANG=C.UTF-8 - -RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone - -RUN apt-get update && apt-get install -y software-properties-common - -RUN add-apt-repository ppa:ubuntugis/ppa && \ - apt-get update && \ - apt-get install -y build-essential python3-dev python3-pip \ - jq unzip ca-certificates wget curl git && \ - apt-get autoremove && apt-get autoclean && apt-get clean - -RUN update-alternatives --install /usr/bin/python python /usr/bin/python3 10 - -# See https://github.com/mapbox/rasterio/issues/1289 -ENV CURL_CA_BUNDLE=/etc/ssl/certs/ca-certificates.crt - -# Install Python 3.8 -RUN curl -L -O "https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-$(uname)-$(uname -m).sh" \ - && bash "Mambaforge-$(uname)-$(uname -m).sh" -b -p /opt/conda \ - && rm -rf "Mambaforge-$(uname)-$(uname -m).sh" - -ENV PATH /opt/conda/bin:$PATH -ENV LD_LIBRARY_PATH /opt/conda/lib/:$LD_LIBRARY_PATH - -RUN mamba install -y -c conda-forge python=3.8 gdal=3.3.3 pip setuptools cython numpy==1.21.5 - -RUN python -m pip install --upgrade pip - -# Install common packages -COPY requirements-task-base.txt /tmp/requirements.txt -RUN python -m pip install --no-build-isolation -r /tmp/requirements.txt - -# -# Copy and install packages -# - -COPY pctasks/core /opt/src/pctasks/core -RUN cd /opt/src/pctasks/core && \ - pip install . - -COPY pctasks/cli /opt/src/pctasks/cli -RUN cd /opt/src/pctasks/cli && \ - pip install . - -COPY pctasks/task /opt/src/pctasks/task -RUN cd /opt/src/pctasks/task && \ - pip install . - -COPY pctasks/client /opt/src/pctasks/client -RUN cd /opt/src/pctasks/client && \ - pip install . - -COPY pctasks/ingest /opt/src/pctasks/ingest -RUN cd /opt/src/pctasks/ingest && \ - pip install . - -COPY pctasks/dataset /opt/src/pctasks/dataset -RUN cd /opt/src/pctasks/dataset && \ - pip install . - -COPY datasets/goes/goes-glm /opt/src/datasets/goes-glm -RUN python3 -m pip install -r /opt/src/datasets/goes-glm/requirements.txt - -# Setup Python Path to allow import of test modules -ENV PYTHONPATH=/opt/src:$PYTHONPATH - -WORKDIR /opt/src diff --git a/datasets/goes/goes-glm/README.md b/datasets/goes/goes-glm/README.md index d5577d55..f3894983 100644 --- a/datasets/goes/goes-glm/README.md +++ b/datasets/goes/goes-glm/README.md @@ -23,4 +23,12 @@ And registered with ```console $ pctasks workflow create datasets/goes/goes-glm/workflows/goes-glm-update.yaml +$ pctasks workflow create datasets/goes/goes-glm/workflows/goes-glm-update-blue.yaml +``` + +## Image building + +``` +./scripts/generate-requirements datasets/goes/goes-glm/requirements.txt +docker build -t /pctasks-goes-glm: -f datasets/goes/goes-glm/Dockerfile . ``` \ No newline at end of file diff --git a/datasets/noaa-mrms-qpe/Dockerfile b/datasets/noaa-mrms-qpe/Dockerfile deleted file mode 100644 index 54ea82a3..00000000 --- a/datasets/noaa-mrms-qpe/Dockerfile +++ /dev/null @@ -1,5 +0,0 @@ -ARG registry -FROM ${registry}/pctasks-task-base:latest - -COPY datasets/noaa-mrms-qpe /opt/src/datasets/noaa-mrms-qpe -RUN python3 -m pip install -r /opt/src/datasets/noaa-mrms-qpe/requirements.txt diff --git a/datasets/noaa-mrms-qpe/README.md b/datasets/noaa-mrms-qpe/README.md index 5132a468..6d70a16f 100644 --- a/datasets/noaa-mrms-qpe/README.md +++ b/datasets/noaa-mrms-qpe/README.md @@ -36,4 +36,12 @@ They can be registered with ```bash $ ls datasets/noaa-mrms-qpe/workflows/* | xargs -I {} pctasks workflow update {} -``` \ No newline at end of file +``` + +## Image building + +``` +./scripts/generate-requirements datasets/noaa-mrms-qpe/requirements.txt +docker build -t /pctasks-noaa-mrms-qpe: -f datasets/goes/goes-glm/Dockerfile . +``` + diff --git a/docs/user_guide/runtime.md b/docs/user_guide/runtime.md index a69a0faf..16f784d9 100644 --- a/docs/user_guide/runtime.md +++ b/docs/user_guide/runtime.md @@ -3,7 +3,11 @@ ## Specifying requirements In addition to the set of packages provided by the base docker image, you can specify a list of additional packages -to install with a `requirements.txt` file. This can be done in a dataset configuration or in a task configuration. +to install with a `requirements.txt` file. + +```{note} Installing extra dependencies at runtime should only be done when developing a workflow. See [](#building-images) for transitioning to a production-ready workflow. +``` +This can be done in a dataset configuration or in a task configuration. ```yaml # file: naip/dataset.yaml @@ -56,4 +60,32 @@ Behind the scenes, when you submit a workflow generated from this `dataset.yaml` the module is uploaded to Azure Blob Storage. Before executing your task, the worker downloads that module and places it in a location that's importable by the Python interpreter. The uploaded module / package is prioritized over any -existing modules with the same import name. \ No newline at end of file +existing modules with the same import name. + +## Building Images + +`pctasks` lets you specify a `requirements.txt` with additional dependencies to +install at runtime. This is convenient for development, but installing +additional dependencies isn't appropriate for production environments that need +to run reliably at scale. For that, we'll build a container image from our +requirements. + +First, use `./scripts/generate-requirements` to generate the `requirements.txt` +file. Provide any additional requirements files you need to this script: + +``` +$ ./scripts/generate-requirements datasets/goes/goes-glm/requirements.txt +``` + +Next, build and upload the container image: + +``` +$ docker build -t pctasks-goes-glm:latest -f datasets/goes/goes-glm/Dockerfile . +$ docker push ... +``` + +Alternatively, build the docker container in Azure: + +``` +$ az acr build -r "registry" -g "resource-group -t 'pctasks-:' -f Dockerfile.task_base . +``` \ No newline at end of file diff --git a/pctasks/cli/dev_requirements.txt b/pctasks/cli/dev_requirements.txt new file mode 100644 index 00000000..50a65c42 --- /dev/null +++ b/pctasks/cli/dev_requirements.txt @@ -0,0 +1 @@ +file:./pctasks/core#egg=pctasks.core \ No newline at end of file diff --git a/pctasks/client/dev_requirements.txt b/pctasks/client/dev_requirements.txt new file mode 100644 index 00000000..528959cc --- /dev/null +++ b/pctasks/client/dev_requirements.txt @@ -0,0 +1,2 @@ +file:./pctasks/core#egg=pctasks.core +file:./pctasks/cli#egg=pctasks.cli \ No newline at end of file diff --git a/pctasks/dataset/dev_requirements.txt b/pctasks/dataset/dev_requirements.txt new file mode 100644 index 00000000..1d27ad70 --- /dev/null +++ b/pctasks/dataset/dev_requirements.txt @@ -0,0 +1,3 @@ +file:./pctasks/task#egg=pctasks.task +file:./pctasks/client#egg=pctasks.client +file:./pctasks/ingest#egg=pctasks.ingest \ No newline at end of file diff --git a/pctasks/dev/dev_requirements.txt b/pctasks/dev/dev_requirements.txt new file mode 100644 index 00000000..6d80fcc3 --- /dev/null +++ b/pctasks/dev/dev_requirements.txt @@ -0,0 +1,5 @@ +file:./pctasks/task#egg=pctasks.task +file:./pctasks/client#egg=pctasks.client +file:./pctasks/ingest#egg=pctasks.ingest +file:./pctasks/run#egg=pctasks.run +file:./pctasks/cli#egg=pctasks.cli \ No newline at end of file diff --git a/pctasks/ingest/dev_requirements.txt b/pctasks/ingest/dev_requirements.txt new file mode 100644 index 00000000..2a859303 --- /dev/null +++ b/pctasks/ingest/dev_requirements.txt @@ -0,0 +1 @@ +file:./pctasks/client#egg=pctasks.client \ No newline at end of file diff --git a/pctasks/ingest_task/dev_requirements.txt b/pctasks/ingest_task/dev_requirements.txt new file mode 100644 index 00000000..dbf1b7f7 --- /dev/null +++ b/pctasks/ingest_task/dev_requirements.txt @@ -0,0 +1,2 @@ +file:./pctasks/task#egg=pctasks.task +file:./pctasks/ingest#egg=pctasks.ingest \ No newline at end of file diff --git a/pctasks/notify/dev_requirements.txt b/pctasks/notify/dev_requirements.txt new file mode 100644 index 00000000..50a65c42 --- /dev/null +++ b/pctasks/notify/dev_requirements.txt @@ -0,0 +1 @@ +file:./pctasks/core#egg=pctasks.core \ No newline at end of file diff --git a/pctasks/router/dev_requirements.txt b/pctasks/router/dev_requirements.txt new file mode 100644 index 00000000..50a65c42 --- /dev/null +++ b/pctasks/router/dev_requirements.txt @@ -0,0 +1 @@ +file:./pctasks/core#egg=pctasks.core \ No newline at end of file diff --git a/pctasks/run/dev_requirements.txt b/pctasks/run/dev_requirements.txt new file mode 100644 index 00000000..9e81042d --- /dev/null +++ b/pctasks/run/dev_requirements.txt @@ -0,0 +1,3 @@ +file:./pctasks/core#egg=pctasks.core +file:./pctasks/task#egg=pctasks.task +file:./pctasks/client#egg=pctasks.client \ No newline at end of file diff --git a/pctasks/server/dev_requirements.txt b/pctasks/server/dev_requirements.txt new file mode 100644 index 00000000..c15f913e --- /dev/null +++ b/pctasks/server/dev_requirements.txt @@ -0,0 +1,2 @@ +file:./pctasks/core#egg=pctasks.core +file:./pctasks/run#egg=pctasks.run \ No newline at end of file diff --git a/pctasks/task/dev_requirements.txt b/pctasks/task/dev_requirements.txt new file mode 100644 index 00000000..528959cc --- /dev/null +++ b/pctasks/task/dev_requirements.txt @@ -0,0 +1,2 @@ +file:./pctasks/core#egg=pctasks.core +file:./pctasks/cli#egg=pctasks.cli \ No newline at end of file diff --git a/requirements-dev.txt b/requirements-dev.txt index 79621dd2..1223a80f 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -11,6 +11,9 @@ pystac[validation]==1.* azure-functions azure-functions-durable +# for generating requirements files for Docker +pip-tools + # Mypy stubs types-cachetools diff --git a/requirements.base.txt b/requirements.base.txt new file mode 100644 index 00000000..d357f3c4 --- /dev/null +++ b/requirements.base.txt @@ -0,0 +1,229 @@ +# +# This file is autogenerated by pip-compile with Python 3.10 +# by the following command: +# +# pip-compile --output-file=requirements.txt pctasks/cli/dev_requirements.txt pctasks/cli/setup.py pctasks/client/dev_requirements.txt pctasks/client/setup.py pctasks/core/setup.py pctasks/dataset/dev_requirements.txt pctasks/dataset/setup.py pctasks/ingest/dev_requirements.txt pctasks/ingest/setup.py pctasks/task/dev_requirements.txt pctasks/task/setup.py +# +aiohttp==3.8.3 + # via pctasks.core (pctasks/core/setup.py) +aiosignal==1.3.1 + # via aiohttp +async-timeout==4.0.2 + # via aiohttp +attrs==22.2.0 + # via + # aiohttp + # jsonschema +azure-core==1.26.2 + # via + # azure-cosmos + # azure-data-tables + # azure-identity + # azure-storage-blob + # azure-storage-queue + # msrest + # opencensus-ext-azure +azure-cosmos==4.3.0 + # via pctasks.core (pctasks/core/setup.py) +azure-data-tables==12.4.1 + # via pctasks.core (pctasks/core/setup.py) +azure-identity==1.12.0 + # via + # opencensus-ext-azure + # pctasks.core (pctasks/core/setup.py) +azure-storage-blob==12.9 + # via pctasks.core (pctasks/core/setup.py) +azure-storage-queue==12.5.0 + # via pctasks.core (pctasks/core/setup.py) +cachetools==5.2.0 + # via google-auth +certifi==2022.12.7 + # via + # msrest + # requests +cffi==1.15.1 + # via cryptography +charset-normalizer==2.1.1 + # via + # aiohttp + # requests +click==8.1.3 + # via + # pctasks.cli (pctasks/cli/setup.py) + # planetary-computer + # stac-validator +colorama==0.4.6 + # via rich +commonmark==0.9.1 + # via rich +cryptography==39.0.0 + # via + # azure-identity + # azure-storage-blob + # azure-storage-queue + # msal + # pyjwt +frozenlist==1.3.3 + # via + # aiohttp + # aiosignal +google-api-core==2.11.0 + # via opencensus +google-auth==2.15.0 + # via google-api-core +googleapis-common-protos==1.57.1 + # via google-api-core +idna==3.4 + # via + # requests + # yarl +isodate==0.6.1 + # via msrest +jinja2==3.0.3 + # via pctasks.ingest (pctasks/ingest/setup.py) +jsonschema==4.17.3 + # via stac-validator +marko==1.0.1 + # via pctasks.ingest (pctasks/ingest/setup.py) +markupsafe==2.1.1 + # via jinja2 +msal==1.20.0 + # via + # azure-identity + # msal-extensions +msal-extensions==1.0.0 + # via azure-identity +msrest==0.7.1 + # via + # azure-data-tables + # azure-storage-blob + # azure-storage-queue +multidict==6.0.4 + # via + # aiohttp + # yarl +oauthlib==3.2.2 + # via requests-oauthlib +opencensus==0.11.0 + # via + # opencensus-ext-azure + # opencensus-ext-logging +opencensus-context==0.1.3 + # via opencensus +opencensus-ext-azure==1.1.0 + # via pctasks.core (pctasks/core/setup.py) +opencensus-ext-logging==0.1.1 + # via pctasks.core (pctasks/core/setup.py) +orjson==3.8.4 + # via pctasks.core (pctasks/core/setup.py) + # via + # -r pctasks/client/dev_requirements.txt + # -r pctasks/task/dev_requirements.txt + # pctasks.client (pctasks/client/setup.py) + # pctasks.task (pctasks/task/setup.py) + # via + # -r pctasks/dataset/dev_requirements.txt + # -r pctasks/ingest/dev_requirements.txt + # pctasks.dataset (pctasks/dataset/setup.py) + # pctasks.ingest (pctasks/ingest/setup.py) + # via + # -r pctasks/cli/dev_requirements.txt + # -r pctasks/client/dev_requirements.txt + # -r pctasks/task/dev_requirements.txt + # pctasks.cli (pctasks/cli/setup.py) + # pctasks.client (pctasks/client/setup.py) + # pctasks.task (pctasks/task/setup.py) + # via + # -r pctasks/dataset/dev_requirements.txt + # pctasks.dataset (pctasks/dataset/setup.py) + # via + # -r pctasks/dataset/dev_requirements.txt + # pctasks.dataset (pctasks/dataset/setup.py) +planetary-computer==0.4.9 + # via pctasks.core (pctasks/core/setup.py) +portalocker==2.6.0 + # via msal-extensions +protobuf==4.21.12 + # via + # google-api-core + # googleapis-common-protos +psutil==5.9.4 + # via opencensus-ext-azure +pyasn1==0.4.8 + # via + # pyasn1-modules + # rsa +pyasn1-modules==0.2.8 + # via google-auth +pycparser==2.21 + # via cffi +pydantic[dotenv]==1.10.4 + # via + # pctasks.client (pctasks/client/setup.py) + # pctasks.core (pctasks/core/setup.py) + # planetary-computer +pygments==2.14.0 + # via rich +pyjwt[crypto]==2.6.0 + # via msal +pyrsistent==0.19.3 + # via jsonschema +pystac==1.6.1 + # via + # planetary-computer + # pystac-client +pystac-client==0.5.1 + # via planetary-computer +python-dateutil==2.8.2 + # via + # pystac + # pystac-client + # strictyaml +python-dotenv==0.21.0 + # via pydantic +pytz==2022.7 + # via planetary-computer +pyyaml==6.0 + # via pctasks.core (pctasks/core/setup.py) +requests==2.28.1 + # via + # azure-core + # google-api-core + # msal + # msrest + # opencensus-ext-azure + # planetary-computer + # pystac-client + # requests-oauthlib + # stac-validator +requests-oauthlib==1.3.1 + # via msrest +rich==11.2.0 + # via pctasks.client (pctasks/client/setup.py) +rsa==4.9 + # via google-auth +six==1.16.0 + # via + # azure-core + # azure-identity + # google-auth + # isodate + # python-dateutil +stac-validator==3.3.1 + # via pctasks.core (pctasks/core/setup.py) +strictyaml==1.6.2 + # via pctasks.core (pctasks/core/setup.py) +types-docutils==0.19.1.1 + # via types-setuptools +types-setuptools==65.6.0.3 + # via stac-validator +typing-extensions==4.4.0 + # via + # azure-core + # pydantic +urllib3==1.26.13 + # via requests +yarl==1.8.2 + # via + # aiohttp + # azure-data-tables diff --git a/requirements.pctasks.txt b/requirements.pctasks.txt new file mode 100644 index 00000000..a90a32d9 --- /dev/null +++ b/requirements.pctasks.txt @@ -0,0 +1,5 @@ +-e file:./pctasks/cli#egg=pctasks.cli +-e file:./pctasks/client#egg=pctasks.client +-e file:./pctasks/core#egg=pctasks.core +-e file:./pctasks/ingest#egg=pctasks.ingest +-e file:./pctasks/task#egg=pctasks.task diff --git a/scripts/generate-requirements b/scripts/generate-requirements new file mode 100755 index 00000000..5969bdef --- /dev/null +++ b/scripts/generate-requirements @@ -0,0 +1,20 @@ +#!/bin/bash +# Usage: ./scripts/generate-requirements [optional paths to additional requirements.txt] +set -ex + +pip-compile -o requirements.txt \ + pctasks/core/setup.py \ + pctasks/cli/dev_requirements.txt \ + pctasks/cli/setup.py \ + pctasks/task/dev_requirements.txt \ + pctasks/task/setup.py \ + pctasks/client/dev_requirements.txt \ + pctasks/client/setup.py \ + pctasks/ingest/dev_requirements.txt \ + pctasks/ingest/setup.py \ + pctasks/dataset/dev_requirements.txt \ + pctasks/dataset/setup.py \ + "${@:1}" + +grep -v "^file:./" requirements.txt > requirements.base.txt +grep "^file:./" requirements.txt | sed 's/^/-e /' > requirements.pctasks.txt \ No newline at end of file