From ede169a50fed8c5c6aa3a7b5d83d3bc0ce302203 Mon Sep 17 00:00:00 2001 From: Krzysztof Findeisen Date: Mon, 6 Nov 2023 15:58:50 -0800 Subject: [PATCH 1/5] Remove Google-specific sections from Playbook. The Prompt Processing code in `main` has not supported Google Cloud in over a year, and the old services running on GCP have now been shut down. --- doc/playbook.rst | 248 +---------------------------------------------- 1 file changed, 5 insertions(+), 243 deletions(-) diff --git a/doc/playbook.rst b/doc/playbook.rst index da9b5c94..1868026e 100644 --- a/doc/playbook.rst +++ b/doc/playbook.rst @@ -8,12 +8,10 @@ Table of Contents ================= * `Containers`_ -* `Pub/Sub Topics`_ * `Buckets`_ * `Prototype Service`_ * `tester`_ * `Databases`_ -* `Middleware Worker VM`_ Containers @@ -46,55 +44,9 @@ You will need to authenticate to Google Cloud first using :command:`gcloud auth The ``PYTHONUNBUFFERED`` environment variable defined in the Dockerfiles for the containers ensures that container logs are emitted in real-time. -Pub/Sub Topics -============== - -One Google Pub/Sub topic is used for ``nextVisit`` events. -Additional topics are used for images from each instrument, where the instrument is one of ``LSSTCam``, ``LSSTComCam``, ``LATISS``, ``DECam``, or ``HSC``. - -To create the topic, in the Google Cloud Console for the ``prompt-proto`` project: - -* Choose "Pub/Sub" -* Choose "Create Topic" -* Set "Topic ID" to ``nextVisit`` or ``{instrument}-image``, replacing ``{instrument}`` with the name from the list above. - -The single ``nextVisit`` topic is used for multiple messages per visit, one per detector. -It is also expected that it can be used with multiple instruments, with filtering on the subscription distinguishing between them. -Using a single topic this way could simplify the translator from SAL/DDS. -On the other hand, using multiple topics is also simple to do. - - Buckets ======= -Google Cloud ------------- - -A single bucket named ``rubin-prompt-proto-main`` has been created to hold the central repository described in `DMTN-219`_, as well as incoming raw images. - -The bucket ``rubin-prompt-proto-support-data-template`` contains a pristine copy of the calibration datasets and templates. -This bucket is not intended for direct use by the prototype, but can be used to restore the central repository to its state at the start of an observing run. - -The bucket ``rubin-prompt-proto-unobserved`` contains raw files that the upload script(s) can draw from to create incoming raws for ``rubin-prompt-proto-main``. - -The ``-main`` bucket has had notifications configured for it; these publish to a Google Pub/Sub topic as mentioned in the previous section. -To configure these notifications, in a shell: - -.. code-block:: sh - - gsutil notification create \ - -t project/prompt-proto/topics/{instrument}-image \ - -f json \ - -e OBJECT_FINALIZE \ - -p {instrument}/ \ - gs://rubin-prompt-proto-main - -This creates a notification on the given topic using JSON format when an object has been finalized (transfer of it has completed). -Notifications are only sent on this topic for objects with the instrument name as a prefix. - -USDF ----- - The bucket ``rubin:rubin-pp`` holds incoming raw images. The bucket ``rubin:rubin-pp-users`` holds: @@ -142,74 +94,6 @@ For Butler not to complain about the bucket names, set the environment variable Prototype Service ================= -Google Cloud ------------- - -The service can be controlled by Google Cloud Run, which will automatically trigger instances based on ``nextVisit`` messages and can autoscale the number of them depending on load. -Each time the service container is updated, a new revision of the service should be edited and deployed. -(Continuous deployment has not yet been set up.) - -To create or edit the Cloud Run service in the Google Cloud Console: - -* Choose "Create Service" or "Edit & Deploy New Revision" -* Select the container image URL from "Artifact Registry > prompt-proto-service" -* In the Variables & Secrets tab, set the following required parameters: - - * RUBIN_INSTRUMENT: the "short" instrument name - * PUBSUB_VERIFICATION_TOKEN: choose an arbitrary string matching the Pub/Sub endpoint URL below - * IMAGE_BUCKET: bucket containing raw images (``rubin-prompt-proto-main``) - * CALIB_REPO: URI to repo containing calibrations (and templates) - * IP_APDB: IP address or hostname and port of the APDB (see `Databases`_, below) - * IP_REGISTRY: IP address or hostname and port of the registry database (see `Databases`_) - * DB_APDB: PostgreSQL database name for the APDB - * DB_REGISTRY: PostgreSQL database name for the registry database - -* There are also five optional parameters: - - * IMAGE_TIMEOUT: timeout in seconds to wait after expected script completion for raw image arrival, default 20 sec. - * LOCAL_REPOS: absolute path (in the container) where local repos are created, default ``/tmp``. - * USER_APDB: database user for the APDB, default "postgres" - * USER_REGISTRY: database user for the registry database, default "postgres" - * NAMESPACE_APDB: the database namespace for the APDB, defaults to the DB's default namespace - -* One variable is set by Cloud Run and should not be overridden: - - * PORT - -* Also in the Variables & Secrets tab, reference the following secrets: - - * ``butler-registry-db-pass``, as the environment variable ``PSQL_REGISTRY_PASS`` - * ``apdb-db-pass``, as the environment variable ``PSQL_APDB_PASS`` - -* In the Connections tab, select the ``db-connector`` VPC connector. Do *not* create anything under "Cloud SQL connections" -* Set the "Request timeout" to 600 seconds (if a worker has not responded by then, it will be killed). -* Set the "Maximum requests per container" to 1. -* Under "Autoscaling", the minimum number should be set to 0 to save money while debugging, but it would be a multiple of the number of detectors in production. - The maximum number depends on how many simultaneous visits could be in process. - -The Cloud Run service URL is given at the top of the service details page. -Copy it for use in the Pub/Sub subscription. - -One subscription needs to be created (once) for the ``nextVisit`` topic. -It accepts messages and gateways them to Cloud Run. - -* Choose "Pub/Sub" -* Choose "Subscriptions" -* Choose "Create Subscription" -* Set "Subscription ID" to "nextVisit-sub" -* Select the ``projects/prompt-proto/topics/nextVisit`` topic -* Set "Delivery type" to "Push" -* Set the "Endpoint URL" to the service URL from Cloud Run, with ``?token={PUBSUB_VERIFICATION_TOKEN}`` appended to it. - As mentioned, the string ``{PUBSUB_VERIFICATION_TOKEN}`` should be replaced by an arbitrary string matching the variable set above. -* Enable authentication using a service account that has Artifact Registry Reader, Cloud Run Invoker, Pub/Sub Editor, Pub/Sub Subscriber, and Storage Object Viewer roles -* Set "Message retention duration" to 10 minutes -* Do not "Retain acknowledged messages", and do not expire the subscription -* Set the acknowledgement deadline to 600 seconds -* Set the "Retry policy" to "Retry immediately" - -USDF ----- - The service can be controlled with ``kubectl`` from ``rubin-devl``. You must first `get credentials for the development cluster `_ on the web; ignore the installation instructions and copy the commands from the second box. Credentials must be renewed if you get a "cannot fetch token: 400 Bad Request" error when running ``kubectl``. @@ -251,10 +135,10 @@ A few useful commands for managing the service: ``kubectl logs`` also offers the ``-f`` flag for streaming output. Troubleshooting -^^^^^^^^^^^^^^^ +--------------- Deleting Old Services -""""""""""""""""""""" +^^^^^^^^^^^^^^^^^^^^^ Normally, old revisions of a service are automatically removed when a new revision is deployed. However, sometimes an old revision will stick around; this seems to be related to Python errors from bad code. @@ -272,7 +156,7 @@ To delete such services manually: There's no point to deleting the pod itself, because the service will just recreate it. Identifying a Pod's Codebase -"""""""""""""""""""""""""""" +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ To identify which version of ``prompt-prototype`` a pod is running, run @@ -286,7 +170,7 @@ Actually mapping the hash to a branch version may require a bit of detective wor To find the version of Science Pipelines used, find the container's page in the GitHub registry, then search for ``EUPS_TAG``. Inspecting a Pod -"""""""""""""""" +^^^^^^^^^^^^^^^^ To inspect the state of a pod (e.g., the local repo): @@ -361,59 +245,6 @@ The visits are randomly selected and uploaded as one new group for each visit. Databases ========= -Google Cloud ------------- - -Two PostgreSQL databases have been created on Cloud SQL: ``butler-registry`` and ``apdb``. - -To access these for manual operations, start by creating a virtual machine in Google Compute Engine. - -* In the Cloud Console, go to "Compute Engine > VM instances". -* Select "Create Instance" at the top. -* Enter an instance name (e.g. ``ktl-db-client``). -* Under "Identity and API access / Access scopes", select "Set access for each API". -* Select "Enabled" for "Cloud SQL". - If desired, change "Storage" to "Read Write" and "Cloud Pub/Sub" to "Enabled". -* Expand "Networking, Disks, Security, Management, Sole-Tenancy". -* Under "Networking", add the tag ``ssh``. - This enables the firewall rule to allow connections from the Google Identity-Aware Proxy to the ssh port on the machine. -* You can leave all the rest at their defaults unless you think you need more CPU or memory. - If you do (e.g. if you wanted to run Pipelines code on the VM), it's probably better to switch to an N2 series machine. -* With the project owner role, you should have appropriate permissions to connect to the machine and also to ``sudo`` to ``root`` on it, allowing installation of software. -* When the green check shows up in the status column, click "SSH" under "Connect" to start an in-browser shell to the machine. -* Then execute the following to install client software, set up proxy forwarding, and connect to the database: - -.. code-block:: sh - - sudo apt-get update - sudo apt-get install postgresql-client-11 cloudsql-proxy - cloud_sql_proxy -instances=prompt-proto:us-central1:butler-registry=tcp:5432 & - psql -h localhost -U postgres - -A separate ``cloud_sql_proxy`` using a different port will be needed to communicate with the ``apdb`` database. - -For passwordless login, create a ``~/.pgpass`` file with contents ``localhost:5432:postgres:postgres:PASSWORD`` and execute ``chmod 0600 ~/.pgpass``. - -On a VM with the Science Pipelines installed, a new APDB schema can be created in the usual way: - -.. code-block:: sh - - make_apdb.py -c db_url="postgresql://postgres@localhost:/postgres" - -Resetting the APDB -^^^^^^^^^^^^^^^^^^ - -To restore the APDB to a clean state, run the following (replacing 5433 with the appropriate port on your machine): - -.. code-block:: sh - - psql -h localhost -U postgres -p 5433 -c 'drop table "DiaForcedSource", "DiaObject", "DiaObject_To_Object_Match", "DiaSource", "SSObject" cascade;' - make_apdb.py -c db_url="postgresql://postgres@localhost:5433/postgres" - - -USDF ----- - A database server is running at ``postgresql:://usdf-prompt-processing-dev.slac.stanford.edu``. The server runs two databases: ``ppcentralbutler`` (for the Butler registry) and ``lsst-devl`` (for the APDB). @@ -447,7 +278,7 @@ From ``rubin-devl``, a new APDB schema can be created in the usual way: -c db_url="postgresql://rubin@usdf-prompt-processing-dev.slac.stanford.edu/lsst-devl" Resetting the APDB -^^^^^^^^^^^^^^^^^^ +------------------ To restore the APDB to a clean state, run the following: @@ -456,72 +287,3 @@ To restore the APDB to a clean state, run the following: psql -h usdf-prompt-processing-dev.slac.stanford.edu lsst-devl rubin -c 'drop schema "pp_apdb" cascade;' make_apdb.py -c namespace="pp_apdb" \ -c db_url="postgresql://rubin@usdf-prompt-processing-dev.slac.stanford.edu/lsst-devl" - - -Middleware Worker VM -==================== - -The ``rubin-utility-middleware`` VM on Google Compute Engine is intended as a general-purpose environment for working with Butler repositories. -It can work with both local repositories and ones based on Google Storage. -However, it has limited computing power, and is not suited for things like pipeline runs. - -Built-in support: - -* a complete install of the Science Pipelines in ``/software/lsst_stack/`` -* a running instance of ``cloud_sql_proxy`` mapping the ``butler-registry`` database to port 5432 -* global configuration pointing Butler ``s3://`` URIs to Google Storage buckets (though ``gs://`` URIs now work as well) - -The user is responsible for: - -* running ``source /software/lsst_stack/loadLSST.sh`` on login -* database authentication (see `Databases`_, above) -* `Google Storage Authentication`_ - - -Google Storage Authentication ------------------------------ - -To access `Google Storage-Backed Repositories`_, you must first set up Boto authentication. -If you don't have one, `create an HMAC key`_ (this is *not* the same as the token for running the `tester`_); the relevant service account is ``service-620570835826@gs-project-accounts.iam.gserviceaccount.com``. -Then create a ``~/.aws/credentials`` file with the contents:: - - [default] - aws_access_key_id= - aws_secret_access_key= - -and execute ``chmod go-rwx ~/.aws/credentials``. - -.. _create an HMAC key: https://cloud.google.com/storage/docs/authentication/managing-hmackeys#create - -PostgreSQL-Backed Repositories ------------------------------- - -By default, ``butler create`` creates a repository whose registry is stored in SQLite. -To instead store the registry in the ``butler-registry`` database, create a seed config YAML such as: - -.. code-block:: yaml - - registry: - db: postgresql://postgres@localhost:5432/ - namespace: - -Then run ``butler create --seed-config seedconfig.yaml `` to create the repository. - -Each repository needs its own ``namespace`` value, corresponding to a PostgreSQL schema. -Schemas can be listed from within ``psql`` using the ``\dn`` command, and corrupted or outdated registries can be deleted using the ``DROP SCHEMA`` command. - -.. warning:: - - Be sure to always provide a unique namespace. - Otherwise, the registry will be created in the database's ``public`` schema, making it very difficult to clean up later. - - -Google Storage-Backed Repositories ----------------------------------- - -All Google Storage repositories must also be `PostgreSQL-Backed Repositories`_. -Otherwise, no special configuration is needed to create one. - -To create or access a Google Storage repository, give the repository location as a URI, e.g.:: - - butler query-collections gs:/// From 0933c7f69f7c1fe36d680ef38fa15139e005aebb Mon Sep 17 00:00:00 2001 From: Krzysztof Findeisen Date: Mon, 6 Nov 2023 16:34:47 -0800 Subject: [PATCH 2/5] Update container build instructions for GitHub Actions. The old instructions referred to ``gcloud builds``, which we stopped using even while we were still running on Google Cloud. --- doc/playbook.rst | 45 +++++++++++++++++++++++++++++++++------------ 1 file changed, 33 insertions(+), 12 deletions(-) diff --git a/doc/playbook.rst b/doc/playbook.rst index 1868026e..954367c1 100644 --- a/doc/playbook.rst +++ b/doc/playbook.rst @@ -18,30 +18,51 @@ Containers ========== The prototype consists of two containers. -The first is a base container with the Science Pipelines "stack" code and Google Cloud Platform utilities. +The first is a base container with the Science Pipelines "stack" code and networking utilities. The second is a service container made from the base that has the Prompt Processing prototype service code. +All containers are managed by `GitHub Container Registry `_ and are built using GitHub Actions. -To build the base container using Google Cloud Build: +To build the base container: -.. code-block:: sh +* If there are changes to the container, push them to a branch, then open a PR. + The container should be built automatically. +* If there are no changes (typically because you want to use an updated Science Pipelines container), go to the repository's `Actions tab `_ and select "Run workflow". + From the dropdown, select the branch whose container definition will be used, and the label of the Science Pipelines container. +* New containers built from ``main`` are tagged with the corresponding Science Pipelines release (plus ``w_latest`` or ``d_latest`` if the release was requested by that name). + For automatic ``main`` builds, or if the corresponding box in the manual build is checked, the new container also has the ``latest`` label. + Containers built from a branch use the same scheme, but prefixed by the ticket number or, for user branches, the branch topic. + +.. note:: - cd base - gcloud builds submit --tag us-central1-docker.pkg.dev/prompt-proto/prompt/prompt-proto-base + If a PR automatically builds both the base and the service container, the service build will *not* use the new base container unless you specifically override it (see below). + Even then, the service build will not wait for the base build to finish. + You may need to manually rerun the service container build to get it to use the newly built base. To build the service container: -.. code-block:: sh +* If there are changes to the service, push them to a branch, then open a PR. + The container should be built automatically using the ``latest`` base container. +* To force a rebuild manually, go to the repository's `Actions tab `_ and select "Run workflow". + From the dropdown, select the branch whose code should be built. + The container will be built using the ``latest`` base container, even if there is a branch build of the base. +* To use a base other than ``latest``, edit ``.github/workflows/build-service.yml`` on the branch and override the ``BASE_TAG_LIST`` variable. + Be careful not to merge the temporary override to ``main``! +* New service containers built from ``main`` have the tags of their base container. + Containers built from a branch are prefixed by the ticket number or, for user branches, the branch topic. - cd activator - gcloud builds submit --tag us-central1-docker.pkg.dev/prompt-proto/prompt/prompt-proto-service +.. note:: -These commands publish to Google Artifact Registry. + The ``PYTHONUNBUFFERED`` environment variable defined in the Dockerfiles for the containers ensures that container logs are emitted in real-time. -You will need to authenticate to Google Cloud first using :command:`gcloud auth login`. +Stable Base Containers +---------------------- -.. note:: +In general, the ``latest`` base container is built from a weekly or other stable Science Pipelines release. +However, it may happen that the ``latest`` base is used for development while production runs should use an older build. +If this comes up, edit ``.github/workflows/build-service.yml`` and append the desired base build to the ``BASE_TAG_LIST`` variable. +Any subsequent builds of the service container will build against both bases. - The ``PYTHONUNBUFFERED`` environment variable defined in the Dockerfiles for the containers ensures that container logs are emitted in real-time. +This is the only situation in which a change to ``BASE_TAG_LIST`` should be committed to ``main``. Buckets From 768e7231e5d301ea1aa6b6c4d798690d2f45f6aa Mon Sep 17 00:00:00 2001 From: Krzysztof Findeisen Date: Mon, 6 Nov 2023 16:45:14 -0800 Subject: [PATCH 3/5] Remove Google-specific code comments. These comments have been updated for USDF/S3, where appropriate. --- bin.src/make_remote_butler.py | 5 ----- python/tester/upload.py | 3 --- 2 files changed, 8 deletions(-) diff --git a/bin.src/make_remote_butler.py b/bin.src/make_remote_butler.py index ab524b42..45e8046e 100755 --- a/bin.src/make_remote_butler.py +++ b/bin.src/make_remote_butler.py @@ -24,11 +24,6 @@ """Simple script for creating a repository at a remote URI, given a source repository and export file. -For most values of --target-repo and --seed-config, this script is only useful -if run from the prompt-proto project on Google Cloud (because of access -restrictions to **both** the repository's storage location and its registry -database). - The user is responsible for clearing any old copies of the repository from both the target URI and the registry database. """ diff --git a/python/tester/upload.py b/python/tester/upload.py index 9d268f50..35218fe6 100644 --- a/python/tester/upload.py +++ b/python/tester/upload.py @@ -155,9 +155,6 @@ def get_samples_non_lsst(bucket, instrument): # TODO: set up a lookup-friendly class to represent the return value # TODO: replace this dict with something more scalable. - # One option is to attach metadata to the Google Storage objects at - # upload time, another is to download the blob and actually read - # its header. hsc_metadata = { 59126: {"ra": 149.28531249999997, "dec": 2.935002777777778, "rot": 270.0, "time": 1457332820.0}, 59134: {"ra": 149.45749166666664, "dec": 2.926961111111111, "rot": 270.0, "time": 1457333685.0}, From c5c12256f348ce07d77c45cd0ba62bd44a91acc2 Mon Sep 17 00:00:00 2001 From: Krzysztof Findeisen Date: Mon, 6 Nov 2023 17:03:16 -0800 Subject: [PATCH 4/5] Stop uploading to Google Artifact Registry. --- .github/workflows/build-base.yml | 9 +-------- .github/workflows/build-service.yml | 9 +-------- 2 files changed, 2 insertions(+), 16 deletions(-) diff --git a/.github/workflows/build-base.yml b/.github/workflows/build-base.yml index 0a87a09a..08359c37 100644 --- a/.github/workflows/build-base.yml +++ b/.github/workflows/build-base.yml @@ -43,12 +43,6 @@ jobs: registry: ghcr.io username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - - name: Login to Google Artifact Registry - uses: docker/login-action@v2 - with: - registry: us-central1-docker.pkg.dev - username: _json_key_base64 - password: ${{ secrets.GAR_JSON_B64 }} - name: Determine base image eups tag run: | if [[ -n "$STACK_TAG" ]]; then @@ -81,8 +75,7 @@ jobs: BRANCH=$(echo "${{ github.ref }}" | sed -e 's,.*/\(.*\),\1,') [ "$BRANCH" == "merge" ] && BRANCH=$(echo "${{ github.head_ref }}" | sed -e 's,.*/\(.*\),\1,') - for IMAGE_ID in "ghcr.io/${{ github.repository_owner }}/$IMAGE_NAME" \ - "us-central1-docker.pkg.dev/prompt-proto/prompt/$IMAGE_NAME"; do + for IMAGE_ID in "ghcr.io/${{ github.repository_owner }}/$IMAGE_NAME"; do STACK_TAG="$(< stack.tag)" if [ "$BRANCH" == "main" ]; then diff --git a/.github/workflows/build-service.yml b/.github/workflows/build-service.yml index 7595967e..ff9d8ba3 100644 --- a/.github/workflows/build-service.yml +++ b/.github/workflows/build-service.yml @@ -92,12 +92,6 @@ jobs: registry: ghcr.io username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - - name: Login to Google Artifact Registry - uses: docker/login-action@v2 - with: - registry: us-central1-docker.pkg.dev - username: _json_key_base64 - password: ${{ secrets.GAR_JSON_B64 }} - name: Determine eups tag run: | docker run ghcr.io/${{ github.repository_owner }}/prompt-proto-base:"$BASE_TAG" bash -c "cat stack/miniconda*/ups_db/global.tags" > eups.tag @@ -115,8 +109,7 @@ jobs: BRANCH=$(echo "${{ github.ref }}" | sed -e 's,.*/\(.*\),\1,') [ "$BRANCH" == "merge" ] && BRANCH=$(echo "${{ github.head_ref }}" | sed -e 's,.*/\(.*\),\1,') - for IMAGE_ID in "ghcr.io/${{ github.repository_owner }}/$IMAGE_NAME" \ - "us-central1-docker.pkg.dev/prompt-proto/prompt/$IMAGE_NAME"; do + for IMAGE_ID in "ghcr.io/${{ github.repository_owner }}/$IMAGE_NAME"; do if [ "$BRANCH" == "main" ]; then VERSION="$BASE_TAG" else From da747fd497d92834acb1010b7e48d2e8ae726ffe Mon Sep 17 00:00:00 2001 From: Krzysztof Findeisen Date: Mon, 6 Nov 2023 16:47:39 -0800 Subject: [PATCH 5/5] Remove Google-specific code. Neither the .gcloudignore file nor the Google logger are useful outside the Google Cloud environment. --- .gcloudignore | 18 ----- python/activator/logger.py | 63 +--------------- tests/test_logger.py | 144 +------------------------------------ 3 files changed, 2 insertions(+), 223 deletions(-) delete mode 100644 .gcloudignore diff --git a/.gcloudignore b/.gcloudignore deleted file mode 100644 index b40a2c52..00000000 --- a/.gcloudignore +++ /dev/null @@ -1,18 +0,0 @@ -# This file specifies files that are *not* uploaded to Google Cloud -# using gcloud. It follows the same syntax as .gitignore, with the addition of -# "#!include" directives (which insert the entries of the given .gitignore-style -# file at that point). -# -# For more information, run: -# $ gcloud topic gcloudignore -# - -# Exclude everything unless otherwise stated. -* -# Include these paths. -Dockerfile.* -!python/activator/*.py -!pipelines - -# Exclude .gitignored files, even if they match the above paths. -#!include:.gitignore diff --git a/python/activator/logger.py b/python/activator/logger.py index 921c32b1..d50edd21 100644 --- a/python/activator/logger.py +++ b/python/activator/logger.py @@ -19,8 +19,7 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . -__all__ = ["GCloudStructuredLogFormatter", "UsdfJsonFormatter", "setup_google_logger", "setup_usdf_logger", - "RecordFactoryContextAdapter"] +__all__ = ["UsdfJsonFormatter", "setup_usdf_logger", "RecordFactoryContextAdapter"] import collections.abc from contextlib import contextmanager @@ -99,34 +98,6 @@ def _set_context_logger(): logging.setLogRecordFactory(RecordFactoryContextAdapter(logging.getLogRecordFactory())) -# TODO: replace with something more extensible, once we know what needs to -# vary besides the formatter (handler type?). -def setup_google_logger(labels=None): - """Set global logging settings for prompt_prototype. - - Calling this function makes `GCloudStructuredLogFormatter` the root - formatter and redirects all warnings to go through it. - - Parameters - ---------- - labels : `dict` [`str`, `str`] - Any metadata that should be attached to all logs. See - ``LogEntry.labels`` in Google Cloud REST API documentation. - - Returns - ------- - handler : `logging.Handler` - The handler used by the root logger. - """ - _set_context_logger() - log_handler = logging.StreamHandler() - log_handler.setFormatter(GCloudStructuredLogFormatter(labels)) - logging.basicConfig(handlers=[log_handler]) - _channel_all_to_pylog() - _set_lsst_logging_levels() - return log_handler - - def setup_usdf_logger(labels=None): """Set global logging settings for prompt_prototype. @@ -151,38 +122,6 @@ def setup_usdf_logger(labels=None): return log_handler -class GCloudStructuredLogFormatter(logging.Formatter): - """A formatter that can be parsed by the Google Cloud logging agent. - - The formatter's output is a JSON-encoded message containing keywords - recognized by the logging agent. - - Parameters - ---------- - labels : `dict` [`str`, `str`] - Any metadata that should be attached to the log. See ``LogEntry.labels`` - in Google Cloud REST API documentation. - """ - def __init__(self, labels=None): - super().__init__() - - if labels: - self._labels = labels - else: - self._labels = {} - - def format(self, record): - # format updates record.message, but the full info is *only* in the return value. - msg = super().format(record) - - entry = { - "severity": record.levelname, - "logging.googleapis.com/labels": self._labels | record.logging_context, - "message": msg, - } - return json.dumps(entry, default=_encode_json_extras) - - class UsdfJsonFormatter(logging.Formatter): """A formatter that can be parsed by the Loki/Grafana system at USDF. diff --git a/tests/test_logger.py b/tests/test_logger.py index 6254b84b..be2961ee 100644 --- a/tests/test_logger.py +++ b/tests/test_logger.py @@ -29,7 +29,7 @@ import pytest -from activator.logger import GCloudStructuredLogFormatter, UsdfJsonFormatter, \ +from activator.logger import UsdfJsonFormatter, \ _parse_log_levels, RecordFactoryContextAdapter @@ -59,148 +59,6 @@ def test_root_(self): ) -class GoogleFormatterTest(unittest.TestCase): - """Test GCloudStructuredLogFormatter with fake log messages. - """ - def setUp(self): - super().setUp() - - # Buffer for log output. - # Can't use assertLogs, because it inserts its own handler/formatter. - self.output = io.StringIO() - self.addCleanup(io.StringIO.close, self.output) - - # GCloudStructuredLogFormatter assumes a logging_context field is present. - old_factory = logging.getLogRecordFactory() - self.addCleanup(logging.setLogRecordFactory, old_factory) - logging.setLogRecordFactory(RecordFactoryContextAdapter(old_factory)) - - log_handler = logging.StreamHandler(self.output) - log_handler.setFormatter(GCloudStructuredLogFormatter( - labels={"instrument": "NotACam"}, - )) - # Unique logger per test - self.log = logging.getLogger(self.id()) - self.log.propagate = False - self.log.addHandler(log_handler) - self.log.setLevel(logging.DEBUG) - - def _check_log(self, outputs, level, labels, texts): - """Check that the log output is formatted correctly. - - Parameters - ---------- - outputs : `list` [`str`] - A list of the formatted log messages. - level : `str` - The emitted log level. - labels : `dict` [`str`, `str`] - The labels attached to the log message. - texts : `list` [`str`] - The expected log messages. - """ - self.assertEqual(len(outputs), len(texts)) - for output, text in zip(outputs, texts): - parsed = json.loads(output) - self.assertEqual(parsed["severity"], level) - self.assertEqual(parsed["message"], text) - self.assertEqual(parsed["logging.googleapis.com/labels"], labels) - - def test_direct(self): - """Test the translation of verbatim log messages. - """ - msg = "Consider a spherical cow..." - self.log.info(msg) - self._check_log(self.output.getvalue().splitlines(), - "INFO", {"instrument": "NotACam"}, - [msg]) - - def test_args(self): - """Test the translation of arg-based log messages. - """ - msg = "Consider a %s..." - args = "rotund bovine" - self.log.warning(msg, args) - self._check_log(self.output.getvalue().splitlines(), - "WARNING", {"instrument": "NotACam"}, - [msg % args]) - - def test_quotes(self): - """Test handling of messages containing single or double quotes. - """ - msgs = ["Consider a so-called 'spherical cow'.", - 'Consider a so-called "spherical cow".', - ] - for msg in msgs: - self.log.info(msg) - self._check_log(self.output.getvalue().splitlines(), - "INFO", {"instrument": "NotACam"}, - msgs) - - def test_multiline(self): - """Test handling of messages that split across multiple lines. - """ - msg = """This is a multiline - message with internal line - breaks.""" - self.log.error(msg) - self._check_log(self.output.getvalue().splitlines(), - "ERROR", {"instrument": "NotACam"}, - [msg]) - - def test_exception(self): - """Test that exception messages include the stack trace. - """ - try: - raise RuntimeError("I take exception to that!") - except RuntimeError as e: - self.log.exception(e) - shredded = self.output.getvalue().splitlines() - self.assertEqual(len(shredded), 1) - self.assertIn("Traceback (most recent call last)", shredded[0]) - - def test_context(self): - """Test handling of messages that have free-form context. - """ - msg = "Consider a spherical exposure." - exposures = {1, 2, 3} - visit = 42 - ratio = 3.5 - group = "group A" - settings = {"option": True} - with logging.getLogRecordFactory().add_context( - exposures=exposures, - visit=visit, - ratio=ratio, - group=group, - settings=settings, - ): - self.log.info(msg) - self._check_log(self.output.getvalue().splitlines(), - "INFO", - {"instrument": "NotACam", - "exposures": list(exposures), - "visit": visit, - "ratio": ratio, - "group": group, - "settings": settings, - }, - [msg]) - - def test_side_effects(self): - """Test that format still modifies exposure records in the same way - as Formatter.format. - """ - msg = "Consider a %s..." - args = "rotund bovine" - factory = logging.getLogRecordFactory() - record = factory(self.id(), logging.INFO, "file.py", 42, msg, args, None) - formatter = GCloudStructuredLogFormatter() - formatter.format(record) - # If format has no side effects, record.message does not exist. - self.assertEqual(record.message, msg % args) - - class UsdfJsonFormatterTest(unittest.TestCase): """Test UsdfJsonFormatter with fake log messages. """