From 331b576ad8dd99dc00c6a777a3b3addc6dbd3225 Mon Sep 17 00:00:00 2001 From: "Openverse (Bot)" <101814513+openverse-bot@users.noreply.github.com> Date: Sat, 2 Mar 2024 01:36:52 +1100 Subject: [PATCH] Update workflows (#3858) * Update workflows * Apply ruff-format updates --------- Co-authored-by: Krystle Salazar --- .github/workflows/pr_label_check.yml | 8 +++---- .pre-commit-config.yaml | 6 ++--- api/api/constants/media_types.py | 1 + api/api/utils/aiohttp.py | 6 ++--- api/api/utils/licenses.py | 1 - api/conf/urls/__init__.py | 1 + api/test/fixtures/asynchronous.py | 3 +-- api/test/unit/utils/test_throttle.py | 6 ++--- automations/python/models/label.py | 1 - .../workflows/get_folder_differences.py | 1 + catalog/dags/common/cloudwatch.py | 1 + catalog/dags/common/licenses/licenses.py | 1 + .../dags/common/loader/provider_details.py | 1 + catalog/dags/common/storage/db_columns.py | 1 + catalog/dags/common/storage/util.py | 1 + catalog/dags/common/urls.py | 1 + .../data_refresh/create_filtered_index.py | 1 + .../data_refresh/create_filtered_index_dag.py | 1 + catalog/dags/data_refresh/dag_factory.py | 1 + .../data_refresh/data_refresh_task_factory.py | 1 + .../dags/data_refresh/data_refresh_types.py | 1 + .../batched_update/batched_update_dag.py | 1 - .../delete_records/delete_records_dag.py | 1 - .../database/report_pending_reported_media.py | 1 + .../create_new_es_index_dag.py | 1 + ...roportional_by_source_staging_index_dag.py | 1 + .../recreate_full_staging_index_dag.py | 1 + catalog/dags/flickr_thumbs_removal.py | 1 + catalog/dags/maintenance/add_license_url.py | 1 + .../airflow_log_cleanup_workflow.py | 1 + catalog/dags/oauth2/authorize_dag.py | 1 + catalog/dags/oauth2/token_refresh_dag.py | 1 + .../popularity_refresh_dag_factory.py | 1 + .../popularity/popularity_refresh_types.py | 1 + ...eate_popularity_calculation_dag_factory.py | 7 +++--- .../provider_api_scripts/auckland_museum.py | 1 + .../provider_api_scripts/europeana.py | 1 + .../provider_api_scripts/finnish_museums.py | 1 + .../provider_api_scripts/freesound.py | 1 + .../providers/provider_api_scripts/jamendo.py | 1 + .../provider_api_scripts/justtakeitfree.py | 1 + .../providers/provider_api_scripts/nappy.py | 1 + .../provider_api_scripts/rawpixel.py | 1 + .../provider_api_scripts/science_museum.py | 1 + .../providers/provider_api_scripts/smk.py | 1 + .../provider_api_scripts/stocksnap.py | 1 + .../provider_api_scripts/wordpress.py | 1 + .../dags/providers/provider_dag_factory.py | 1 + .../tests/dags/common/storage/test_media.py | 1 + .../test_resources/fake_provider_module.py | 1 + .../maintenance/test_pr_review_reminders.py | 24 +++++++++---------- .../inaturalist/pull_sample_records.py | 1 - .../test_wikimedia_commons.py | 6 ++--- .../dag_doc_gen/dag_doc_generation.py | 1 + documentation/_ext/link_issues.py | 1 + documentation/_ext/link_usernames.py | 1 + ingestion_server/ingestion_server/api.py | 1 + ingestion_server/ingestion_server/cleanup.py | 1 + utilities/dead_links/dead_link_tally.py | 1 + .../calculate_average_weeks_of_work.py | 1 + .../project_planning/graph_project_voting.py | 1 + .../process_selection_votes.py | 1 + .../provider_tallies/provider_tally_stats.py | 1 + 63 files changed, 83 insertions(+), 38 deletions(-) diff --git a/.github/workflows/pr_label_check.yml b/.github/workflows/pr_label_check.yml index abcee295f27..ec07a9d7302 100644 --- a/.github/workflows/pr_label_check.yml +++ b/.github/workflows/pr_label_check.yml @@ -42,7 +42,7 @@ jobs: - get_label_groups steps: - name: Check aspect label - uses: docker://agilepathway/pull-request-label-checker:v1.6.20 + uses: docker://agilepathway/pull-request-label-checker:v1.6.23 with: any_of: ${{ needs.get_label_groups.outputs.aspect }} repo_token: ${{ secrets.GITHUB_TOKEN }} @@ -55,7 +55,7 @@ jobs: - get_label_groups steps: - name: Check goal label - uses: docker://agilepathway/pull-request-label-checker:v1.6.20 + uses: docker://agilepathway/pull-request-label-checker:v1.6.23 with: one_of: ${{ needs.get_label_groups.outputs.goal }} repo_token: ${{ secrets.GITHUB_TOKEN }} @@ -68,7 +68,7 @@ jobs: - get_label_groups steps: - name: Check priority label - uses: docker://agilepathway/pull-request-label-checker:v1.6.20 + uses: docker://agilepathway/pull-request-label-checker:v1.6.23 with: one_of: ${{ needs.get_label_groups.outputs.priority }} repo_token: ${{ secrets.GITHUB_TOKEN }} @@ -81,7 +81,7 @@ jobs: - get_label_groups steps: - name: Check stack label - uses: docker://agilepathway/pull-request-label-checker:v1.6.20 + uses: docker://agilepathway/pull-request-label-checker:v1.6.23 with: any_of: ${{ needs.get_label_groups.outputs.stack }} repo_token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index dc6a456f981..81b2c97dacb 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -46,7 +46,7 @@ repos: - id: requirements-txt-fixer - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.2.0 + rev: v0.3.0 hooks: - id: ruff # replaces Flake8, isort, pydocstyle, pyupgrade args: @@ -74,7 +74,7 @@ repos: - id: shfmt-docker - repo: https://github.com/rhysd/actionlint - rev: v1.6.26 + rev: v1.6.27 hooks: - id: actionlint-docker @@ -153,7 +153,7 @@ repos: files: (.vale/.*|.mdx?)$ - repo: https://github.com/renovatebot/pre-commit-hooks - rev: 37.168.1 + rev: 37.221.1 hooks: - id: renovate-config-validator args: diff --git a/api/api/constants/media_types.py b/api/api/constants/media_types.py index ae97bbd3e6d..a1b0f71f42c 100644 --- a/api/api/constants/media_types.py +++ b/api/api/constants/media_types.py @@ -1,4 +1,5 @@ """Also see `ingestion_server/constants/media_types.py`.""" + from typing import Literal diff --git a/api/api/utils/aiohttp.py b/api/api/utils/aiohttp.py index 2d1905a36b4..339f9db0bf6 100644 --- a/api/api/utils/aiohttp.py +++ b/api/api/utils/aiohttp.py @@ -14,9 +14,9 @@ asyncio.AbstractEventLoop, aiohttp.ClientSession ] = weakref.WeakKeyDictionary() -_LOCKS: weakref.WeakKeyDictionary[ - asyncio.AbstractEventLoop, asyncio.Lock -] = weakref.WeakKeyDictionary() +_LOCKS: weakref.WeakKeyDictionary[asyncio.AbstractEventLoop, asyncio.Lock] = ( + weakref.WeakKeyDictionary() +) @asgi_shutdown.connect diff --git a/api/api/utils/licenses.py b/api/api/utils/licenses.py index 4a58a9e9395..01ae9337f60 100644 --- a/api/api/utils/licenses.py +++ b/api/api/utils/licenses.py @@ -5,7 +5,6 @@ frontend, or open an issue to track it. """ - from api.constants.licenses import ( ALL_CC_LICENSES, DEPRECATED_CC_LICENSES, diff --git a/api/conf/urls/__init__.py b/api/conf/urls/__init__.py index 62a980aa266..8ce45a6ad55 100644 --- a/api/conf/urls/__init__.py +++ b/api/conf/urls/__init__.py @@ -4,6 +4,7 @@ For more information on this file, see https://docs.djangoproject.com/en/4.2/topics/http/urls/ """ + from django.conf import settings from django.conf.urls.static import static from django.contrib import admin diff --git a/api/test/fixtures/asynchronous.py b/api/test/fixtures/asynchronous.py index 5277a134a29..e0041bf5c7b 100644 --- a/api/test/fixtures/asynchronous.py +++ b/api/test/fixtures/asynchronous.py @@ -43,8 +43,7 @@ def ensure_asgi_lifecycle(session_loop: asyncio.AbstractEventLoop): """ scope = {"type": "lifespan"} - async def noop(*args, **kwargs): - ... + async def noop(*args, **kwargs): ... async def shutdown(): return {"type": "lifespan.shutdown"} diff --git a/api/test/unit/utils/test_throttle.py b/api/test/unit/utils/test_throttle.py index a727ea2b836..975276c3858 100644 --- a/api/test/unit/utils/test_throttle.py +++ b/api/test/unit/utils/test_throttle.py @@ -43,9 +43,9 @@ def enable_throttles(settings): # Put settings into base Django settings from which DRF reads # settings when we call `api_settings.reload()` settings.REST_FRAMEWORK["DEFAULT_THROTTLE_RATES"] = settings.DEFAULT_THROTTLE_RATES - settings.REST_FRAMEWORK[ - "DEFAULT_THROTTLE_CLASSES" - ] = settings.DEFAULT_THROTTLE_CLASSES + settings.REST_FRAMEWORK["DEFAULT_THROTTLE_CLASSES"] = ( + settings.DEFAULT_THROTTLE_CLASSES + ) # Reload the settings and read them from base Django settings # Also handles importing classes from class strings, etc diff --git a/automations/python/models/label.py b/automations/python/models/label.py index edbed65a4db..6a91967e751 100644 --- a/automations/python/models/label.py +++ b/automations/python/models/label.py @@ -4,7 +4,6 @@ class Label: - """ Represents a single label. diff --git a/automations/python/workflows/get_folder_differences.py b/automations/python/workflows/get_folder_differences.py index a45053fa1af..cbf2572b599 100644 --- a/automations/python/workflows/get_folder_differences.py +++ b/automations/python/workflows/get_folder_differences.py @@ -7,6 +7,7 @@ files on the docs preview site. Finally, it outputs the preview site docs generated comment along with a list of new and changed files. """ + import os import shutil import subprocess diff --git a/catalog/dags/common/cloudwatch.py b/catalog/dags/common/cloudwatch.py index 0be3df340a3..3d51b3b57b6 100644 --- a/catalog/dags/common/cloudwatch.py +++ b/catalog/dags/common/cloudwatch.py @@ -9,6 +9,7 @@ in the Airflow Variables, which is particularly the desired behavior when running the Data Refresh DAGs locally or in a development environment. """ + import logging from airflow.exceptions import AirflowSkipException diff --git a/catalog/dags/common/licenses/licenses.py b/catalog/dags/common/licenses/licenses.py index a939de24d18..198b7825494 100644 --- a/catalog/dags/common/licenses/licenses.py +++ b/catalog/dags/common/licenses/licenses.py @@ -2,6 +2,7 @@ This module has a number of public methods which are useful for working with licenses. """ + import logging from functools import lru_cache from typing import NamedTuple diff --git a/catalog/dags/common/loader/provider_details.py b/catalog/dags/common/loader/provider_details.py index 6e892a62e2a..39c97daec3e 100644 --- a/catalog/dags/common/loader/provider_details.py +++ b/catalog/dags/common/loader/provider_details.py @@ -8,6 +8,7 @@ might be useful for retrieving sub-providers at the database level and the API level. """ + import os diff --git a/catalog/dags/common/storage/db_columns.py b/catalog/dags/common/storage/db_columns.py index 2ca7d800e90..c508603bf39 100644 --- a/catalog/dags/common/storage/db_columns.py +++ b/catalog/dags/common/storage/db_columns.py @@ -2,6 +2,7 @@ This module contains the lists of database columns in the same order as in the main media tables within the database. """ + from common.constants import AUDIO, IMAGE from common.storage import columns as col from common.utils import setup_kwargs_for_media_type diff --git a/catalog/dags/common/storage/util.py b/catalog/dags/common/storage/util.py index 3f6b75f22d9..514e4e47a95 100644 --- a/catalog/dags/common/storage/util.py +++ b/catalog/dags/common/storage/util.py @@ -1,4 +1,5 @@ """This module has public methods which are useful for storage operations.""" + import logging from common.storage.audio import AudioStore diff --git a/catalog/dags/common/urls.py b/catalog/dags/common/urls.py index d8dd08f1677..cdfc33e3eae 100644 --- a/catalog/dags/common/urls.py +++ b/catalog/dags/common/urls.py @@ -2,6 +2,7 @@ This module has a number of public methods which are useful for verifying and cleaning URLs. """ + import logging import re from functools import lru_cache diff --git a/catalog/dags/data_refresh/create_filtered_index.py b/catalog/dags/data_refresh/create_filtered_index.py index f4fd3d9270f..a77c2d53eb3 100644 --- a/catalog/dags/data_refresh/create_filtered_index.py +++ b/catalog/dags/data_refresh/create_filtered_index.py @@ -15,6 +15,7 @@ finally deleting the old, now unused filtered index. These TaskGroups are used in the data refresh DAGs to execute the filtered index steps. """ + from datetime import timedelta from airflow.operators.empty import EmptyOperator diff --git a/catalog/dags/data_refresh/create_filtered_index_dag.py b/catalog/dags/data_refresh/create_filtered_index_dag.py index 7fc0f38ea39..20f365f982a 100644 --- a/catalog/dags/data_refresh/create_filtered_index_dag.py +++ b/catalog/dags/data_refresh/create_filtered_index_dag.py @@ -50,6 +50,7 @@ This ensures that neither are depending on or modifying the origin indexes critical for the creation of the filtered indexes. """ + from datetime import datetime from airflow import DAG diff --git a/catalog/dags/data_refresh/dag_factory.py b/catalog/dags/data_refresh/dag_factory.py index 838ab97c890..358a0b3e212 100644 --- a/catalog/dags/data_refresh/dag_factory.py +++ b/catalog/dags/data_refresh/dag_factory.py @@ -22,6 +22,7 @@ - [[Feature] Merge popularity calculations and data refresh into a single DAG]( https://github.com/WordPress/openverse-catalog/issues/453) """ + import logging from collections.abc import Sequence diff --git a/catalog/dags/data_refresh/data_refresh_task_factory.py b/catalog/dags/data_refresh/data_refresh_task_factory.py index 8444f0641c0..3ad20a806d3 100644 --- a/catalog/dags/data_refresh/data_refresh_task_factory.py +++ b/catalog/dags/data_refresh/data_refresh_task_factory.py @@ -44,6 +44,7 @@ - [[Feature] Data refresh orchestration DAG]( https://github.com/WordPress/openverse-catalog/issues/353) """ + import logging import os from collections.abc import Sequence diff --git a/catalog/dags/data_refresh/data_refresh_types.py b/catalog/dags/data_refresh/data_refresh_types.py index f6bc6dff631..27d2cc0caaa 100644 --- a/catalog/dags/data_refresh/data_refresh_types.py +++ b/catalog/dags/data_refresh/data_refresh_types.py @@ -5,6 +5,7 @@ for each of our media types. This configuration information is used to generate the dynamic Data Refresh dags. """ + import os from dataclasses import dataclass, field from datetime import datetime, timedelta diff --git a/catalog/dags/database/batched_update/batched_update_dag.py b/catalog/dags/database/batched_update/batched_update_dag.py index a698d79eba9..c032e027e4b 100644 --- a/catalog/dags/database/batched_update/batched_update_dag.py +++ b/catalog/dags/database/batched_update/batched_update_dag.py @@ -63,7 +63,6 @@ failures during the `update_batches` step. """ - import logging from airflow.decorators import dag diff --git a/catalog/dags/database/delete_records/delete_records_dag.py b/catalog/dags/database/delete_records/delete_records_dag.py index 679de59bea5..62a9220749f 100644 --- a/catalog/dags/database/delete_records/delete_records_dag.py +++ b/catalog/dags/database/delete_records/delete_records_dag.py @@ -50,7 +50,6 @@ DAGs are not currently running. """ - import logging from airflow.decorators import dag diff --git a/catalog/dags/database/report_pending_reported_media.py b/catalog/dags/database/report_pending_reported_media.py index 61f04169176..edc1e850e07 100644 --- a/catalog/dags/database/report_pending_reported_media.py +++ b/catalog/dags/database/report_pending_reported_media.py @@ -9,6 +9,7 @@ taken. If a record has been reported multiple times, it only needs to be reviewed once and so is only counted once in the reporting by this DAG. """ + import logging import os from textwrap import dedent diff --git a/catalog/dags/elasticsearch_cluster/create_new_es_index/create_new_es_index_dag.py b/catalog/dags/elasticsearch_cluster/create_new_es_index/create_new_es_index_dag.py index 3062281ee49..407af9a5fb9 100644 --- a/catalog/dags/elasticsearch_cluster/create_new_es_index/create_new_es_index_dag.py +++ b/catalog/dags/elasticsearch_cluster/create_new_es_index/create_new_es_index_dag.py @@ -96,6 +96,7 @@ } ``` """ + import logging from airflow import DAG diff --git a/catalog/dags/elasticsearch_cluster/create_proportional_by_source_staging_index/create_proportional_by_source_staging_index_dag.py b/catalog/dags/elasticsearch_cluster/create_proportional_by_source_staging_index/create_proportional_by_source_staging_index_dag.py index 28ed4a02a00..1903d5f0ce8 100644 --- a/catalog/dags/elasticsearch_cluster/create_proportional_by_source_staging_index/create_proportional_by_source_staging_index_dag.py +++ b/catalog/dags/elasticsearch_cluster/create_proportional_by_source_staging_index/create_proportional_by_source_staging_index_dag.py @@ -36,6 +36,7 @@ * `recreate_full_staging_index` * `create_new_staging_es_index` """ + from datetime import datetime, timedelta from airflow.decorators import dag diff --git a/catalog/dags/elasticsearch_cluster/recreate_staging_index/recreate_full_staging_index_dag.py b/catalog/dags/elasticsearch_cluster/recreate_staging_index/recreate_full_staging_index_dag.py index b91e93edb90..0be1bc848dc 100644 --- a/catalog/dags/elasticsearch_cluster/recreate_staging_index/recreate_full_staging_index_dag.py +++ b/catalog/dags/elasticsearch_cluster/recreate_staging_index/recreate_full_staging_index_dag.py @@ -40,6 +40,7 @@ * `create_proportional_by_provider_staging_index` * `create_new_staging_es_index` """ + from datetime import datetime from airflow.decorators import dag diff --git a/catalog/dags/flickr_thumbs_removal.py b/catalog/dags/flickr_thumbs_removal.py index 5c8615c2ea5..9c2e9e0eb81 100644 --- a/catalog/dags/flickr_thumbs_removal.py +++ b/catalog/dags/flickr_thumbs_removal.py @@ -2,6 +2,7 @@ One-time run DAG to remove progressively all the old Flickr thumbnails, as they were determined to be unsuitable for the Openverse UI requirements. """ + import logging from datetime import timedelta from textwrap import dedent diff --git a/catalog/dags/maintenance/add_license_url.py b/catalog/dags/maintenance/add_license_url.py index 61a03012b3f..3eb36de3535 100644 --- a/catalog/dags/maintenance/add_license_url.py +++ b/catalog/dags/maintenance/add_license_url.py @@ -9,6 +9,7 @@ the `meta_data` column are updated, the DAG will only run the first and the last step, logging the statistics. """ + import csv import logging from collections import defaultdict diff --git a/catalog/dags/maintenance/airflow_log_cleanup_workflow.py b/catalog/dags/maintenance/airflow_log_cleanup_workflow.py index 23f2876d66e..e645ab6a8bc 100644 --- a/catalog/dags/maintenance/airflow_log_cleanup_workflow.py +++ b/catalog/dags/maintenance/airflow_log_cleanup_workflow.py @@ -19,6 +19,7 @@ - maxLogAgeInDays: - Optional - enableDelete: - Optional """ + from datetime import datetime, timedelta import jinja2 diff --git a/catalog/dags/oauth2/authorize_dag.py b/catalog/dags/oauth2/authorize_dag.py index a02238ed9dd..db5ad0c48e5 100644 --- a/catalog/dags/oauth2/authorize_dag.py +++ b/catalog/dags/oauth2/authorize_dag.py @@ -1,4 +1,5 @@ """# OAuth Provider Authorization""" + from datetime import datetime from airflow.models import DAG diff --git a/catalog/dags/oauth2/token_refresh_dag.py b/catalog/dags/oauth2/token_refresh_dag.py index 56c65309ac1..4c83fdef671 100644 --- a/catalog/dags/oauth2/token_refresh_dag.py +++ b/catalog/dags/oauth2/token_refresh_dag.py @@ -1,4 +1,5 @@ """# OAuth Provider Token Refresh""" + from datetime import datetime from airflow.models import DAG diff --git a/catalog/dags/popularity/popularity_refresh_dag_factory.py b/catalog/dags/popularity/popularity_refresh_dag_factory.py index d8f05b4336e..615d7fc50dd 100644 --- a/catalog/dags/popularity/popularity_refresh_dag_factory.py +++ b/catalog/dags/popularity/popularity_refresh_dag_factory.py @@ -22,6 +22,7 @@ - [[Implementation Plan] Decoupling Popularity Calculations from the Data Refresh]( https://docs.openverse.org/projects/proposals/popularity_optimizations/20230420-implementation_plan_popularity_optimizations.html) """ + import logging from datetime import datetime diff --git a/catalog/dags/popularity/popularity_refresh_types.py b/catalog/dags/popularity/popularity_refresh_types.py index 67c9cb51912..46d661ab516 100644 --- a/catalog/dags/popularity/popularity_refresh_types.py +++ b/catalog/dags/popularity/popularity_refresh_types.py @@ -5,6 +5,7 @@ `POPULARITY_REFRESH_CONFIGS` for each of our media types. This configuration info is used to generate the dynamic Popularity Refresh dags. """ + import os from dataclasses import dataclass, field from datetime import datetime, timedelta diff --git a/catalog/dags/popularity/recreate_popularity_calculation_dag_factory.py b/catalog/dags/popularity/recreate_popularity_calculation_dag_factory.py index f8662b06b02..c13ea77eb86 100644 --- a/catalog/dags/popularity/recreate_popularity_calculation_dag_factory.py +++ b/catalog/dags/popularity/recreate_popularity_calculation_dag_factory.py @@ -12,6 +12,7 @@ These DAGs are not on a schedule, and should only be run manually when new SQL code is deployed for the calculation. """ + from airflow import DAG from common.constants import DAG_DEFAULT_ARGS, POSTGRES_CONN_ID @@ -74,6 +75,6 @@ def create_recreate_popularity_calculation_dag(popularity_refresh: PopularityRef recreate_popularity_calculation_dag = create_recreate_popularity_calculation_dag( popularity_refresh ) - globals()[ - recreate_popularity_calculation_dag.dag_id - ] = recreate_popularity_calculation_dag + globals()[recreate_popularity_calculation_dag.dag_id] = ( + recreate_popularity_calculation_dag + ) diff --git a/catalog/dags/providers/provider_api_scripts/auckland_museum.py b/catalog/dags/providers/provider_api_scripts/auckland_museum.py index 87035537ae4..2d57da0fc3e 100644 --- a/catalog/dags/providers/provider_api_scripts/auckland_museum.py +++ b/catalog/dags/providers/provider_api_scripts/auckland_museum.py @@ -16,6 +16,7 @@ /search, /id | 10 | 1000 /id/media | 10 | 1000 """ + import logging from datetime import datetime, timedelta diff --git a/catalog/dags/providers/provider_api_scripts/europeana.py b/catalog/dags/providers/provider_api_scripts/europeana.py index 8a4ea68761f..63f44685da0 100644 --- a/catalog/dags/providers/provider_api_scripts/europeana.py +++ b/catalog/dags/providers/provider_api_scripts/europeana.py @@ -8,6 +8,7 @@ Notes: https://pro.europeana.eu/page/search """ + import argparse import functools import logging diff --git a/catalog/dags/providers/provider_api_scripts/finnish_museums.py b/catalog/dags/providers/provider_api_scripts/finnish_museums.py index de842b37e91..28ec1314783 100644 --- a/catalog/dags/providers/provider_api_scripts/finnish_museums.py +++ b/catalog/dags/providers/provider_api_scripts/finnish_museums.py @@ -14,6 +14,7 @@ reingestion DAG, as updated data will be processed during regular ingestion. """ + import logging from itertools import chain diff --git a/catalog/dags/providers/provider_api_scripts/freesound.py b/catalog/dags/providers/provider_api_scripts/freesound.py index 6cf55bc80ef..fc6f01a7ffc 100644 --- a/catalog/dags/providers/provider_api_scripts/freesound.py +++ b/catalog/dags/providers/provider_api_scripts/freesound.py @@ -11,6 +11,7 @@ This script can be run either to ingest the full dataset or as a dated DAG. """ + import functools import logging from datetime import datetime diff --git a/catalog/dags/providers/provider_api_scripts/jamendo.py b/catalog/dags/providers/provider_api_scripts/jamendo.py index 5243bf9589b..0b58c1f9418 100644 --- a/catalog/dags/providers/provider_api_scripts/jamendo.py +++ b/catalog/dags/providers/provider_api_scripts/jamendo.py @@ -15,6 +15,7 @@ sample rate: 44.1 or 48 kHz channels: 1/2 """ + import logging from datetime import timedelta from urllib.parse import parse_qs, urlencode, urlsplit diff --git a/catalog/dags/providers/provider_api_scripts/justtakeitfree.py b/catalog/dags/providers/provider_api_scripts/justtakeitfree.py index 7b0bafbea55..503b8c8ed97 100644 --- a/catalog/dags/providers/provider_api_scripts/justtakeitfree.py +++ b/catalog/dags/providers/provider_api_scripts/justtakeitfree.py @@ -9,6 +9,7 @@ Notes: https://justtakeitfree.com/api/api.php This API requires an API key. For more details, see https://github.com/WordPress/openverse/pull/2793 """ + import logging from airflow.models import Variable diff --git a/catalog/dags/providers/provider_api_scripts/nappy.py b/catalog/dags/providers/provider_api_scripts/nappy.py index f9078b8ba90..ffe9d6eb982 100644 --- a/catalog/dags/providers/provider_api_scripts/nappy.py +++ b/catalog/dags/providers/provider_api_scripts/nappy.py @@ -10,6 +10,7 @@ https://nappy.co/ """ + import logging from common import constants diff --git a/catalog/dags/providers/provider_api_scripts/rawpixel.py b/catalog/dags/providers/provider_api_scripts/rawpixel.py index bbd8d657048..dbe68df0370 100644 --- a/catalog/dags/providers/provider_api_scripts/rawpixel.py +++ b/catalog/dags/providers/provider_api_scripts/rawpixel.py @@ -12,6 +12,7 @@ although the API key we've been given can circumvent this limit. https://www.rawpixel.com/api/v1/search?tags=$publicdomain&page=1&pagesize=100 """ + import base64 import hmac import html diff --git a/catalog/dags/providers/provider_api_scripts/science_museum.py b/catalog/dags/providers/provider_api_scripts/science_museum.py index 35a8ff513f0..7876ecf4bda 100644 --- a/catalog/dags/providers/provider_api_scripts/science_museum.py +++ b/catalog/dags/providers/provider_api_scripts/science_museum.py @@ -9,6 +9,7 @@ Notes: https://github.com/TheScienceMuseum/collectionsonline/wiki/Collections-Online-API Rate limited, no specific rate given. """ # noqa: E501 + import logging import re from datetime import date diff --git a/catalog/dags/providers/provider_api_scripts/smk.py b/catalog/dags/providers/provider_api_scripts/smk.py index 63d0ff66051..b17d1142eaf 100644 --- a/catalog/dags/providers/provider_api_scripts/smk.py +++ b/catalog/dags/providers/provider_api_scripts/smk.py @@ -7,6 +7,7 @@ Notes: https://www.smk.dk/en/article/smk-api/ """ + import logging import urllib.parse diff --git a/catalog/dags/providers/provider_api_scripts/stocksnap.py b/catalog/dags/providers/provider_api_scripts/stocksnap.py index e7d2f558d20..faca731e383 100644 --- a/catalog/dags/providers/provider_api_scripts/stocksnap.py +++ b/catalog/dags/providers/provider_api_scripts/stocksnap.py @@ -11,6 +11,7 @@ No rate limits or authorization required. API is undocumented. """ + import json import logging diff --git a/catalog/dags/providers/provider_api_scripts/wordpress.py b/catalog/dags/providers/provider_api_scripts/wordpress.py index 4d8fc3dfbd6..4c60d1b462a 100644 --- a/catalog/dags/providers/provider_api_scripts/wordpress.py +++ b/catalog/dags/providers/provider_api_scripts/wordpress.py @@ -9,6 +9,7 @@ Provide photos, media, users and more related resources. No rate limit specified. """ + import logging import lxml.html as html diff --git a/catalog/dags/providers/provider_dag_factory.py b/catalog/dags/providers/provider_dag_factory.py index dc31be4892c..a3515986638 100644 --- a/catalog/dags/providers/provider_dag_factory.py +++ b/catalog/dags/providers/provider_dag_factory.py @@ -62,6 +62,7 @@ - [DB Loader should take data from S3, rather than EC2 to load into PostgreSQL]( https://github.com/creativecommons/cccatalog/issues/334) """ + import logging import os import re diff --git a/catalog/tests/dags/common/storage/test_media.py b/catalog/tests/dags/common/storage/test_media.py index e843326166c..d1cdfd9d396 100644 --- a/catalog/tests/dags/common/storage/test_media.py +++ b/catalog/tests/dags/common/storage/test_media.py @@ -2,6 +2,7 @@ MediaStore is an abstract class, so to test it we use one of the inheriting classes, ImageStore """ + import logging from unittest.mock import patch diff --git a/catalog/tests/dags/common/test_resources/fake_provider_module.py b/catalog/tests/dags/common/test_resources/fake_provider_module.py index 811ca852e2d..196b7cd1fe1 100644 --- a/catalog/tests/dags/common/test_resources/fake_provider_module.py +++ b/catalog/tests/dags/common/test_resources/fake_provider_module.py @@ -2,6 +2,7 @@ This is a fake provider module used in test_dag_factory. It is used to check that the output path acquisition logic is correct. """ + from common.storage.audio import AudioStore from common.storage.image import ImageStore diff --git a/catalog/tests/dags/maintenance/test_pr_review_reminders.py b/catalog/tests/dags/maintenance/test_pr_review_reminders.py index 9d2ad863487..07731e5a731 100644 --- a/catalog/tests/dags/maintenance/test_pr_review_reminders.py +++ b/catalog/tests/dags/maintenance/test_pr_review_reminders.py @@ -265,9 +265,9 @@ def test_pings_past_due(github, urgency, events): github["events"][past_due_pull["number"]] = make_urgent_events(urgency, events) github["events"][not_due_pull["number"]] = make_non_urgent_events(events) - github["events"][ - old_but_not_due_pull["number"] - ] = make_non_urgent_reviewable_events(events) + github["events"][old_but_not_due_pull["number"]] = ( + make_non_urgent_reviewable_events(events) + ) post_reminders("not_set", dry_run=False) @@ -307,9 +307,9 @@ def test_does_not_reping_past_due_if_reminder_is_current(github, urgency, events github["events"][past_due_pull["number"]] = make_urgent_events(urgency, events) github["events"][not_due_pull["number"]] = make_non_urgent_events(events) - github["events"][ - old_but_not_due_pull["number"] - ] = make_non_urgent_reviewable_events(events) + github["events"][old_but_not_due_pull["number"]] = ( + make_non_urgent_reviewable_events(events) + ) post_reminders("not_set", dry_run=False) @@ -343,9 +343,9 @@ def test_does_reping_past_due_if_reminder_is_outdated(github, urgency, events): github["events"][past_due_pull["number"]] = make_urgent_events(urgency, events) github["events"][not_due_pull["number"]] = make_non_urgent_events(events) - github["events"][ - old_but_not_due_pull["number"] - ] = make_non_urgent_reviewable_events(events) + github["events"][old_but_not_due_pull["number"]] = ( + make_non_urgent_reviewable_events(events) + ) post_reminders("not_set", dry_run=False) @@ -459,9 +459,9 @@ def test_does_not_ping_if_no_reviewers(github, urgency, events): github["events"][past_due_pull["number"]] = make_urgent_events(urgency, events) github["events"][not_due_pull["number"]] = make_non_urgent_events(events) - github["events"][ - old_but_not_due_pull["number"] - ] = make_non_urgent_reviewable_events(events) + github["events"][old_but_not_due_pull["number"]] = ( + make_non_urgent_reviewable_events(events) + ) for pr in [past_due_pull, not_due_pull]: _setup_branch_protection(github, pr) diff --git a/catalog/tests/dags/providers/provider_api_scripts/resources/inaturalist/pull_sample_records.py b/catalog/tests/dags/providers/provider_api_scripts/resources/inaturalist/pull_sample_records.py index ac58de78167..5fffe06154c 100644 --- a/catalog/tests/dags/providers/provider_api_scripts/resources/inaturalist/pull_sample_records.py +++ b/catalog/tests/dags/providers/provider_api_scripts/resources/inaturalist/pull_sample_records.py @@ -7,7 +7,6 @@ git and linting. """ - import csv import gzip import os diff --git a/catalog/tests/dags/providers/provider_api_scripts/test_wikimedia_commons.py b/catalog/tests/dags/providers/provider_api_scripts/test_wikimedia_commons.py index 853e31a728d..da446916f6f 100644 --- a/catalog/tests/dags/providers/provider_api_scripts/test_wikimedia_commons.py +++ b/catalog/tests/dags/providers/provider_api_scripts/test_wikimedia_commons.py @@ -493,9 +493,9 @@ def test_get_audio_record_data_parses_wav_invalid_bit_rate(wmc): file_metadata = _get_resource_json("audio_filedata_wav.json") original_data = {"url": "myurl.com", "meta_data": {}} # Set the bit rate higher than the int max - file_metadata["metadata"][5]["value"][3]["value"][0]["value"][3][ - "value" - ] = 4294967294 + file_metadata["metadata"][5]["value"][3]["value"][0]["value"][3]["value"] = ( + 4294967294 + ) expected_parsed_data = { "url": "myurl.com", "bit_rate": None, diff --git a/catalog/utilities/dag_doc_gen/dag_doc_generation.py b/catalog/utilities/dag_doc_gen/dag_doc_generation.py index 436bb709607..aabec1dffa7 100644 --- a/catalog/utilities/dag_doc_gen/dag_doc_generation.py +++ b/catalog/utilities/dag_doc_gen/dag_doc_generation.py @@ -13,6 +13,7 @@ The individual DAG documentation section pulls the DAG's `doc_md` blurb and renders it within the document. """ + import logging import re from collections import defaultdict diff --git a/documentation/_ext/link_issues.py b/documentation/_ext/link_issues.py index cdb70382ca9..9070b6931b3 100644 --- a/documentation/_ext/link_issues.py +++ b/documentation/_ext/link_issues.py @@ -26,6 +26,7 @@ Add GitHub token to prevent rate limiting """ + import dataclasses import os import re diff --git a/documentation/_ext/link_usernames.py b/documentation/_ext/link_usernames.py index 4ed01b89c50..d9543b40c7b 100644 --- a/documentation/_ext/link_usernames.py +++ b/documentation/_ext/link_usernames.py @@ -8,6 +8,7 @@ The plugin ignores code inside of fixed text blocks, including code blocks and backticks. """ + import re from docutils import nodes diff --git a/ingestion_server/ingestion_server/api.py b/ingestion_server/ingestion_server/api.py index 7285fc695d6..9156c561305 100644 --- a/ingestion_server/ingestion_server/api.py +++ b/ingestion_server/ingestion_server/api.py @@ -1,4 +1,5 @@ """A small RPC API server for scheduling data refresh and indexing tasks.""" + import logging import os import time diff --git a/ingestion_server/ingestion_server/cleanup.py b/ingestion_server/ingestion_server/cleanup.py index 23f69f84390..6e794f290d7 100644 --- a/ingestion_server/ingestion_server/cleanup.py +++ b/ingestion_server/ingestion_server/cleanup.py @@ -3,6 +3,7 @@ This includes cleaning up malformed URLs and filtering out undesirable tags. """ + import csv import logging as log import multiprocessing diff --git a/utilities/dead_links/dead_link_tally.py b/utilities/dead_links/dead_link_tally.py index 1a1d9cda4a4..77562a2a599 100644 --- a/utilities/dead_links/dead_link_tally.py +++ b/utilities/dead_links/dead_link_tally.py @@ -5,6 +5,7 @@ on localhost (usually via tunneling). It will run through the link validation entries in Redis. """ + import pprint from collections import defaultdict from urllib.parse import urlparse diff --git a/utilities/project_planning/calculate_average_weeks_of_work.py b/utilities/project_planning/calculate_average_weeks_of_work.py index 0b8246de60a..94a783b01d2 100644 --- a/utilities/project_planning/calculate_average_weeks_of_work.py +++ b/utilities/project_planning/calculate_average_weeks_of_work.py @@ -4,6 +4,7 @@ See the README for more information. """ + from pathlib import Path import click diff --git a/utilities/project_planning/graph_project_voting.py b/utilities/project_planning/graph_project_voting.py index df2c79eb9af..ee63642d314 100644 --- a/utilities/project_planning/graph_project_voting.py +++ b/utilities/project_planning/graph_project_voting.py @@ -3,6 +3,7 @@ See the README for more information. """ + from datetime import datetime from pathlib import Path diff --git a/utilities/project_planning/process_selection_votes.py b/utilities/project_planning/process_selection_votes.py index b3d3e7073ed..a809ec4b0bc 100644 --- a/utilities/project_planning/process_selection_votes.py +++ b/utilities/project_planning/process_selection_votes.py @@ -3,6 +3,7 @@ See the README for more information. """ + from datetime import datetime from pathlib import Path diff --git a/utilities/provider_tallies/provider_tally_stats.py b/utilities/provider_tallies/provider_tally_stats.py index ae073cb9742..9f62eff7d4f 100644 --- a/utilities/provider_tallies/provider_tally_stats.py +++ b/utilities/provider_tallies/provider_tally_stats.py @@ -5,6 +5,7 @@ on localhost (usually via tunneling). It will run through the provider result count entries in Redis and output them to CSV. """ + import pprint from datetime import datetime from pathlib import Path