diff --git a/.github/workflows/draft_releases.yml b/.github/workflows/draft_releases.yml new file mode 100644 index 00000000000..ed1c60707dc --- /dev/null +++ b/.github/workflows/draft_releases.yml @@ -0,0 +1,57 @@ +name: Draft releases + +on: + push: + branches: + - main + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + # Freely interrupt release drafting workflows, they do not depend on each other and always generate the full release based on the previous one + cancel-in-progress: true + +jobs: + draft-release: + name: "Draft ${{ matrix.app }} release" + runs-on: ubuntu-latest + permissions: + # write permission is required to create a github release + contents: write + + strategy: + matrix: + app: + - api + - frontend + - catalog + - ingestion_server + + steps: + - uses: actions/checkout@v4 + + - name: Calculate tag name + id: tag + run: | + # Format example: 2023.03.22.04.56.29 + # `-u` forces UTC + formatted_date="$(date -u +%Y.%m.%d.%H.%M.%S)" + + { + echo "date=$formatted_date"; + echo "git-tag=${{ matrix.app }}-$formatted_date"; + } >> "$GITHUB_OUTPUT" + + # Each time this runs, it should replace the previous drafted release matching the tag prefix set in the release drafter config + # This means there will always be a running drafted release for all apps + # Publishing the drafted release triggers the `release-app` workflow + - uses: release-drafter/release-drafter@v5 + id: release-drafter + with: + config-name: release-drafter-${{ matrix.app }}.yml + version: ${{ steps.tag.outputs.date }} + tag: ${{ steps.tag.outputs.git-tag }} + name: ${{ steps.tag.outputs.git-tag }} + draft: true + commitish: main + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/release-app.yml b/.github/workflows/release-app.yml index eea0a3fbd92..aa4679b4bdc 100644 --- a/.github/workflows/release-app.yml +++ b/.github/workflows/release-app.yml @@ -1,23 +1,19 @@ name: Release app on: - workflow_dispatch: - inputs: - app: - type: choice - options: - - api - - ingestion_server - - frontend - - catalog - required: true - description: Application to release. If `api` or `frontend`, the deployment workflow will automatically be dispatched for you. - image-sha: - type: string - required: true - description: The SHA of the staging image to tag. - -concurrency: ${{ github.workflow }}-${{ inputs.app }} + release: + types: [released] + +# The app-name expression is duplicated between here and `env` below because neither context is able to reference the other +# but both need it +concurrency: > + ${{ github.workflow }}- + ${{ + startsWith(github.ref_name, 'api-') && 'api' + || startsWith(github.ref_name, 'ingestion_server-') && 'ingestion_server' + || startsWith(github.ref_name, 'catalog-') && 'catalog' + || startsWith(github.ref_name, 'frontend-') && 'frontend' + }} jobs: release-app: @@ -32,55 +28,25 @@ jobs: packages: write # Needed to open the changelog PR pull-requests: write + env: + # Do not split this into multiple lines, it will not work: + # https://github.com/WordPress/openverse/pull/3789#pullrequestreview-1876525552 + APP_NAME: ${{ startsWith(github.ref_name, 'api-') && 'api' || startsWith(github.ref_name, 'ingestion_server-') && 'ingestion_server' || startsWith(github.ref_name, 'catalog-') && 'catalog' || startsWith(github.ref_name, 'frontend-') && 'frontend' }} steps: - uses: actions/checkout@v4 with: # Creating the tag requires having the whole history of `main` fetch-depth: 0 - - name: Validate `sha-tag` input - uses: actions/github-script@v6 - with: - script: | - let exists = undefined, - page = 0 - while (!exists) { - page += 1 - const { data: versions } = - await github.rest.packages.getAllPackageVersionsForPackageOwnedByOrg({ - package_type: 'container', - // We do not have to validate that auxiliary images also exist as they're built at the same time - // as the "main" image. e.g., `api_nginx` is always built when `api` is built and they'll have - // the same set of tags. - package_name: 'openverse-${{ inputs.app }}', - org: 'WordPress', - page, - // max of `per_page` - per_page: 100, - }) - if (!versions.length) { - break - } - exists = versions.some((v) => v.metadata.container.tags.includes('${{ inputs.image-sha }}')) - } - if (!exists) { - throw new Error( - `'${{ inputs.image-sha }}' does not appear to be a valid SHA tag for ${{ inputs.app }}.` - ) - } - - name: Calculate tag name id: tag run: | - # Format example: 2023.03.22.04.56.29 - # `-u` forces UTC - formatted_date="$(date -u +%Y.%m.%d.%H.%M.%S)" + release_date=$(echo '${{ github.ref_name }}' | sed 's/${{ env.APP_NAME }}-//') - # Split image and git tag to avoid app name duplicated in the fully qualified image name + # Generate `rel-` prefixed image tag to avoid duplicated app name between image and tag { - echo "date=$formatted_date"; - echo "git-tag=${{ inputs.app }}-$formatted_date"; - echo "image-tag=rel-$formatted_date"; + echo "date=$release_date"; + echo "image-tag=rel-$release_date"; } >> "$GITHUB_OUTPUT" - name: Log in to GitHub Docker Registry @@ -90,22 +56,22 @@ jobs: username: ${{ github.repository_owner }} password: ${{ secrets.GITHUB_TOKEN }} - - name: Add new tag to existing docker image + - name: Tag latest with release tag run: | - docker buildx imagetools create ghcr.io/wordpress/openverse-${{ inputs.app }}:${{ inputs.image-sha }} --tag ghcr.io/wordpress/openverse-${{ inputs.app }}:${{ steps.tag.outputs.image-tag }} + docker buildx imagetools create ghcr.io/wordpress/openverse-${{ env.APP_NAME }}:latest --tag ghcr.io/wordpress/openverse-${{ env.APP_NAME }}:${{ steps.tag.outputs.image-tag }} - if [[ "${{ inputs.app }}" == "api" ]] || [[ "${{ inputs.app }}" == "frontend" ]]; then - docker buildx imagetools create ghcr.io/wordpress/openverse-${{ inputs.app }}_nginx:${{ inputs.image-sha }} --tag ghcr.io/wordpress/openverse-${{ inputs.app }}_nginx:${{ steps.tag.outputs.image-tag }} + if [[ "${{ env.APP_NAME }}" == "api" ]] || [[ "${{ env.APP_NAME }}" == "frontend" ]]; then + docker buildx imagetools create ghcr.io/wordpress/openverse-${{ env.APP_NAME }}_nginx:latest --tag ghcr.io/wordpress/openverse-${{ env.APP_NAME }}_nginx:${{ steps.tag.outputs.image-tag }} fi - name: Deploy production application - if: inputs.app == 'frontend' || inputs.app == 'api' + if: env.APP_NAME == 'frontend' || env.APP_NAME == 'api' uses: felixp8/dispatch-and-wait@v0.1.0 with: owner: WordPress repo: openverse-infrastructure token: ${{ secrets.ACCESS_TOKEN }} - event_type: deploy_production_${{ inputs.app == 'frontend' && 'nuxt' || inputs.app }} + event_type: deploy_production_${{ env.APP_NAME == 'frontend' && 'nuxt' || env.APP_NAME }} client_payload: | { "actor": "${{ github.actor }}", @@ -120,24 +86,11 @@ jobs: # minutes is reached. On the other hand, we do want to wait # so that there is a record of the successful deployment. - - name: Create and publish release - uses: release-drafter/release-drafter@v5 - id: release-drafter - with: - config-name: release-drafter-${{ inputs.app }}.yml - version: ${{ steps.tag.outputs.date }} - tag: ${{ steps.tag.outputs.git-tag }} - name: ${{ steps.tag.outputs.git-tag }} - publish: true - commitish: main - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Add new changelog file to documentation env: - APP: ${{ inputs.app }} + APP: ${{ env.APP_NAME }} DATE: ${{ steps.tag.outputs.date }} - RELEASE_BODY: ${{ steps.release-drafter.outputs.body }} + RELEASE_BODY: ${{ github.event.release.body }} working-directory: automations/python/workflows run: python write_changelog.py @@ -158,24 +111,23 @@ jobs: - name: Lint the changelog file so that it passes CI run: | # Add the new changelog file to git so that pre-commit can lint it. - git add documentation/changelogs/${{ inputs.app }}/${{ steps.tag.outputs.date }}.md + git add documentation/changelogs/${{ env.APP_NAME }}/${{ steps.tag.outputs.date }}.md just precommit # Ensure this step passes even if linting has made changes so the workflow can continue just lint || true - name: Open changelog PR uses: peter-evans/create-pull-request@v5 - if: ${{ !cancelled() }} with: # Access token necessary for PRs to run with CI token: ${{ secrets.ACCESS_TOKEN }} base: main - branch: changelog/${{ steps.tag.outputs.git-tag }} - commit-message: Publish changelog for ${{ steps.tag.outputs.git-tag }} - title: Publish changelog for ${{ steps.tag.outputs.git-tag }} + branch: changelog/${{ github.ref_name }} + commit-message: Publish changelog for ${{ github.ref_name }} + title: Publish changelog for ${{ github.ref_name }} # Add labels to pass CI labels: | - 🧱 stack: ${{ inputs.app == 'ingestion_server' && 'ingestion server' || inputs.app }} + 🧱 stack: ${{ env.APP_NAME == 'ingestion_server' && 'ingestion server' || env.APP_NAME }} 🌟 goal: addition 📄 aspect: text 🟩 priority: low diff --git a/api/test/api_live_integration.py b/api/test/api_live_integration.py deleted file mode 100644 index 22412f20301..00000000000 --- a/api/test/api_live_integration.py +++ /dev/null @@ -1,473 +0,0 @@ -""" -These are the LEGACY API integration tests. - -**Do not add further tests here. New tests should be added in v1_integration_test.** - -End-to-end API tests. Can be used to verify a live deployment is functioning as -designed. Run with the `pytest -s` command from this directory. -""" - -import json -import os -import uuid - -import pytest -import requests - -from api.constants.licenses import LICENSE_GROUPS -from api.models import Image -from api.utils.watermark import watermark - - -API_URL = os.getenv("INTEGRATION_TEST_URL", "http://localhost:8000") -known_apis = { - "http://localhost:8000": "LOCAL", - "https://api.openverse.engineering": "PRODUCTION", - "https://api-dev.openverse.engineering": "TESTING", -} - - -def setup_module(): - if API_URL in known_apis: - print(f"\n\033[1;31;40mTesting {known_apis[API_URL]} environment") - - -@pytest.fixture -def search_fixture(): - response = requests.get(f"{API_URL}/image/search?q=honey", verify=False) - assert response.status_code == 200 - parsed = json.loads(response.text) - return parsed - - -def test_search_quotes(): - """Test that a response is given even if the user messes up quote matching.""" - - response = requests.get(f'{API_URL}/image/search?q="test"', verify=False) - assert response.status_code == 200 - - -def test_search(search_fixture): - assert search_fixture["result_count"] > 0 - - -def test_search_consistency(): - """ - Ensure that no duplicates appear in the first few pages of a search query. - - Elasticsearch sometimes reaches an inconsistent state, which causes search - results to appear differently upon page refresh. This can also introduce - image duplicates in subsequent pages. - """ - - n_pages = 5 - searches = { - requests.get(f"{API_URL}/image/search?q=honey;page={page}", verify=False) - for page in range(1, n_pages) - } - - images = set() - for response in searches: - parsed = json.loads(response.text) - for result in parsed["results"]: - image_id = result["id"] - assert image_id not in images - images.add(image_id) - - -def test_image_detail(search_fixture): - test_id = search_fixture["results"][0]["id"] - response = requests.get(f"{API_URL}/image/{test_id}", verify=False) - assert response.status_code == 200 - - -def test_image_delete_invalid_creds(search_fixture): - test_id = search_fixture["results"][0]["id"] - should_fail = requests.delete( - f"{API_URL}/image/{test_id}", auth=("invalid", "credentials"), verify=False - ) - assert should_fail.status_code == 401 - - -def test_image_delete(search_fixture): - test_id = search_fixture["results"][0]["id"] - response = requests.delete( - f"{API_URL}/image/{test_id}", - auth=("continuous_integration", "deploy"), - verify=False, - ) - assert response.status_code == 204 - deleted_response = requests.get(f"{API_URL}/image/{test_id}") - assert deleted_response.status_code == 404 - - -@pytest.fixture -def link_shortener_fixture(search_fixture): - link_to_shorten = search_fixture["results"][0]["detail"] - payload = {"full_url": link_to_shorten} - response = requests.post(f"{API_URL}/link", json=payload, verify=False) - assert response.status_code == 200 - return json.loads(response.text) - - -def test_link_shortener_create(link_shortener_fixture): - assert "shortened_url" in link_shortener_fixture - - -def test_link_shortener_resolve(link_shortener_fixture): - path = link_shortener_fixture["shortened_url"].split("/")[-1] - response = requests.get( - f"{API_URL}/link/{path}", allow_redirects=False, verify=False - ) - assert response.status_code == 301 - - -def test_stats(): - response = requests.get(f"{API_URL}/statistics/image", verify=False) - parsed_response = json.loads(response.text) - assert response.status_code == 200 - num_images = 0 - provider_count = 0 - for pair in parsed_response: - image_count = pair["image_count"] - num_images += int(image_count) - provider_count += 1 - assert num_images > 0 - assert provider_count > 0 - - -@pytest.mark.skip(reason="Disabled feature") -@pytest.fixture -def test_list_create(search_fixture): - payload = { - "title": "INTEGRATION TEST", - "images": [search_fixture["results"][0]["id"]], - } - response = requests.post(API_URL + "/list", json=payload, verify=False) - parsed_response = json.loads(response.text) - assert response.status_code == 201 - return parsed_response - - -@pytest.mark.skip(reason="Disabled feature") -def test_list_detail(test_list_create): - list_slug = test_list_create["url"].split("/")[-1] - response = requests.get(f"{API_URL}/list/{list_slug}", verify=False) - assert response.status_code == 200 - - -@pytest.mark.skip(reason="Disabled feature") -def test_list_delete(test_list_create): - list_slug = test_list_create["url"].split("/")[-1] - token = test_list_create["auth"] - headers = {"Authorization": f"Token {token}"} - response = requests.delete( - f"{API_URL}/list/{list_slug}", headers=headers, verify=False - ) - assert response.status_code == 204 - - -def test_license_type_filtering(): - """Ensure that multiple license type filters interact together correctly.""" - - commercial = LICENSE_GROUPS["commercial"] - modification = LICENSE_GROUPS["modification"] - commercial_and_modification = set.intersection(modification, commercial) - response = requests.get( - f"{API_URL}/image/search?q=honey<=commercial,modification", verify=False - ) - parsed = json.loads(response.text) - for result in parsed["results"]: - assert result["license"].upper() in commercial_and_modification - - -def test_single_license_type_filtering(): - commercial = LICENSE_GROUPS["commercial"] - response = requests.get( - f"{API_URL}/image/search?q=honey<=commercial", verify=False - ) - parsed = json.loads(response.text) - for result in parsed["results"]: - assert result["license"].upper() in commercial - - -def test_specific_license_filter(): - response = requests.get(f"{API_URL}/image/search?q=honey&li=by", verify=False) - parsed = json.loads(response.text) - for result in parsed["results"]: - assert result["license"] == "by" - - -def test_creator_quotation_grouping(): - """Test that quotation marks can be used to narrow down search results.""" - - no_quotes = json.loads( - requests.get( - f"{API_URL}/image/search?creator=claude%20monet", verify=False - ).text - ) - quotes = json.loads( - requests.get( - f'{API_URL}/image/search?creator="claude%20monet"', verify=False - ).text - ) - # Did quotation marks actually narrow down the search? - assert len(no_quotes["results"]) > len(quotes["results"]) - # Did we find only Claude Monet works, or did his lesser known brother Jim - # Monet sneak into the results? - for result in quotes["results"]: - assert "Claude Monet" in result["creator"] - - -@pytest.fixture -def test_oauth2_registration(): - payload = { - "name": f"INTEGRATION TEST APPLICATION {uuid.uuid4()}", - "description": "A key for testing the OAuth2 registration process.", - "email": "example@example.org", - } - response = requests.post(f"{API_URL}/oauth2/register/", json=payload, verify=False) - parsed_response = json.loads(response.text) - assert response.status_code == 201 - return parsed_response - - -def test_oauth2_token_exchange(test_oauth2_registration): - client_id = test_oauth2_registration["client_id"] - client_secret = test_oauth2_registration["client_secret"] - token_exchange_request = ( - f"client_id={client_id}" - f"&client_secret={client_secret}" - f"&grant_type=client_credentials" - ) - headers = { - "content-type": "application/x-www-form-urlencoded", - "cache-control": "no-cache", - } - response = json.loads( - requests.post( - f"{API_URL}/oauth2/token/", - data=token_exchange_request, - headers=headers, - verify=False, - ).text - ) - assert "access_token" in response - - -def test_watermark_preserves_exif(): - img_with_exif = ( - "https://raw.githubusercontent.com/ianare/exif-samples/" - "master/jpg/Canon_PowerShot_S40.jpg" - ) - info = { - "title": "test", - "creator": "test", - "license": "test", - "license_version": "test", - } - _, exif = watermark(image_url=img_with_exif, info=info) - assert exif is not None - - img_no_exif = ( - "https://creativecommons.org/wp-content/uploads/" - "2019/03/9467312978_64cd5d2f3b_z.jpg" - ) - _, no_exif = watermark(image_url=img_no_exif, info=info) - assert no_exif is None - - -def test_attribution(): - """ - Check that the API includes an attribution string. - - Since there are some works where the title or creator is not known, the format of - the attribution string can need to be tweaked slightly. - """ - - title_and_creator_missing = Image( - identifier="ab80dbe1-414c-4ee8-9543-f9599312aeb8", - title=None, - creator=None, - license="by", - license_version="3.0", - ) - print("\nAttribution examples:\n") - print(title_and_creator_missing.attribution) - assert "This work" in title_and_creator_missing.attribution - - title = "A foo walks into a bar" - creator_missing = Image( - identifier="ab80dbe1-414c-4ee8-9543-f9599312aeb8", - title=title, - creator=None, - license="by", - license_version="3.0", - ) - print(creator_missing.attribution) - assert title in creator_missing.attribution - assert "by " not in creator_missing.attribution - - creator = "John Doe" - title_missing = Image( - identifier="ab80dbe1-414c-4ee8-9543-f9599312aeb8", - title=None, - creator=creator, - license="by", - license_version="3.0", - ) - print(title_missing.attribution) - assert creator in title_missing.attribution - assert "This work" in title_missing.attribution - - all_data_present = Image( - identifier="ab80dbe1-414c-4ee8-9543-f9599312aeb8", - title=title, - creator=creator, - license="by", - license_version="3.0", - ) - print(all_data_present.attribution) - assert title in all_data_present.attribution - assert creator in all_data_present.attribution - - -def test_browse_by_provider(): - response = requests.get(f"{API_URL}/image/browse/behance", verify=False) - assert response.status_code == 200 - parsed = json.loads(response.text) - assert parsed["result_count"] > 0 - - -def test_extension_filter(): - response = requests.get(f"{API_URL}/image/search?q=honey&extension=jpg") - parsed = json.loads(response.text) - for result in parsed["results"]: - assert ".jpg" in result["url"] - - -@pytest.fixture -def search_factory(): - """Allow passing url parameters along with a search request.""" - - def _parameterized_search(**kwargs): - response = requests.get(f"{API_URL}/image/search", params=kwargs, verify=False) - assert response.status_code == 200 - parsed = response.json() - return parsed - - return _parameterized_search - - -@pytest.fixture -def search_with_dead_links(search_factory): - """Test with ``filter_dead`` parameter set to ``False``.""" - - def _search_with_dead_links(**kwargs): - return search_factory(filter_dead=False, **kwargs) - - return _search_with_dead_links - - -@pytest.fixture -def search_without_dead_links(search_factory): - """Test with ``filter_dead`` parameter set to ``True``.""" - - def _search_without_dead_links(**kwargs): - return search_factory(filter_dead=True, **kwargs) - - return _search_without_dead_links - - -def test_page_size_removing_dead_links(search_without_dead_links): - """ - Test whether the number of results returned is equal to the requested page size. - - We have about 500 dead links in the sample data and should have around - 8 dead links in the first 100 results on a query composed of a single - wildcard operator. - - """ - data = search_without_dead_links(q="*", pagesize=100) - assert len(data["results"]) == 100 - - -def test_dead_links_are_correctly_filtered( - search_with_dead_links, search_without_dead_links -): - """ - Test the results for the same query with and without dead links are different. - - We use the results' id to compare them. - """ - data_with_dead_links = search_with_dead_links(q="*", pagesize=100) - data_without_dead_links = search_without_dead_links(q="*", pagesize=100) - - comparisons = [] - for result_1 in data_with_dead_links["results"]: - for result_2 in data_without_dead_links["results"]: - comparisons.append(result_1["id"] == result_2["id"]) - - # Some results should be different - # so we should have less than 100 True comparisons - assert comparisons.count(True) < 100 - - -def test_page_consistency_removing_dead_links(search_without_dead_links): - """Test that results in consecutive pages don't repeat when filtering dead links.""" - - total_pages = 100 - pagesize = 5 - - page_results = [] - for page in range(1, total_pages + 1): - page_data = search_without_dead_links(q="*", pagesize=pagesize, page=page) - page_results += page_data["results"] - - def no_duplicates(xs): - s = set() - for x in xs: - if x in s: - return False - s.add(x) - return True - - ids = list(map(lambda x: x["id"], page_results)) - # No results should be repeated so we should have no duplicate ids - assert no_duplicates(ids) - - -def test_related_does_not_break(): - response = requests.get( - f"{API_URL}/image/related/000000000000000000000000000000000000", verify=False - ) - assert response.status_code == 404 - - -@pytest.fixture -def related_factory(): - """Allow passing url parameters along with a related images request.""" - - def _parameterized_search(identifier, **kwargs): - response = requests.get( - f"{API_URL}/image/related/{identifier}", params=kwargs, verify=False - ) - assert response.status_code == 200 - parsed = response.json() - return parsed - - return _parameterized_search - - -@pytest.mark.skip( - reason="Generally, we don't paginate related images, so " - "consistency is less of an issue." -) -def test_related_image_search_page_consistency( - related_factory, search_without_dead_links -): - initial_images = search_without_dead_links(q="*", pagesize=10) - for image in initial_images["results"]: - related = related_factory(image["id"]) - assert related["result_count"] > 0 - assert len(related["results"]) == 10 diff --git a/api/test/api_live_search_qa.py b/api/test/api_live_search_qa.py deleted file mode 100644 index 550d341c782..00000000000 --- a/api/test/api_live_search_qa.py +++ /dev/null @@ -1,43 +0,0 @@ -""" -Tests to run against a live Openverse instance with a significant (10M+) record count. - -Quality of search rankings can be affected by the number of documents in the search -index, so toy examples with few documents do not accurately model relevance at scale. -""" - -import json - -import requests - - -API_URL = "https://api-dev.openverse.engineering" - - -def _phrase_in_tags(tags, term): - for tag in tags: - if "name" in tag: - if tag["name"] == term: - return True - return False - - -def _phrase_in_title(title, term): - return term in title - - -def test_phrase_relevance(): - """ - Test that results have the phrase in the tags or title. - - If I search for "home office", the top results ought to have the phrase - 'home office' in the tags or title. - """ - - search_term = "home office" - response = requests.get(f"{API_URL}/image/search?q={search_term}", verify=False) - assert response.status_code == 200 - parsed = json.loads(response.text) - first_result = parsed["results"][0] - assert _phrase_in_tags(first_result["tags"], search_term) or _phrase_in_title( - first_result["title"], search_term - ) diff --git a/api/test/conftest.py b/api/test/conftest.py index 8ffefc50e7a..1aecc9b86c7 100644 --- a/api/test/conftest.py +++ b/api/test/conftest.py @@ -1,3 +1,5 @@ +"""Fixtures usable by or necessary for both unit and integration tests.""" + from test.fixtures.asynchronous import ensure_asgi_lifecycle, get_new_loop, session_loop from test.fixtures.cache import ( django_cache, @@ -5,6 +7,7 @@ unreachable_django_cache, unreachable_redis, ) +from test.fixtures.rest_framework import api_client, request_factory __all__ = [ @@ -15,4 +18,6 @@ "redis", "unreachable_django_cache", "unreachable_redis", + "api_client", + "request_factory", ] diff --git a/api/test/fixtures/rest_framework.py b/api/test/fixtures/rest_framework.py new file mode 100644 index 00000000000..3359b0a81df --- /dev/null +++ b/api/test/fixtures/rest_framework.py @@ -0,0 +1,15 @@ +from rest_framework.test import APIClient, APIRequestFactory + +import pytest + + +@pytest.fixture +def api_client(): + return APIClient() + + +@pytest.fixture +def request_factory() -> APIRequestFactory(): + request_factory = APIRequestFactory(defaults={"REMOTE_ADDR": "192.0.2.1"}) + + return request_factory diff --git a/api/test/integration/conftest.py b/api/test/integration/conftest.py new file mode 100644 index 00000000000..bd9e998175c --- /dev/null +++ b/api/test/integration/conftest.py @@ -0,0 +1,13 @@ +import pytest + + +@pytest.fixture +def django_db_setup(): + """ + We want the integration tests to use the real database so that we can test + the complete behaviour of the system. This fixture overrides the fixture + from ``pytest-django`` that sets up the tests database and because it's a + no-op, the tests will use the real database. + """ + + pass diff --git a/api/test/integration/test_audio_integration.py b/api/test/integration/test_audio_integration.py new file mode 100644 index 00000000000..32e4bfb1e70 --- /dev/null +++ b/api/test/integration/test_audio_integration.py @@ -0,0 +1,29 @@ +""" +End-to-end API tests for audio. + +Can be used to verify a live deployment is functioning as designed. +Run with the `pytest -s` command from this directory, inside the Docker +container. + +Tests common to all media types are in ``test_media_integration.py``. +""" + +import pytest + + +pytestmark = pytest.mark.django_db + + +def test_audio_detail_without_thumb(api_client): + resp = api_client.get("/v1/audio/44540200-91eb-483d-9e99-38ce86a52fb6/") + assert resp.status_code == 200 + parsed = resp.json() + assert parsed["thumbnail"] is None + + +def test_audio_search_without_thumb(api_client): + """The first audio of this search should not have a thumbnail.""" + resp = api_client.get("/v1/audio/?q=zaus") + assert resp.status_code == 200 + parsed = resp.json() + assert parsed["results"][0]["thumbnail"] is None diff --git a/api/test/test_auth.py b/api/test/integration/test_auth.py similarity index 72% rename from api/test/test_auth.py rename to api/test/integration/test_auth.py index 752c848fed4..b6a7b492194 100644 --- a/api/test/test_auth.py +++ b/api/test/integration/test_auth.py @@ -1,9 +1,7 @@ import time import uuid -from unittest.mock import patch from django.urls import reverse -from django.utils.http import urlencode import pytest from oauth2_provider.models import AccessToken @@ -38,13 +36,13 @@ def unreachable_oauth_cache(unreachable_django_cache, monkeypatch): @pytest.mark.django_db @pytest.fixture -def test_auth_tokens_registration(client): +def test_auth_tokens_registration(api_client): data = { "name": f"INTEGRATION TEST APPLICATION {uuid.uuid4()}", "description": "A key for testing the OAuth2 registration process.", "email": "example@example.org", } - res = client.post( + res = api_client.post( "/v1/auth_tokens/register/", data, verify=False, @@ -56,20 +54,19 @@ def test_auth_tokens_registration(client): @pytest.mark.django_db @pytest.fixture -def test_auth_token_exchange(client, test_auth_tokens_registration): - client_id = test_auth_tokens_registration["client_id"] - client_secret = test_auth_tokens_registration["client_secret"] - data = urlencode( - { - "client_id": client_id, - "client_secret": client_secret, - "grant_type": "client_credentials", - } - ) - res = client.post( +def test_auth_token_exchange(api_client, test_auth_tokens_registration): + api_client_id = test_auth_tokens_registration["client_id"] + api_client_secret = test_auth_tokens_registration["client_secret"] + data = { + "client_id": api_client_id, + "client_secret": api_client_secret, + "grant_type": "client_credentials", + } + + res = api_client.post( "/v1/auth_tokens/token/", data, - "application/x-www-form-urlencoded", + "multipart", verify=False, ) res_data = res.json() @@ -78,8 +75,8 @@ def test_auth_token_exchange(client, test_auth_tokens_registration): @pytest.mark.django_db -def test_auth_token_exchange_unsupported_method(client): - res = client.get( +def test_auth_token_exchange_unsupported_method(api_client): + res = api_client.get( "/v1/auth_tokens/token/", verify=False, ) @@ -87,11 +84,11 @@ def test_auth_token_exchange_unsupported_method(client): assert res.json()["detail"] == 'Method "GET" not allowed.' -def _integration_verify_most_recent_token(client): +def _integration_verify_most_recent_token(api_client): verify = OAuth2Verification.objects.last() code = verify.code path = reverse("verify-email", args=[code]) - return client.get(path) + return api_client.get(path) @pytest.mark.django_db @@ -110,17 +107,17 @@ def _integration_verify_most_recent_token(client): ) def test_auth_email_verification( request, - client, + api_client, is_cache_reachable, cache_name, rate_limit_model, test_auth_token_exchange, ): - res = _integration_verify_most_recent_token(client) + res = _integration_verify_most_recent_token(api_client) assert res.status_code == 200 test_auth_rate_limit_reporting( request, - client, + api_client, is_cache_reachable, cache_name, rate_limit_model, @@ -137,7 +134,7 @@ def test_auth_email_verification( @cache_availability_params def test_auth_rate_limit_reporting( request, - client, + api_client, is_cache_reachable, cache_name, rate_limit_model, @@ -153,7 +150,7 @@ def test_auth_rate_limit_reporting( application = AccessToken.objects.get(token=token).application application.rate_limit_model = rate_limit_model application.save() - res = client.get("/v1/rate_limit/", HTTP_AUTHORIZATION=f"Bearer {token}") + res = api_client.get("/v1/rate_limit/", HTTP_AUTHORIZATION=f"Bearer {token}") res_data = res.json() if is_cache_reachable: assert res.status_code == 200 @@ -176,14 +173,14 @@ def test_auth_rate_limit_reporting( (True, False), ) def test_auth_response_headers( - client, verified, test_auth_tokens_registration, test_auth_token_exchange + api_client, verified, test_auth_tokens_registration, test_auth_token_exchange ): if verified: - _integration_verify_most_recent_token(client) + _integration_verify_most_recent_token(api_client) token = test_auth_token_exchange["access_token"] - res = client.get("/v1/images/", HTTP_AUTHORIZATION=f"Bearer {token}") + res = api_client.get("/v1/images/", HTTP_AUTHORIZATION=f"Bearer {token}") assert ( res.headers["x-ov-client-application-name"] @@ -192,8 +189,8 @@ def test_auth_response_headers( assert res.headers["x-ov-client-application-verified"] == str(verified) -def test_unauthed_response_headers(client): - res = client.get("/v1/images") +def test_unauthed_response_headers(api_client): + res = api_client.get("/v1/images") assert "x-ov-client-application-name" not in res.headers assert "x-ov-client-application-verified" not in res.headers @@ -207,21 +204,16 @@ def test_unauthed_response_headers(client): ("asc", "2022-01-01"), ], ) -def test_sorting_authed(client, test_auth_token_exchange, sort_dir, exp_indexed_on): +def test_sorting_authed(api_client, test_auth_token_exchange, sort_dir, exp_indexed_on): time.sleep(1) token = test_auth_token_exchange["access_token"] query_params = { "unstable__sort_by": "indexed_on", "unstable__sort_dir": sort_dir, } - with patch( - "api.views.image_views.ImageViewSet.get_db_results" - ) as mock_get_db_result: - mock_get_db_result.side_effect = lambda value: value - - res = client.get( - "/v1/images/", query_params, HTTP_AUTHORIZATION=f"Bearer {token}" - ) + res = api_client.get( + "/v1/images/", query_params, HTTP_AUTHORIZATION=f"Bearer {token}" + ) assert res.status_code == 200 res_data = res.json() @@ -238,7 +230,7 @@ def test_sorting_authed(client, test_auth_token_exchange, sort_dir, exp_indexed_ ], ) def test_authority_authed( - client, test_auth_token_exchange, authority_boost, exp_source + api_client, test_auth_token_exchange, authority_boost, exp_source ): time.sleep(1) token = test_auth_token_exchange["access_token"] @@ -247,14 +239,9 @@ def test_authority_authed( "unstable__authority": "true", "unstable__authority_boost": authority_boost, } - with patch( - "api.views.image_views.ImageViewSet.get_db_results" - ) as mock_get_db_result: - mock_get_db_result.side_effect = lambda value: value - - res = client.get( - "/v1/images/", query_params, HTTP_AUTHORIZATION=f"Bearer {token}" - ) + res = api_client.get( + "/v1/images/", query_params, HTTP_AUTHORIZATION=f"Bearer {token}" + ) assert res.status_code == 200 res_data = res.json() @@ -263,23 +250,27 @@ def test_authority_authed( @pytest.mark.django_db -def test_page_size_limit_unauthed(client): +def test_page_size_limit_unauthed(api_client): query_params = {"page_size": 20} - res = client.get("/v1/images/", query_params) + res = api_client.get("/v1/images/", query_params) assert res.status_code == 200 query_params["page_size"] = 21 - res = client.get("/v1/images/", query_params) + res = api_client.get("/v1/images/", query_params) assert res.status_code == 401 @pytest.mark.django_db -def test_page_size_limit_authed(client, test_auth_token_exchange): +def test_page_size_limit_authed(api_client, test_auth_token_exchange): time.sleep(1) token = test_auth_token_exchange["access_token"] query_params = {"page_size": 21} - res = client.get("/v1/images/", query_params, HTTP_AUTHORIZATION=f"Bearer {token}") + res = api_client.get( + "/v1/images/", query_params, HTTP_AUTHORIZATION=f"Bearer {token}" + ) assert res.status_code == 200 query_params = {"page_size": 500} - res = client.get("/v1/images/", query_params, HTTP_AUTHORIZATION=f"Bearer {token}") + res = api_client.get( + "/v1/images/", query_params, HTTP_AUTHORIZATION=f"Bearer {token}" + ) assert res.status_code == 200 diff --git a/api/test/test_dead_link_filter.py b/api/test/integration/test_dead_link_filter.py similarity index 91% rename from api/test/test_dead_link_filter.py rename to api/test/integration/test_dead_link_filter.py index caa4600fb7a..6fe7de4cca4 100644 --- a/api/test/test_dead_link_filter.py +++ b/api/test/integration/test_dead_link_filter.py @@ -4,10 +4,8 @@ from django.conf import settings import pytest -import requests from api.controllers.elasticsearch.helpers import DEAD_LINK_RATIO -from test.constants import API_URL @pytest.fixture @@ -62,7 +60,7 @@ def _make_head_requests(urls): @pytest.mark.django_db @_patch_make_head_requests() -def test_dead_link_filtering(mocked_map, client): +def test_dead_link_filtering(mocked_map, api_client): path = "/v1/images/" query_params = {"q": "*", "page_size": 20} @@ -71,7 +69,7 @@ def test_dead_link_filtering(mocked_map, client): "api.views.image_views.ImageViewSet.get_db_results" ) as mock_get_db_result: mock_get_db_result.side_effect = lambda value: value - res_with_dead_links = client.get( + res_with_dead_links = api_client.get( path, query_params | {"filter_dead": False}, ) @@ -79,7 +77,7 @@ def test_dead_link_filtering(mocked_map, client): mocked_map.assert_not_called() # Make a request that filters dead links... - res_without_dead_links = client.get( + res_without_dead_links = api_client.get( path, query_params | {"filter_dead": True}, ) @@ -111,7 +109,7 @@ def test_dead_link_filtering(mocked_map, client): ), ) def test_dead_link_filtering_all_dead_links( - client, + api_client, filter_dead, page_size, expected_result_count, @@ -126,7 +124,7 @@ def test_dead_link_filtering_all_dead_links( ) as mock_get_db_result: mock_get_db_result.side_effect = lambda value: value with patch_link_validation_dead_for_count(page_size / DEAD_LINK_RATIO): - response = client.get( + response = api_client.get( path, query_params | {"filter_dead": filter_dead}, ) @@ -141,11 +139,11 @@ def test_dead_link_filtering_all_dead_links( @pytest.fixture -def search_factory(client): +def search_factory(api_client): """Allow passing url parameters along with a search request.""" def _parameterized_search(**kwargs): - response = requests.get(f"{API_URL}/v1/images", params=kwargs, verify=False) + response = api_client.get("/v1/images/", kwargs) assert response.status_code == 200 parsed = response.json() return parsed @@ -203,10 +201,8 @@ def no_duplicates(xs): @pytest.mark.django_db -def test_max_page_count(): - response = requests.get( - f"{API_URL}/v1/images", - params={"page": settings.MAX_PAGINATION_DEPTH + 1}, - verify=False, +def test_max_page_count(api_client): + response = api_client.get( + "/v1/images/", {"page": settings.MAX_PAGINATION_DEPTH + 1} ) assert response.status_code == 400 diff --git a/api/test/integration/test_deprecations.py b/api/test/integration/test_deprecations.py new file mode 100644 index 00000000000..a6b3d30bebf --- /dev/null +++ b/api/test/integration/test_deprecations.py @@ -0,0 +1,38 @@ +import uuid + +import pytest + + +@pytest.mark.parametrize( + "old, new", + [ + ("/v1/sources?type=images", "/v1/images/stats/"), + ("/v1/recommendations/images/{idx}", "/v1/images/{idx}/related/"), + ("/v1/oembed?key=value", "/v1/images/oembed/?key=value"), + ("/v1/thumbs/{idx}", "/v1/images/{idx}/thumb/"), + ], +) +def test_deprecated_endpoints_redirect_to_new(old, new, api_client): + idx = uuid.uuid4() + old = old.format(idx=str(idx)) + new = new.format(idx=str(idx)) + + res = api_client.get(old) + assert res.status_code == 301 + assert res.headers.get("Location") == new + + +@pytest.mark.parametrize( + "method, path, kwargs", + [ + ("get", "/v1/link/abc", {}), + ( + "post", + "/v1/link/", + {"data": {"full_url": "abcd"}, "content_type": "application/json"}, + ), + ], +) +def test_deleted_endpoints_are_gone(method, path, kwargs, api_client): + res = getattr(api_client, method)(path, **kwargs) + assert res.status_code == 410 diff --git a/api/test/integration/test_image_integration.py b/api/test/integration/test_image_integration.py new file mode 100644 index 00000000000..7ee3ac03cbe --- /dev/null +++ b/api/test/integration/test_image_integration.py @@ -0,0 +1,73 @@ +""" +End-to-end API tests for images. + +Can be used to verify a live deployment is functioning as designed. +Run with the `pytest -s` command from this directory, inside the Docker +container. + +Tests common to all media types are in ``test_media_integration.py``. +""" + +import pytest + + +pytestmark = pytest.mark.django_db + + +@pytest.fixture +def image_fixture(api_client): + response = api_client.get("/v1/images/", {"q": "dog"}) + assert response.status_code == 200 + parsed = response.json() + return parsed + + +@pytest.mark.parametrize( + "url, expected_status_code", + [ + pytest.param( + "https://any.domain/any/path/{identifier}", + 200, + id="OK; no trailing slash", + ), + pytest.param( + "https://any.domain/any/path/{identifier}/", + 200, + id="OK; trailing slash", + ), # trailing slash + pytest.param( + "https://any.domain/any/path/00000000-0000-0000-0000-000000000000", + 404, + id="not OK; valid UUID but no matching identifier", + ), + pytest.param( + "https://any.domain/any/path/not-a-valid-uuid", + 400, + id="not OK; invalid UUID", + ), + ], +) +def test_oembed_endpoint( + image_fixture, url: str, expected_status_code: int, api_client +): + if "{identifier}" in url: + url = url.format(identifier=image_fixture["results"][0]["id"]) + params = {"url": url} + response = api_client.get("/v1/images/oembed/", params) + assert response.status_code == expected_status_code + + +def test_oembed_endpoint_for_json(image_fixture, api_client): + identifier = image_fixture["results"][0]["id"] + params = { + "url": f"https://any.domain/any/path/{identifier}", + # 'format': 'json' is the default + } + response = api_client.get("/v1/images/oembed/", params) + assert response.status_code == 200 + assert response.headers["Content-Type"] == "application/json" + + parsed = response.json() + assert parsed["width"] == image_fixture["results"][0]["width"] + assert parsed["height"] == image_fixture["results"][0]["height"] + assert parsed["license_url"] == image_fixture["results"][0]["license_url"] diff --git a/api/test/integration/test_media_integration.py b/api/test/integration/test_media_integration.py new file mode 100644 index 00000000000..ee725c2f70a --- /dev/null +++ b/api/test/integration/test_media_integration.py @@ -0,0 +1,447 @@ +"""This test suite covers common operations for all media types.""" + +import re +from dataclasses import dataclass + +import pytest + +from api.constants.licenses import LICENSE_GROUPS + + +pytestmark = pytest.mark.django_db + + +@dataclass +class MediaType: + name: str # the name of the media type + path: str # the version of the media type in the URL paths + providers: list[str] # providers for the media type from the sample data + categories: list[str] # categories for the media type from the sample data + tags: list[str] # tags for the media type from the sample data + q: str # a search query for this media type that yields some results + + +def _check_non_es_fields_are_present(results: list[dict]): + for result in results: + # ``license`` is stored in ES, ``license_version`` is not. + assert result["license_version"] is not None + # ``creator`` is stored in ES, ``creator_url`` is not. + assert result["creator_url"] is not None + # ``foreign_landing_url`` is not stored in ES. + assert result["foreign_landing_url"] is not None + + +############ +# Fixtures # +############ + + +@pytest.fixture(params=["audio", "image"]) +def media_type(request): + """ + Get a ``MediaType`` object associated with each media type supported by + Openverse. This fixture is used to parametrize tests and other dependent + fixtures so that the overall test suite covers all supported media types. + """ + + name = request.param + return { + "audio": MediaType( + name="audio", + path="audio", + providers=["freesound", "jamendo", "wikimedia_audio"], + categories=["music", "pronunciation"], + tags=["cat"], + q="love", + ), + "image": MediaType( + name="image", + path="images", + providers=["flickr", "stocksnap"], + categories=["photograph"], + tags=["cat", "Cat"], + q="dog", + ), + }[name] + + +@pytest.fixture +def search_results(media_type: MediaType, api_client) -> tuple[MediaType, dict]: + res = api_client.get(f"/v1/{media_type.path}/", {"q": media_type.q}) + assert res.status_code == 200 + + data = res.json() + return media_type, data + + +@pytest.fixture +def single_result(search_results) -> tuple[MediaType, dict]: + media_type, data = search_results + item = data["results"][0] + return media_type, item + + +@pytest.fixture +def related_results(single_result, api_client) -> tuple[MediaType, dict, dict]: + media_type, item = single_result + res = api_client.get(f"/v1/{media_type.path}/{item['id']}/related/") + assert res.status_code == 200 + + data = res.json() + return media_type, item, data + + +@pytest.fixture +def sensitive_result(media_type: MediaType, api_client) -> tuple[MediaType, dict]: + q = "bird" # Not using the default ``q`` from ``media_type``. + res = api_client.get( + f"/v1/{media_type.path}/", + {"q": q, "unstable__include_sensitive_results": True}, + ) + assert res.status_code == 200 + + data = res.json() + # Raises ``StopIteration`` if no sensitive results are found. + sensitive_result = next( + result for result in data["results"] if result["unstable__sensitivity"] + ) + + return media_type, sensitive_result + + +############## +# Stats view # +############## + + +def test_stats(media_type: MediaType, api_client): + res = api_client.get(f"/v1/{media_type.path}/stats/") + data = res.json() + num_media = 0 + provider_count = 0 + for pair in data: + num_media += pair["media_count"] + provider_count += 1 + assert num_media > 0 + assert provider_count > 0 + + +############### +# Search view # +############### + + +def test_search_returns_non_zero_results(search_results): + _, data = search_results + assert data["result_count"] > 0 + + +def test_search_handles_unbalanced_quotes_with_ok(media_type: MediaType, api_client): + res = api_client.get(f"/v1/{media_type.path}/", {"q": f'"{media_type.q}'}) + assert res.status_code == 200 + + data = res.json() + assert data["result_count"] > 0 + + +def test_search_handles_special_chars_with_ok(media_type: MediaType, api_client): + res = api_client.get(f"/v1/{media_type.path}/", {"q": f"{media_type.q}!"}) + assert res.status_code == 200 + + data = res.json() + assert data["result_count"] > 0 + + +def test_search_results_have_non_es_fields(search_results): + _, data = search_results + _check_non_es_fields_are_present(data["results"]) + + +def test_search_removes_dupes_from_initial_pages(media_type: MediaType, api_client): + """ + Return consistent, non-duplicate results in the first n pages. + + Elasticsearch sometimes reaches an inconsistent state, which causes search + results to appear differently upon page refresh. This can also introduce + image duplicates in subsequent pages. This test ensures that no duplicates + appear in the first few pages of a search query. + """ + + num_pages = 5 + + searches = { + api_client.get(f"/v1/{media_type.path}/", {"page": page}) + for page in range(1, num_pages) + } + + results = set() + for res in searches: + parsed = res.json() + for result in parsed["results"]: + media_id = result["id"] + assert media_id not in results # Ensure that each result is new. + results.add(media_id) + + +@pytest.mark.parametrize( + "search_field, match_field", [("q", "title"), ("creator", "creator")] +) +def test_search_quotes_matches_only_exact( + media_type: MediaType, search_field, match_field, api_client +): + # We want a query containing more than one word. + if match_field == "title": + q = "dancing penguins" + else: + q = "The League" if media_type.name == "audio" else "Steve Wedgwood" + + base_params = {"unstable__include_sensitive_results": True} + path = f"/v1/{media_type.path}/" + + unquoted_res = api_client.get(path, base_params | {search_field: q}) + assert unquoted_res.status_code == 200 + + unquoted_data = unquoted_res.json() + unquoted_result_count = unquoted_data["result_count"] + assert unquoted_result_count > 0 + + unquoted_results = unquoted_data["results"] + exact_matches = [q in item[match_field] for item in unquoted_results].count(True) + assert 0 < exact_matches < unquoted_result_count + + quoted_res = api_client.get(path, base_params | {search_field: f'"{q}"'}) + assert quoted_res.status_code == 200 + + quoted_data = quoted_res.json() + quoted_result_count = quoted_data["result_count"] + assert quoted_result_count > 0 + + quoted_results = quoted_data["results"] + assert all([q in item[match_field] for item in quoted_results]) + + # Unquoted results will match more records due to the query being overall + # less strict. Above we check that the results are not 0 to confirm that we + # do still get results back. + assert quoted_result_count < unquoted_result_count + + +def test_search_filters_by_source(media_type: MediaType, api_client): + provider = media_type.providers[0] + res = api_client.get( + f"/v1/{media_type.path}/", + {"q": media_type.q, "source": provider}, + ) + assert res.status_code == 200 + + data = res.json() + assert data["result_count"] > 0 + assert all(result["source"] == provider for result in data["results"]) + + +def test_search_returns_zero_results_when_all_excluded( + media_type: MediaType, api_client +): + res = api_client.get( + f"/v1/{media_type.path}/", + {"q": media_type.q, "excluded_source": ",".join(media_type.providers)}, + ) + assert res.status_code == 200 + + data = res.json() + assert data["result_count"] == 0 + + +def test_search_refuses_both_sources_and_excluded(media_type: MediaType, api_client): + res = api_client.get( + f"/v1/{media_type.path}/", + {"q": media_type.q, "source": "x", "excluded_source": "y"}, + ) + assert res.status_code == 400 + + +@pytest.mark.parametrize( + "filter_rule, exp_licenses", + [ + ({"license_type": "commercial"}, LICENSE_GROUPS["commercial"]), # license group + ( + {"license_type": "commercial,modification"}, + LICENSE_GROUPS["commercial"] & LICENSE_GROUPS["modification"], + ), # multiple license groups + ({"license": "by"}, ["by"]), # exact license + ({"license": "by,by-nc-nd"}, ["by", "by-nc-nd"]), # multiple exact licenses + ({"license": "bY"}, ["by"]), # case insensitive + ], +) +def test_search_filters_by_license( + media_type: MediaType, filter_rule, exp_licenses, api_client +): + res = api_client.get(f"/v1/{media_type.path}/", filter_rule) + assert res.status_code == 200 + + data = res.json() + assert data["result_count"] > 0 + assert all(result["license"] in exp_licenses for result in data["results"]) + + +def test_search_filters_by_extension(media_type: MediaType, api_client): + ext = "mp3" if media_type.name == "audio" else "jpg" + res = api_client.get(f"/v1/{media_type.path}/", {"extension": ext}) + assert res.status_code == 200 + + data = res.json() + assert data["result_count"] > 0 + assert all(result["filetype"] == ext for result in data["results"]) + + +def test_search_filters_by_category(media_type: MediaType, api_client): + for category in media_type.categories: + res = api_client.get(f"/v1/{media_type.path}/", {"category": category}) + assert res.status_code == 200 + + data = res.json() + assert data["result_count"] > 0 + assert all(result["category"] == category for result in data["results"]) + + +def test_search_refuses_invalid_categories(media_type: MediaType, api_client): + res = api_client.get(f"/v1/{media_type.path}/", {"category": "invalid_category"}) + assert res.status_code == 400 + + +################ +# Detail view # +################ + + +@pytest.mark.parametrize( + "bad_uuid", + [ + "123456789123456789123456789123456789", + "12345678-1234-5678-1234-1234567891234", + "abcd", + ], +) +def test_detail_view_for_invalid_uuids_returns_not_found( + media_type: MediaType, bad_uuid: str, api_client +): + res = api_client.get(f"/v1/{media_type.path}/{bad_uuid}/") + assert res.status_code == 404 + + +def test_detail_view_returns_ok(single_result, api_client): + media_type, item = single_result + res = api_client.get(f"/v1/{media_type.path}/{item['id']}/") + assert res.status_code == 200 + + +def test_detail_view_contains_sensitivity_info(sensitive_result, api_client): + media_type, item = sensitive_result + res = api_client.get(f"/v1/{media_type.path}/{item['id']}/") + assert res.status_code == 200 + + data = res.json() + assert data["unstable__sensitivity"] is not None + assert len(data["unstable__sensitivity"]) > 0 + + +################ +# Related view # +################ + + +def test_related_view_has_no_pagination(related_results): + _, _, data = related_results + results = data["results"] + assert data["result_count"] == len(results) == 10 + assert data["page_count"] == 1 + + +def test_related_results_have_something_in_common_with_parent(related_results): + _, item, data = related_results + + def _get_terms_set(obj): + # The title is analyzed in ES, we try to mimic it here. + terms = [t["name"] for t in obj["tags"]] + re.split(r"[\s-]", obj["title"]) + return {t.lower() for t in terms} + + terms_set = _get_terms_set(item) + # Make sure each result has at least one word in common with the original item, + # or is by the same creator. + for result in data["results"]: + assert ( + len(terms_set.intersection(_get_terms_set(result))) > 0 + or result["creator"] == item["creator"] + ), f"{terms_set} {_get_terms_set(result)}/{result['creator']}-{item['creator']}" + + +def test_related_results_have_non_es_fields(related_results): + *_, data = related_results + _check_non_es_fields_are_present(data["results"]) + + +############### +# Report view # +############### + + +def test_report_is_created(single_result, api_client): + media_type, item = single_result + res = api_client.post( + f"/v1/{media_type.path}/{item['id']}/report/", + { + "reason": "mature", + "description": "This item contains sensitive content", + }, + "json", + ) + assert res.status_code == 201 + + data = res.json() + assert data["identifier"] == item["id"] + + +#################### +# Collection views # +#################### + + +def test_collection_by_tag(media_type: MediaType, api_client): + tags = media_type.tags + for tag in tags: + res = api_client.get(f"/v1/{media_type.path}/tag/{tag}/") + assert res.status_code == 200 + + data = res.json() + assert data["result_count"] > 0 + for result in data["results"]: + tag_names = [tag["name"] for tag in result["tags"]] + assert tag in tag_names + + +def test_collection_by_source(media_type: MediaType, api_client): + source = api_client.get(f"/v1/{media_type.path}/stats/").json()[0]["source_name"] + + res = api_client.get(f"/v1/{media_type.path}/source/{source}/") + assert res.status_code == 200 + + data = res.json() + assert data["result_count"] > 0 + assert all(result["source"] == source for result in data["results"]) + + +def test_collection_by_creator(media_type: MediaType, api_client): + source_res = api_client.get(f"/v1/{media_type.path}/stats/") + source = source_res.json()[0]["source_name"] + + first_res = api_client.get(f"/v1/{media_type.path}/source/{source}/") + first = first_res.json()["results"][0] + assert (creator := first.get("creator")) + + res = api_client.get(f"/v1/{media_type.path}/source/{source}/creator/{creator}/") + assert res.status_code == 200 + + data = res.json() + assert data["result_count"] > 0 + for result in data["results"]: + assert result["source"] == source + assert result["creator"] == creator diff --git a/api/test/media_integration.py b/api/test/media_integration.py deleted file mode 100644 index 2ca5eb699c9..00000000000 --- a/api/test/media_integration.py +++ /dev/null @@ -1,258 +0,0 @@ -""" -Base test cases for all media types. - -These are not tests and cannot be invoked. -""" - -import json -import re - -import requests - -from test.constants import API_URL - - -def search(fixture): - """Return results for test query.""" - - assert fixture["result_count"] > 0 - - -def search_by_category(media_path, category, fixture): - response = requests.get(f"{API_URL}/v1/{media_path}?category={category}") - assert response.status_code == 200 - data = json.loads(response.text) - assert data["result_count"] < fixture["result_count"] - results = data["results"] - # Make sure each result is from the specified category - assert all(audio_item["category"] == category for audio_item in results) - - -def tag_collection(media_path, tag="cat"): - response = requests.get(f"{API_URL}/v1/{media_path}/tag/{tag}") - assert response.status_code == 200 - - results = response.json()["results"] - for r in results: - tag_names = [tag["name"] for tag in r["tags"]] - assert tag in tag_names - - -def source_collection(media_path): - source = requests.get(f"{API_URL}/v1/{media_path}/stats").json()[0]["source_name"] - - response = requests.get(f"{API_URL}/v1/{media_path}/source/{source}") - assert response.status_code == 200 - - results = response.json()["results"] - assert all(result["source"] == source for result in results) - - -def creator_collection(media_path): - source = requests.get(f"{API_URL}/v1/{media_path}/stats").json()[0]["source_name"] - - first_res = requests.get(f"{API_URL}/v1/{media_path}/source/{source}").json()[ - "results" - ][0] - if not (creator := first_res.get("creator")): - raise AttributeError(f"No creator in {first_res}") - - response = requests.get( - f"{API_URL}/v1/{media_path}/source/{source}/creator/{creator}" - ) - assert response.status_code == 200 - - results = response.json()["results"] - for result in results: - assert result["source"] == source, f"{result['source']} != {source}" - assert result["creator"] == creator, f"{result['creator']} != {creator}" - - -def search_all_excluded(media_path, excluded_source): - response = requests.get( - f"{API_URL}/v1/{media_path}?q=test&excluded_source={','.join(excluded_source)}" - ) - data = json.loads(response.text) - assert data["result_count"] == 0 - - -def search_source_and_excluded(media_path): - response = requests.get( - f"{API_URL}/v1/{media_path}?q=test&source=x&excluded_source=y" - ) - assert response.status_code == 400 - - -def search_quotes(media_path, q="test"): - """Return a response when quote matching is messed up.""" - - response = requests.get(f'{API_URL}/v1/{media_path}?q="{q}', verify=False) - assert response.status_code == 200 - - -def search_quotes_exact(media_path, q): - """Return only exact matches for the given query.""" - - url_format = ( - f"{API_URL}/v1/{media_path}?q={{q}}&unstable__include_sensitive_results=true" - ) - unquoted_response = requests.get(url_format.format(q=q), verify=False) - assert unquoted_response.status_code == 200 - unquoted_result_count = unquoted_response.json()["result_count"] - assert unquoted_result_count > 0 - unquoted_results = unquoted_response.json()["results"] - titles = [res["title"] for res in unquoted_results] - exact_match_count = sum([1 for t in titles if q in t]) - assert exact_match_count > 0, f"No results contain `{q}` in title: {titles}" - assert exact_match_count < len( - titles - ), f"Unquoted search returned only exact matches: {titles}" - - quoted_response = requests.get(url_format.format(q=f'"{q}"'), verify=False) - assert quoted_response.status_code == 200 - quoted_result_count = quoted_response.json()["result_count"] - assert quoted_result_count > 0 - - # The rationale here is that the unquoted results will match more records due - # to the query being overall less strict. Quoting the query will make it more - # strict causing it to return fewer results. - # Above we check that the results are not 0 to confirm that we do still get results back. - assert quoted_result_count < unquoted_result_count - - quoted_result_titles = [res["title"] for res in quoted_response.json()["results"]] - assert all( - [q in title for title in quoted_result_titles] - ), f"Not all titles contain exact match for `{q}`: {quoted_result_titles}" - - -def search_special_chars(media_path, q="test"): - """Return a response when query includes special characters.""" - - response = requests.get(f"{API_URL}/v1/{media_path}?q={q}!", verify=False) - assert response.status_code == 200 - - -def search_consistency( - media_path, - n_pages, -): - """ - Return consistent, non-duplicate results in the first n pages. - - Elasticsearch sometimes reaches an inconsistent state, which causes search - results to appear differently upon page refresh. This can also introduce - image duplicates in subsequent pages. This test ensures that no duplicates - appear in the first few pages of a search query. - """ - - searches = { - requests.get(f"{API_URL}/v1/{media_path}?page={page}", verify=False) - for page in range(1, n_pages) - } - - results = set() - for response in searches: - parsed = json.loads(response.text) - for result in parsed["results"]: - media_id = result["id"] - assert media_id not in results - results.add(media_id) - - -def detail(media_type, fixture): - test_id = fixture["results"][0]["id"] - response = requests.get(f"{API_URL}/v1/{media_type}/{test_id}", verify=False) - assert response.status_code == 200 - - -def stats(media_type, count_key="media_count"): - response = requests.get(f"{API_URL}/v1/{media_type}/stats", verify=False) - parsed_response = json.loads(response.text) - assert response.status_code == 200 - num_media = 0 - provider_count = 0 - for pair in parsed_response: - media_count = pair[count_key] - num_media += int(media_count) - provider_count += 1 - assert num_media > 0 - assert provider_count > 0 - - -def report(media_type, fixture): - test_id = fixture["results"][0]["id"] - response = requests.post( - f"{API_URL}/v1/{media_type}/{test_id}/report/", - json={ - "reason": "mature", - "description": "This item contains sensitive content", - }, - verify=False, - ) - assert response.status_code == 201 - data = json.loads(response.text) - assert data["identifier"] == test_id - - -def license_filter_case_insensitivity(media_type): - response = requests.get(f"{API_URL}/v1/{media_type}?license=bY", verify=False) - parsed = json.loads(response.text) - assert parsed["result_count"] > 0 - - -def uuid_validation(media_type, identifier): - response = requests.get(f"{API_URL}/v1/{media_type}/{identifier}", verify=False) - assert response.status_code == 404 - - -def related(fixture): - item = fixture["results"][0] - - response = requests.get(item["related_url"]).json() - results = response["results"] - - assert response["result_count"] == len(results) == 10 - assert response["page_count"] == 1 - - def get_terms_set(res): - # The title is analyzed in ES, we try to mimic it here. - terms = [t["name"] for t in res["tags"]] + re.split(" |-", res["title"]) - return {t.lower() for t in terms} - - terms_set = get_terms_set(item) - # Make sure each result has at least one word in common with the original item, - # or is by the same creator. - for result in results: - assert ( - len(terms_set.intersection(get_terms_set(result))) > 0 - or result["creator"] == item["creator"] - ), f"{terms_set} {get_terms_set(result)}/{result['creator']}-{item['creator']}" - - assert result["license_version"] is not None - assert result["attribution"] is not None - assert result["creator_url"] is not None - - -def sensitive_search_and_detail(media_type): - search_res = requests.get( - f"{API_URL}/v1/{media_type}/", - params={"q": "bird", "unstable__include_sensitive_results": "true"}, - verify=False, - ) - results = search_res.json()["results"] - - sensitive_result = None - sensitivities = [] - for result in results: - if sensitivities := result["unstable__sensitivity"]: - sensitive_result = result - break - assert sensitive_result is not None - assert len(sensitivities) != 0 - - detail_res = requests.get( - f"{API_URL}/v1/{media_type}/{sensitive_result['id']}", verify=False - ) - details = detail_res.json() - - assert sensitivities == details["unstable__sensitivity"] diff --git a/api/test/test_audio_integration.py b/api/test/test_audio_integration.py deleted file mode 100644 index 6bf1796d682..00000000000 --- a/api/test/test_audio_integration.py +++ /dev/null @@ -1,179 +0,0 @@ -""" -End-to-end API tests for audio. - -Can be used to verify a live deployment is functioning as designed. -Run with the `pytest -s` command from this directory. -""" - -import json - -import pytest -import requests -from django_redis import get_redis_connection - -from api.utils.check_dead_links import CACHE_PREFIX -from test.constants import API_URL -from test.media_integration import ( - creator_collection, - detail, - license_filter_case_insensitivity, - related, - report, - search, - search_all_excluded, - search_by_category, - search_consistency, - search_quotes, - search_quotes_exact, - search_source_and_excluded, - search_special_chars, - sensitive_search_and_detail, - source_collection, - stats, - tag_collection, - uuid_validation, -) - - -@pytest.fixture -def force_result_validity(): - statuses = {} - - def force_validity(query_response): - nonlocal statuses - new_statuses = { - f"{CACHE_PREFIX}{item['url']}": 200 for item in query_response["results"] - } - statuses |= new_statuses - with get_redis_connection() as redis: - redis.mset(new_statuses) - - yield force_validity - - with get_redis_connection() as redis: - redis.delete(*list(statuses.keys())) - - -@pytest.fixture -def audio_fixture(force_result_validity): - res = requests.get(f"{API_URL}/v1/audio/", verify=False) - parsed = res.json() - force_result_validity(parsed) - assert res.status_code == 200 - return parsed - - -@pytest.fixture -def jamendo_audio_fixture(force_result_validity): - """ - Get an audio object specifically from the Jamendo provider. - - Thumbnail tests must use Jamendo results because the Wikimedia - sample audio results do not have thumbnails. - """ - res = requests.get( - f"{API_URL}/v1/audio/", - data={"source": "jamendo"}, - verify=False, - ) - parsed = res.json() - force_result_validity(parsed) - assert res.status_code == 200 - return parsed - - -def test_search(audio_fixture): - search(audio_fixture) - - -def test_search_category_filtering(audio_fixture): - search_by_category("audio", "music", audio_fixture) - search_by_category("audio", "pronunciation", audio_fixture) - - -def test_search_category_filtering_fails(audio_fixture): - with pytest.raises(AssertionError): - search_by_category("audio", "not_valid", audio_fixture) - - -def test_search_all_excluded(): - search_all_excluded("audio", ["freesound", "jamendo", "wikimedia_audio"]) - - -def test_search_source_and_excluded(): - search_source_and_excluded("audio") - - -def test_search_quotes(): - search_quotes("audio", "love") - - -def test_search_quotes_exact(): - # ``dancing penguins`` returns different results when quoted vs unquoted - search_quotes_exact("audio", "dancing penguins") - - -def test_search_with_special_characters(): - search_special_chars("audio", "love") - - -def test_search_consistency(): - n_pages = 5 - search_consistency("audio", n_pages) - - -def test_audio_detail(audio_fixture): - detail("audio", audio_fixture) - - -def test_audio_stats(): - stats("audio") - - -def test_audio_detail_without_thumb(): - resp = requests.get(f"{API_URL}/v1/audio/44540200-91eb-483d-9e99-38ce86a52fb6") - assert resp.status_code == 200 - parsed = json.loads(resp.text) - assert parsed["thumbnail"] is None - - -def test_audio_search_without_thumb(): - """The first audio of this search should not have a thumbnail.""" - resp = requests.get(f"{API_URL}/v1/audio/?q=zaus") - assert resp.status_code == 200 - parsed = json.loads(resp.text) - assert parsed["results"][0]["thumbnail"] is None - - -def test_audio_report(audio_fixture): - report("audio", audio_fixture) - - -def test_audio_license_filter_case_insensitivity(): - license_filter_case_insensitivity("audio") - - -def test_audio_uuid_validation(): - uuid_validation("audio", "123456789123456789123456789123456789") - uuid_validation("audio", "12345678-1234-5678-1234-1234567891234") - uuid_validation("audio", "abcd") - - -def test_audio_related(audio_fixture): - related(audio_fixture) - - -def test_audio_tag_collection(): - tag_collection("audio") - - -def test_audio_source_collection(): - source_collection("audio") - - -def test_audio_creator_collection(): - creator_collection("audio") - - -def test_audio_sensitive_search_and_detail(): - sensitive_search_and_detail("audio") diff --git a/api/test/test_backwards_compat.py b/api/test/test_backwards_compat.py deleted file mode 100644 index 4250aa5d049..00000000000 --- a/api/test/test_backwards_compat.py +++ /dev/null @@ -1,52 +0,0 @@ -""" -Ensures that deprecated URLs are redirected to their updated paths and not left to rot. - -Can be used to verify a live deployment is functioning as designed. -Run with the `pytest -s` command from this directory. -""" - -import uuid - -import requests - -from test.constants import API_URL - - -def test_old_stats_endpoint(): - response = requests.get( - f"{API_URL}/v1/sources?type=images", allow_redirects=False, verify=False - ) - assert response.status_code == 301 - assert response.is_permanent_redirect - assert response.headers.get("Location") == "/v1/images/stats/" - - -def test_old_related_images_endpoint(): - idx = uuid.uuid4() - response = requests.get( - f"{API_URL}/v1/recommendations/images/{idx}", - allow_redirects=False, - verify=False, - ) - assert response.status_code == 301 - assert response.is_permanent_redirect - assert response.headers.get("Location") == f"/v1/images/{idx}/related/" - - -def test_old_oembed_endpoint(): - response = requests.get( - f"{API_URL}/v1/oembed?key=value", allow_redirects=False, verify=False - ) - assert response.status_code == 301 - assert response.is_permanent_redirect - assert response.headers.get("Location") == "/v1/images/oembed/?key=value" - - -def test_old_thumbs_endpoint(): - idx = uuid.uuid4() - response = requests.get( - f"{API_URL}/v1/thumbs/{idx}", allow_redirects=False, verify=False - ) - assert response.status_code == 301 - assert response.is_permanent_redirect - assert response.headers.get("Location") == f"/v1/images/{idx}/thumb/" diff --git a/api/test/test_image_integration.py b/api/test/test_image_integration.py deleted file mode 100644 index 34dcb840b87..00000000000 --- a/api/test/test_image_integration.py +++ /dev/null @@ -1,170 +0,0 @@ -""" -End-to-end API tests for images. - -Can be used to verify a live deployment is functioning as designed. -Run with the `pytest -s` command from this directory. -""" - -import json -from urllib.parse import urlencode - -import pytest -import requests - -from test.constants import API_URL -from test.media_integration import ( - creator_collection, - detail, - license_filter_case_insensitivity, - related, - report, - search, - search_all_excluded, - search_consistency, - search_quotes, - search_quotes_exact, - search_source_and_excluded, - search_special_chars, - sensitive_search_and_detail, - source_collection, - stats, - tag_collection, - uuid_validation, -) - - -identifier = "cdbd3bf6-1745-45bb-b399-61ee149cd58a" - - -@pytest.fixture -def image_fixture(): - response = requests.get(f"{API_URL}/v1/images?q=dog", verify=False) - assert response.status_code == 200 - parsed = json.loads(response.text) - return parsed - - -def test_search(image_fixture): - search(image_fixture) - - -def test_search_all_excluded(): - search_all_excluded("images", ["flickr", "stocksnap"]) - - -def test_search_source_and_excluded(): - search_source_and_excluded("images") - - -def test_search_quotes(): - search_quotes("images", "dog") - - -def test_search_quotes_exact(): - # ``dancing penguins`` returns different results when quoted vs unquoted - search_quotes_exact("images", "dancing penguins") - - -def test_search_with_special_characters(): - search_special_chars("images", "dog") - - -def test_search_consistency(): - n_pages = 5 - search_consistency("images", n_pages) - - -def test_image_detail(image_fixture): - detail("images", image_fixture) - - -def test_image_stats(): - stats("images") - - -def test_audio_report(image_fixture): - report("images", image_fixture) - - -@pytest.mark.parametrize( - "url, expected_status_code", - [ - pytest.param( - f"https://any.domain/any/path/{identifier}", - 200, - id="OK; no trailing slash", - ), - pytest.param( - f"https://any.domain/any/path/{identifier}/", - 200, - id="OK; with trailing slash", - ), # trailing slash - pytest.param( - "https://any.domain/any/path/00000000-0000-0000-0000-000000000000", - 404, - id="Valid UUID but no matching identifier", - ), - pytest.param( - "https://any.domain/any/path/not-a-valid-uuid", - 400, - id="not a valid UUID", - ), - ], -) -def test_oembed_endpoint(url, expected_status_code): - params = {"url": url} - response = requests.get( - f"{API_URL}/v1/images/oembed?{urlencode(params)}", verify=False - ) - assert response.status_code == expected_status_code - - -def test_oembed_endpoint_for_json(): - params = { - "url": f"https://any.domain/any/path/{identifier}", - # 'format': 'json' is the default - } - response = requests.get( - f"{API_URL}/v1/images/oembed?{urlencode(params)}", verify=False - ) - assert response.status_code == 200 - assert response.headers["Content-Type"] == "application/json" - - parsed = response.json() - assert parsed["width"] == 1024 - assert parsed["height"] == 683 - assert parsed["license_url"] == "https://creativecommons.org/licenses/by/2.0/" - - -def test_image_license_filter_case_insensitivity(): - license_filter_case_insensitivity("images") - - -def test_image_uuid_validation(): - uuid_validation("images", "123456789123456789123456789123456789") - uuid_validation("images", "12345678-1234-5678-1234-1234567891234") - uuid_validation("images", "abcd") - - -def test_image_tag_collection(): - tag_collection("images", "cat") - - -def test_image_tag_collection_case_sensitive(): - tag_collection("images", "Cat") - - -def test_image_source_collection(): - source_collection("images") - - -def test_image_creator_collection(): - creator_collection("images") - - -def test_image_related(image_fixture): - related(image_fixture) - - -def test_image_sensitive_search_and_detail(): - sensitive_search_and_detail("images") diff --git a/api/test/test_v1_integration.py b/api/test/test_v1_integration.py deleted file mode 100644 index b29c57f4a8f..00000000000 --- a/api/test/test_v1_integration.py +++ /dev/null @@ -1,247 +0,0 @@ -""" -End-to-end API tests. - -Can be used to verify a live deployment is functioning as designed. -Run with the `pytest -s` command from this directory. -""" - -import json - -import pytest -import requests - -from api.constants.licenses import LICENSE_GROUPS -from api.models import Image -from api.utils.watermark import watermark -from test.constants import API_URL - - -@pytest.fixture -def image_fixture(): - response = requests.get(f"{API_URL}/v1/images?q=dog", verify=False) - assert response.status_code == 200 - parsed = json.loads(response.text) - return parsed - - -def test_link_shortener_create(): - payload = {"full_url": "abcd"} - response = requests.post(f"{API_URL}/v1/link/", json=payload, verify=False) - assert response.status_code == 410 - - -def test_link_shortener_resolve(): - response = requests.get(f"{API_URL}/v1/link/abc", verify=False) - assert response.status_code == 410 - - -@pytest.mark.skip(reason="Disabled feature") -@pytest.fixture -def test_list_create(image_fixture): - payload = { - "title": "INTEGRATION TEST", - "images": [image_fixture["results"][0]["id"]], - } - response = requests.post(f"{API_URL}/list", json=payload, verify=False) - parsed_response = json.loads(response.text) - assert response.status_code == 201 - return parsed_response - - -@pytest.mark.skip(reason="Disabled feature") -def test_list_detail(test_list_create): - list_slug = test_list_create["url"].split("/")[-1] - response = requests.get(f"{API_URL}/list/{list_slug}", verify=False) - assert response.status_code == 200 - - -@pytest.mark.skip(reason="Disabled feature") -def test_list_delete(test_list_create): - list_slug = test_list_create["url"].split("/")[-1] - token = test_list_create["auth"] - headers = {"Authorization": f"Token {token}"} - response = requests.delete( - f"{API_URL}/list/{list_slug}", headers=headers, verify=False - ) - assert response.status_code == 204 - - -def test_license_type_filtering(): - """Ensure that multiple license type filters interact together correctly.""" - - commercial = LICENSE_GROUPS["commercial"] - modification = LICENSE_GROUPS["modification"] - commercial_and_modification = set.intersection(modification, commercial) - response = requests.get( - f"{API_URL}/v1/images?q=dog&license_type=commercial,modification", verify=False - ) - parsed = json.loads(response.text) - for result in parsed["results"]: - assert result["license"] in commercial_and_modification - - -def test_single_license_type_filtering(): - commercial = LICENSE_GROUPS["commercial"] - response = requests.get( - f"{API_URL}/v1/images?q=dog&license_type=commercial", verify=False - ) - parsed = json.loads(response.text) - for result in parsed["results"]: - assert result["license"] in commercial - - -def test_specific_license_filter(): - response = requests.get(f"{API_URL}/v1/images?q=dog&license=by", verify=False) - parsed = json.loads(response.text) - for result in parsed["results"]: - assert result["license"] == "by" - - -def test_creator_quotation_grouping(): - """Test that quotation marks can be used to narrow down search results.""" - - no_quotes = json.loads( - requests.get(f"{API_URL}/v1/images?creator=Steve%20Wedgwood", verify=False).text - ) - quotes = json.loads( - requests.get( - f'{API_URL}/v1/images?creator="Steve%20Wedgwood"', verify=False - ).text - ) - # Did quotation marks actually narrow down the search? - assert len(no_quotes["results"]) > len(quotes["results"]) - # Did we find only William Ford Stanley works, or also by others? - for result in quotes["results"]: - assert "Steve Wedgwood" in result["creator"] - - -@pytest.mark.skip(reason="Unmaintained feature/grequests ssl recursion bug") -def test_watermark_preserves_exif(): - img_with_exif = ( - "https://raw.githubusercontent.com/ianare/exif-samples/" - "master/jpg/Canon_PowerShot_S40.jpg" - ) - info = { - "title": "test", - "creator": "test", - "license": "test", - "license_version": "test", - } - _, exif = watermark(image_url=img_with_exif, info=info) - assert exif is not None - - img_no_exif = ( - "https://creativecommons.org/wp-content/uploads/" - "2019/03/9467312978_64cd5d2f3b_z.jpg" - ) - _, no_exif = watermark(image_url=img_no_exif, info=info) - assert no_exif is None - - -def test_attribution(): - """ - Check that the API includes an attribution string. - - Since there are some works where the title or creator is not known, the format of - the attribution string can need to be tweaked slightly. - """ - - title_and_creator_missing = Image( - identifier="ab80dbe1-414c-4ee8-9543-f9599312aeb8", - title=None, - creator=None, - license="by", - license_version="3.0", - ) - assert "This work" in title_and_creator_missing.attribution - - title = "A foo walks into a bar" - creator_missing = Image( - identifier="ab80dbe1-414c-4ee8-9543-f9599312aeb8", - title=title, - creator=None, - license="by", - license_version="3.0", - ) - assert title in creator_missing.attribution - assert "by " not in creator_missing.attribution - - creator = "John Doe" - title_missing = Image( - identifier="ab80dbe1-414c-4ee8-9543-f9599312aeb8", - title=None, - creator=creator, - license="by", - license_version="3.0", - ) - assert creator in title_missing.attribution - assert "This work" in title_missing.attribution - - all_data_present = Image( - identifier="ab80dbe1-414c-4ee8-9543-f9599312aeb8", - title=title, - creator=creator, - license="by", - license_version="3.0", - ) - assert title in all_data_present.attribution - assert creator in all_data_present.attribution - - -def test_license_override(): - null_license_url = Image( - identifier="ab80dbe1-414c-4ee8-9543-f9599312aeb8", - title="test", - creator="test", - license="by", - license_version="3.0", - meta_data={"license_url": "null"}, - ) - assert null_license_url.license_url is not None - - -def test_source_search(): - response = requests.get(f"{API_URL}/v1/images?source=flickr", verify=False) - if response.status_code != 200: - print(f"Request failed. Message: {response.body}") - assert response.status_code == 200 - parsed = json.loads(response.text) - assert parsed["result_count"] > 0 - - -def test_extension_filter(): - response = requests.get(f"{API_URL}/v1/images?q=dog&extension=jpg") - parsed = json.loads(response.text) - for result in parsed["results"]: - assert ".jpg" in result["url"] - - -@pytest.fixture -def recommendation_factory(): - """Allow passing url parameters along with a related images request.""" - - def _parameterized_search(identifier, **kwargs): - response = requests.get( - f"{API_URL}/v1/recommendations?type=images&id={identifier}", - params=kwargs, - verify=False, - ) - assert response.status_code == 200 - parsed = response.json() - return parsed - - return _parameterized_search - - -@pytest.mark.skip( - reason="Generally, we don't paginate related images, so " - "consistency is less of an issue." -) -def test_related_image_search_page_consistency( - recommendation, search_without_dead_links -): - initial_images = search_without_dead_links(q="*", page_size=10) - for image in initial_images["results"]: - related = recommendation_factory(image["id"]) - assert related["result_count"] > 0 - assert len(related["results"]) == 10 diff --git a/api/test/unit/conftest.py b/api/test/unit/conftest.py index ad70cd77965..674b97a6eab 100644 --- a/api/test/unit/conftest.py +++ b/api/test/unit/conftest.py @@ -1,8 +1,6 @@ from dataclasses import dataclass from unittest.mock import MagicMock -from rest_framework.test import APIClient, APIRequestFactory - import pook import pytest from elasticsearch import Elasticsearch @@ -39,11 +37,6 @@ ) -@pytest.fixture -def api_client(): - return APIClient() - - @pytest.fixture(autouse=True) def sentry_capture_exception(monkeypatch): mock = MagicMock() @@ -52,13 +45,6 @@ def sentry_capture_exception(monkeypatch): yield mock -@pytest.fixture -def request_factory() -> APIRequestFactory(): - request_factory = APIRequestFactory(defaults={"REMOTE_ADDR": "192.0.2.1"}) - - return request_factory - - @dataclass class MediaTypeConfig: media_type: str @@ -158,3 +144,12 @@ def cleanup_elasticsearch_test_documents(request, settings): query={"match": {"tags.name": CREATED_BY_FIXTURE_MARKER}}, refresh=True, ) + + +__all__ = [ + "sentry_capture_exception", + "image_media_type_config", + "audio_media_type_config", + "media_type_config", + "cleanup_elasticsearch_test_documents", +] diff --git a/api/test/unit/models/test_media.py b/api/test/unit/models/test_media.py new file mode 100644 index 00000000000..6d7d3b00e67 --- /dev/null +++ b/api/test/unit/models/test_media.py @@ -0,0 +1,56 @@ +import pytest + +from api.models import Audio, Image + + +media_type_params = pytest.mark.parametrize( + "media_type, media_model", + [ + ("image", Image), + ("audio", Audio), + ], +) + + +@media_type_params +@pytest.mark.parametrize( + "fields, attribution", + [ + ( + ["title", "creator"], + '"A foo walks into a bar" by John Doe is licensed under CC BY 3.0.', + ), + (["title"], '"A foo walks into a bar" is licensed under CC BY 3.0.'), + (["creator"], "This work by John Doe is licensed under CC BY 3.0."), + ([], "This work is licensed under CC BY 3.0."), + ], +) +def test_attribution_handles_missing_title_or_creator( + media_type, media_model, fields, attribution +): + field_values = { + "title": "A foo walks into a bar", + "creator": "John Doe", + } + + obj = media_model( + license="by", + license_version="3.0", + ) + for field in fields: + setattr(obj, field, field_values[field]) + + assert attribution in obj.attribution + assert ( + "To view a copy of this license, " + "visit https://creativecommons.org/licenses/by/3.0/." + ) in obj.attribution + + +@media_type_params +def test_license_url_is_generated_if_missing(media_type, media_model): + obj = media_model( + license="by", + license_version="3.0", + ) + assert obj.license_url is not None diff --git a/catalog/dags/common/constants.py b/catalog/dags/common/constants.py index 13a7dab1857..c2de110341d 100644 --- a/catalog/dags/common/constants.py +++ b/catalog/dags/common/constants.py @@ -15,6 +15,9 @@ STAGING = "staging" PRODUCTION = "production" +Environment = Literal["staging", "production"] +ENVIRONMENTS = [STAGING, PRODUCTION] + CONTACT_EMAIL = os.getenv("CONTACT_EMAIL") DAG_DEFAULT_ARGS = { diff --git a/catalog/dags/common/sensors/utils.py b/catalog/dags/common/sensors/utils.py index bac5b8ce9ed..08c3bb9a6ac 100644 --- a/catalog/dags/common/sensors/utils.py +++ b/catalog/dags/common/sensors/utils.py @@ -93,6 +93,24 @@ def prevent_concurrency_with_dag(external_dag_id: str, **context): raise ValueError(f"Concurrency check with {external_dag_id} failed.") +@task(retries=0) +def is_concurrent_with_any(external_dag_ids: list[str], **context): + """ + Detect whether any of the external DAG are running. + + Returns the ID of the first DAG found to be running. Otherwise, + returns None. + """ + for dag_id in external_dag_ids: + try: + prevent_concurrency_with_dag.function(dag_id, **context) + except ValueError: + return dag_id + + # Explicit return None to clarify expectations + return None + + @task_group(group_id="prevent_concurrency") def prevent_concurrency_with_dags(external_dag_ids: list[str]): """Fail immediately if any of the given external dags are in progress.""" diff --git a/catalog/dags/elasticsearch_cluster/create_new_es_index/create_new_es_index.py b/catalog/dags/elasticsearch_cluster/create_new_es_index/create_new_es_index.py index 9fbc58192c5..1cb2b970eda 100644 --- a/catalog/dags/elasticsearch_cluster/create_new_es_index/create_new_es_index.py +++ b/catalog/dags/elasticsearch_cluster/create_new_es_index/create_new_es_index.py @@ -2,7 +2,6 @@ from datetime import timedelta from airflow.decorators import task, task_group -from airflow.models.connection import Connection from airflow.providers.elasticsearch.hooks.elasticsearch import ElasticsearchPythonHook from airflow.sensors.python import PythonSensor @@ -21,12 +20,6 @@ GET_CURRENT_INDEX_CONFIG_TASK_NAME = "get_current_index_configuration" -@task -def get_es_host(environment: str): - conn = Connection.get_connection_from_secrets(f"elasticsearch_http_{environment}") - return conn.host - - @task def get_index_name(media_type: str, index_suffix: str): return f"{media_type}-{index_suffix}".lower() diff --git a/catalog/dags/elasticsearch_cluster/create_new_es_index/create_new_es_index_dag.py b/catalog/dags/elasticsearch_cluster/create_new_es_index/create_new_es_index_dag.py index ad4c3229c3a..5e2e517a042 100644 --- a/catalog/dags/elasticsearch_cluster/create_new_es_index/create_new_es_index_dag.py +++ b/catalog/dags/elasticsearch_cluster/create_new_es_index/create_new_es_index_dag.py @@ -109,6 +109,7 @@ CREATE_NEW_INDEX_CONFIGS, CreateNewIndex, ) +from elasticsearch_cluster.shared import get_es_host logger = logging.getLogger(__name__) @@ -188,7 +189,7 @@ def create_new_es_index_dag(config: CreateNewIndex): with dag: prevent_concurrency = prevent_concurrency_with_dags(config.blocking_dags) - es_host = es.get_es_host(environment=config.environment) + es_host = get_es_host(environment=config.environment) index_name = es.get_index_name( media_type="{{ params.media_type }}", diff --git a/catalog/dags/elasticsearch_cluster/healthcheck_dag.py b/catalog/dags/elasticsearch_cluster/healthcheck_dag.py new file mode 100644 index 00000000000..63132d604b1 --- /dev/null +++ b/catalog/dags/elasticsearch_cluster/healthcheck_dag.py @@ -0,0 +1,184 @@ +""" +Monitor staging and production Elasticsearch cluster health endpoint. + +Requests the cluster health and alerts under the following conditions: + +- Red cluster health +- Unexpected number of nodes +- Unresponsive cluster + +Additionally, the DAG will notify (rather than alert) when the cluster health is yellow. +Yellow cluster health may or may not be an issue, depending on whether it is expected, +and occurs whenever shards and replicas are being relocated (e.g., during reindexes). +It is worthwhile to notify in these cases, as an assurance, but we could choose to add +logic that ignores yellow cluster health during data refresh or other similar operations. +""" + +import json +import logging +from datetime import datetime +from textwrap import dedent, indent + +from airflow.decorators import dag, task +from airflow.exceptions import AirflowSkipException +from airflow.providers.elasticsearch.hooks.elasticsearch import ElasticsearchPythonHook +from elasticsearch import Elasticsearch + +from common.constants import ENVIRONMENTS, PRODUCTION, Environment +from common.sensors.utils import is_concurrent_with_any +from common.slack import send_alert, send_message +from data_refresh.data_refresh_types import DATA_REFRESH_CONFIGS +from elasticsearch_cluster.shared import get_es_host + + +logger = logging.getLogger(__name__) + + +_DAG_ID = "{env}_elasticsearch_cluster_healthcheck" + +EXPECTED_NODE_COUNT = 6 +EXPECTED_DATA_NODE_COUNT = 3 +EXPECTED_MASTER_NODE_COUNT = 3 + + +def _format_response_body(response_body: dict) -> str: + body_str = indent(json.dumps(response_body, indent=4), prefix=" " * 4) + # body_str is indented in, because the f string added an indentation to + # the front, causing the first curly brace to be incorrectly indented + # and interpolating a multi-line string into the f string led subsequent lines + # to have incorrect indentation (they did not incorporate the f-strings + # own indentation. + # Adding our own indentation using `indent` to match the f-strings + # allows us to correctly dedent later on without issue, with a uniform indentation + # on every line. + return f""" + Full healthcheck response body: + ``` +{body_str} + ``` + """ + + +def _compose_red_status(env: Environment, response_body: dict): + message = f""" + Elasticsearch {env} cluster status is **red**. + + This is a critical status change, **investigate ASAP**. + + {_format_response_body(response_body)} + """ + return message + + +def _compose_unexpected_node_count(env: Environment, response_body: dict): + node_count = response_body["number_of_nodes"] + data_node_count = response_body["number_of_data_nodes"] + master_node_count = node_count - data_node_count + + message = f""" + Elasticsearch {env} cluster node count is **{node_count}**. + Expected {EXPECTED_NODE_COUNT} total nodes. + + Master nodes: **{master_node_count}** of expected {EXPECTED_MASTER_NODE_COUNT} + Data nodes: **{data_node_count}** of expected {EXPECTED_DATA_NODE_COUNT} + + This is a critical status change, **investigate ASAP**. + If this is expected (e.g., during controlled node or cluster changes), acknowledge immediately with explanation. + + {_format_response_body(response_body)} + """ + logger.error(f"Unexpected node count; {json.dumps(response_body)}") + return message + + +def _compose_yellow_cluster_health(env: Environment, response_body: dict): + message = f""" + Elasticsearch {env} cluster health is **yellow**. + + This does not mean something is necessarily wrong, but if this is not expected (e.g., data refresh) then investigate cluster health now. + + {_format_response_body(response_body)} + """ + logger.info(f"Cluster health was yellow; {json.dumps(response_body)}") + return message + + +@task +def ping_healthcheck(env: str, es_host: str): + es_conn: Elasticsearch = ElasticsearchPythonHook(hosts=[es_host]).get_conn + + response = es_conn.cluster.health() + + return response.body + + +@task +def compose_notification( + env: Environment, response_body: dict, is_data_refresh_running: bool +): + status = response_body["status"] + + if status == "red": + return "alert", _compose_red_status(env, response_body) + + if response_body["number_of_nodes"] != EXPECTED_NODE_COUNT: + return "alert", _compose_unexpected_node_count(env, response_body) + + if status == "yellow": + if is_data_refresh_running and env == PRODUCTION: + raise AirflowSkipException( + "Production cluster health status is yellow during data refresh. " + "This is an expected state, so no alert is sent." + ) + + return "notification", _compose_yellow_cluster_health(env, response_body) + + logger.info(f"Cluster health was green; {json.dumps(response_body)}") + return None, None + + +@task +def notify(env: str, message_type_and_string: tuple[str, str]): + message_type, message = message_type_and_string + + if message_type == "alert": + send_alert(dedent(message), dag_id=_DAG_ID.format(env=env)) + elif message_type == "notification": + send_message(dedent(message), dag_id=_DAG_ID.format(env=env)) + else: + raise ValueError( + f"Invalid message_type. Expected 'alert' or 'notification', " + f"received {message_type}" + ) + + +_SHARED_DAG_ARGS = { + # Every 15 minutes + "schedule": "*/15 * * * *", + "start_date": datetime(2024, 2, 4), + "catchup": False, + "max_active_runs": 1, + "doc_md": __doc__, + "tags": ["elasticsearch", "monitoring"], +} + + +_DATA_REFRESH_DAG_IDS = [] +for config in DATA_REFRESH_CONFIGS.values(): + _DATA_REFRESH_DAG_IDS += [config.dag_id, config.filtered_index_dag_id] + + +for env in ENVIRONMENTS: + + @dag(dag_id=_DAG_ID.format(env=env), **_SHARED_DAG_ARGS) + def cluster_healthcheck_dag(): + is_data_refresh_running = is_concurrent_with_any(_DATA_REFRESH_DAG_IDS) + + es_host = get_es_host(env) + healthcheck_response = ping_healthcheck(env, es_host) + notification = compose_notification( + env, healthcheck_response, is_data_refresh_running + ) + es_host >> healthcheck_response >> notification >> notify(env, notification) + + cluster_healthcheck_dag() diff --git a/catalog/dags/elasticsearch_cluster/shared.py b/catalog/dags/elasticsearch_cluster/shared.py new file mode 100644 index 00000000000..ef53d0fade8 --- /dev/null +++ b/catalog/dags/elasticsearch_cluster/shared.py @@ -0,0 +1,11 @@ +from airflow.decorators import task +from airflow.models.connection import Connection +from airflow.models.xcom_arg import XComArg + +from common.constants import Environment + + +@task +def get_es_host(environment: Environment) -> XComArg: + conn = Connection.get_connection_from_secrets(f"elasticsearch_http_{environment}") + return conn.host diff --git a/catalog/justfile b/catalog/justfile index c11f6bc2913..e56f644fff6 100644 --- a/catalog/justfile +++ b/catalog/justfile @@ -108,6 +108,7 @@ _mount-test command: up-deps {{ command }} # Launch a Bash shell in a test container under `SERVICE` +# Run pytest with `--pdb` to workaround xdist breaking pdb.set_trace() test-session: just _mount-test bash diff --git a/catalog/tests/dags/elasticsearch_cluster/test_healthcheck_dag.py b/catalog/tests/dags/elasticsearch_cluster/test_healthcheck_dag.py new file mode 100644 index 00000000000..010e9e8a301 --- /dev/null +++ b/catalog/tests/dags/elasticsearch_cluster/test_healthcheck_dag.py @@ -0,0 +1,123 @@ +import pytest +from airflow.exceptions import AirflowSkipException + +from common.constants import PRODUCTION +from elasticsearch_cluster.healthcheck_dag import compose_notification + + +_TEST_ENV = "testing_environment" + + +def _make_response_body(**kwargs): + # Default values based on real response from Openverse staging cluster + # Only the `cluster_name` is changed to reflect the environment + return { + "cluster_name": "testcluster", + "status": "green", + "timed_out": False, + "number_of_nodes": 6, + "number_of_data_nodes": 3, + "active_primary_shards": 51, + "active_shards": 103, + "relocating_shards": 0, + "initializing_shards": 0, + "unassigned_shards": 0, + "delayed_unassigned_shards": 0, + "number_of_pending_tasks": 0, + "number_of_in_flight_fetch": 0, + "task_max_waiting_in_queue_millis": 0, + "active_shards_percent_as_number": 100, + } | kwargs + + +def _missing_node_keys(master_nodes: int, data_nodes: int): + total_nodes = master_nodes + data_nodes + return ( + f"Elasticsearch {_TEST_ENV} cluster node count is **{total_nodes}**", + "Expected 6 total nodes.", + f"Master nodes: **{master_nodes}** of expected 3", + f"Data nodes: **{data_nodes}** of expected 3", + ) + + +@pytest.mark.parametrize( + ("expected_message_type", "message_keys", "cluster_health_response"), + ( + pytest.param( + "alert", + (f"Elasticsearch {_TEST_ENV} cluster status is **red**",), + _make_response_body(status="red"), + id="red-status", + ), + pytest.param( + "alert", + _missing_node_keys(master_nodes=3, data_nodes=2), + _make_response_body( + status="yellow", + number_of_nodes=5, + number_of_data_nodes=2, + ), + id="missing-data-node", + ), + pytest.param( + "alert", + _missing_node_keys(master_nodes=2, data_nodes=3), + _make_response_body( + status="yellow", + number_of_nodes=5, + number_of_data_nodes=3, + ), + id="missing-master-node", + ), + pytest.param( + "alert", + _missing_node_keys(master_nodes=1, data_nodes=2), + _make_response_body( + status="yellow", + number_of_nodes=3, + number_of_data_nodes=2, + ), + id="missing-some-of-both-node-types", + ), + pytest.param( + "notification", + (f"Elasticsearch {_TEST_ENV} cluster health is **yellow**.",), + _make_response_body( + status="yellow", + ), + id="yellow-status-all-nodes-present", + ), + ), +) +def test_compose_notification( + expected_message_type, message_keys, cluster_health_response +): + message_type, message = compose_notification.function( + _TEST_ENV, cluster_health_response, is_data_refresh_running=False + ) + + assert message_type == expected_message_type + for message_key in message_keys: + assert message_key in message + + +def test_production_compose_notification_data_refresh_running(): + with pytest.raises(AirflowSkipException): + cluster_health_response = _make_response_body(status="yellow") + compose_notification.function( + PRODUCTION, + cluster_health_response, + is_data_refresh_running=True, + ) + + +def test_production_compose_notification_data_refresh_not_running(): + cluster_health_response = _make_response_body(status="yellow") + message_type, message = compose_notification.function( + PRODUCTION, + cluster_health_response, + is_data_refresh_running=False, + ) + + assert message_type == "notification" + assert "Elasticsearch production cluster health is **yellow**." in message diff --git a/catalog/tests/dags/test_dag_parsing.py b/catalog/tests/dags/test_dag_parsing.py index 2a070e022ef..d7232795b04 100644 --- a/catalog/tests/dags/test_dag_parsing.py +++ b/catalog/tests/dags/test_dag_parsing.py @@ -3,7 +3,7 @@ import pytest from airflow.models import DagBag -from common.constants import MEDIA_TYPES +from common.constants import ENVIRONMENTS, MEDIA_TYPES from providers.provider_reingestion_workflows import ( PROVIDER_REINGESTION_WORKFLOWS as REINGESTION_WORKFLOW_CONFIGS, ) @@ -25,6 +25,7 @@ "data_refresh/dag_factory.py", "data_refresh/create_filtered_index_dag.py", "elasticsearch_cluster/recreate_staging_index/recreate_full_staging_index_dag.py", + "elasticsearch_cluster/healthcheck_dag.py", "oauth2/authorize_dag.py", "oauth2/token_refresh_dag.py", "database/delete_records/delete_records_dag.py", @@ -41,6 +42,7 @@ "popularity/popularity_refresh_dag_factory.py": len(MEDIA_TYPES), "data_refresh/dag_factory.py": len(MEDIA_TYPES), "data_refresh/create_filtered_index_dag.py": len(MEDIA_TYPES), + "elasticsearch_cluster/healthcheck_dag.py": len(ENVIRONMENTS), } diff --git a/documentation/api/guides/deploy.md b/documentation/api/guides/deploy.md index 5cdfa05d7d5..52a690bc4b3 100644 --- a/documentation/api/guides/deploy.md +++ b/documentation/api/guides/deploy.md @@ -4,17 +4,10 @@ For more information on how deployments work, please see the [general deployment guide](/general/deployment.md). ``` -1. Visit - [https://api-staging.openverse.engineering/version](https://api-staging.openverse.engineering/version) - and - [the API Docker image](https://github.com/wordpress/openverse/pkgs/container/openverse-api). - Verify that the commit SHA live on the staging site is also tagged with - `latest` in the Docker image. - ![GitHub package directory screenshot](/_static/package_directory_example.png) -1. Release the app via - [GitHub workflow](https://github.com/WordPress/openverse/actions/workflows/release-app.yml). - Click the "Run workflow" button, choose "api" from the dropdown, and supply - the SHA identified in step 1. +1. [Publish the drafted API release in the GitHub release page of the monorepo](https://github.com/WordPress/openverse/releases?q=api-) + - Here you can preview the changes included in the API release and decide + whether a release is necessary and adjust monitoring during the deployment + accordingly. 1. That's it! The API will be deployed. You can monitor the deployment in the maintainers `#openverse-notifications` channel and in the [infrastructure repository's workflow listing](https://github.com/WordPress/openverse-infrastructure/actions). diff --git a/documentation/catalog/guides/deploy.md b/documentation/catalog/guides/deploy.md index 652efd7e1fe..57aeac56e5a 100644 --- a/documentation/catalog/guides/deploy.md +++ b/documentation/catalog/guides/deploy.md @@ -13,13 +13,10 @@ unpause it back. ``` -1. Visit the - [Catalog Docker image](https://github.com/WordPress/openverse/pkgs/container/openverse-catalog) - page and copy the SHA of the image tagged `latest`. -1. Release the app via - [GitHub workflow](https://github.com/WordPress/openverse/actions/workflows/release-app.yml). - Click the "Run workflow" button, choose "catalog" from the dropdown, and - supply the SHA identified in the previous step +1. [Publish the drafted catalog release in the GitHub release page of the monorepo](https://github.com/WordPress/openverse/releases?q=catalog-) + - Here you can preview the changes included in the catalog release and decide + whether a release is necessary and adjust monitoring during the deployment + accordingly. ## Deployment diff --git a/documentation/catalog/reference/DAGs.md b/documentation/catalog/reference/DAGs.md index abdec759661..94055eafe25 100644 --- a/documentation/catalog/reference/DAGs.md +++ b/documentation/catalog/reference/DAGs.md @@ -54,10 +54,12 @@ The following are DAGs grouped by their primary tag: ### Elasticsearch -| DAG ID | Schedule Interval | -| ------------------------------------------------------------------- | ----------------- | -| [`create_new_production_es_index`](#create_new_production_es_index) | `None` | -| [`create_new_staging_es_index`](#create_new_staging_es_index) | `None` | +| DAG ID | Schedule Interval | +| ----------------------------------------------------------------------------------------------- | ----------------- | +| [`create_new_production_es_index`](#create_new_production_es_index) | `None` | +| [`create_new_staging_es_index`](#create_new_staging_es_index) | `None` | +| [`production_elasticsearch_cluster_healthcheck`](#production_elasticsearch_cluster_healthcheck) | `*/15 * * * *` | +| [`staging_elasticsearch_cluster_healthcheck`](#staging_elasticsearch_cluster_healthcheck) | `*/15 * * * *` | ### Maintenance @@ -163,6 +165,7 @@ The following is documentation associated with each DAG (where available): 1. [`phylopic_reingestion_workflow`](#phylopic_reingestion_workflow) 1. [`phylopic_workflow`](#phylopic_workflow) 1. [`pr_review_reminders`](#pr_review_reminders) +1. [`production_elasticsearch_cluster_healthcheck`](#production_elasticsearch_cluster_healthcheck) 1. [`rawpixel_workflow`](#rawpixel_workflow) 1. [`recreate_audio_popularity_calculation`](#recreate_audio_popularity_calculation) 1. [`recreate_full_staging_index`](#recreate_full_staging_index) @@ -173,6 +176,7 @@ The following is documentation associated with each DAG (where available): 1. [`smithsonian_workflow`](#smithsonian_workflow) 1. [`smk_workflow`](#smk_workflow) 1. [`staging_database_restore`](#staging_database_restore) +1. [`staging_elasticsearch_cluster_healthcheck`](#staging_elasticsearch_cluster_healthcheck) 1. [`stocksnap_workflow`](#stocksnap_workflow) 1. [`wikimedia_commons_workflow`](#wikimedia_commons_workflow) 1. [`wikimedia_reingestion_workflow`](#wikimedia_reingestion_workflow) @@ -1032,6 +1036,23 @@ Unfortunately the DAG does not know when someone is on vacation. It is up to the author of the PR to re-assign review if one of the randomly selected reviewers is unavailable for the time period during which the PR should be reviewed. +### `production_elasticsearch_cluster_healthcheck` + +Monitor staging and production Elasticsearch cluster health endpoint. + +Requests the cluster health and alerts under the following conditions: + +- Red cluster health +- Unexpected number of nodes +- Unresponsive cluster + +Additionally, the DAG will notify (rather than alert) when the cluster health is +yellow. Yellow cluster health may or may not be an issue, depending on whether +it is expected, and occurs whenever shards and replicas are being relocated +(e.g., during reindexes). It is worthwhile to notify in these cases, as an +assurance, but we could choose to add logic that ignores yellow cluster health +during data refresh or other similar operations. + ### `rawpixel_workflow` Content Provider: Rawpixel @@ -1199,6 +1220,23 @@ the RDS operations run using a different hook: - `AIRFLOW_CONN_`: The connection string to use for RDS operations (per the above example, it might be `AIRFLOW_CONN_AWS_RDS`) +### `staging_elasticsearch_cluster_healthcheck` + +Monitor staging and production Elasticsearch cluster health endpoint. + +Requests the cluster health and alerts under the following conditions: + +- Red cluster health +- Unexpected number of nodes +- Unresponsive cluster + +Additionally, the DAG will notify (rather than alert) when the cluster health is +yellow. Yellow cluster health may or may not be an issue, depending on whether +it is expected, and occurs whenever shards and replicas are being relocated +(e.g., during reindexes). It is worthwhile to notify in these cases, as an +assurance, but we could choose to add logic that ignores yellow cluster health +during data refresh or other similar operations. + ### `stocksnap_workflow` Content Provider: StockSnap diff --git a/documentation/changelogs/api/2024.02.13.05.45.50.md b/documentation/changelogs/api/2024.02.13.05.45.50.md new file mode 100644 index 00000000000..bc20a60887b --- /dev/null +++ b/documentation/changelogs/api/2024.02.13.05.45.50.md @@ -0,0 +1,11 @@ +# 2024.02.13.05.45.50 + +## Improvements + +- Refactor integration tests to have no side-effects + ([#3544](https://github.com/WordPress/openverse/pull/3544)) by @dhruvkb + +## Internal Improvements + +- feat: Updated Dockerfile to Use Specific Version of audiowaveform Image + ([#3784](https://github.com/WordPress/openverse/pull/3784)) by @zablon-oigo diff --git a/documentation/changelogs/frontend/2024.02.12.23.34.43.md b/documentation/changelogs/frontend/2024.02.12.23.34.43.md new file mode 100644 index 00000000000..d0a20f6d224 --- /dev/null +++ b/documentation/changelogs/frontend/2024.02.12.23.34.43.md @@ -0,0 +1,43 @@ +# 2024.02.12.23.34.43 + +```{note} +The actual release corresponding to this tag is +https://github.com/WordPress/openverse/releases/tag/frontend-2024.02.12.23.34.43. + +As noted in the release's description, the actual changes are from +https://github.com/WordPress/openverse/releases/tag/frontend-2024.02.12.22.53.23. + +A one-time issue caused by testing a new version of the release workflow caused +this drift, and it shouldn't occur again (or for other apps). +``` + +## New Features + +- Add source and provider definition tooltips + ([#3407](https://github.com/WordPress/openverse/pull/3407)) by @obulat + +## Improvements + +- Change outdated links in about page for codebase and project board to current + links ([#3768](https://github.com/WordPress/openverse/pull/3768)) by + @enteragoodusername + +## Internal Improvements + +- Add SEARCH_RESPONSE_TIME analytics event to searches + ([#3632](https://github.com/WordPress/openverse/pull/3632)) by @adjeiv +- Update dependency @types/node to v18.19.14 + ([#3734](https://github.com/WordPress/openverse/pull/3734)) by @openverse-bot +- Update @openverse/eslint-plugin + ([#3737](https://github.com/WordPress/openverse/pull/3737)) by @openverse-bot +- Replace dependency npm-run-all with npm-run-all2 ^6.1.2 (edited by @obulat) + ([#3733](https://github.com/WordPress/openverse/pull/3733)) by @openverse-bot +- Update dependency @playwright/test to v1.41.2 + ([#3738](https://github.com/WordPress/openverse/pull/3738)) by @openverse-bot + +## Bug Fixes + +- Revert Add SEARCH_RESPONSE_TIME analytics event to searches (#3632) + ([#3779](https://github.com/WordPress/openverse/pull/3779)) by @obulat +- Fix flaky Playwright tests + ([#3759](https://github.com/WordPress/openverse/pull/3759)) by @obulat diff --git a/documentation/frontend/guides/deploy.md b/documentation/frontend/guides/deploy.md index e89fe471e71..43e34fef08a 100644 --- a/documentation/frontend/guides/deploy.md +++ b/documentation/frontend/guides/deploy.md @@ -4,17 +4,10 @@ For more information on how deployments work, please see the [general deployment guide](/general/deployment.md). ``` -1. Visit - [https://staging.openverse.org/version.json](https://staging.openverse.org/version.json) - and - [the frontend Docker image](https://github.com/wordpress/openverse/pkgs/container/openverse-frontend). - Verify that the commit SHA live on the staging site is also tagged with - `latest` in the Docker image. - ![GitHub package directory screenshot](/_static/package_directory_example.png) -1. Release the app via - [GitHub workflow](https://github.com/WordPress/openverse/actions/workflows/release-app.yml). - Click the "Run workflow" button, choose "frontend" from the dropdown, and - supply the SHA identified in step 1. +1. [Publish the drafted frontend release in the GitHub release page of the monorepo](https://github.com/WordPress/openverse/releases?q=frontend-) + - Here you can preview the changes included in the frontend release and + decide whether a release is necessary and adjust monitoring during the + deployment accordingly. 1. That's it. The frontend will be deployed. You can monitor the deployment in the maintainers `#openverse-notifications` channel and in the [infrastructure repository's workflow listing](https://github.com/WordPress/openverse-infrastructure/actions). diff --git a/documentation/general/deployment.md b/documentation/general/deployment.md index f92c7b41369..e781ba541b2 100644 --- a/documentation/general/deployment.md +++ b/documentation/general/deployment.md @@ -143,21 +143,18 @@ the process above. The staging deployment workflows ## Production -Maintainers manually dispatch the production deployment via the -[Release app](https://github.com/WordPress/openverse/actions/workflows/release-app.yml) -workflow. The workflow requires the tag of an existing Docker image to tag as -the "released" image. It generates a date-based tag for the specific application -being released, publishes a GitHub Release (which creates a git tag), tags the -Docker image, and then triggers the deployment workflow. The workflow also opens -a PR to add the changelog to the +Maintainers begin the production deployment process by publishing the drafted +release for an application. Publishing a release automatically tags the latest +docker image for the application and opens a PR to add the changelog to the [documentation site's changelog directory](https://docs.openverse.org/changelogs/index.html). This needs to be manually approved and merged by maintainers. The person who triggers the release app workflow is pinged in the PR description to help with the visibility of the PR. -The same workflow is used to create production release images for the ingestion -server. In that case the production deployment still needs to be handled via -Terraform. +For the API and frontend, publishing the release also triggers an automated +production deployment. For the catalog and ingestion server, however, +maintainers must manually deploy the changes to production using the Terraform +deployment process. ## Rollbacks diff --git a/documentation/ingestion_server/guides/deploy.md b/documentation/ingestion_server/guides/deploy.md index 82557f6e6d4..86a30b361f4 100644 --- a/documentation/ingestion_server/guides/deploy.md +++ b/documentation/ingestion_server/guides/deploy.md @@ -4,13 +4,10 @@ 1. Check [Airflow](https://airflow.openverse.engineering/home?tags=data_refresh) to make sure a data refresh isn't occurring. -1. Visit the - [Ingestion Server Docker image](https://github.com/WordPress/openverse/pkgs/container/openverse-ingestion_server) - page and copy the SHA of the image tagged `latest`. -1. Release the app via - [GitHub workflow](https://github.com/WordPress/openverse/actions/workflows/release-app.yml). - Click the "Run workflow" button, choose "ingestion_server" from the dropdown, - and supply the SHA identified in the previous step. +1. [Publish the drafted ingestion server release in the GitHub release page of the monorepo](https://github.com/WordPress/openverse/releases?q=ingestion_server-) + - Here you can preview the changes included in the ingestion server release + and decide whether a release is necessary and adjust monitoring during the + deployment accordingly. ## Deployment diff --git a/frontend/src/components/VSourcesTable.vue b/frontend/src/components/VSourcesTable.vue index 9b2274b7503..1bfb3897fac 100644 --- a/frontend/src/components/VSourcesTable.vue +++ b/frontend/src/components/VSourcesTable.vue @@ -39,10 +39,13 @@ - - - {{ provider.display_name }} + + + {{ + provider.display_name + }} + {{ provider.display_name }} {{ cleanSourceUrlForPresentation(provider.source_url) }} @@ -59,12 +62,16 @@