diff --git a/catalog/dags/providers/provider_api_scripts/inaturalist.py b/catalog/dags/providers/provider_api_scripts/inaturalist.py index 8b0661a1f7a..b3b9c397aa3 100644 --- a/catalog/dags/providers/provider_api_scripts/inaturalist.py +++ b/catalog/dags/providers/provider_api_scripts/inaturalist.py @@ -50,6 +50,7 @@ } OUTPUT_DIR = Path(os.getenv("OUTPUT_DIR", "/tmp/")) COL_URL = "https://download.checklistbank.org/col/latest_coldp.zip" +INATURALIST_BUCKET = "inaturalist-open-data" class INaturalistDataIngester(ProviderDataIngester): @@ -202,9 +203,9 @@ def compare_update_dates( for key in s3_keys: # this will error out if the files don't exist, and bubble up as an # informative failure - last_modified = s3_client.head_object( - Bucket="inaturalist-open-data", Key=key - )["LastModified"] + last_modified = s3_client.head_object(Bucket=INATURALIST_BUCKET, Key=key)[ + "LastModified" + ] logger.info( f"{key} was last modified on s3 on " f"{last_modified.strftime('%Y-%m-%d %H:%M:%S')}." diff --git a/catalog/tests/dags/common/loader/test_s3.py b/catalog/tests/dags/common/loader/test_s3.py index 46786727657..c19a13a49a8 100644 --- a/catalog/tests/dags/common/loader/test_s3.py +++ b/catalog/tests/dags/common/loader/test_s3.py @@ -13,9 +13,8 @@ TEST_MEDIA_PREFIX = "media" TEST_STAGING_PREFIX = "test_staging" S3_LOCAL_ENDPOINT = os.getenv("S3_LOCAL_ENDPOINT") -S3_TEST_BUCKET = f"cccatalog-storage-{TEST_ID}" -ACCESS_KEY = os.getenv("AWS_ACCESS_KEY") -SECRET_KEY = os.getenv("AWS_SECRET_KEY") +ACCESS_KEY = os.getenv("AWS_ACCESS_KEY", "test_key") +SECRET_KEY = os.getenv("AWS_SECRET_KEY", "test_secret") @pytest.fixture diff --git a/catalog/tests/dags/providers/provider_api_scripts/test_inaturalist.py b/catalog/tests/dags/providers/provider_api_scripts/test_inaturalist.py index 6320bcdec42..2767aee1b68 100644 --- a/catalog/tests/dags/providers/provider_api_scripts/test_inaturalist.py +++ b/catalog/tests/dags/providers/provider_api_scripts/test_inaturalist.py @@ -2,10 +2,16 @@ from pathlib import Path from unittest import mock +import boto3 import pendulum import pytest from airflow.exceptions import AirflowSkipException from airflow.models import TaskInstance +from tests.dags.common.loader.test_s3 import ( + ACCESS_KEY, + S3_LOCAL_ENDPOINT, + SECRET_KEY, +) from common.constants import IMAGE from common.loader.reporting import RecordMetrics @@ -232,3 +238,14 @@ def test_compare_update_dates(last_success, s3_dir, expected_msgs, caplog): assert actual is None for msg in expected_msgs: assert msg in caplog.text + + +def test_bucket_exists(): + bucket = boto3.resource( + "s3", + aws_access_key_id=ACCESS_KEY, + aws_secret_access_key=SECRET_KEY, + endpoint_url=S3_LOCAL_ENDPOINT, + ).Bucket(inaturalist.INATURALIST_BUCKET) + + assert bucket.creation_date is not None diff --git a/docker/minio/env.template b/docker/minio/env.template index 873f917a27d..178889c3828 100644 --- a/docker/minio/env.template +++ b/docker/minio/env.template @@ -5,4 +5,4 @@ MINIO_ROOT_USER=test_key MINIO_ROOT_PASSWORD=test_secret # Comma separated list of buckets to create on startup -BUCKETS_TO_CREATE=openverse-catalog,openverse-airflow-logs +BUCKETS_TO_CREATE=openverse-catalog,openverse-airflow-logs,inaturalist-open-data diff --git a/docker/minio/load_to_s3_entrypoint.sh b/docker/minio/load_to_s3_entrypoint.sh index b106dae314a..10e32a1c603 100755 --- a/docker/minio/load_to_s3_entrypoint.sh +++ b/docker/minio/load_to_s3_entrypoint.sh @@ -8,7 +8,7 @@ # More info here: https://stackoverflow.com/questions/72867045 set -euxo pipefail -/usr/bin/mc config host add s3 http://s3:5000 "${AWS_ACCESS_KEY}" "${AWS_SECRET_KEY}" +/usr/bin/mc config host add s3 http://s3:5000 "${MINIO_ROOT_USER}" "${MINIO_ROOT_PASSWORD}" cd /data for b in */; do echo "Loading bucket $b"