diff --git a/CHANGELOG.md b/CHANGELOG.md index 2d44dbac..e704b83f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # Changelog +## 6.20.2 - Aug 14, 2024 + +* Prevent failure in Google Cloud Storage archiving from failing a scrape/update operation + ## 6.20.1 - Aug 2, 2024 * Fix permissions issue caused by slightly wrong usage of GCP storage client code diff --git a/openstates/cli/update.py b/openstates/cli/update.py index b1902d83..1e30bf25 100644 --- a/openstates/cli/update.py +++ b/openstates/cli/update.py @@ -217,22 +217,28 @@ def archive_to_cloud_storage( return logger.info("Beginning archive of scraped files to google cloud storage.") logger.info(f"GCP Project is {GCP_PROJECT} and bucket is {BUCKET_NAME}") - cloud_storage_client = storage.Client(project=GCP_PROJECT) - bucket = cloud_storage_client.bucket(BUCKET_NAME) - jurisdiction_id = juris.jurisdiction_id.replace("ocd-jurisdiction/", "") - destination_prefx = ( - f"{SCRAPE_LAKE_PREFIX}/{jurisdiction_id}/{last_scrape_end_datetime.isoformat()}" - ) - # read files in directory and upload - files_count = 0 - for file_path in glob.glob(datadir + "/*.json"): - files_count += 1 - blob_name = os.path.join(destination_prefx, os.path.basename(file_path)) - blob = bucket.blob(blob_name) - blob.upload_from_filename(file_path) + # Catch exceptions so that we do not fail the scrape if transient GCS error occurs + try: + cloud_storage_client = storage.Client(project=GCP_PROJECT) + bucket = cloud_storage_client.bucket(BUCKET_NAME) + jurisdiction_id = juris.jurisdiction_id.replace("ocd-jurisdiction/", "") + destination_prefx = ( + f"{SCRAPE_LAKE_PREFIX}/{jurisdiction_id}/{last_scrape_end_datetime.isoformat()}" + ) + + # read files in directory and upload + files_count = 0 + for file_path in glob.glob(datadir + "/*.json"): + files_count += 1 + blob_name = os.path.join(destination_prefx, os.path.basename(file_path)) + blob = bucket.blob(blob_name) + blob.upload_from_filename(file_path) + + logger.info(f"Completed archive to Google Cloud Storage, {files_count} files were uploaded.") - logger.info(f"Completed archive to Google Cloud Storage, {files_count} files were uploaded.") + except Exception as e: + logger.warning(f"An error occurred during the attempt to archive files to Google Cloud Storage: {e}") def do_import(juris: State, args: argparse.Namespace) -> dict[str, typing.Any]: diff --git a/pyproject.toml b/pyproject.toml index b3d59ea1..f71ceb50 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "openstates" -version = "6.20.1" +version = "6.20.2" description = "core infrastructure for the openstates project" authors = ["James Turk "] license = "MIT"