diff --git a/aws_dagster_bgg/assets/assets.py b/aws_dagster_bgg/assets/assets.py index e8085c2..feace1f 100644 --- a/aws_dagster_bgg/assets/assets.py +++ b/aws_dagster_bgg/assets/assets.py @@ -21,11 +21,12 @@ def bgg_games_csv( s3_scraper_bucket = configs["s3_scraper_bucket"] - original_timestamps = get_original_timestamps( - s3_resource, - bucket=s3_scraper_bucket, - keys=[configs["boardgamegeek_csv_filename"]], - ) + original_timestamps = { + configs["boardgamegeek_csv_filename"]: s3_resource.get_last_modified( + bucket=s3_scraper_bucket, + key=configs["boardgamegeek_csv_filename"], + ) + } lambda_resource.invoke_lambda(function=configs["file_retrieval_lambda"]) @@ -179,25 +180,6 @@ def user_scraper_urls( return True -@op -def get_original_timestamps( - s3_resource: ConfigurableResource, - bucket: str, - keys: list[str], -) -> dict: - logger.info("Getting the original timestamps") - try: - return { - key: s3_resource.get_last_modified( - bucket=bucket, - key=key, - ) - for key in keys - } - except: - return {key: datetime(1970, 1, 1, 0, 0, 0, 0) for key in keys} - - @op def compare_timestamps_for_refresh( original_timestamps: dict, @@ -211,7 +193,6 @@ def compare_timestamps_for_refresh( time.sleep(sleep_timer) logger.info("Checking timestamps...") - logger.info(f"Original timestamps: {original_timestamps}") while len(file_list_to_check): logger.info(f"Files to check: {file_list_to_check}") @@ -221,6 +202,7 @@ def compare_timestamps_for_refresh( bucket=location_bucket, key=key, ) + logger.info(f"Original timestamp: {original_timestamps[key]}") logger.info(f"New timestamp: {new_timestamp_tracker[key]}") for key in original_timestamps: @@ -248,11 +230,13 @@ def create_new_urls( lambda_function_name: str, ) -> bool: - original_timestamps = get_original_timestamps( - s3_resource=s3_resource, - bucket=s3_scraper_bucket, - keys=scraper_url_filenames, - ) + original_timestamps = { + key: s3_resource.get_last_modified( + bucket=s3_scraper_bucket, + key=key, + ) + for key in scraper_url_filenames + } lambda_resource.invoke_lambda(function=lambda_function_name) @@ -284,11 +268,13 @@ def scrape_data( f"{output_key_directory}/{output_key_suffix.format(i)}" for i in range(1, 31) ] - original_timestamps = get_original_timestamps( - s3_resource=s3_resource, - bucket=bucket, - keys=scraper_raw_data_filenames, - ) + original_timestamps = { + key: s3_resource.get_last_modified( + bucket=bucket, + key=key, + ) + for key in scraper_raw_data_filenames + } game_scraper_url_filenames = s3_resource.list_file_keys( bucket=bucket, key=input_urls_key