Skip to content

Commit

Permalink
logging updates
Browse files Browse the repository at this point in the history
  • Loading branch information
threnjen committed Oct 16, 2024
1 parent 67d8877 commit 76cf136
Showing 1 changed file with 21 additions and 35 deletions.
56 changes: 21 additions & 35 deletions aws_dagster_bgg/assets/assets.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,12 @@ def bgg_games_csv(

s3_scraper_bucket = configs["s3_scraper_bucket"]

original_timestamps = get_original_timestamps(
s3_resource,
bucket=s3_scraper_bucket,
keys=[configs["boardgamegeek_csv_filename"]],
)
original_timestamps = {
configs["boardgamegeek_csv_filename"]: s3_resource.get_last_modified(
bucket=s3_scraper_bucket,
key=configs["boardgamegeek_csv_filename"],
)
}

lambda_resource.invoke_lambda(function=configs["file_retrieval_lambda"])

Expand Down Expand Up @@ -179,25 +180,6 @@ def user_scraper_urls(
return True


@op
def get_original_timestamps(
s3_resource: ConfigurableResource,
bucket: str,
keys: list[str],
) -> dict:
logger.info("Getting the original timestamps")
try:
return {
key: s3_resource.get_last_modified(
bucket=bucket,
key=key,
)
for key in keys
}
except:
return {key: datetime(1970, 1, 1, 0, 0, 0, 0) for key in keys}


@op
def compare_timestamps_for_refresh(
original_timestamps: dict,
Expand All @@ -211,7 +193,6 @@ def compare_timestamps_for_refresh(
time.sleep(sleep_timer)

logger.info("Checking timestamps...")
logger.info(f"Original timestamps: {original_timestamps}")

while len(file_list_to_check):
logger.info(f"Files to check: {file_list_to_check}")
Expand All @@ -221,6 +202,7 @@ def compare_timestamps_for_refresh(
bucket=location_bucket,
key=key,
)
logger.info(f"Original timestamp: {original_timestamps[key]}")
logger.info(f"New timestamp: {new_timestamp_tracker[key]}")

for key in original_timestamps:
Expand Down Expand Up @@ -248,11 +230,13 @@ def create_new_urls(
lambda_function_name: str,
) -> bool:

original_timestamps = get_original_timestamps(
s3_resource=s3_resource,
bucket=s3_scraper_bucket,
keys=scraper_url_filenames,
)
original_timestamps = {
key: s3_resource.get_last_modified(
bucket=s3_scraper_bucket,
key=key,
)
for key in scraper_url_filenames
}

lambda_resource.invoke_lambda(function=lambda_function_name)

Expand Down Expand Up @@ -284,11 +268,13 @@ def scrape_data(
f"{output_key_directory}/{output_key_suffix.format(i)}" for i in range(1, 31)
]

original_timestamps = get_original_timestamps(
s3_resource=s3_resource,
bucket=bucket,
keys=scraper_raw_data_filenames,
)
original_timestamps = {
key: s3_resource.get_last_modified(
bucket=bucket,
key=key,
)
for key in scraper_raw_data_filenames
}

game_scraper_url_filenames = s3_resource.list_file_keys(
bucket=bucket, key=input_urls_key
Expand Down

0 comments on commit 76cf136

Please sign in to comment.