Skip to content

Commit

Permalink
update assets
Browse files Browse the repository at this point in the history
  • Loading branch information
threnjen committed Oct 16, 2024
1 parent 98d4176 commit 49b3dae
Showing 1 changed file with 15 additions and 5 deletions.
20 changes: 15 additions & 5 deletions aws_dagster_bgg/assets/assets.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,14 +61,16 @@ def game_scraper_urls(
f"{raw_urls_directory}/group{i}{output_urls_json_suffix}" for i in range(1, 31)
]

return create_new_urls(
create_new_urls(
lambda_resource,
s3_resource,
s3_scraper_bucket,
game_scraper_url_filenames,
lambda_function_name="bgg_generate_game_urls",
)

return True


@asset(deps=["game_scraper_urls"])
def scrape_game_data(
Expand Down Expand Up @@ -118,14 +120,16 @@ def game_dfs_dirty(
for key in data_set_file_names
}

return compare_timestamps_for_refresh(
compare_timestamps_for_refresh(
original_timestamps=original_timestamps,
file_list_to_check=data_set_file_names,
location_bucket=bucket,
sleep_timer=300,
s3_resource=s3_resource,
)

return True


@asset(deps=["game_dfs_dirty"])
def user_scraper_urls(
Expand Down Expand Up @@ -155,14 +159,16 @@ def user_scraper_urls(
f"{raw_urls_directory}/group{i}{output_urls_json_suffix}" for i in range(1, 31)
]

return create_new_urls(
create_new_urls(
lambda_resource,
s3_resource,
s3_scraper_bucket,
user_scraper_url_filenames,
lambda_function_name="bgg_generate_user_urls",
)

return True


@op
def get_original_timestamps(
Expand Down Expand Up @@ -233,14 +239,16 @@ def create_new_urls(

lambda_resource.invoke_lambda(function=lambda_function_name)

return compare_timestamps_for_refresh(
compare_timestamps_for_refresh(
original_timestamps=original_timestamps,
file_list_to_check=scraper_url_filenames,
location_bucket=s3_scraper_bucket,
sleep_timer=15,
s3_resource=s3_resource,
)

return True


@op
def scrape_data(
Expand Down Expand Up @@ -287,14 +295,16 @@ def scrape_data(
}
ecs_resource.launch_ecs_task(task_definition, overrides)

return compare_timestamps_for_refresh(
compare_timestamps_for_refresh(
original_timestamps=original_timestamps,
file_list_to_check=scraper_raw_data_filenames,
location_bucket=bucket,
sleep_timer=300,
s3_resource=s3_resource,
)

return True


# @multi_asset(specs=[AssetSpec("asset1"), AssetSpec("asset2")])
# def materialize_1_and_2():
Expand Down

0 comments on commit 49b3dae

Please sign in to comment.