From a8876a1400c02e5137159319d116e05bf13d1fe0 Mon Sep 17 00:00:00 2001 From: threnjen Date: Sun, 24 Nov 2024 10:56:44 -0800 Subject: [PATCH 1/2] create user data cleaner lambda --- .github/workflows/lambda_deployments_dev.yml | 2 + .github/workflows/lambda_deployments_prod.yml | 2 + Dockerfiles/Dockerfile.game-data-cleaner | 7 +- Dockerfiles/Dockerfile.user-data-cleaner | 11 +-- config.json | 3 +- .../bgg_game_data_cleaner_fargate_trigger.py | 2 +- .../bgg_user_data_cleaner_fargate_trigger.py | 74 +++++++++++++++++++ 7 files changed, 85 insertions(+), 16 deletions(-) create mode 100644 modules/lambda_functions/bgg_user_data_cleaner_fargate_trigger.py diff --git a/.github/workflows/lambda_deployments_dev.yml b/.github/workflows/lambda_deployments_dev.yml index 8fc2015..7f0b839 100644 --- a/.github/workflows/lambda_deployments_dev.yml +++ b/.github/workflows/lambda_deployments_dev.yml @@ -28,6 +28,7 @@ jobs: zip -r generate_game_urls_lambda.zip generate_game_urls_lambda.py ../../config.py ../../utils zip -r generate_user_urls_lambda.zip generate_user_urls_lambda.py ../../config.py ../../utils zip -r bgg_orchestrator_fargate_trigger.zip bgg_orchestrator_fargate_trigger.py ../../config.py ../../utils + zip -r bgg_user_data_cleaner_fargate_trigger.zip bgg_user_data_cleaner_fargate_trigger.py ../../config.py ../../utils - name: Configure AWS credentials uses: aws-actions/configure-aws-credentials@v4 with: @@ -41,4 +42,5 @@ jobs: aws lambda update-function-code --function-name=dev_bgg_generate_game_urls --zip-file=fileb://generate_game_urls_lambda.zip > /dev/null 2>&1 aws lambda update-function-code --function-name=dev_bgg_generate_user_urls --zip-file=fileb://generate_user_urls_lambda.zip > /dev/null 2>&1 aws lambda update-function-code --function-name=dev_bgg_orchestrator_fargate_trigger --zip-file=fileb://bgg_orchestrator_fargate_trigger.zip > /dev/null 2>&1 + aws lambda update-function-code --function-name=dev_bgg_user_data_cleaner_fargate_trigger --zip-file=fileb://bgg_user_data_cleaner_fargate_trigger.zip > /dev/null 2>&1 diff --git a/.github/workflows/lambda_deployments_prod.yml b/.github/workflows/lambda_deployments_prod.yml index 6f62108..c17fe4f 100644 --- a/.github/workflows/lambda_deployments_prod.yml +++ b/.github/workflows/lambda_deployments_prod.yml @@ -23,6 +23,7 @@ jobs: zip -r generate_game_urls_lambda.zip generate_game_urls_lambda.py ../../config.py ../../utils zip -r generate_user_urls_lambda.zip generate_user_urls_lambda.py ../../config.py ../../utils zip -r bgg_orchestrator_fargate_trigger.zip bgg_orchestrator_fargate_trigger.py ../../config.py ../../utils + zip -r bgg_user_data_cleaner_fargate_trigger.zip bgg_user_data_cleaner_fargate_trigger.py ../../config.py ../../utils - name: Configure AWS credentials uses: aws-actions/configure-aws-credentials@v4 with: @@ -36,3 +37,4 @@ jobs: aws lambda update-function-code --function-name=bgg_generate_game_urls --zip-file=fileb://generate_game_urls_lambda.zip > /dev/null 2>&1 aws lambda update-function-code --function-name=bgg_generate_user_urls --zip-file=fileb://generate_user_urls_lambda.zip > /dev/null 2>&1 aws lambda update-function-code --function-name=bgg_orchestrator_fargate_trigger --zip-file=fileb://bgg_orchestrator_fargate_trigger.zip > /dev/null 2>&1 + aws lambda update-function-code --function-name=bgg_user_data_cleaner_fargate_trigger --zip-file=fileb://dev_bgg_user_data_cleaner_fargate_trigger.zip > /dev/null 2>&1 diff --git a/Dockerfiles/Dockerfile.game-data-cleaner b/Dockerfiles/Dockerfile.game-data-cleaner index 21edce5..723118a 100644 --- a/Dockerfiles/Dockerfile.game-data-cleaner +++ b/Dockerfiles/Dockerfile.game-data-cleaner @@ -6,10 +6,6 @@ RUN apt-get update && apt-get install -y \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* -# Set up environment variables -ARG FILENAME -ENV FILENAME=$FILENAME - # Install pipenv RUN pip3 install pipenv @@ -30,5 +26,4 @@ COPY config.py . # Install dependencies with pipenv RUN pipenv sync -ENTRYPOINT ["pipenv", "run", "python", "modules/game_data_cleaner/main.py"] -CMD ["$FILENAME"] \ No newline at end of file +ENTRYPOINT ["pipenv", "run", "python", "modules/game_data_cleaner/main.py"] \ No newline at end of file diff --git a/Dockerfiles/Dockerfile.user-data-cleaner b/Dockerfiles/Dockerfile.user-data-cleaner index d9d3079..e0516e1 100644 --- a/Dockerfiles/Dockerfile.user-data-cleaner +++ b/Dockerfiles/Dockerfile.user-data-cleaner @@ -6,16 +6,12 @@ RUN apt-get update && apt-get install -y \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* -# Set up environment variables -ARG FILENAME -ENV FILENAME=$FILENAME - # Install pipenv RUN pip3 install pipenv # Create necessary directories -RUN mkdir -p data/prod/users/scraped_xml_raw \ - data/test/users/scraped_xml_raw \ +RUN mkdir -p data/prod/users/user_dfs_clean \ + data/test/users/user_dfs_clean \ modules # Copy the source code into the container @@ -27,5 +23,4 @@ COPY config.py . # Install dependencies with pipenv RUN pipenv sync -ENTRYPOINT ["pipenv", "run", "python", "modules/user_data_cleaner/main.py"] -CMD ["$FILENAME"] \ No newline at end of file +ENTRYPOINT ["pipenv", "run", "python", "modules/user_data_cleaner/main.py"] \ No newline at end of file diff --git a/config.json b/config.json index 182f7f6..57711e3 100644 --- a/config.json +++ b/config.json @@ -2,7 +2,8 @@ "s3_scraper_bucket": "boardgamegeek-scraper", "game_dfs_dirty": "game_dfs_dirty", "scraper_task_definition": "bgg_scraper", - "cleaner_task_definition": "bgg_cleaner", + "game_cleaner_task_definition": "bgg_game_data_cleaner", + "user_cleaner_task_definition": "user_game_data_cleaner", "ecs_cluster": "boardgamegeek", "orchestrator_task_definition": "bgg_orchestrator", "boardgamegeek_csv_filename": "boardgames_ranks.csv", diff --git a/modules/lambda_functions/bgg_game_data_cleaner_fargate_trigger.py b/modules/lambda_functions/bgg_game_data_cleaner_fargate_trigger.py index 721f6ad..c8f00d0 100644 --- a/modules/lambda_functions/bgg_game_data_cleaner_fargate_trigger.py +++ b/modules/lambda_functions/bgg_game_data_cleaner_fargate_trigger.py @@ -7,7 +7,7 @@ ENVIRONMENT = os.environ.get("ENVIRONMENT", "dev") S3_SCRAPER_BUCKET = os.environ.get("S3_SCRAPER_BUCKET") -SCRAPER_TASK_DEFINITION = CONFIGS["cleaner_task_definition"] +SCRAPER_TASK_DEFINITION = CONFIGS["game_cleaner_task_definition"] TERRAFORM_STATE_BUCKET = os.environ.get("TF_VAR_BUCKET") diff --git a/modules/lambda_functions/bgg_user_data_cleaner_fargate_trigger.py b/modules/lambda_functions/bgg_user_data_cleaner_fargate_trigger.py new file mode 100644 index 0000000..a3c8ff7 --- /dev/null +++ b/modules/lambda_functions/bgg_user_data_cleaner_fargate_trigger.py @@ -0,0 +1,74 @@ +import json +import os + +import boto3 + +from config import CONFIGS + +ENVIRONMENT = os.environ.get("ENVIRONMENT", "dev") +S3_SCRAPER_BUCKET = os.environ.get("S3_SCRAPER_BUCKET") +SCRAPER_TASK_DEFINITION = CONFIGS["user_cleaner_task_definition"] +TERRAFORM_STATE_BUCKET = os.environ.get("TF_VAR_BUCKET") + + +def get_terraform_state_file_for_vpc(): + """Get the terraform state file for the VPC""" + + s3_client = boto3.client("s3") + terraform_state_file = ( + s3_client.get_object(Bucket=TERRAFORM_STATE_BUCKET, Key="vpc.tfstate")["Body"] + .read() + .decode("utf-8") + ) + + terraform_state_file = json.loads(terraform_state_file) + + print(terraform_state_file.keys()) + + return terraform_state_file + + +def lambda_handler(event, context): + """Trigger the Fargate task to process the files in the S3 bucket""" + + print(f"Running User Data Cleaner task") + + terraform_state_file = get_terraform_state_file_for_vpc() + + task_definition = ( + f"dev_{SCRAPER_TASK_DEFINITION}" + if ENVIRONMENT != "prod" + else SCRAPER_TASK_DEFINITION + ) + print(task_definition) + + ecs_client = boto3.client("ecs") + + latest_version = ( + ecs_client.describe_task_definition(taskDefinition=task_definition) + .get("taskDefinition") + .get("revision") + ) + + response = ecs_client.run_task( + taskDefinition=f"{task_definition}:{latest_version}", + cluster="boardgamegeek", + launchType="FARGATE", + count=1, + platformVersion="LATEST", + enableECSManagedTags=False, + networkConfiguration={ + "awsvpcConfiguration": { + "subnets": terraform_state_file["outputs"]["public_subnets"]["value"], + "securityGroups": [ + terraform_state_file["outputs"]["sg_ec2_ssh_access"]["value"] + ], + "assignPublicIp": "ENABLED", + }, + }, + ) + + +if __name__ == "__main__": + + lambda_handler(None, None) From 62c949a30193966739ff4de3cce712457c5980c7 Mon Sep 17 00:00:00 2001 From: threnjen Date: Sun, 24 Nov 2024 11:07:04 -0800 Subject: [PATCH 2/2] update config --- config.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config.json b/config.json index 57711e3..cd4210a 100644 --- a/config.json +++ b/config.json @@ -3,7 +3,7 @@ "game_dfs_dirty": "game_dfs_dirty", "scraper_task_definition": "bgg_scraper", "game_cleaner_task_definition": "bgg_game_data_cleaner", - "user_cleaner_task_definition": "user_game_data_cleaner", + "user_cleaner_task_definition": "bgg_user_data_cleaner", "ecs_cluster": "boardgamegeek", "orchestrator_task_definition": "bgg_orchestrator", "boardgamegeek_csv_filename": "boardgames_ranks.csv",