diff --git a/.github/workflows/lambda_deployments_dev.yml b/.github/workflows/lambda_deployments_dev.yml index 8fc2015..7f0b839 100644 --- a/.github/workflows/lambda_deployments_dev.yml +++ b/.github/workflows/lambda_deployments_dev.yml @@ -28,6 +28,7 @@ jobs: zip -r generate_game_urls_lambda.zip generate_game_urls_lambda.py ../../config.py ../../utils zip -r generate_user_urls_lambda.zip generate_user_urls_lambda.py ../../config.py ../../utils zip -r bgg_orchestrator_fargate_trigger.zip bgg_orchestrator_fargate_trigger.py ../../config.py ../../utils + zip -r bgg_user_data_cleaner_fargate_trigger.zip bgg_user_data_cleaner_fargate_trigger.py ../../config.py ../../utils - name: Configure AWS credentials uses: aws-actions/configure-aws-credentials@v4 with: @@ -41,4 +42,5 @@ jobs: aws lambda update-function-code --function-name=dev_bgg_generate_game_urls --zip-file=fileb://generate_game_urls_lambda.zip > /dev/null 2>&1 aws lambda update-function-code --function-name=dev_bgg_generate_user_urls --zip-file=fileb://generate_user_urls_lambda.zip > /dev/null 2>&1 aws lambda update-function-code --function-name=dev_bgg_orchestrator_fargate_trigger --zip-file=fileb://bgg_orchestrator_fargate_trigger.zip > /dev/null 2>&1 + aws lambda update-function-code --function-name=dev_bgg_user_data_cleaner_fargate_trigger --zip-file=fileb://bgg_user_data_cleaner_fargate_trigger.zip > /dev/null 2>&1 diff --git a/.github/workflows/lambda_deployments_prod.yml b/.github/workflows/lambda_deployments_prod.yml index 6f62108..c17fe4f 100644 --- a/.github/workflows/lambda_deployments_prod.yml +++ b/.github/workflows/lambda_deployments_prod.yml @@ -23,6 +23,7 @@ jobs: zip -r generate_game_urls_lambda.zip generate_game_urls_lambda.py ../../config.py ../../utils zip -r generate_user_urls_lambda.zip generate_user_urls_lambda.py ../../config.py ../../utils zip -r bgg_orchestrator_fargate_trigger.zip bgg_orchestrator_fargate_trigger.py ../../config.py ../../utils + zip -r bgg_user_data_cleaner_fargate_trigger.zip bgg_user_data_cleaner_fargate_trigger.py ../../config.py ../../utils - name: Configure AWS credentials uses: aws-actions/configure-aws-credentials@v4 with: @@ -36,3 +37,4 @@ jobs: aws lambda update-function-code --function-name=bgg_generate_game_urls --zip-file=fileb://generate_game_urls_lambda.zip > /dev/null 2>&1 aws lambda update-function-code --function-name=bgg_generate_user_urls --zip-file=fileb://generate_user_urls_lambda.zip > /dev/null 2>&1 aws lambda update-function-code --function-name=bgg_orchestrator_fargate_trigger --zip-file=fileb://bgg_orchestrator_fargate_trigger.zip > /dev/null 2>&1 + aws lambda update-function-code --function-name=bgg_user_data_cleaner_fargate_trigger --zip-file=fileb://dev_bgg_user_data_cleaner_fargate_trigger.zip > /dev/null 2>&1 diff --git a/Dockerfiles/Dockerfile.game-data-cleaner b/Dockerfiles/Dockerfile.game-data-cleaner index 21edce5..723118a 100644 --- a/Dockerfiles/Dockerfile.game-data-cleaner +++ b/Dockerfiles/Dockerfile.game-data-cleaner @@ -6,10 +6,6 @@ RUN apt-get update && apt-get install -y \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* -# Set up environment variables -ARG FILENAME -ENV FILENAME=$FILENAME - # Install pipenv RUN pip3 install pipenv @@ -30,5 +26,4 @@ COPY config.py . # Install dependencies with pipenv RUN pipenv sync -ENTRYPOINT ["pipenv", "run", "python", "modules/game_data_cleaner/main.py"] -CMD ["$FILENAME"] \ No newline at end of file +ENTRYPOINT ["pipenv", "run", "python", "modules/game_data_cleaner/main.py"] \ No newline at end of file diff --git a/Dockerfiles/Dockerfile.user-data-cleaner b/Dockerfiles/Dockerfile.user-data-cleaner index d9d3079..e0516e1 100644 --- a/Dockerfiles/Dockerfile.user-data-cleaner +++ b/Dockerfiles/Dockerfile.user-data-cleaner @@ -6,16 +6,12 @@ RUN apt-get update && apt-get install -y \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* -# Set up environment variables -ARG FILENAME -ENV FILENAME=$FILENAME - # Install pipenv RUN pip3 install pipenv # Create necessary directories -RUN mkdir -p data/prod/users/scraped_xml_raw \ - data/test/users/scraped_xml_raw \ +RUN mkdir -p data/prod/users/user_dfs_clean \ + data/test/users/user_dfs_clean \ modules # Copy the source code into the container @@ -27,5 +23,4 @@ COPY config.py . # Install dependencies with pipenv RUN pipenv sync -ENTRYPOINT ["pipenv", "run", "python", "modules/user_data_cleaner/main.py"] -CMD ["$FILENAME"] \ No newline at end of file +ENTRYPOINT ["pipenv", "run", "python", "modules/user_data_cleaner/main.py"] \ No newline at end of file diff --git a/config.json b/config.json index 182f7f6..cd4210a 100644 --- a/config.json +++ b/config.json @@ -2,7 +2,8 @@ "s3_scraper_bucket": "boardgamegeek-scraper", "game_dfs_dirty": "game_dfs_dirty", "scraper_task_definition": "bgg_scraper", - "cleaner_task_definition": "bgg_cleaner", + "game_cleaner_task_definition": "bgg_game_data_cleaner", + "user_cleaner_task_definition": "bgg_user_data_cleaner", "ecs_cluster": "boardgamegeek", "orchestrator_task_definition": "bgg_orchestrator", "boardgamegeek_csv_filename": "boardgames_ranks.csv", diff --git a/modules/lambda_functions/bgg_game_data_cleaner_fargate_trigger.py b/modules/lambda_functions/bgg_game_data_cleaner_fargate_trigger.py index 721f6ad..c8f00d0 100644 --- a/modules/lambda_functions/bgg_game_data_cleaner_fargate_trigger.py +++ b/modules/lambda_functions/bgg_game_data_cleaner_fargate_trigger.py @@ -7,7 +7,7 @@ ENVIRONMENT = os.environ.get("ENVIRONMENT", "dev") S3_SCRAPER_BUCKET = os.environ.get("S3_SCRAPER_BUCKET") -SCRAPER_TASK_DEFINITION = CONFIGS["cleaner_task_definition"] +SCRAPER_TASK_DEFINITION = CONFIGS["game_cleaner_task_definition"] TERRAFORM_STATE_BUCKET = os.environ.get("TF_VAR_BUCKET") diff --git a/modules/lambda_functions/bgg_user_data_cleaner_fargate_trigger.py b/modules/lambda_functions/bgg_user_data_cleaner_fargate_trigger.py new file mode 100644 index 0000000..a3c8ff7 --- /dev/null +++ b/modules/lambda_functions/bgg_user_data_cleaner_fargate_trigger.py @@ -0,0 +1,74 @@ +import json +import os + +import boto3 + +from config import CONFIGS + +ENVIRONMENT = os.environ.get("ENVIRONMENT", "dev") +S3_SCRAPER_BUCKET = os.environ.get("S3_SCRAPER_BUCKET") +SCRAPER_TASK_DEFINITION = CONFIGS["user_cleaner_task_definition"] +TERRAFORM_STATE_BUCKET = os.environ.get("TF_VAR_BUCKET") + + +def get_terraform_state_file_for_vpc(): + """Get the terraform state file for the VPC""" + + s3_client = boto3.client("s3") + terraform_state_file = ( + s3_client.get_object(Bucket=TERRAFORM_STATE_BUCKET, Key="vpc.tfstate")["Body"] + .read() + .decode("utf-8") + ) + + terraform_state_file = json.loads(terraform_state_file) + + print(terraform_state_file.keys()) + + return terraform_state_file + + +def lambda_handler(event, context): + """Trigger the Fargate task to process the files in the S3 bucket""" + + print(f"Running User Data Cleaner task") + + terraform_state_file = get_terraform_state_file_for_vpc() + + task_definition = ( + f"dev_{SCRAPER_TASK_DEFINITION}" + if ENVIRONMENT != "prod" + else SCRAPER_TASK_DEFINITION + ) + print(task_definition) + + ecs_client = boto3.client("ecs") + + latest_version = ( + ecs_client.describe_task_definition(taskDefinition=task_definition) + .get("taskDefinition") + .get("revision") + ) + + response = ecs_client.run_task( + taskDefinition=f"{task_definition}:{latest_version}", + cluster="boardgamegeek", + launchType="FARGATE", + count=1, + platformVersion="LATEST", + enableECSManagedTags=False, + networkConfiguration={ + "awsvpcConfiguration": { + "subnets": terraform_state_file["outputs"]["public_subnets"]["value"], + "securityGroups": [ + terraform_state_file["outputs"]["sg_ec2_ssh_access"]["value"] + ], + "assignPublicIp": "ENABLED", + }, + }, + ) + + +if __name__ == "__main__": + + lambda_handler(None, None)