Skip to content

Commit

Permalink
Merge pull request #64 from threnjen/updates_user_data_cleaner
Browse files Browse the repository at this point in the history
create user data cleaner lambda
  • Loading branch information
threnjen authored Nov 24, 2024
2 parents 85a6a74 + 62c949a commit e80c04f
Show file tree
Hide file tree
Showing 7 changed files with 85 additions and 16 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/lambda_deployments_dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ jobs:
zip -r generate_game_urls_lambda.zip generate_game_urls_lambda.py ../../config.py ../../utils
zip -r generate_user_urls_lambda.zip generate_user_urls_lambda.py ../../config.py ../../utils
zip -r bgg_orchestrator_fargate_trigger.zip bgg_orchestrator_fargate_trigger.py ../../config.py ../../utils
zip -r bgg_user_data_cleaner_fargate_trigger.zip bgg_user_data_cleaner_fargate_trigger.py ../../config.py ../../utils
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v4
with:
Expand All @@ -41,4 +42,5 @@ jobs:
aws lambda update-function-code --function-name=dev_bgg_generate_game_urls --zip-file=fileb://generate_game_urls_lambda.zip > /dev/null 2>&1
aws lambda update-function-code --function-name=dev_bgg_generate_user_urls --zip-file=fileb://generate_user_urls_lambda.zip > /dev/null 2>&1
aws lambda update-function-code --function-name=dev_bgg_orchestrator_fargate_trigger --zip-file=fileb://bgg_orchestrator_fargate_trigger.zip > /dev/null 2>&1
aws lambda update-function-code --function-name=dev_bgg_user_data_cleaner_fargate_trigger --zip-file=fileb://bgg_user_data_cleaner_fargate_trigger.zip > /dev/null 2>&1
2 changes: 2 additions & 0 deletions .github/workflows/lambda_deployments_prod.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ jobs:
zip -r generate_game_urls_lambda.zip generate_game_urls_lambda.py ../../config.py ../../utils
zip -r generate_user_urls_lambda.zip generate_user_urls_lambda.py ../../config.py ../../utils
zip -r bgg_orchestrator_fargate_trigger.zip bgg_orchestrator_fargate_trigger.py ../../config.py ../../utils
zip -r bgg_user_data_cleaner_fargate_trigger.zip bgg_user_data_cleaner_fargate_trigger.py ../../config.py ../../utils
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v4
with:
Expand All @@ -36,3 +37,4 @@ jobs:
aws lambda update-function-code --function-name=bgg_generate_game_urls --zip-file=fileb://generate_game_urls_lambda.zip > /dev/null 2>&1
aws lambda update-function-code --function-name=bgg_generate_user_urls --zip-file=fileb://generate_user_urls_lambda.zip > /dev/null 2>&1
aws lambda update-function-code --function-name=bgg_orchestrator_fargate_trigger --zip-file=fileb://bgg_orchestrator_fargate_trigger.zip > /dev/null 2>&1
aws lambda update-function-code --function-name=bgg_user_data_cleaner_fargate_trigger --zip-file=fileb://dev_bgg_user_data_cleaner_fargate_trigger.zip > /dev/null 2>&1
7 changes: 1 addition & 6 deletions Dockerfiles/Dockerfile.game-data-cleaner
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,6 @@ RUN apt-get update && apt-get install -y \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*

# Set up environment variables
ARG FILENAME
ENV FILENAME=$FILENAME

# Install pipenv
RUN pip3 install pipenv

Expand All @@ -30,5 +26,4 @@ COPY config.py .
# Install dependencies with pipenv
RUN pipenv sync

ENTRYPOINT ["pipenv", "run", "python", "modules/game_data_cleaner/main.py"]
CMD ["$FILENAME"]
ENTRYPOINT ["pipenv", "run", "python", "modules/game_data_cleaner/main.py"]
11 changes: 3 additions & 8 deletions Dockerfiles/Dockerfile.user-data-cleaner
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,12 @@ RUN apt-get update && apt-get install -y \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*

# Set up environment variables
ARG FILENAME
ENV FILENAME=$FILENAME

# Install pipenv
RUN pip3 install pipenv

# Create necessary directories
RUN mkdir -p data/prod/users/scraped_xml_raw \
data/test/users/scraped_xml_raw \
RUN mkdir -p data/prod/users/user_dfs_clean \
data/test/users/user_dfs_clean \
modules

# Copy the source code into the container
Expand All @@ -27,5 +23,4 @@ COPY config.py .
# Install dependencies with pipenv
RUN pipenv sync

ENTRYPOINT ["pipenv", "run", "python", "modules/user_data_cleaner/main.py"]
CMD ["$FILENAME"]
ENTRYPOINT ["pipenv", "run", "python", "modules/user_data_cleaner/main.py"]
3 changes: 2 additions & 1 deletion config.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
"s3_scraper_bucket": "boardgamegeek-scraper",
"game_dfs_dirty": "game_dfs_dirty",
"scraper_task_definition": "bgg_scraper",
"cleaner_task_definition": "bgg_cleaner",
"game_cleaner_task_definition": "bgg_game_data_cleaner",
"user_cleaner_task_definition": "bgg_user_data_cleaner",
"ecs_cluster": "boardgamegeek",
"orchestrator_task_definition": "bgg_orchestrator",
"boardgamegeek_csv_filename": "boardgames_ranks.csv",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

ENVIRONMENT = os.environ.get("ENVIRONMENT", "dev")
S3_SCRAPER_BUCKET = os.environ.get("S3_SCRAPER_BUCKET")
SCRAPER_TASK_DEFINITION = CONFIGS["cleaner_task_definition"]
SCRAPER_TASK_DEFINITION = CONFIGS["game_cleaner_task_definition"]
TERRAFORM_STATE_BUCKET = os.environ.get("TF_VAR_BUCKET")


Expand Down
74 changes: 74 additions & 0 deletions modules/lambda_functions/bgg_user_data_cleaner_fargate_trigger.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
import json
import os

import boto3

from config import CONFIGS

ENVIRONMENT = os.environ.get("ENVIRONMENT", "dev")
S3_SCRAPER_BUCKET = os.environ.get("S3_SCRAPER_BUCKET")
SCRAPER_TASK_DEFINITION = CONFIGS["user_cleaner_task_definition"]
TERRAFORM_STATE_BUCKET = os.environ.get("TF_VAR_BUCKET")


def get_terraform_state_file_for_vpc():
"""Get the terraform state file for the VPC"""

s3_client = boto3.client("s3")
terraform_state_file = (
s3_client.get_object(Bucket=TERRAFORM_STATE_BUCKET, Key="vpc.tfstate")["Body"]
.read()
.decode("utf-8")
)

terraform_state_file = json.loads(terraform_state_file)

print(terraform_state_file.keys())

return terraform_state_file


def lambda_handler(event, context):
"""Trigger the Fargate task to process the files in the S3 bucket"""

print(f"Running User Data Cleaner task")

terraform_state_file = get_terraform_state_file_for_vpc()

task_definition = (
f"dev_{SCRAPER_TASK_DEFINITION}"
if ENVIRONMENT != "prod"
else SCRAPER_TASK_DEFINITION
)
print(task_definition)

ecs_client = boto3.client("ecs")

latest_version = (
ecs_client.describe_task_definition(taskDefinition=task_definition)
.get("taskDefinition")
.get("revision")
)

response = ecs_client.run_task(
taskDefinition=f"{task_definition}:{latest_version}",
cluster="boardgamegeek",
launchType="FARGATE",
count=1,
platformVersion="LATEST",
enableECSManagedTags=False,
networkConfiguration={
"awsvpcConfiguration": {
"subnets": terraform_state_file["outputs"]["public_subnets"]["value"],
"securityGroups": [
terraform_state_file["outputs"]["sg_ec2_ssh_access"]["value"]
],
"assignPublicIp": "ENABLED",
},
},
)


if __name__ == "__main__":

lambda_handler(None, None)

0 comments on commit e80c04f

Please sign in to comment.