From 91174db6f2205bc94c11d628f763308f64d7dc42 Mon Sep 17 00:00:00 2001
From: threnjen <threnjen@gmail.com>
Date: Tue, 15 Oct 2024 12:15:23 -0700
Subject: [PATCH] update terraform makefile to clean env file for AWS

---
 .../dev_deployment_ecs_scraper.yml}           |  9 --
 .../temp_off/dev_eployment_ecs_cleaner.yml    | 41 +++++++++
 .../lambda_deployments_dev.yml                |  0
 ...prod_deployment_ecs_bgg_file_retrieval.yml | 35 +++++++
 .../temp_off/prod_deployment_ecs_cleaner.yml  | 32 +++++++
 .../temp_off/prod_deployment_ecs_scraper.yml  | 35 +++++++
 .github/workflows/prod_deployment.yml         | 60 ------------
 .../prod_deployment_ecs_orchestrator.yml      | 36 ++++++++
 Dockerfiles/Dockerfile.bgg_orchestrator       |  8 +-
 aws_dagster_bgg/README.md                     |  0
 aws_dagster_bgg/__init__.py                   |  6 +-
 aws_dagster_bgg/assets/assets.py              | 91 +++++++++++--------
 aws_dagster_bgg/bgg_orchestrate.py            |  5 +-
 aws_dagster_bgg/dagster_runs/.gitkeep         |  0
 aws_dagster_bgg/resources/__init__.py         | 22 +++--
 aws_terraform_bgg/iam_policies.tf             |  3 +-
 aws_terraform_bgg/lambdas_direct.tf           |  4 +-
 aws_terraform_bgg/makefile                    |  6 +-
 .../modules/ecs_task_definition/ecs.tf        |  4 -
 aws_terraform_bgg/scripts/clean_env.py        |  3 +
 aws_terraform_bgg/vpc/main.tf                 |  2 +-
 dagster_cloud.yaml                            |  4 -
 .../bgg_orchestrator_fargate_trigger.py       | 18 ++--
 .../pyproject.toml => pyproject.toml          |  2 +-
 utils/processing_functions.py                 |  7 +-
 25 files changed, 287 insertions(+), 146 deletions(-)
 rename .github/{workflows/dev_deployment.yml => temp_off/dev_deployment_ecs_scraper.yml} (73%)
 create mode 100644 .github/temp_off/dev_eployment_ecs_cleaner.yml
 rename .github/{workflows => temp_off}/lambda_deployments_dev.yml (100%)
 create mode 100644 .github/temp_off/prod_deployment_ecs_bgg_file_retrieval.yml
 create mode 100644 .github/temp_off/prod_deployment_ecs_cleaner.yml
 create mode 100644 .github/temp_off/prod_deployment_ecs_scraper.yml
 delete mode 100644 .github/workflows/prod_deployment.yml
 create mode 100644 .github/workflows/prod_deployment_ecs_orchestrator.yml
 create mode 100644 aws_dagster_bgg/README.md
 create mode 100644 aws_dagster_bgg/dagster_runs/.gitkeep
 delete mode 100644 dagster_cloud.yaml
 rename aws_dagster_bgg/pyproject.toml => pyproject.toml (74%)

diff --git a/.github/workflows/dev_deployment.yml b/.github/temp_off/dev_deployment_ecs_scraper.yml
similarity index 73%
rename from .github/workflows/dev_deployment.yml
rename to .github/temp_off/dev_deployment_ecs_scraper.yml
index 94a620e..55b4e1a 100644
--- a/.github/workflows/dev_deployment.yml
+++ b/.github/temp_off/dev_deployment_ecs_scraper.yml
@@ -38,12 +38,3 @@ jobs:
         run: |
           DOCKER_BUILDKIT=1 docker build -f Dockerfiles/Dockerfile.game-data-scraper --build-arg GROUP="group1" -t $ECR_REGISTRY/$ECR_REPOSITORY .
           docker push $ECR_REGISTRY/$ECR_REPOSITORY
-          
-      - name: Build, tag, push image to AWS ECR boardgamegeek_cleaner
-        env:
-          ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }}
-          ECR_REPOSITORY: boardgamegeek_cleaner_dev
-        run: |
-          DOCKER_BUILDKIT=1 docker build -f Dockerfiles/Dockerfile.game-data-cleaner --build-arg GROUP="group1" -t $ECR_REGISTRY/$ECR_REPOSITORY .
-          docker push $ECR_REGISTRY/$ECR_REPOSITORY
-      
\ No newline at end of file
diff --git a/.github/temp_off/dev_eployment_ecs_cleaner.yml b/.github/temp_off/dev_eployment_ecs_cleaner.yml
new file mode 100644
index 0000000..3b5c486
--- /dev/null
+++ b/.github/temp_off/dev_eployment_ecs_cleaner.yml
@@ -0,0 +1,41 @@
+name: DEV deployment to boardgamegeek cleaner
+
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+    branches:
+      - main
+      
+permissions:
+  id-token: write # This is required for requesting the JWT
+  contents: read  # This is required for actions/checkout
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Check out code
+        uses: actions/checkout@v2
+        with:
+          ref: ${{ github.event.pull_request.head.sha }}
+
+      - name: Configure AWS credentials
+        uses: aws-actions/configure-aws-credentials@v4
+        with:
+          role-to-assume: ${{ secrets.AWS_GITHUB_ROLE }}
+          aws-region: ${{ secrets.AWS_REGION }}
+
+      - name: Login to Amazon ECR
+        id: login-ecr
+        uses: aws-actions/amazon-ecr-login@v2
+          
+      - name: Build, tag, push image to AWS ECR boardgamegeek_cleaner
+        env:
+          ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }}
+          ECR_REPOSITORY: boardgamegeek_cleaner_dev
+        run: |
+          DOCKER_BUILDKIT=1 docker build -f Dockerfiles/Dockerfile.game-data-cleaner --build-arg GROUP="group1" -t $ECR_REGISTRY/$ECR_REPOSITORY .
+          docker push $ECR_REGISTRY/$ECR_REPOSITORY
+      
\ No newline at end of file
diff --git a/.github/workflows/lambda_deployments_dev.yml b/.github/temp_off/lambda_deployments_dev.yml
similarity index 100%
rename from .github/workflows/lambda_deployments_dev.yml
rename to .github/temp_off/lambda_deployments_dev.yml
diff --git a/.github/temp_off/prod_deployment_ecs_bgg_file_retrieval.yml b/.github/temp_off/prod_deployment_ecs_bgg_file_retrieval.yml
new file mode 100644
index 0000000..9d52838
--- /dev/null
+++ b/.github/temp_off/prod_deployment_ecs_bgg_file_retrieval.yml
@@ -0,0 +1,35 @@
+name: PROD deployment to bgg file retrieval
+
+on:
+  push:
+    branches:
+      - main
+
+permissions:
+  id-token: write # This is required for requesting the JWT
+  contents: read  # This is required for actions/checkout
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Check out code
+        uses: actions/checkout@v2
+        
+      - name: Configure AWS credentials
+        uses: aws-actions/configure-aws-credentials@v4
+        with:
+          role-to-assume: ${{ secrets.AWS_GITHUB_ROLE }}
+          aws-region: ${{ secrets.AWS_REGION }}
+
+      - name: Login to Amazon ECR
+        id: login-ecr
+        uses: aws-actions/amazon-ecr-login@v2
+
+      - name: Build, tag, push image to AWS ECR bgg_boardgame_file_retrieval
+        env:
+          ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }}
+          ECR_REPOSITORY: bgg_boardgame_file_retrieval
+        run: |
+          DOCKER_BUILDKIT=1 docker build -f Dockerfiles/Dockerfile.bgg_boardgame_file_retrieval -t $ECR_REGISTRY/$ECR_REPOSITORY .
+          docker push $ECR_REGISTRY/$ECR_REPOSITORY
diff --git a/.github/temp_off/prod_deployment_ecs_cleaner.yml b/.github/temp_off/prod_deployment_ecs_cleaner.yml
new file mode 100644
index 0000000..d045a28
--- /dev/null
+++ b/.github/temp_off/prod_deployment_ecs_cleaner.yml
@@ -0,0 +1,32 @@
+name: PROD deployment to boardgamegeek cleaner
+
+on:
+  push:
+    branches:
+      - main
+
+permissions:
+  id-token: write # This is required for requesting the JWT
+  contents: read  # This is required for actions/checkout
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Check out code
+        uses: actions/checkout@v2
+      - name: Configure AWS credentials
+        uses: aws-actions/configure-aws-credentials@v4
+        with:
+          role-to-assume: ${{ secrets.AWS_GITHUB_ROLE }}
+          aws-region: ${{ secrets.AWS_REGION }}
+      - name: Login to Amazon ECR
+        id: login-ecr
+        uses: aws-actions/amazon-ecr-login@v2
+      - name: Build, tag, push image to AWS ECR boardgamegeek_cleaner
+        env:
+          ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }}
+          ECR_REPOSITORY: boardgamegeek_cleaner
+        run: |
+          DOCKER_BUILDKIT=1 docker build -f Dockerfiles/Dockerfile.game-data-cleaner --build-arg GROUP="group1" -t $ECR_REGISTRY/$ECR_REPOSITORY .
+          docker push $ECR_REGISTRY/$ECR_REPOSITORY
diff --git a/.github/temp_off/prod_deployment_ecs_scraper.yml b/.github/temp_off/prod_deployment_ecs_scraper.yml
new file mode 100644
index 0000000..978e723
--- /dev/null
+++ b/.github/temp_off/prod_deployment_ecs_scraper.yml
@@ -0,0 +1,35 @@
+name: PROD deployment to boardgamegeek scraper
+
+on:
+  push:
+    branches:
+      - main
+
+permissions:
+  id-token: write # This is required for requesting the JWT
+  contents: read  # This is required for actions/checkout
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Check out code
+        uses: actions/checkout@v2
+        
+      - name: Configure AWS credentials
+        uses: aws-actions/configure-aws-credentials@v4
+        with:
+          role-to-assume: ${{ secrets.AWS_GITHUB_ROLE }}
+          aws-region: ${{ secrets.AWS_REGION }}
+
+      - name: Login to Amazon ECR
+        id: login-ecr
+        uses: aws-actions/amazon-ecr-login@v2
+
+      - name: Build, tag, push image to AWS ECR boardgamegeek_scraper
+        env:
+          ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }}
+          ECR_REPOSITORY: boardgamegeek_scraper
+        run: |
+          DOCKER_BUILDKIT=1 docker build -f Dockerfiles/Dockerfile.game-data-scraper --build-arg GROUP="group1" -t $ECR_REGISTRY/$ECR_REPOSITORY .
+          docker push $ECR_REGISTRY/$ECR_REPOSITORY
diff --git a/.github/workflows/prod_deployment.yml b/.github/workflows/prod_deployment.yml
deleted file mode 100644
index 99366d4..0000000
--- a/.github/workflows/prod_deployment.yml
+++ /dev/null
@@ -1,60 +0,0 @@
-name: PROD deployment to boardgamegeek scraper
-
-on:
-  push:
-    branches:
-      - main
-
-permissions:
-  id-token: write # This is required for requesting the JWT
-  contents: read  # This is required for actions/checkout
-
-jobs:
-  build:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Check out code
-        uses: actions/checkout@v2
-        
-      - name: Configure AWS credentials
-        uses: aws-actions/configure-aws-credentials@v4
-        with:
-          role-to-assume: ${{ secrets.AWS_GITHUB_ROLE }}
-          aws-region: ${{ secrets.AWS_REGION }}
-
-      - name: Login to Amazon ECR
-        id: login-ecr
-        uses: aws-actions/amazon-ecr-login@v2
-
-      - name: Build, tag, push image to AWS ECR bgg_boardgame_file_retrieval
-        env:
-          ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }}
-          ECR_REPOSITORY: bgg_boardgame_file_retrieval
-        run: |
-          DOCKER_BUILDKIT=1 docker build -f Dockerfiles/Dockerfile.bgg_boardgame_file_retrieval -t $ECR_REGISTRY/$ECR_REPOSITORY .
-          docker push $ECR_REGISTRY/$ECR_REPOSITORY
-      
-      - name: Build, tag, push image to AWS ECR boardgamegeek_scraper
-        env:
-          ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }}
-          ECR_REPOSITORY: boardgamegeek_scraper
-        run: |
-          DOCKER_BUILDKIT=1 docker build -f Dockerfiles/Dockerfile.game-data-scraper --build-arg GROUP="group1" -t $ECR_REGISTRY/$ECR_REPOSITORY .
-          docker push $ECR_REGISTRY/$ECR_REPOSITORY
-
-      - name: Build, tag, push image to AWS ECR boardgamegeek_cleaner
-        env:
-          ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }}
-          ECR_REPOSITORY: boardgamegeek_cleaner
-        run: |
-          DOCKER_BUILDKIT=1 docker build -f Dockerfiles/Dockerfile.game-data-cleaner --build-arg GROUP="group1" -t $ECR_REGISTRY/$ECR_REPOSITORY .
-          docker push $ECR_REGISTRY/$ECR_REPOSITORY
-
-      - name: Build, tag, push image to AWS ECR boardgamegeek_cleaner
-        env:
-          ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }}
-          ECR_REPOSITORY: bgg_orchestrator
-        run: |
-          DOCKER_BUILDKIT=1 docker build -f Dockerfiles/Dockerfile.bgg_orchestrator --build-arg ENV="prod" --build-arg ASSET="all" -t $ECR_REGISTRY/$ECR_REPOSITORY .
-          docker push $ECR_REGISTRY/$ECR_REPOSITORY
-          
\ No newline at end of file
diff --git a/.github/workflows/prod_deployment_ecs_orchestrator.yml b/.github/workflows/prod_deployment_ecs_orchestrator.yml
new file mode 100644
index 0000000..404899c
--- /dev/null
+++ b/.github/workflows/prod_deployment_ecs_orchestrator.yml
@@ -0,0 +1,36 @@
+name: PROD deployment to boardgamegeek orchestrator
+
+on:
+  push:
+    branches:
+      - main
+
+permissions:
+  id-token: write # This is required for requesting the JWT
+  contents: read  # This is required for actions/checkout
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Check out code
+        uses: actions/checkout@v2
+        
+      - name: Configure AWS credentials
+        uses: aws-actions/configure-aws-credentials@v4
+        with:
+          role-to-assume: ${{ secrets.AWS_GITHUB_ROLE }}
+          aws-region: ${{ secrets.AWS_REGION }}
+
+      - name: Login to Amazon ECR
+        id: login-ecr
+        uses: aws-actions/amazon-ecr-login@v2
+
+      - name: Build, tag, push image to AWS ECR boardgamegeek_cleaner
+        env:
+          ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }}
+          ECR_REPOSITORY: bgg_orchestrator
+        run: |
+          DOCKER_BUILDKIT=1 docker build -f Dockerfiles/Dockerfile.bgg_orchestrator --build-arg ENV="prod" --build-arg ASSET="all" -t $ECR_REGISTRY/$ECR_REPOSITORY .
+          docker push $ECR_REGISTRY/$ECR_REPOSITORY
+          
\ No newline at end of file
diff --git a/Dockerfiles/Dockerfile.bgg_orchestrator b/Dockerfiles/Dockerfile.bgg_orchestrator
index b8e5d59..f846755 100644
--- a/Dockerfiles/Dockerfile.bgg_orchestrator
+++ b/Dockerfiles/Dockerfile.bgg_orchestrator
@@ -10,23 +10,23 @@ RUN apt-get update && apt-get install -y \
 RUN pip3 install pipenv
 
 # Copy the source code into the container
-# COPY data game_data_scraper/data
 COPY aws_dagster_bgg aws_dagster_bgg
-COPY utils utils
 COPY aws_dagster_bgg/Pipfile* .
+COPY utils utils
 COPY config.py .
-COPY dagster_cloud.yaml .
-COPY aws_dagster_bgg/pyproject.toml .
+COPY pyproject.toml .
 
 # Install dependencies with pipenv
 RUN pipenv sync
 
+# Set environment variables with ARG for build-time and ENV for runtime
 ARG ENV
 ENV ENV=$ENV
 
 ARG ASSET
 ENV ASSET=$ASSET
 
+# Expose the port (if needed for the service)
 EXPOSE 3000
 
 # Set the entry point and command
diff --git a/aws_dagster_bgg/README.md b/aws_dagster_bgg/README.md
new file mode 100644
index 0000000..e69de29
diff --git a/aws_dagster_bgg/__init__.py b/aws_dagster_bgg/__init__.py
index a3884b4..7c0e964 100644
--- a/aws_dagster_bgg/__init__.py
+++ b/aws_dagster_bgg/__init__.py
@@ -29,9 +29,9 @@
         "s3_resource": S3Resource(
             region_name=REGION,
         ),
-        "dynamodb_resource": DynamoDBResource(
-            region_name=REGION, table_name="boardgamegeek"
-        ),
+        # "dynamodb_resource": DynamoDBResource(
+        #     region_name=REGION, table_name="boardgamegeek"
+        # ),
         "lambda_resource": LambdaHandlerResource(region_name=REGION),
         "ecs_resource": ECSResource(region_name=REGION),
         "config_resource": ConfigResource(
diff --git a/aws_dagster_bgg/assets/assets.py b/aws_dagster_bgg/assets/assets.py
index 175d207..feace1f 100644
--- a/aws_dagster_bgg/assets/assets.py
+++ b/aws_dagster_bgg/assets/assets.py
@@ -1,7 +1,10 @@
-from dagster import asset, ConfigurableResource, op
+from dagster import asset, ConfigurableResource, op, get_dagster_logger
 import time
 import os
 from datetime import datetime
+import logging
+
+logger = get_dagster_logger()
 
 
 @asset
@@ -12,18 +15,23 @@ def bgg_games_csv(
 ) -> bool:
     f"""Triggers the lambda to get the games file from the BoardGameGeek website"""
 
+    logger.info("Getting the games csv file from BoardGameGeek")
+
     configs = config_resource.get_config_file()
 
     s3_scraper_bucket = configs["s3_scraper_bucket"]
 
-    original_timestamps = get_original_timestamps(
-        s3_resource,
-        bucket=s3_scraper_bucket,
-        keys=[configs["boardgamegeek_csv_filename"]],
-    )
+    original_timestamps = {
+        configs["boardgamegeek_csv_filename"]: s3_resource.get_last_modified(
+            bucket=s3_scraper_bucket,
+            key=configs["boardgamegeek_csv_filename"],
+        )
+    }
 
     lambda_resource.invoke_lambda(function=configs["file_retrieval_lambda"])
 
+    logger.info("Lambda invoked. Beginning timestamp checks...")
+
     return compare_timestamps_for_refresh(
         original_timestamps=original_timestamps,
         file_list_to_check=[configs["boardgamegeek_csv_filename"]],
@@ -51,6 +59,8 @@ def game_scraper_urls(
     Update the last modified timestamp of the keys in s3
     """
 
+    logger.info("Generating game scraper urls")
+
     configs = config_resource.get_config_file()
 
     s3_scraper_bucket = configs["s3_scraper_bucket"]
@@ -61,7 +71,7 @@ def game_scraper_urls(
         f"{raw_urls_directory}/group{i}{output_urls_json_suffix}" for i in range(1, 31)
     ]
 
-    return create_new_urls(
+    create_new_urls(
         lambda_resource,
         s3_resource,
         s3_scraper_bucket,
@@ -69,6 +79,8 @@ def game_scraper_urls(
         lambda_function_name="bgg_generate_game_urls",
     )
 
+    return True
+
 
 @asset(deps=["game_scraper_urls"])
 def scrape_game_data(
@@ -118,7 +130,7 @@ def game_dfs_dirty(
         for key in data_set_file_names
     }
 
-    return compare_timestamps_for_refresh(
+    compare_timestamps_for_refresh(
         original_timestamps=original_timestamps,
         file_list_to_check=data_set_file_names,
         location_bucket=bucket,
@@ -126,6 +138,8 @@ def game_dfs_dirty(
         s3_resource=s3_resource,
     )
 
+    return True
+
 
 @asset(deps=["game_dfs_dirty"])
 def user_scraper_urls(
@@ -155,7 +169,7 @@ def user_scraper_urls(
         f"{raw_urls_directory}/group{i}{output_urls_json_suffix}" for i in range(1, 31)
     ]
 
-    return create_new_urls(
+    create_new_urls(
         lambda_resource,
         s3_resource,
         s3_scraper_bucket,
@@ -163,23 +177,7 @@ def user_scraper_urls(
         lambda_function_name="bgg_generate_user_urls",
     )
 
-
-@op
-def get_original_timestamps(
-    s3_resource: ConfigurableResource,
-    bucket: str,
-    keys: list[str],
-) -> dict:
-    try:
-        return {
-            key: s3_resource.get_last_modified(
-                bucket=bucket,
-                key=key,
-            )
-            for key in keys
-        }
-    except:
-        return {key: datetime(1970, 1, 1, 0, 0, 0, 0) for key in keys}
+    return True
 
 
 @op
@@ -194,18 +192,25 @@ def compare_timestamps_for_refresh(
 
     time.sleep(sleep_timer)
 
+    logger.info("Checking timestamps...")
+
     while len(file_list_to_check):
+        logger.info(f"Files to check: {file_list_to_check}")
         for key in file_list_to_check:
+            logger.info(f"Checking key: {key}")
             new_timestamp_tracker[key] = s3_resource.get_last_modified(
                 bucket=location_bucket,
                 key=key,
             )
+            logger.info(f"Original timestamp: {original_timestamps[key]}")
+            logger.info(f"New timestamp: {new_timestamp_tracker[key]}")
 
         for key in original_timestamps:
             new_date = new_timestamp_tracker[key]
             old_date = original_timestamps[key]
+            logger.info(f"New date: {new_date}, Old date: {old_date}")
             if new_date > old_date:
-                print(
+                logger.info(
                     f"new timestamp {new_date} is greater than old timestamp {old_date}"
                 )
                 if key in file_list_to_check:
@@ -225,15 +230,17 @@ def create_new_urls(
     lambda_function_name: str,
 ) -> bool:
 
-    original_timestamps = get_original_timestamps(
-        s3_resource=s3_resource,
-        bucket=s3_scraper_bucket,
-        keys=scraper_url_filenames,
-    )
+    original_timestamps = {
+        key: s3_resource.get_last_modified(
+            bucket=s3_scraper_bucket,
+            key=key,
+        )
+        for key in scraper_url_filenames
+    }
 
     lambda_resource.invoke_lambda(function=lambda_function_name)
 
-    return compare_timestamps_for_refresh(
+    compare_timestamps_for_refresh(
         original_timestamps=original_timestamps,
         file_list_to_check=scraper_url_filenames,
         location_bucket=s3_scraper_bucket,
@@ -241,6 +248,8 @@ def create_new_urls(
         s3_resource=s3_resource,
     )
 
+    return True
+
 
 @op
 def scrape_data(
@@ -259,11 +268,13 @@ def scrape_data(
         f"{output_key_directory}/{output_key_suffix.format(i)}" for i in range(1, 31)
     ]
 
-    original_timestamps = get_original_timestamps(
-        s3_resource=s3_resource,
-        bucket=bucket,
-        keys=scraper_raw_data_filenames,
-    )
+    original_timestamps = {
+        key: s3_resource.get_last_modified(
+            bucket=bucket,
+            key=key,
+        )
+        for key in scraper_raw_data_filenames
+    }
 
     game_scraper_url_filenames = s3_resource.list_file_keys(
         bucket=bucket, key=input_urls_key
@@ -287,7 +298,7 @@ def scrape_data(
         }
         ecs_resource.launch_ecs_task(task_definition, overrides)
 
-    return compare_timestamps_for_refresh(
+    compare_timestamps_for_refresh(
         original_timestamps=original_timestamps,
         file_list_to_check=scraper_raw_data_filenames,
         location_bucket=bucket,
@@ -295,6 +306,8 @@ def scrape_data(
         s3_resource=s3_resource,
     )
 
+    return True
+
 
 # @multi_asset(specs=[AssetSpec("asset1"), AssetSpec("asset2")])
 # def materialize_1_and_2():
diff --git a/aws_dagster_bgg/bgg_orchestrate.py b/aws_dagster_bgg/bgg_orchestrate.py
index e8847be..af050ac 100644
--- a/aws_dagster_bgg/bgg_orchestrate.py
+++ b/aws_dagster_bgg/bgg_orchestrate.py
@@ -1,6 +1,7 @@
 import subprocess
 import time
 import sys
+import os
 
 
 if __name__ == "__main__":
@@ -16,12 +17,12 @@
     if asset == "all":
         print("Executing all assets...")
         subprocess.run(
-            f"dagster job execute --package-name bgg_orchestrator -j bgg_job".split(" ")
+            f"dagster job execute --package-name aws_dagster_bgg -j bgg_job".split(" ")
         )
     else:
         print(f"Executing asset: {asset}...")
         subprocess.run(
-            f"dagster asset materialize --select {asset} --package-name bgg_orchestrator".split(
+            f"dagster asset materialize --select {asset} --package-name aws_dagster_bgg".split(
                 " "
             )
         )
diff --git a/aws_dagster_bgg/dagster_runs/.gitkeep b/aws_dagster_bgg/dagster_runs/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/aws_dagster_bgg/resources/__init__.py b/aws_dagster_bgg/resources/__init__.py
index ed34523..be570af 100644
--- a/aws_dagster_bgg/resources/__init__.py
+++ b/aws_dagster_bgg/resources/__init__.py
@@ -1,9 +1,12 @@
-from dagster import EnvVar, ConfigurableResource
+from dagster import EnvVar, ConfigurableResource, get_dagster_logger
 import boto3
 import json
 from datetime import datetime
 import pytz
 import os
+import logging
+
+logger = get_dagster_logger()
 
 REGION = os.environ.get("TF_VAR_REGION", "us-west-2")
 TERRAFORM_STATE_BUCKET = os.environ.get("TF_VAR_BUCKET")
@@ -17,7 +20,7 @@ def get_dynamodb_client(self):
         return boto3.client("dynamodb", region_name=REGION)
 
     def get_last_modified(self, key):
-        print(f"Key: {key}")
+        logger.info(f"Key: {key}")
         return self.get_dynamodb_client().get_item(
             TableName=self.table_name,
             Key={
@@ -28,7 +31,7 @@ def get_last_modified(self, key):
         )["Item"]["last_modified"]["S"]
 
     def update_last_modified(self, key, timestamp):
-        print(f"Key: {key}, Timestamp: {timestamp}")
+        logger.info(f"Key: {key}, Timestamp: {timestamp}")
         self.get_dynamodb_client().put_item(
             TableName=self.table_name,
             Item={"filename": {"S": key}, "last_modified": {"S": timestamp}},
@@ -52,12 +55,13 @@ def get_s3_client(self):
         return boto3.client("s3", region_name=self.region_name)
 
     def get_last_modified(self, bucket: str, key):
-        print(f"Bucket: {bucket}, Key: {key}")
+        logger.info(f"Bucket: {bucket}, Key: {key}")
         try:
             return self.get_s3_client().get_object_attributes(
                 Bucket=bucket, Key=key, ObjectAttributes=["ObjectParts"]
             )["LastModified"]
-        except:
+        except Exception as e:
+            logger.info(f"Error: {e}")
             return datetime(1970, 1, 1, 0, 0, 0, 0, pytz.UTC)
 
     def list_file_keys(self, bucket: str, key):
@@ -67,7 +71,7 @@ def list_file_keys(self, bucket: str, key):
         return [x["Key"] for x in raw_files]
 
     def load_json(self, bucket: str, key):
-        print(f"Loading data from S3: {key}")
+        logger.info(f"Loading data from S3: {key}")
         object = (
             self.get_s3_client()
             .get_object(Bucket=bucket, Key=key)["Body"]
@@ -86,7 +90,7 @@ def get_config_file(self):
         try:
             return json.loads(open("config.json"))
         except:
-            print("No config file found")
+            logger.info("No config file found")
             configs = S3Resource(region_name=self.region_name).load_json(
                 bucket=self.bucket, key="config.json"
             )
@@ -126,6 +130,10 @@ def launch_ecs_task(self, task_definition: str, overrides: dict = {}):
 
         terraform_state_file = self.get_terraform_state_file_for_vpc()
 
+        logger.info(
+            f"Got terraform state file. Launching ECS task for {task_definition}"
+        )
+
         self.get_ecs_client().run_task(
             taskDefinition=f"{task_definition}:{self.get_latest_task_revision(task_definition)}",
             cluster=ConfigResource()["ecs_task_components"]["cluster"],
diff --git a/aws_terraform_bgg/iam_policies.tf b/aws_terraform_bgg/iam_policies.tf
index d737571..295aa44 100644
--- a/aws_terraform_bgg/iam_policies.tf
+++ b/aws_terraform_bgg/iam_policies.tf
@@ -8,7 +8,8 @@ resource "aws_iam_policy" "S3_Access_boardgamegeek_scraper_policy" {
         Action = [
           "s3:ListBucket",
           "s3:PutObject",
-          "s3:GetObject"
+          "s3:GetObject",
+          "s3:GetObjectAttributes"
         ]
         Effect   = "Allow"
         Resource = [
diff --git a/aws_terraform_bgg/lambdas_direct.tf b/aws_terraform_bgg/lambdas_direct.tf
index 331b4a4..1ace9c3 100644
--- a/aws_terraform_bgg/lambdas_direct.tf
+++ b/aws_terraform_bgg/lambdas_direct.tf
@@ -15,7 +15,7 @@ module "bgg_generate_game_urls" {
   source        = "./modules/lambda_function_direct"
   function_name = "bgg_generate_game_urls"
   timeout       = 900
-  memory_size   = 512
+  memory_size   = 1024
   role          = module.bgg_generate_game_urls_lambda_role.arn
   handler       = "generate_game_urls_lambda.lambda_handler"
   layers        = ["arn:aws:lambda:${var.REGION}:336392948345:layer:AWSSDKPandas-Python312:13"]
@@ -26,7 +26,7 @@ module "bgg_generate_user_urls" {
   source        = "./modules/lambda_function_direct"
   function_name = "bgg_generate_user_urls"
   timeout       = 900
-  memory_size   = 512
+  memory_size   = 1024
   role          = module.bgg_generate_user_urls_lambda_role.arn
   handler       = "generate_user_urls_lambda.lambda_handler"
   layers        = ["arn:aws:lambda:${var.REGION}:336392948345:layer:AWSSDKPandas-Python312:13"]
diff --git a/aws_terraform_bgg/makefile b/aws_terraform_bgg/makefile
index c9f35f9..e206488 100644
--- a/aws_terraform_bgg/makefile
+++ b/aws_terraform_bgg/makefile
@@ -57,8 +57,11 @@ get_current_ip: get_terraform_bucket
     TF_VAR_MY_IP_FIRST_THREE_BLOCKS=$$response; \
     echo "TF_VAR_MY_IP_FIRST_THREE_BLOCKS=$$TF_VAR_MY_IP_FIRST_THREE_BLOCKS" >> ../.env
 
+enter_temp_vars: get_current_ip
+	echo "IS_LOCAL=False" >> ../.env
+
 # Make the backend config file for terraform
-backend_config: get_current_ip
+backend_config: enter_temp_vars
 	echo 'key="boardgamegeek.tfstate"' >> backend.conf
 
 # A target that runs the Python script and checks the output
@@ -98,6 +101,7 @@ cleanup_superfluous_files: setup_boardgamegeek
 	@echo "\nCleaning up garbage files"
 	find . -type f -name "*.DS_Store" -delete
 	find . -type f -name '*!lambda_function.zip' -delete
+	echo "PYTHONPATH=." >> ../.env
 
 failure-action:
 	@echo "\nScript was not ready to run Terraform. Running cleanup and exiting, do not interrupt..."
diff --git a/aws_terraform_bgg/modules/ecs_task_definition/ecs.tf b/aws_terraform_bgg/modules/ecs_task_definition/ecs.tf
index 1a2e2bb..3498050 100644
--- a/aws_terraform_bgg/modules/ecs_task_definition/ecs.tf
+++ b/aws_terraform_bgg/modules/ecs_task_definition/ecs.tf
@@ -7,10 +7,6 @@ resource "aws_ecs_task_definition" "task_definition" {
       image = var.image
       cpu   = 0,
       portMappings = [
-        {
-          containerPort = 80,
-          hostPort      = 80
-        },
         {
           containerPort = 3000,
           hostPort      = 3000
diff --git a/aws_terraform_bgg/scripts/clean_env.py b/aws_terraform_bgg/scripts/clean_env.py
index 3bc32f2..3bed004 100644
--- a/aws_terraform_bgg/scripts/clean_env.py
+++ b/aws_terraform_bgg/scripts/clean_env.py
@@ -8,6 +8,9 @@
                 "TF_VAR_BUCKET" in line
                 or "TF_VAR_REGION" in line
                 or "TF_VAR_MY_IP_FIRST_THREE_BLOCKS" in line
+                or "PYTHONPATH" in line
+                or "IS_LOCAL" in line
+                or "ENV" in line
             ):
                 continue
             if line == "\n":
diff --git a/aws_terraform_bgg/vpc/main.tf b/aws_terraform_bgg/vpc/main.tf
index 32c9426..58bb0b6 100644
--- a/aws_terraform_bgg/vpc/main.tf
+++ b/aws_terraform_bgg/vpc/main.tf
@@ -52,7 +52,7 @@ resource "aws_security_group" "ec2_dagster_port_access" {
                   "${var.MY_IP_FIRST_THREE_BLOCKS}.0/24"
                 ],
                 "description": "",
-                "from_port": 22,
+                "from_port": 3000,
                 "ipv6_cidr_blocks": [],
                 "prefix_list_ids": [],
                 "protocol": "tcp",
diff --git a/dagster_cloud.yaml b/dagster_cloud.yaml
deleted file mode 100644
index d5f7668..0000000
--- a/dagster_cloud.yaml
+++ /dev/null
@@ -1,4 +0,0 @@
-locations:
-  - location_name: boardgamegeek
-    code_source:
-      package_name: aws_dagster_bgg
\ No newline at end of file
diff --git a/lambda_functions/bgg_orchestrator_fargate_trigger.py b/lambda_functions/bgg_orchestrator_fargate_trigger.py
index fcc6d6a..0c59fa3 100644
--- a/lambda_functions/bgg_orchestrator_fargate_trigger.py
+++ b/lambda_functions/bgg_orchestrator_fargate_trigger.py
@@ -46,6 +46,15 @@ def lambda_handler(event, context):
         .get("revision")
     )
 
+    subnets = terraform_state_file["outputs"]["public_subnets"]["value"]
+    print(subnets)
+
+    security_groups = [
+        terraform_state_file["outputs"]["sg_ec2_ssh_access"]["value"],
+        terraform_state_file["outputs"]["sg_ec2_dagster_port_access"]["value"],
+    ]
+    print(security_groups)
+
     response = ecs_client.run_task(
         taskDefinition=f"{task_definition}:{latest_version}",
         cluster="boardgamegeek",
@@ -55,13 +64,8 @@ def lambda_handler(event, context):
         enableECSManagedTags=False,
         networkConfiguration={
             "awsvpcConfiguration": {
-                "subnets": terraform_state_file["outputs"]["public_subnets"]["value"],
-                "securityGroups": [
-                    terraform_state_file["outputs"]["sg_ec2_ssh_access"]["value"],
-                    terraform_state_file["outputs"]["sg_ec2_dagster_port_access"][
-                        "value"
-                    ],
-                ],
+                "subnets": subnets,
+                "securityGroups": security_groups,
                 "assignPublicIp": "ENABLED",
             },
         },
diff --git a/aws_dagster_bgg/pyproject.toml b/pyproject.toml
similarity index 74%
rename from aws_dagster_bgg/pyproject.toml
rename to pyproject.toml
index 85772aa..34b27c5 100644
--- a/aws_dagster_bgg/pyproject.toml
+++ b/pyproject.toml
@@ -3,4 +3,4 @@ requires = ["setuptools"]
 build-backend = "setuptools.build_meta"
 
 [tool.dagster]
-module_name = "bgg_orchestrator"
+module_name = "aws_dagster_bgg"
diff --git a/utils/processing_functions.py b/utils/processing_functions.py
index f7a7cd5..a0537dd 100644
--- a/utils/processing_functions.py
+++ b/utils/processing_functions.py
@@ -29,10 +29,13 @@
 
 def save_file_local_first(path: str, file_name: str, data: Union[pd.DataFrame, dict]):
     file_path = f"{path}/{file_name}"
+    print(file_path)
 
     if IS_LOCAL:
+        print(f"Saving {file_name} to local")
         LocalFileHandler().save_file(file_path=file_path, data=data)
     if ENV == "prod":
+        print(f"Saving {file_name} to S3")
         S3FileHandler().save_file(file_path=file_path, data=data)
 
 
@@ -45,7 +48,9 @@ def load_file_local_first(path: str, file_name: str):
     except FileNotFoundError as e:
         print(f"Downloading {file_name} from S3")
         file = S3FileHandler().load_file(file_path=file_path)
-        LocalFileHandler().save_file(file_path=file_path, data=file)
+        if IS_LOCAL:
+            print(f"Saving {file_name} to local")
+            LocalFileHandler().save_file(file_path=file_path, data=file)
     return file