From b01c6c2a4d08004beefa00c02b298fa887515fa4 Mon Sep 17 00:00:00 2001 From: threnjen Date: Thu, 19 Dec 2024 10:19:48 -0800 Subject: [PATCH 01/26] temporarily disable several deployment yml --- .github/{workflows => temp_off}/deploy_file_to_s3.yml | 0 .../{workflows => temp_off}/dev_deployment_ecs_game_cleaner.yml | 0 .../{workflows => temp_off}/dev_deployment_ecs_orchestrator.yml | 0 .../dev_deployment_ecs_ratings_cleaner.yml | 0 .github/{workflows => temp_off}/dev_deployment_ecs_scraper.yml | 0 .../{workflows => temp_off}/dev_deployment_ecs_users_cleaner.yml | 0 .github/{workflows => temp_off}/lambda_deployments_dev.yml | 0 .github/{workflows => temp_off}/lambda_deployments_prod.yml | 0 .../prod_deployment_ecs_bgg_file_retrieval.yml | 0 .../{workflows => temp_off}/prod_deployment_ecs_game_cleaner.yml | 0 .../{workflows => temp_off}/prod_deployment_ecs_orchestrator.yml | 0 .../prod_deployment_ecs_ratings_cleaner.yml | 0 .github/{workflows => temp_off}/prod_deployment_ecs_scraper.yml | 0 .../{workflows => temp_off}/prod_deployment_ecs_users_cleaner.yml | 0 .../prod_deployment_rag_description_generation.yml | 0 15 files changed, 0 insertions(+), 0 deletions(-) rename .github/{workflows => temp_off}/deploy_file_to_s3.yml (100%) rename .github/{workflows => temp_off}/dev_deployment_ecs_game_cleaner.yml (100%) rename .github/{workflows => temp_off}/dev_deployment_ecs_orchestrator.yml (100%) rename .github/{workflows => temp_off}/dev_deployment_ecs_ratings_cleaner.yml (100%) rename .github/{workflows => temp_off}/dev_deployment_ecs_scraper.yml (100%) rename .github/{workflows => temp_off}/dev_deployment_ecs_users_cleaner.yml (100%) rename .github/{workflows => temp_off}/lambda_deployments_dev.yml (100%) rename .github/{workflows => temp_off}/lambda_deployments_prod.yml (100%) rename .github/{workflows => temp_off}/prod_deployment_ecs_bgg_file_retrieval.yml (100%) rename .github/{workflows => temp_off}/prod_deployment_ecs_game_cleaner.yml (100%) rename .github/{workflows => temp_off}/prod_deployment_ecs_orchestrator.yml (100%) rename .github/{workflows => temp_off}/prod_deployment_ecs_ratings_cleaner.yml (100%) rename .github/{workflows => temp_off}/prod_deployment_ecs_scraper.yml (100%) rename .github/{workflows => temp_off}/prod_deployment_ecs_users_cleaner.yml (100%) rename .github/{workflows => temp_off}/prod_deployment_rag_description_generation.yml (100%) diff --git a/.github/workflows/deploy_file_to_s3.yml b/.github/temp_off/deploy_file_to_s3.yml similarity index 100% rename from .github/workflows/deploy_file_to_s3.yml rename to .github/temp_off/deploy_file_to_s3.yml diff --git a/.github/workflows/dev_deployment_ecs_game_cleaner.yml b/.github/temp_off/dev_deployment_ecs_game_cleaner.yml similarity index 100% rename from .github/workflows/dev_deployment_ecs_game_cleaner.yml rename to .github/temp_off/dev_deployment_ecs_game_cleaner.yml diff --git a/.github/workflows/dev_deployment_ecs_orchestrator.yml b/.github/temp_off/dev_deployment_ecs_orchestrator.yml similarity index 100% rename from .github/workflows/dev_deployment_ecs_orchestrator.yml rename to .github/temp_off/dev_deployment_ecs_orchestrator.yml diff --git a/.github/workflows/dev_deployment_ecs_ratings_cleaner.yml b/.github/temp_off/dev_deployment_ecs_ratings_cleaner.yml similarity index 100% rename from .github/workflows/dev_deployment_ecs_ratings_cleaner.yml rename to .github/temp_off/dev_deployment_ecs_ratings_cleaner.yml diff --git a/.github/workflows/dev_deployment_ecs_scraper.yml b/.github/temp_off/dev_deployment_ecs_scraper.yml similarity index 100% rename from .github/workflows/dev_deployment_ecs_scraper.yml rename to .github/temp_off/dev_deployment_ecs_scraper.yml diff --git a/.github/workflows/dev_deployment_ecs_users_cleaner.yml b/.github/temp_off/dev_deployment_ecs_users_cleaner.yml similarity index 100% rename from .github/workflows/dev_deployment_ecs_users_cleaner.yml rename to .github/temp_off/dev_deployment_ecs_users_cleaner.yml diff --git a/.github/workflows/lambda_deployments_dev.yml b/.github/temp_off/lambda_deployments_dev.yml similarity index 100% rename from .github/workflows/lambda_deployments_dev.yml rename to .github/temp_off/lambda_deployments_dev.yml diff --git a/.github/workflows/lambda_deployments_prod.yml b/.github/temp_off/lambda_deployments_prod.yml similarity index 100% rename from .github/workflows/lambda_deployments_prod.yml rename to .github/temp_off/lambda_deployments_prod.yml diff --git a/.github/workflows/prod_deployment_ecs_bgg_file_retrieval.yml b/.github/temp_off/prod_deployment_ecs_bgg_file_retrieval.yml similarity index 100% rename from .github/workflows/prod_deployment_ecs_bgg_file_retrieval.yml rename to .github/temp_off/prod_deployment_ecs_bgg_file_retrieval.yml diff --git a/.github/workflows/prod_deployment_ecs_game_cleaner.yml b/.github/temp_off/prod_deployment_ecs_game_cleaner.yml similarity index 100% rename from .github/workflows/prod_deployment_ecs_game_cleaner.yml rename to .github/temp_off/prod_deployment_ecs_game_cleaner.yml diff --git a/.github/workflows/prod_deployment_ecs_orchestrator.yml b/.github/temp_off/prod_deployment_ecs_orchestrator.yml similarity index 100% rename from .github/workflows/prod_deployment_ecs_orchestrator.yml rename to .github/temp_off/prod_deployment_ecs_orchestrator.yml diff --git a/.github/workflows/prod_deployment_ecs_ratings_cleaner.yml b/.github/temp_off/prod_deployment_ecs_ratings_cleaner.yml similarity index 100% rename from .github/workflows/prod_deployment_ecs_ratings_cleaner.yml rename to .github/temp_off/prod_deployment_ecs_ratings_cleaner.yml diff --git a/.github/workflows/prod_deployment_ecs_scraper.yml b/.github/temp_off/prod_deployment_ecs_scraper.yml similarity index 100% rename from .github/workflows/prod_deployment_ecs_scraper.yml rename to .github/temp_off/prod_deployment_ecs_scraper.yml diff --git a/.github/workflows/prod_deployment_ecs_users_cleaner.yml b/.github/temp_off/prod_deployment_ecs_users_cleaner.yml similarity index 100% rename from .github/workflows/prod_deployment_ecs_users_cleaner.yml rename to .github/temp_off/prod_deployment_ecs_users_cleaner.yml diff --git a/.github/workflows/prod_deployment_rag_description_generation.yml b/.github/temp_off/prod_deployment_rag_description_generation.yml similarity index 100% rename from .github/workflows/prod_deployment_rag_description_generation.yml rename to .github/temp_off/prod_deployment_rag_description_generation.yml From 95239ef1d51ae42f26db235ebb151769cf8e5c17 Mon Sep 17 00:00:00 2001 From: threnjen Date: Thu, 19 Dec 2024 10:20:05 -0800 Subject: [PATCH 02/26] add first draft weaviate deployment yml --- .github/workflows/weaviate.yml | 50 ++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 .github/workflows/weaviate.yml diff --git a/.github/workflows/weaviate.yml b/.github/workflows/weaviate.yml new file mode 100644 index 0000000..7f9c5b6 --- /dev/null +++ b/.github/workflows/weaviate.yml @@ -0,0 +1,50 @@ +name: Build and Push Weaviate and Transformers Images + +on: + push: + branches: + - main + +permissions: + id-token: write + contents: read + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - name: Check out code + uses: actions/checkout@v2 + + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v4 + with: + role-to-assume: ${{ secrets.AWS_GITHUB_ROLE }} + aws-region: ${{ secrets.AWS_REGION }} + + - name: Login to Amazon ECR + id: login-ecr + uses: aws-actions/amazon-ecr-login@v2 + + - name: Build, tag, and push Weaviate image to AWS ECR + env: + ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }} + ECR_REPOSITORY: weaviate_rag_server + run: | + DOCKER_BUILDKIT=1 docker build -f Dockerfiles/Dockerfile.weaviate -t $ECR_REGISTRY/$ECR_REPOSITORY:latest - < Date: Thu, 19 Dec 2024 10:20:16 -0800 Subject: [PATCH 03/26] add first draft dockerfiles --- Dockerfiles/Dockerfile.sentence_transformers | 5 +++++ Dockerfiles/Dockerfile.weaviate | 14 ++++++++++++++ 2 files changed, 19 insertions(+) create mode 100644 Dockerfiles/Dockerfile.sentence_transformers create mode 100644 Dockerfiles/Dockerfile.weaviate diff --git a/Dockerfiles/Dockerfile.sentence_transformers b/Dockerfiles/Dockerfile.sentence_transformers new file mode 100644 index 0000000..23c2cc4 --- /dev/null +++ b/Dockerfiles/Dockerfile.sentence_transformers @@ -0,0 +1,5 @@ +# Dockerfile.t2v-transformers +FROM cr.weaviate.io/semitechnologies/transformers-inference:sentence-transformers-all-mpnet-base-v2 + +# Set environment variables +ENV ENABLE_CUDA=0 \ No newline at end of file diff --git a/Dockerfiles/Dockerfile.weaviate b/Dockerfiles/Dockerfile.weaviate new file mode 100644 index 0000000..45582c6 --- /dev/null +++ b/Dockerfiles/Dockerfile.weaviate @@ -0,0 +1,14 @@ +# Dockerfile.weaviate +FROM cr.weaviate.io/semitechnologies/weaviate:1.27.7 + +# Set environment variables +ENV QUERY_DEFAULTS_LIMIT=25 +ENV AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED=true +ENV PERSISTENCE_DATA_PATH=/var/lib/weaviate +ENV DEFAULT_VECTORIZER_MODULE=text2vec-transformers +ENV ENABLE_MODULES=text2vec-transformers,generative-openai +ENV CLUSTER_HOSTNAME=node1 +ENV TRANSFORMERS_INFERENCE_API=http://t2v-transformers:8080 + +# Define entrypoint command +CMD ["--host", "0.0.0.0", "--port", "8080", "--scheme", "http"] \ No newline at end of file From 27ca6d27b5cca2e61b00232ca357cf76580134fd Mon Sep 17 00:00:00 2001 From: threnjen Date: Thu, 19 Dec 2024 10:20:44 -0800 Subject: [PATCH 04/26] add minor updates to rag lambda trigger --- ..._description_generation_fargate_trigger.py | 22 +++++++++++++------ 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/modules/lambda_functions/rag_description_generation_fargate_trigger.py b/modules/lambda_functions/rag_description_generation_fargate_trigger.py index f68762d..217b977 100644 --- a/modules/lambda_functions/rag_description_generation_fargate_trigger.py +++ b/modules/lambda_functions/rag_description_generation_fargate_trigger.py @@ -1,6 +1,7 @@ import json import os import sys +import time import boto3 @@ -37,7 +38,10 @@ def get_terraform_state_file(): def lambda_handler(event, context): - """Trigger the Fargate task to process the blocks""" + """Trigger the Fargate task to process the blocks + + Optional args: + - start_block: int""" terraform_state_file = get_terraform_state_file() @@ -54,17 +58,17 @@ def lambda_handler(event, context): .get("revision") ) + start_block = int(event.get("start_block", "0")) number_blocks = 10 total_entries = 5000 block_size = total_entries // number_blocks + print(start_block, number_blocks, total_entries, block_size) + # using block_size and number_blocks, make a list of tuples of start and end indexes blocks = [ - (x, y) - for x, y in zip( - range(0, total_entries, block_size), - range(block_size, total_entries + block_size, block_size), - ) + (start, start + block_size) + for start in range(start_block, start_block + total_entries, block_size) ] print(blocks) @@ -81,6 +85,7 @@ def lambda_handler(event, context): for block in blocks: start = block[0] end = block[1] + print(block) response = ecs_client.run_task( taskDefinition=f"{task_definition}:{latest_version}", @@ -108,8 +113,11 @@ def lambda_handler(event, context): ] }, ) + print(response) + print(f"Successfully launched block {block}") + time.sleep(60) if __name__ == "__main__": - lambda_handler(None, None) + lambda_handler({"start_block": 1000}, None) From 42db3c7ea3aeb7dcd536b27eb87ca30ceb0f0523 Mon Sep 17 00:00:00 2001 From: threnjen Date: Thu, 19 Dec 2024 10:21:08 -0800 Subject: [PATCH 05/26] add reporting update to rag main --- modules/rag_description_generation/main.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/modules/rag_description_generation/main.py b/modules/rag_description_generation/main.py index dae9127..ac6a63d 100644 --- a/modules/rag_description_generation/main.py +++ b/modules/rag_description_generation/main.py @@ -2,6 +2,7 @@ import json import os import sys +import time import pandas as pd from config import CONFIGS @@ -33,6 +34,7 @@ class RagDescription(BaseModel): def model_post_init(self, __context): self.start_block = int(self.start_block) self.end_block = int(self.end_block) + self.num_completed_games = self.start_block def confirm_running_ec2_host(self): ec2_instance = Ec2() @@ -163,10 +165,13 @@ def rag_description_generation_chain(self): self.dynamodb_client = DynamoDB() for game_id in self.game_ids: - print(f"\nProcessing game {game_id}") + print( + f"\nProcessing game {game_id}\n{self.num_completed_games} of {self.end_block}" + ) self.process_single_game( weaviate_client, game_id, all_games_df, generate_prompt ) + self.num_completed_games += 1 weaviate_client.close_client() @@ -178,5 +183,7 @@ def rag_description_generation_chain(self): print(start_block, end_block) + # time.sleep(48000) + rag_description = RagDescription(start_block=start_block, end_block=end_block) rag_description.rag_description_generation_chain() From 14852a9897824f032b92c0ebe77f0eac0103d492 Mon Sep 17 00:00:00 2001 From: threnjen Date: Thu, 19 Dec 2024 10:22:00 -0800 Subject: [PATCH 06/26] clean up terraform files to stop making EC2 instance --- aws_terraform_bgg/ec2_instances.tf | 46 +++++++++++++------------- aws_terraform_bgg/fargate_ecr.tf | 13 +++++++- aws_terraform_bgg/fargate_ecs_roles.tf | 2 -- aws_terraform_bgg/fargate_task_defs.tf | 30 ----------------- aws_terraform_bgg/lambdas_direct.tf | 4 +-- aws_terraform_bgg/variables.tf | 12 +++++++ 6 files changed, 49 insertions(+), 58 deletions(-) diff --git a/aws_terraform_bgg/ec2_instances.tf b/aws_terraform_bgg/ec2_instances.tf index a7b3aed..edef2c1 100644 --- a/aws_terraform_bgg/ec2_instances.tf +++ b/aws_terraform_bgg/ec2_instances.tf @@ -1,26 +1,26 @@ -resource "aws_instance" "weaviate_ec2_instance" { - - instance_type = "t3.medium" - ami = "ami-055e3d4f0bbeb5878" - key_name = "weaviate-ec2" - monitoring = true - vpc_security_group_ids = [aws_security_group.ec2_ssh_access.id, aws_security_group.ec2_weaviate_port_access.id, aws_security_group.shared_resources_sg.id] - subnet_id = module.vpc.public_subnets[0] - associate_public_ip_address = true - iam_instance_profile = aws_iam_instance_profile.weaviate_ec2_instance_role.name - - root_block_device { - volume_size = 30 - encrypted = true - } - tags = { - Name = "weaviate_embedder" - Terraform = "true" - Environment = "dev" - } - - user_data = data.cloudinit_config.weaviate_ec2_instance.rendered -} +# resource "aws_instance" "weaviate_ec2_instance" { + +# instance_type = "t3.large" +# ami = "ami-055e3d4f0bbeb5878" +# key_name = "weaviate-ec2" +# monitoring = true +# vpc_security_group_ids = [aws_security_group.ec2_ssh_access.id, aws_security_group.ec2_weaviate_port_access.id, aws_security_group.shared_resources_sg.id] +# subnet_id = module.vpc.public_subnets[0] +# associate_public_ip_address = true +# iam_instance_profile = aws_iam_instance_profile.weaviate_ec2_instance_role.name + +# root_block_device { +# volume_size = 30 +# encrypted = true +# } +# tags = { +# Name = "weaviate_embedder" +# Terraform = "true" +# Environment = "dev" +# } + +# user_data = data.cloudinit_config.weaviate_ec2_instance.rendered +# } resource "aws_iam_instance_profile" "weaviate_ec2_instance_role" { name = "test_profile" diff --git a/aws_terraform_bgg/fargate_ecr.tf b/aws_terraform_bgg/fargate_ecr.tf index fa4c6ac..e7cfd58 100644 --- a/aws_terraform_bgg/fargate_ecr.tf +++ b/aws_terraform_bgg/fargate_ecr.tf @@ -15,7 +15,9 @@ locals { module.bgg_users_data_cleaner_ecr.ecr_repository_name, module.dev_bgg_users_data_cleaner_ecr.ecr_repository_name, module.rag_description_generation_ecr.ecr_repository_name, - module.dev_rag_description_generation_ecr.ecr_repository_name + module.dev_rag_description_generation_ecr.ecr_repository_name, + module.weaviate_rag_server_ecr.ecr_repository_name, + module.sentence_transformers_ecr.ecr_repository_name ] } @@ -24,6 +26,15 @@ module "bgg_boardgame_file_retrieval_ecr" { ecr_repository_name = "bgg_boardgame_file_retrieval" } +module "weaviate_rag_server_ecr" { + source = "./modules/ecr" + ecr_repository_name = "weaviate_rag_server" +} + +module "sentence_transformers_ecr" { + source = "./modules/ecr" + ecr_repository_name = "sentence_transformers" +} module "rag_description_generation_ecr" { source = "./modules/ecr" ecr_repository_name = "rag_description_generation" diff --git a/aws_terraform_bgg/fargate_ecs_roles.tf b/aws_terraform_bgg/fargate_ecs_roles.tf index 36ff537..5ce2e80 100644 --- a/aws_terraform_bgg/fargate_ecs_roles.tf +++ b/aws_terraform_bgg/fargate_ecs_roles.tf @@ -14,8 +14,6 @@ resource "aws_iam_role_policy_attachment" "S3_Access_rag_description_generation_ policy_arn = aws_iam_policy.S3_Access_bgg_scraper_policy.arn } - - module "rag_description_generation_FargateTaskRole_role" { source = "./modules/iam_ecs_roles" task_definition = "rag_description_generation_FargateTaskRole" diff --git a/aws_terraform_bgg/fargate_task_defs.tf b/aws_terraform_bgg/fargate_task_defs.tf index a70968f..53bbddc 100644 --- a/aws_terraform_bgg/fargate_task_defs.tf +++ b/aws_terraform_bgg/fargate_task_defs.tf @@ -7,36 +7,6 @@ resource "aws_ecs_cluster" "boardgamegeek" { } } -module "rag_description_generation_ecs" { - source = "./modules/ecs_task_definition" - task_definition_family = var.rag_description_generation - task_definition_name = var.rag_description_generation - registry_name = "${data.aws_caller_identity.current.account_id}.dkr.ecr.${var.REGION}.amazonaws.com/${var.rag_description_generation}:latest" - environment = "prod" - env_file = "arn:aws:s3:::${var.S3_SCRAPER_BUCKET}/boardgamegeek.env" - task_role_arn = "arn:aws:iam::${data.aws_caller_identity.current.account_id}:role/${var.rag_description_generation}_FargateTaskRole" - execution_role_arn = "arn:aws:iam::${data.aws_caller_identity.current.account_id}:role/${var.rag_description_generation}_FargateExecutionRole" - image = "${data.aws_caller_identity.current.account_id}.dkr.ecr.${var.REGION}.amazonaws.com/${var.rag_description_generation}:latest" - cpu = "1024" - memory = "8192" - region = var.REGION -} - -module "dev_rag_description_generation_ecs" { - source = "./modules/ecs_task_definition" - task_definition_family = "dev_${var.rag_description_generation}" - task_definition_name = "dev_${var.rag_description_generation}" - registry_name = "${data.aws_caller_identity.current.account_id}.dkr.ecr.${var.REGION}.amazonaws.com/${var.rag_description_generation}:latest" - environment = "dev" - env_file = "arn:aws:s3:::${var.S3_SCRAPER_BUCKET}/boardgamegeek.env" - task_role_arn = "arn:aws:iam::${data.aws_caller_identity.current.account_id}:role/${var.rag_description_generation}_FargateTaskRole" - execution_role_arn = "arn:aws:iam::${data.aws_caller_identity.current.account_id}:role/${var.rag_description_generation}_FargateExecutionRole" - image = "${data.aws_caller_identity.current.account_id}.dkr.ecr.${var.REGION}.amazonaws.com/dev_${var.rag_description_generation}:latest" - cpu = "1024" - memory = "8192" - region = var.REGION -} - module "boardgamegeek_orchestrator_ecs" { source = "./modules/ecs_task_definition" task_definition_family = var.boardgamegeek_orchestrator diff --git a/aws_terraform_bgg/lambdas_direct.tf b/aws_terraform_bgg/lambdas_direct.tf index 6f69832..55174a9 100644 --- a/aws_terraform_bgg/lambdas_direct.tf +++ b/aws_terraform_bgg/lambdas_direct.tf @@ -26,7 +26,7 @@ module "rag_description_generation" { source = "./modules/lambda_function_direct" function_name = "rag_description_generation_fargate_trigger" timeout = 900 - memory_size = 1024 + memory_size = 256 role = module.rag_description_generation_role.arn handler = "rag_description_generation_fargate_trigger.lambda_handler" layers = ["arn:aws:lambda:${var.REGION}:336392948345:layer:AWSSDKPandas-Python312:13"] @@ -38,7 +38,7 @@ module "dev_rag_description_generation" { source = "./modules/lambda_function_direct" function_name = "dev_rag_description_generation_fargate_trigger" timeout = 900 - memory_size = 1024 + memory_size = 256 role = module.rag_description_generation_role.arn handler = "rag_description_generation_fargate_trigger.lambda_handler" layers = ["arn:aws:lambda:${var.REGION}:336392948345:layer:AWSSDKPandas-Python312:13"] diff --git a/aws_terraform_bgg/variables.tf b/aws_terraform_bgg/variables.tf index 653f758..a7dfa22 100644 --- a/aws_terraform_bgg/variables.tf +++ b/aws_terraform_bgg/variables.tf @@ -53,6 +53,18 @@ variable "rag_description_generation" { default = "rag_description_generation" } +variable "weaviate_rag_server" { + description = "The name of the ECS task definition for the weaviate_rag_server" + type = string + default = "weaviate_rag_server" +} + +variable "sentence_transformers" { + description = "The name of the ECS task definition for the sentence_transformers" + type = string + default = "sentence_transformers" +} + variable "GITHUB_USER_NAME" { description = "The name of the GitHub user" type = string From c74fb2805e583685292322b32b77f0f6b5d3ed95 Mon Sep 17 00:00:00 2001 From: threnjen Date: Thu, 19 Dec 2024 10:36:14 -0800 Subject: [PATCH 07/26] uploade weaviate env file to s3 in github actions --- .github/{temp_off => workflows}/deploy_file_to_s3.yml | 1 + modules/rag_description_generation/weaviate.env | 7 +++++++ 2 files changed, 8 insertions(+) rename .github/{temp_off => workflows}/deploy_file_to_s3.yml (86%) create mode 100644 modules/rag_description_generation/weaviate.env diff --git a/.github/temp_off/deploy_file_to_s3.yml b/.github/workflows/deploy_file_to_s3.yml similarity index 86% rename from .github/temp_off/deploy_file_to_s3.yml rename to .github/workflows/deploy_file_to_s3.yml index e90d29b..dc9b151 100644 --- a/.github/temp_off/deploy_file_to_s3.yml +++ b/.github/workflows/deploy_file_to_s3.yml @@ -26,3 +26,4 @@ jobs: - name: Sync files to S3 bucket run: | aws s3 cp config.json s3://${{ secrets.AWS_BUCKET_NAME }} + aws s3 cp modules/rag_description_generation/weaviate.env s3://${{ secrets.AWS_BUCKET_NAME }} diff --git a/modules/rag_description_generation/weaviate.env b/modules/rag_description_generation/weaviate.env new file mode 100644 index 0000000..ed6705c --- /dev/null +++ b/modules/rag_description_generation/weaviate.env @@ -0,0 +1,7 @@ +QUERY_DEFAULTS_LIMIT=25 +AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED=true +PERSISTENCE_DATA_PATH=/var/lib/weaviate +DEFAULT_VECTORIZER_MODULE=text2vec-transformers +ENABLE_MODULES=text2vec-transformers,generative-openai +CLUSTER_HOSTNAME=node1 +TRANSFORMERS_INFERENCE_API=http://t2v-transformers:8080 \ No newline at end of file From fed26269c1450c52f9b8160e3a7660c6299dacad Mon Sep 17 00:00:00 2001 From: threnjen Date: Thu, 19 Dec 2024 11:16:51 -0800 Subject: [PATCH 08/26] initial draft for self contained weaviate containers --- .github/workflows/weaviate.yml | 4 +- ...ansformers => Dockerfile.t2v-transformers} | 0 aws_terraform_bgg/fargate_ecr.tf | 7 +- aws_terraform_bgg/fargate_task_defs_rag.tf | 169 ++++ aws_terraform_bgg/variables.tf | 6 +- modules/rag_description_generation/Pipfile | 1 - .../rag_description_generation/Pipfile.lock | 808 +++--------------- modules/rag_description_generation/main.py | 29 +- .../rag_encoder_compare.ipynb | 247 ------ .../rag_review_gen.ipynb | 237 ----- .../rag_weaviate.py | 45 +- 11 files changed, 357 insertions(+), 1196 deletions(-) rename Dockerfiles/{Dockerfile.sentence_transformers => Dockerfile.t2v-transformers} (100%) create mode 100644 aws_terraform_bgg/fargate_task_defs_rag.tf delete mode 100644 modules/rag_description_generation/rag_encoder_compare.ipynb delete mode 100644 modules/rag_description_generation/rag_review_gen.ipynb diff --git a/.github/workflows/weaviate.yml b/.github/workflows/weaviate.yml index 7f9c5b6..e21d561 100644 --- a/.github/workflows/weaviate.yml +++ b/.github/workflows/weaviate.yml @@ -41,9 +41,9 @@ jobs: - name: Build, tag, and push Transformers image to AWS ECR env: ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }} - ECR_REPOSITORY: sentence_transformers + ECR_REPOSITORY: t2v-transformers run: | - DOCKER_BUILDKIT=1 docker build -f Dockerfiles/Dockerfile.sentence_transformers -t $ECR_REGISTRY/$ECR_REPOSITORY:latest - <= '3.8'", - "version": "==1.35.82" + "version": "==1.35.84" }, "botocore": { "hashes": [ - "sha256:78dd7bf8f49616d00073698d7bbaf5a115208fe730b7b7afae4456adddb3552e", - "sha256:e43b97d8cbf19d35ce3a177f144bd97cc370f0a67d0984c7d7cf105ac198748f" + "sha256:b4dc2ac7f54ba959429e1debbd6c7c2fb2349baa1cd63803f0682f0773dbd077", + "sha256:f86754882e04683e2e99a6a23377d0dd7f1fc2b2242844b2381dbe4dcd639301" ], "markers": "python_version >= '3.8'", - "version": "==1.35.82" + "version": "==1.35.84" }, "certifi": { "hashes": [ @@ -307,22 +307,6 @@ "markers": "python_version >= '3.6'", "version": "==1.9.0" }, - "filelock": { - "hashes": [ - "sha256:2082e5703d51fbf98ea75855d9d5527e33d8ff23099bec374a134febee6946b0", - "sha256:c249fbfcd5db47e5e2d6d62198e565475ee65e4831e2561c8e313fa7eb961435" - ], - "markers": "python_version >= '3.8'", - "version": "==3.16.1" - }, - "fsspec": { - "hashes": [ - "sha256:03b9a6785766a4de40368b88906366755e2819e758b83705c88cd7cb5fe81871", - "sha256:eda2d8a4116d4f2429db8550f2457da57279247dd930bb12f821b58391359493" - ], - "markers": "python_version >= '3.8'", - "version": "==2024.10.0" - }, "grpcio": { "hashes": [ "sha256:025f790c056815b3bf53da850dd70ebb849fd755a4b1ac822cb65cd631e37d43", @@ -477,14 +461,6 @@ "markers": "python_version >= '3.8'", "version": "==0.28.1" }, - "huggingface-hub": { - "hashes": [ - "sha256:8f2e834517f1f1ddf1ecc716f91b120d7333011b7485f665a9a412eacb1a2a81", - "sha256:902cce1a1be5739f5589e560198a65a8edcfd3b830b1666f36e4b961f0454fac" - ], - "markers": "python_full_version >= '3.8.0'", - "version": "==0.27.0" - }, "idna": { "hashes": [ "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9", @@ -493,14 +469,6 @@ "markers": "python_version >= '3.6'", "version": "==3.10" }, - "jinja2": { - "hashes": [ - "sha256:4a3aee7acbbe7303aede8e9648d13b8bf88a429282aa6122a993f0ac800cb369", - "sha256:bc5dd2abb727a5319567b7a813e6a2e7318c39f4f487cfe6c89c6f9c7d25197d" - ], - "markers": "python_version >= '3.7'", - "version": "==3.1.4" - }, "jiter": { "hashes": [ "sha256:025337859077b41548bdcbabe38698bcd93cfe10b06ff66617a48ff92c9aec60", @@ -599,88 +567,6 @@ "markers": "python_version >= '3.8'", "version": "==1.4.2" }, - "markupsafe": { - "hashes": [ - "sha256:0bff5e0ae4ef2e1ae4fdf2dfd5b76c75e5c2fa4132d05fc1b0dabcd20c7e28c4", - "sha256:0f4ca02bea9a23221c0182836703cbf8930c5e9454bacce27e767509fa286a30", - "sha256:1225beacc926f536dc82e45f8a4d68502949dc67eea90eab715dea3a21c1b5f0", - "sha256:131a3c7689c85f5ad20f9f6fb1b866f402c445b220c19fe4308c0b147ccd2ad9", - "sha256:15ab75ef81add55874e7ab7055e9c397312385bd9ced94920f2802310c930396", - "sha256:1a9d3f5f0901fdec14d8d2f66ef7d035f2157240a433441719ac9a3fba440b13", - "sha256:1c99d261bd2d5f6b59325c92c73df481e05e57f19837bdca8413b9eac4bd8028", - "sha256:1e084f686b92e5b83186b07e8a17fc09e38fff551f3602b249881fec658d3eca", - "sha256:2181e67807fc2fa785d0592dc2d6206c019b9502410671cc905d132a92866557", - "sha256:2cb8438c3cbb25e220c2ab33bb226559e7afb3baec11c4f218ffa7308603c832", - "sha256:3169b1eefae027567d1ce6ee7cae382c57fe26e82775f460f0b2778beaad66c0", - "sha256:3809ede931876f5b2ec92eef964286840ed3540dadf803dd570c3b7e13141a3b", - "sha256:38a9ef736c01fccdd6600705b09dc574584b89bea478200c5fbf112a6b0d5579", - "sha256:3d79d162e7be8f996986c064d1c7c817f6df3a77fe3d6859f6f9e7be4b8c213a", - "sha256:444dcda765c8a838eaae23112db52f1efaf750daddb2d9ca300bcae1039adc5c", - "sha256:48032821bbdf20f5799ff537c7ac3d1fba0ba032cfc06194faffa8cda8b560ff", - "sha256:4aa4e5faecf353ed117801a068ebab7b7e09ffb6e1d5e412dc852e0da018126c", - "sha256:52305740fe773d09cffb16f8ed0427942901f00adedac82ec8b67752f58a1b22", - "sha256:569511d3b58c8791ab4c2e1285575265991e6d8f8700c7be0e88f86cb0672094", - "sha256:57cb5a3cf367aeb1d316576250f65edec5bb3be939e9247ae594b4bcbc317dfb", - "sha256:5b02fb34468b6aaa40dfc198d813a641e3a63b98c2b05a16b9f80b7ec314185e", - "sha256:6381026f158fdb7c72a168278597a5e3a5222e83ea18f543112b2662a9b699c5", - "sha256:6af100e168aa82a50e186c82875a5893c5597a0c1ccdb0d8b40240b1f28b969a", - "sha256:6c89876f41da747c8d3677a2b540fb32ef5715f97b66eeb0c6b66f5e3ef6f59d", - "sha256:6e296a513ca3d94054c2c881cc913116e90fd030ad1c656b3869762b754f5f8a", - "sha256:70a87b411535ccad5ef2f1df5136506a10775d267e197e4cf531ced10537bd6b", - "sha256:7e94c425039cde14257288fd61dcfb01963e658efbc0ff54f5306b06054700f8", - "sha256:846ade7b71e3536c4e56b386c2a47adf5741d2d8b94ec9dc3e92e5e1ee1e2225", - "sha256:88416bd1e65dcea10bc7569faacb2c20ce071dd1f87539ca2ab364bf6231393c", - "sha256:88b49a3b9ff31e19998750c38e030fc7bb937398b1f78cfa599aaef92d693144", - "sha256:8c4e8c3ce11e1f92f6536ff07154f9d49677ebaaafc32db9db4620bc11ed480f", - "sha256:8e06879fc22a25ca47312fbe7c8264eb0b662f6db27cb2d3bbbc74b1df4b9b87", - "sha256:9025b4018f3a1314059769c7bf15441064b2207cb3f065e6ea1e7359cb46db9d", - "sha256:93335ca3812df2f366e80509ae119189886b0f3c2b81325d39efdb84a1e2ae93", - "sha256:9778bd8ab0a994ebf6f84c2b949e65736d5575320a17ae8984a77fab08db94cf", - "sha256:9e2d922824181480953426608b81967de705c3cef4d1af983af849d7bd619158", - "sha256:a123e330ef0853c6e822384873bef7507557d8e4a082961e1defa947aa59ba84", - "sha256:a904af0a6162c73e3edcb969eeeb53a63ceeb5d8cf642fade7d39e7963a22ddb", - "sha256:ad10d3ded218f1039f11a75f8091880239651b52e9bb592ca27de44eed242a48", - "sha256:b424c77b206d63d500bcb69fa55ed8d0e6a3774056bdc4839fc9298a7edca171", - "sha256:b5a6b3ada725cea8a5e634536b1b01c30bcdcd7f9c6fff4151548d5bf6b3a36c", - "sha256:ba8062ed2cf21c07a9e295d5b8a2a5ce678b913b45fdf68c32d95d6c1291e0b6", - "sha256:ba9527cdd4c926ed0760bc301f6728ef34d841f405abf9d4f959c478421e4efd", - "sha256:bbcb445fa71794da8f178f0f6d66789a28d7319071af7a496d4d507ed566270d", - "sha256:bcf3e58998965654fdaff38e58584d8937aa3096ab5354d493c77d1fdd66d7a1", - "sha256:c0ef13eaeee5b615fb07c9a7dadb38eac06a0608b41570d8ade51c56539e509d", - "sha256:cabc348d87e913db6ab4aa100f01b08f481097838bdddf7c7a84b7575b7309ca", - "sha256:cdb82a876c47801bb54a690c5ae105a46b392ac6099881cdfb9f6e95e4014c6a", - "sha256:cfad01eed2c2e0c01fd0ecd2ef42c492f7f93902e39a42fc9ee1692961443a29", - "sha256:d16a81a06776313e817c951135cf7340a3e91e8c1ff2fac444cfd75fffa04afe", - "sha256:d8213e09c917a951de9d09ecee036d5c7d36cb6cb7dbaece4c71a60d79fb9798", - "sha256:e07c3764494e3776c602c1e78e298937c3315ccc9043ead7e685b7f2b8d47b3c", - "sha256:e17c96c14e19278594aa4841ec148115f9c7615a47382ecb6b82bd8fea3ab0c8", - "sha256:e444a31f8db13eb18ada366ab3cf45fd4b31e4db1236a4448f68778c1d1a5a2f", - "sha256:e6a2a455bd412959b57a172ce6328d2dd1f01cb2135efda2e4576e8a23fa3b0f", - "sha256:eaa0a10b7f72326f1372a713e73c3f739b524b3af41feb43e4921cb529f5929a", - "sha256:eb7972a85c54febfb25b5c4b4f3af4dcc731994c7da0d8a0b4a6eb0640e1d178", - "sha256:ee55d3edf80167e48ea11a923c7386f4669df67d7994554387f84e7d8b0a2bf0", - "sha256:f3818cb119498c0678015754eba762e0d61e5b52d34c8b13d770f0719f7b1d79", - "sha256:f8b3d067f2e40fe93e1ccdd6b2e1d16c43140e76f02fb1319a05cf2b79d99430", - "sha256:fcabf5ff6eea076f859677f5f0b6b5c1a51e70a376b0579e0eadef8db48c6b50" - ], - "markers": "python_version >= '3.9'", - "version": "==3.0.2" - }, - "mpmath": { - "hashes": [ - "sha256:7a28eb2a9774d00c7bc92411c19a89209d5da7c4c9a9e227be8330a23a25b91f", - "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c" - ], - "version": "==1.3.0" - }, - "networkx": { - "hashes": [ - "sha256:307c3669428c5362aab27c8a1260aa8f47c4e91d3891f48be0141738d8d053e1", - "sha256:df5d4365b724cf81b8c6a7312509d0c22386097011ad1abe274afd5e9d3bbc5f" - ], - "markers": "python_version >= '3.10'", - "version": "==3.4.2" - }, "nltk": { "hashes": [ "sha256:4fa26829c5b00715afe3061398a8989dc643b92ce7dd93fb4585a70930d168a1", @@ -753,12 +639,12 @@ }, "openai": { "hashes": [ - "sha256:7def1ab2d52f196357ce31b9cfcf4181529ce00838286426bb35be81c035dafb", - "sha256:a8f071a3e9198e2818f63aade68e759417b9f62c0971bdb83de82504b70b77f7" + "sha256:e2910b1170a6b7f88ef491ac3a42c387f08bd3db533411f7ee391d166571d63c", + "sha256:f5a035fd01e141fc743f4b0e02c41ca49be8fab0866d3b67f5f29b4f4d3c0973" ], "index": "pypi", "markers": "python_version >= '3.8'", - "version": "==1.57.4" + "version": "==1.58.1" }, "packaging": { "hashes": [ @@ -817,103 +703,22 @@ "markers": "python_version >= '3.9'", "version": "==2.2.3" }, - "pillow": { - "hashes": [ - "sha256:00177a63030d612148e659b55ba99527803288cea7c75fb05766ab7981a8c1b7", - "sha256:006bcdd307cc47ba43e924099a038cbf9591062e6c50e570819743f5607404f5", - "sha256:084a07ef0821cfe4858fe86652fffac8e187b6ae677e9906e192aafcc1b69903", - "sha256:0ae08bd8ffc41aebf578c2af2f9d8749d91f448b3bfd41d7d9ff573d74f2a6b2", - "sha256:0e038b0745997c7dcaae350d35859c9715c71e92ffb7e0f4a8e8a16732150f38", - "sha256:1187739620f2b365de756ce086fdb3604573337cc28a0d3ac4a01ab6b2d2a6d2", - "sha256:16095692a253047fe3ec028e951fa4221a1f3ed3d80c397e83541a3037ff67c9", - "sha256:1a61b54f87ab5786b8479f81c4b11f4d61702830354520837f8cc791ebba0f5f", - "sha256:1c1d72714f429a521d8d2d018badc42414c3077eb187a59579f28e4270b4b0fc", - "sha256:1e2688958a840c822279fda0086fec1fdab2f95bf2b717b66871c4ad9859d7e8", - "sha256:20ec184af98a121fb2da42642dea8a29ec80fc3efbaefb86d8fdd2606619045d", - "sha256:21a0d3b115009ebb8ac3d2ebec5c2982cc693da935f4ab7bb5c8ebe2f47d36f2", - "sha256:224aaa38177597bb179f3ec87eeefcce8e4f85e608025e9cfac60de237ba6316", - "sha256:2679d2258b7f1192b378e2893a8a0a0ca472234d4c2c0e6bdd3380e8dfa21b6a", - "sha256:27a7860107500d813fcd203b4ea19b04babe79448268403172782754870dac25", - "sha256:290f2cc809f9da7d6d622550bbf4c1e57518212da51b6a30fe8e0a270a5b78bd", - "sha256:2e46773dc9f35a1dd28bd6981332fd7f27bec001a918a72a79b4133cf5291dba", - "sha256:3107c66e43bda25359d5ef446f59c497de2b5ed4c7fdba0894f8d6cf3822dafc", - "sha256:375b8dd15a1f5d2feafff536d47e22f69625c1aa92f12b339ec0b2ca40263273", - "sha256:45c566eb10b8967d71bf1ab8e4a525e5a93519e29ea071459ce517f6b903d7fa", - "sha256:499c3a1b0d6fc8213519e193796eb1a86a1be4b1877d678b30f83fd979811d1a", - "sha256:4ad70c4214f67d7466bea6a08061eba35c01b1b89eaa098040a35272a8efb22b", - "sha256:4b60c9520f7207aaf2e1d94de026682fc227806c6e1f55bba7606d1c94dd623a", - "sha256:5178952973e588b3f1360868847334e9e3bf49d19e169bbbdfaf8398002419ae", - "sha256:52a2d8323a465f84faaba5236567d212c3668f2ab53e1c74c15583cf507a0291", - "sha256:598b4e238f13276e0008299bd2482003f48158e2b11826862b1eb2ad7c768b97", - "sha256:5bd2d3bdb846d757055910f0a59792d33b555800813c3b39ada1829c372ccb06", - "sha256:5c39ed17edea3bc69c743a8dd3e9853b7509625c2462532e62baa0732163a904", - "sha256:5d203af30149ae339ad1b4f710d9844ed8796e97fda23ffbc4cc472968a47d0b", - "sha256:5ddbfd761ee00c12ee1be86c9c0683ecf5bb14c9772ddbd782085779a63dd55b", - "sha256:607bbe123c74e272e381a8d1957083a9463401f7bd01287f50521ecb05a313f8", - "sha256:61b887f9ddba63ddf62fd02a3ba7add935d053b6dd7d58998c630e6dbade8527", - "sha256:6619654954dc4936fcff82db8eb6401d3159ec6be81e33c6000dfd76ae189947", - "sha256:674629ff60030d144b7bca2b8330225a9b11c482ed408813924619c6f302fdbb", - "sha256:6ec0d5af64f2e3d64a165f490d96368bb5dea8b8f9ad04487f9ab60dc4bb6003", - "sha256:6f4dba50cfa56f910241eb7f883c20f1e7b1d8f7d91c750cd0b318bad443f4d5", - "sha256:70fbbdacd1d271b77b7721fe3cdd2d537bbbd75d29e6300c672ec6bb38d9672f", - "sha256:72bacbaf24ac003fea9bff9837d1eedb6088758d41e100c1552930151f677739", - "sha256:7326a1787e3c7b0429659e0a944725e1b03eeaa10edd945a86dead1913383944", - "sha256:73853108f56df97baf2bb8b522f3578221e56f646ba345a372c78326710d3830", - "sha256:73e3a0200cdda995c7e43dd47436c1548f87a30bb27fb871f352a22ab8dcf45f", - "sha256:75acbbeb05b86bc53cbe7b7e6fe00fbcf82ad7c684b3ad82e3d711da9ba287d3", - "sha256:8069c5179902dcdce0be9bfc8235347fdbac249d23bd90514b7a47a72d9fecf4", - "sha256:846e193e103b41e984ac921b335df59195356ce3f71dcfd155aa79c603873b84", - "sha256:8594f42df584e5b4bb9281799698403f7af489fba84c34d53d1c4bfb71b7c4e7", - "sha256:86510e3f5eca0ab87429dd77fafc04693195eec7fd6a137c389c3eeb4cfb77c6", - "sha256:8853a3bf12afddfdf15f57c4b02d7ded92c7a75a5d7331d19f4f9572a89c17e6", - "sha256:88a58d8ac0cc0e7f3a014509f0455248a76629ca9b604eca7dc5927cc593c5e9", - "sha256:8ba470552b48e5835f1d23ecb936bb7f71d206f9dfeee64245f30c3270b994de", - "sha256:8c676b587da5673d3c75bd67dd2a8cdfeb282ca38a30f37950511766b26858c4", - "sha256:8ec4a89295cd6cd4d1058a5e6aec6bf51e0eaaf9714774e1bfac7cfc9051db47", - "sha256:94f3e1780abb45062287b4614a5bc0874519c86a777d4a7ad34978e86428b8dd", - "sha256:9a0f748eaa434a41fccf8e1ee7a3eed68af1b690e75328fd7a60af123c193b50", - "sha256:a5629742881bcbc1f42e840af185fd4d83a5edeb96475a575f4da50d6ede337c", - "sha256:a65149d8ada1055029fcb665452b2814fe7d7082fcb0c5bed6db851cb69b2086", - "sha256:b3c5ac4bed7519088103d9450a1107f76308ecf91d6dabc8a33a2fcfb18d0fba", - "sha256:b4fd7bd29610a83a8c9b564d457cf5bd92b4e11e79a4ee4716a63c959699b306", - "sha256:bcd1fb5bb7b07f64c15618c89efcc2cfa3e95f0e3bcdbaf4642509de1942a699", - "sha256:c12b5ae868897c7338519c03049a806af85b9b8c237b7d675b8c5e089e4a618e", - "sha256:c26845094b1af3c91852745ae78e3ea47abf3dbcd1cf962f16b9a5fbe3ee8488", - "sha256:c6a660307ca9d4867caa8d9ca2c2658ab685de83792d1876274991adec7b93fa", - "sha256:c809a70e43c7977c4a42aefd62f0131823ebf7dd73556fa5d5950f5b354087e2", - "sha256:c8b2351c85d855293a299038e1f89db92a2f35e8d2f783489c6f0b2b5f3fe8a3", - "sha256:cb929ca942d0ec4fac404cbf520ee6cac37bf35be479b970c4ffadf2b6a1cad9", - "sha256:d2c0a187a92a1cb5ef2c8ed5412dd8d4334272617f532d4ad4de31e0495bd923", - "sha256:d69bfd8ec3219ae71bcde1f942b728903cad25fafe3100ba2258b973bd2bc1b2", - "sha256:daffdf51ee5db69a82dd127eabecce20729e21f7a3680cf7cbb23f0829189790", - "sha256:e58876c91f97b0952eb766123bfef372792ab3f4e3e1f1a2267834c2ab131734", - "sha256:eda2616eb2313cbb3eebbe51f19362eb434b18e3bb599466a1ffa76a033fb916", - "sha256:ee217c198f2e41f184f3869f3e485557296d505b5195c513b2bfe0062dc537f1", - "sha256:f02541ef64077f22bf4924f225c0fd1248c168f86e4b7abdedd87d6ebaceab0f", - "sha256:f1b82c27e89fffc6da125d5eb0ca6e68017faf5efc078128cfaa42cf5cb38798", - "sha256:fba162b8872d30fea8c52b258a542c5dfd7b235fb5cb352240c8d63b414013eb", - "sha256:fbbcb7b57dc9c794843e3d1258c0fbf0f48656d46ffe9e09b63bbd6e8cd5d0a2", - "sha256:fcb4621042ac4b7865c179bb972ed0da0218a076dc1820ffc48b1d74c1e37fe9" - ], - "markers": "python_version >= '3.9'", - "version": "==11.0.0" - }, "protobuf": { "hashes": [ - "sha256:012ce28d862ff417fd629285aca5d9772807f15ceb1a0dbd15b88f58c776c98c", - "sha256:027fbcc48cea65a6b17028510fdd054147057fa78f4772eb547b9274e5219331", - "sha256:1fc55267f086dd4050d18ef839d7bd69300d0d08c2a53ca7df3920cc271a3c34", - "sha256:22c1f539024241ee545cbcb00ee160ad1877975690b16656ff87dde107b5f110", - "sha256:32600ddb9c2a53dedc25b8581ea0f1fd8ea04956373c0c07577ce58d312522e0", - "sha256:50879eb0eb1246e3a5eabbbe566b44b10348939b7cc1b267567e8c3d07213853", - "sha256:5a41deccfa5e745cef5c65a560c76ec0ed8e70908a67cc8f4da5fce588b50d57", - "sha256:683be02ca21a6ffe80db6dd02c0b5b2892322c59ca57fd6c872d652cb80549cb", - "sha256:8ee1461b3af56145aca2800e6a3e2f928108c749ba8feccc6f5dd0062c410c0d", - "sha256:b5ba1d0e4c8a40ae0496d0e2ecfdbb82e1776928a205106d14ad6985a09ec155", - "sha256:d473655e29c0c4bbf8b69e9a8fb54645bc289dead6d753b952e7aa660254ae18" + "sha256:13d6d617a2a9e0e82a88113d7191a1baa1e42c2cc6f5f1398d3b054c8e7e714a", + "sha256:2d2e674c58a06311c8e99e74be43e7f3a8d1e2b2fdf845eaa347fbd866f23355", + "sha256:36000f97ea1e76e8398a3f02936aac2a5d2b111aae9920ec1b769fc4a222c4d9", + "sha256:494229ecd8c9009dd71eda5fd57528395d1eacdf307dbece6c12ad0dd09e912e", + "sha256:842de6d9241134a973aab719ab42b008a18a90f9f07f06ba480df268f86432f9", + "sha256:a0c53d78383c851bfa97eb42e3703aefdc96d2036a41482ffd55dc5f529466eb", + "sha256:b2cc8e8bb7c9326996f0e160137b0861f1a82162502658df2951209d0cb0309e", + "sha256:b6b0d416bbbb9d4fbf9d0561dbfc4e324fd522f61f7af0fe0f282ab67b22477e", + "sha256:c12ba8249f5624300cf51c3d0bfe5be71a60c63e4dcf51ffe9a68771d958c851", + "sha256:e621a98c0201a7c8afe89d9646859859be97cb22b8bf1d8eacfd90d5bda2eb19", + "sha256:fde4554c0e578a5a0bcc9a276339594848d1e89f9ea47b4427c80e5d72f90181" ], "markers": "python_version >= '3.8'", - "version": "==5.29.1" + "version": "==5.29.2" }, "pyarrow": { "hashes": [ @@ -973,118 +778,118 @@ }, "pydantic": { "hashes": [ - "sha256:be04d85bbc7b65651c5f8e6b9976ed9c6f41782a55524cef079a34a0bb82144d", - "sha256:cb5ac360ce894ceacd69c403187900a02c4b20b693a9dd1d643e1effab9eadf9" + "sha256:597e135ea68be3a37552fb524bc7d0d66dcf93d395acd93a00682f1efcb8ee3d", + "sha256:82f12e9723da6de4fe2ba888b5971157b3be7ad914267dea8f05f82b28254f06" ], "index": "pypi", "markers": "python_version >= '3.8'", - "version": "==2.10.3" + "version": "==2.10.4" }, "pydantic-core": { "hashes": [ - "sha256:00e6424f4b26fe82d44577b4c842d7df97c20be6439e8e685d0d715feceb9fb9", - "sha256:029d9757eb621cc6e1848fa0b0310310de7301057f623985698ed7ebb014391b", - "sha256:02a3d637bd387c41d46b002f0e49c52642281edacd2740e5a42f7017feea3f2c", - "sha256:0325336f348dbee6550d129b1627cb8f5351a9dc91aad141ffb96d4937bd9529", - "sha256:062f60e512fc7fff8b8a9d680ff0ddaaef0193dba9fa83e679c0c5f5fbd018bc", - "sha256:0b3dfe500de26c52abe0477dde16192ac39c98f05bf2d80e76102d394bd13854", - "sha256:0e4216e64d203e39c62df627aa882f02a2438d18a5f21d7f721621f7a5d3611d", - "sha256:121ceb0e822f79163dd4699e4c54f5ad38b157084d97b34de8b232bcaad70278", - "sha256:159cac0a3d096f79ab6a44d77a961917219707e2a130739c64d4dd46281f5c2a", - "sha256:15aae984e46de8d376df515f00450d1522077254ef6b7ce189b38ecee7c9677c", - "sha256:15cc53a3179ba0fcefe1e3ae50beb2784dede4003ad2dfd24f81bba4b23a454f", - "sha256:161c27ccce13b6b0c8689418da3885d3220ed2eae2ea5e9b2f7f3d48f1d52c27", - "sha256:19910754e4cc9c63bc1c7f6d73aa1cfee82f42007e407c0f413695c2f7ed777f", - "sha256:1ba5e3963344ff25fc8c40da90f44b0afca8cfd89d12964feb79ac1411a260ac", - "sha256:1c00666a3bd2f84920a4e94434f5974d7bbc57e461318d6bb34ce9cdbbc1f6b2", - "sha256:1c39b07d90be6b48968ddc8c19e7585052088fd7ec8d568bb31ff64c70ae3c97", - "sha256:206b5cf6f0c513baffaeae7bd817717140770c74528f3e4c3e1cec7871ddd61a", - "sha256:258c57abf1188926c774a4c94dd29237e77eda19462e5bb901d88adcab6af919", - "sha256:2cdf7d86886bc6982354862204ae3b2f7f96f21a3eb0ba5ca0ac42c7b38598b9", - "sha256:2d4567c850905d5eaaed2f7a404e61012a51caf288292e016360aa2b96ff38d4", - "sha256:35c14ac45fcfdf7167ca76cc80b2001205a8d5d16d80524e13508371fb8cdd9c", - "sha256:38de0a70160dd97540335b7ad3a74571b24f1dc3ed33f815f0880682e6880131", - "sha256:3af385b0cee8df3746c3f406f38bcbfdc9041b5c2d5ce3e5fc6637256e60bbc5", - "sha256:3b748c44bb9f53031c8cbc99a8a061bc181c1000c60a30f55393b6e9c45cc5bd", - "sha256:3bbd5d8cc692616d5ef6fbbbd50dbec142c7e6ad9beb66b78a96e9c16729b089", - "sha256:3ccaa88b24eebc0f849ce0a4d09e8a408ec5a94afff395eb69baf868f5183107", - "sha256:3fa80ac2bd5856580e242dbc202db873c60a01b20309c8319b5c5986fbe53ce6", - "sha256:4228b5b646caa73f119b1ae756216b59cc6e2267201c27d3912b592c5e323b60", - "sha256:42b0e23f119b2b456d07ca91b307ae167cc3f6c846a7b169fca5326e32fdc6cf", - "sha256:45cf8588c066860b623cd11c4ba687f8d7175d5f7ef65f7129df8a394c502de5", - "sha256:45d9c5eb9273aa50999ad6adc6be5e0ecea7e09dbd0d31bd0c65a55a2592ca08", - "sha256:4603137322c18eaf2e06a4495f426aa8d8388940f3c457e7548145011bb68e05", - "sha256:46ccfe3032b3915586e469d4972973f893c0a2bb65669194a5bdea9bacc088c2", - "sha256:4fefee876e07a6e9aad7a8c8c9f85b0cdbe7df52b8a9552307b09050f7512c7e", - "sha256:5556470f1a2157031e676f776c2bc20acd34c1990ca5f7e56f1ebf938b9ab57c", - "sha256:57866a76e0b3823e0b56692d1a0bf722bffb324839bb5b7226a7dbd6c9a40b17", - "sha256:5897bec80a09b4084aee23f9b73a9477a46c3304ad1d2d07acca19723fb1de62", - "sha256:58ca98a950171f3151c603aeea9303ef6c235f692fe555e883591103da709b23", - "sha256:5ca038c7f6a0afd0b2448941b6ef9d5e1949e999f9e5517692eb6da58e9d44be", - "sha256:5f6c8a66741c5f5447e047ab0ba7a1c61d1e95580d64bce852e3df1f895c4067", - "sha256:5f8c4718cd44ec1580e180cb739713ecda2bdee1341084c1467802a417fe0f02", - "sha256:5fde892e6c697ce3e30c61b239330fc5d569a71fefd4eb6512fc6caec9dd9e2f", - "sha256:62a763352879b84aa31058fc931884055fd75089cccbd9d58bb6afd01141b235", - "sha256:62ba45e21cf6571d7f716d903b5b7b6d2617e2d5d67c0923dc47b9d41369f840", - "sha256:64c65f40b4cd8b0e049a8edde07e38b476da7e3aaebe63287c899d2cff253fa5", - "sha256:655d7dd86f26cb15ce8a431036f66ce0318648f8853d709b4167786ec2fa4807", - "sha256:66ff044fd0bb1768688aecbe28b6190f6e799349221fb0de0e6f4048eca14c16", - "sha256:672ebbe820bb37988c4d136eca2652ee114992d5d41c7e4858cdd90ea94ffe5c", - "sha256:6b9af86e1d8e4cfc82c2022bfaa6f459381a50b94a29e95dcdda8442d6d83864", - "sha256:6e0bd57539da59a3e4671b90a502da9a28c72322a4f17866ba3ac63a82c4498e", - "sha256:71a5e35c75c021aaf400ac048dacc855f000bdfed91614b4a726f7432f1f3d6a", - "sha256:7597c07fbd11515f654d6ece3d0e4e5093edc30a436c63142d9a4b8e22f19c35", - "sha256:764be71193f87d460a03f1f7385a82e226639732214b402f9aa61f0d025f0737", - "sha256:7699b1df36a48169cdebda7ab5a2bac265204003f153b4bd17276153d997670a", - "sha256:7ccebf51efc61634f6c2344da73e366c75e735960b5654b63d7e6f69a5885fa3", - "sha256:7f7059ca8d64fea7f238994c97d91f75965216bcbe5f695bb44f354893f11d52", - "sha256:8065914ff79f7eab1599bd80406681f0ad08f8e47c880f17b416c9f8f7a26d05", - "sha256:816f5aa087094099fff7edabb5e01cc370eb21aa1a1d44fe2d2aefdfb5599b31", - "sha256:81f2ec23ddc1b476ff96563f2e8d723830b06dceae348ce02914a37cb4e74b89", - "sha256:84286494f6c5d05243456e04223d5a9417d7f443c3b76065e75001beb26f88de", - "sha256:8bf7b66ce12a2ac52d16f776b31d16d91033150266eb796967a7e4621707e4f6", - "sha256:8f1edcea27918d748c7e5e4d917297b2a0ab80cad10f86631e488b7cddf76a36", - "sha256:981fb88516bd1ae8b0cbbd2034678a39dedc98752f264ac9bc5839d3923fa04c", - "sha256:98476c98b02c8e9b2eec76ac4156fd006628b1b2d0ef27e548ffa978393fd154", - "sha256:992cea5f4f3b29d6b4f7f1726ed8ee46c8331c6b4eed6db5b40134c6fe1768bb", - "sha256:9a3b0793b1bbfd4146304e23d90045f2a9b5fd5823aa682665fbdaf2a6c28f3e", - "sha256:9a42d6a8156ff78981f8aa56eb6394114e0dedb217cf8b729f438f643608cbcd", - "sha256:9c10c309e18e443ddb108f0ef64e8729363adbfd92d6d57beec680f6261556f3", - "sha256:9cbd94fc661d2bab2bc702cddd2d3370bbdcc4cd0f8f57488a81bcce90c7a54f", - "sha256:9fdcf339322a3fae5cbd504edcefddd5a50d9ee00d968696846f089b4432cf78", - "sha256:a0697803ed7d4af5e4c1adf1670af078f8fcab7a86350e969f454daf598c4960", - "sha256:a28af0695a45f7060e6f9b7092558a928a28553366519f64083c63a44f70e618", - "sha256:a2e02889071850bbfd36b56fd6bc98945e23670773bc7a76657e90e6b6603c08", - "sha256:a33cd6ad9017bbeaa9ed78a2e0752c5e250eafb9534f308e7a5f7849b0b1bfb4", - "sha256:a3cb37038123447cf0f3ea4c74751f6a9d7afef0eb71aa07bf5f652b5e6a132c", - "sha256:a57847b090d7892f123726202b7daa20df6694cbd583b67a592e856bff603d6c", - "sha256:a5a8e19d7c707c4cadb8c18f5f60c843052ae83c20fa7d44f41594c644a1d330", - "sha256:ac3b20653bdbe160febbea8aa6c079d3df19310d50ac314911ed8cc4eb7f8cb8", - "sha256:ac6c2c45c847bbf8f91930d88716a0fb924b51e0c6dad329b793d670ec5db792", - "sha256:acc07b2cfc5b835444b44a9956846b578d27beeacd4b52e45489e93276241025", - "sha256:aee66be87825cdf72ac64cb03ad4c15ffef4143dbf5c113f64a5ff4f81477bf9", - "sha256:af52d26579b308921b73b956153066481f064875140ccd1dfd4e77db89dbb12f", - "sha256:b94d4ba43739bbe8b0ce4262bcc3b7b9f31459ad120fb595627eaeb7f9b9ca01", - "sha256:ba630d5e3db74c79300d9a5bdaaf6200172b107f263c98a0539eeecb857b2337", - "sha256:bed0f8a0eeea9fb72937ba118f9db0cb7e90773462af7962d382445f3005e5a4", - "sha256:bf99c8404f008750c846cb4ac4667b798a9f7de673ff719d705d9b2d6de49c5f", - "sha256:c3027001c28434e7ca5a6e1e527487051136aa81803ac812be51802150d880dd", - "sha256:c65af9088ac534313e1963443d0ec360bb2b9cba6c2909478d22c2e363d98a51", - "sha256:d0165ab2914379bd56908c02294ed8405c252250668ebcb438a55494c69f44ab", - "sha256:d1b26e1dff225c31897696cab7d4f0a315d4c0d9e8666dbffdb28216f3b17fdc", - "sha256:d950caa237bb1954f1b8c9227b5065ba6875ac9771bb8ec790d956a699b78676", - "sha256:dc61505e73298a84a2f317255fcc72b710b72980f3a1f670447a21efc88f8381", - "sha256:e173486019cc283dc9778315fa29a363579372fe67045e971e89b6365cc035ed", - "sha256:e1f735dc43da318cad19b4173dd1ffce1d84aafd6c9b782b3abc04a0d5a6f5bb", - "sha256:e9386266798d64eeb19dd3677051f5705bf873e98e15897ddb7d76f477131967", - "sha256:f216dbce0e60e4d03e0c4353c7023b202d95cbaeff12e5fd2e82ea0a66905073", - "sha256:f4e5658dbffe8843a0f12366a4c2d1c316dbe09bb4dfbdc9d2d9cd6031de8aae", - "sha256:f5a823165e6d04ccea61a9f0576f345f8ce40ed533013580e087bd4d7442b52c", - "sha256:f69ed81ab24d5a3bd93861c8c4436f54afdf8e8cc421562b0c7504cf3be58206", - "sha256:f82d068a2d6ecfc6e054726080af69a6764a10015467d7d7b9f66d6ed5afa23b" + "sha256:00bad2484fa6bda1e216e7345a798bd37c68fb2d97558edd584942aa41b7d278", + "sha256:0296abcb83a797db256b773f45773da397da75a08f5fcaef41f2044adec05f50", + "sha256:03d0f86ea3184a12f41a2d23f7ccb79cdb5a18e06993f8a45baa8dfec746f0e9", + "sha256:044a50963a614ecfae59bb1eaf7ea7efc4bc62f49ed594e18fa1e5d953c40e9f", + "sha256:05e3a55d124407fffba0dd6b0c0cd056d10e983ceb4e5dbd10dda135c31071d6", + "sha256:08e125dbdc505fa69ca7d9c499639ab6407cfa909214d500897d02afb816e7cc", + "sha256:097830ed52fd9e427942ff3b9bc17fab52913b2f50f2880dc4a5611446606a54", + "sha256:0d1e85068e818c73e048fe28cfc769040bb1f475524f4745a5dc621f75ac7630", + "sha256:0d75070718e369e452075a6017fbf187f788e17ed67a3abd47fa934d001863d9", + "sha256:14d4a5c49d2f009d62a2a7140d3064f686d17a5d1a268bc641954ba181880236", + "sha256:172fce187655fece0c90d90a678424b013f8fbb0ca8b036ac266749c09438cb7", + "sha256:18a101c168e4e092ab40dbc2503bdc0f62010e95d292b27827871dc85450d7ee", + "sha256:1a4207639fb02ec2dbb76227d7c751a20b1a6b4bc52850568e52260cae64ca3b", + "sha256:1c1fd185014191700554795c99b347d64f2bb637966c4cfc16998a0ca700d048", + "sha256:1e2cb691ed9834cd6a8be61228471d0a503731abfb42f82458ff27be7b2186fc", + "sha256:1ebaf1d0481914d004a573394f4be3a7616334be70261007e47c2a6fe7e50130", + "sha256:220f892729375e2d736b97d0e51466252ad84c51857d4d15f5e9692f9ef12be4", + "sha256:251136cdad0cb722e93732cb45ca5299fb56e1344a833640bf93b2803f8d1bfd", + "sha256:26f0d68d4b235a2bae0c3fc585c585b4ecc51382db0e3ba402a22cbc440915e4", + "sha256:26f32e0adf166a84d0cb63be85c562ca8a6fa8de28e5f0d92250c6b7e9e2aff7", + "sha256:280d219beebb0752699480fe8f1dc61ab6615c2046d76b7ab7ee38858de0a4e7", + "sha256:28ccb213807e037460326424ceb8b5245acb88f32f3d2777427476e1b32c48c4", + "sha256:2bf14caea37e91198329b828eae1618c068dfb8ef17bb33287a7ad4b61ac314e", + "sha256:2d367ca20b2f14095a8f4fa1210f5a7b78b8a20009ecced6b12818f455b1e9fa", + "sha256:30c5f68ded0c36466acede341551106821043e9afaad516adfb6e8fa80a4e6a6", + "sha256:337b443af21d488716f8d0b6164de833e788aa6bd7e3a39c005febc1284f4962", + "sha256:3911ac9284cd8a1792d3cb26a2da18f3ca26c6908cc434a18f730dc0db7bfa3b", + "sha256:3d591580c34f4d731592f0e9fe40f9cc1b430d297eecc70b962e93c5c668f15f", + "sha256:3de3ce3c9ddc8bbd88f6e0e304dea0e66d843ec9de1b0042b0911c1663ffd474", + "sha256:3de9961f2a346257caf0aa508a4da705467f53778e9ef6fe744c038119737ef5", + "sha256:40d02e7d45c9f8af700f3452f329ead92da4c5f4317ca9b896de7ce7199ea459", + "sha256:42c5f762659e47fdb7b16956c71598292f60a03aa92f8b6351504359dbdba6cf", + "sha256:47956ae78b6422cbd46f772f1746799cbb862de838fd8d1fbd34a82e05b0983a", + "sha256:491a2b73db93fab69731eaee494f320faa4e093dbed776be1a829c2eb222c34c", + "sha256:4c9775e339e42e79ec99c441d9730fccf07414af63eac2f0e48e08fd38a64d76", + "sha256:4e0b4220ba5b40d727c7f879eac379b822eee5d8fff418e9d3381ee45b3b0362", + "sha256:50a68f3e3819077be2c98110c1f9dcb3817e93f267ba80a2c05bb4f8799e2ff4", + "sha256:519f29f5213271eeeeb3093f662ba2fd512b91c5f188f3bb7b27bc5973816934", + "sha256:521eb9b7f036c9b6187f0b47318ab0d7ca14bd87f776240b90b21c1f4f149320", + "sha256:57762139821c31847cfb2df63c12f725788bd9f04bc2fb392790959b8f70f118", + "sha256:5e4f4bb20d75e9325cc9696c6802657b58bc1dbbe3022f32cc2b2b632c3fbb96", + "sha256:5e68c4446fe0810e959cdff46ab0a41ce2f2c86d227d96dc3847af0ba7def306", + "sha256:669e193c1c576a58f132e3158f9dfa9662969edb1a250c54d8fa52590045f046", + "sha256:688d3fd9fcb71f41c4c015c023d12a79d1c4c0732ec9eb35d96e3388a120dcf3", + "sha256:6fb4aadc0b9a0c063206846d603b92030eb6f03069151a625667f982887153e2", + "sha256:7041c36f5680c6e0f08d922aed302e98b3745d97fe1589db0a3eebf6624523af", + "sha256:71b24c7d61131bb83df10cc7e687433609963a944ccf45190cfc21e0887b08c9", + "sha256:77d1bca19b0f7021b3a982e6f903dcd5b2b06076def36a652e3907f596e29f67", + "sha256:7969e133a6f183be60e9f6f56bfae753585680f3b7307a8e555a948d443cc05a", + "sha256:7a66efda2387de898c8f38c0cf7f14fca0b51a8ef0b24bfea5849f1b3c95af27", + "sha256:7d0c8399fcc1848491f00e0314bd59fb34a9c008761bcb422a057670c3f65e35", + "sha256:7d14bd329640e63852364c306f4d23eb744e0f8193148d4044dd3dacdaacbd8b", + "sha256:7e17b560be3c98a8e3aa66ce828bdebb9e9ac6ad5466fba92eb74c4c95cb1151", + "sha256:8083d4e875ebe0b864ffef72a4304827015cff328a1be6e22cc850753bfb122b", + "sha256:82f91663004eb8ed30ff478d77c4d1179b3563df6cdb15c0817cd1cdaf34d154", + "sha256:82f986faf4e644ffc189a7f1aafc86e46ef70372bb153e7001e8afccc6e54133", + "sha256:83097677b8e3bd7eaa6775720ec8e0405f1575015a463285a92bfdfe254529ef", + "sha256:85210c4d99a0114f5a9481b44560d7d1e35e32cc5634c656bc48e590b669b145", + "sha256:8c19d1ea0673cd13cc2f872f6c9ab42acc4e4f492a7ca9d3795ce2b112dd7e15", + "sha256:8d9b3388db186ba0c099a6d20f0604a44eabdeef1777ddd94786cdae158729e4", + "sha256:8e10c99ef58cfdf2a66fc15d66b16c4a04f62bca39db589ae8cba08bc55331bc", + "sha256:953101387ecf2f5652883208769a79e48db18c6df442568a0b5ccd8c2723abee", + "sha256:9c3ed807c7b91de05e63930188f19e921d1fe90de6b4f5cd43ee7fcc3525cb8c", + "sha256:9e0c8cfefa0ef83b4da9588448b6d8d2a2bf1a53c3f1ae5fca39eb3061e2f0b0", + "sha256:9fdbe7629b996647b99c01b37f11170a57ae675375b14b8c13b8518b8320ced5", + "sha256:a0fcd29cd6b4e74fe8ddd2c90330fd8edf2e30cb52acda47f06dd615ae72da57", + "sha256:ac4dbfd1691affb8f48c2c13241a2e3b60ff23247cbcf981759c768b6633cf8b", + "sha256:b0cb791f5b45307caae8810c2023a184c74605ec3bcbb67d13846c28ff731ff8", + "sha256:ba5dd002f88b78a4215ed2f8ddbdf85e8513382820ba15ad5ad8955ce0ca19a1", + "sha256:bca101c00bff0adb45a833f8451b9105d9df18accb8743b08107d7ada14bd7da", + "sha256:bd8086fa684c4775c27f03f062cbb9eaa6e17f064307e86b21b9e0abc9c0f02e", + "sha256:bec317a27290e2537f922639cafd54990551725fc844249e64c523301d0822fc", + "sha256:c10eb4f1659290b523af58fa7cffb452a61ad6ae5613404519aee4bfbf1df993", + "sha256:c33939a82924da9ed65dab5a65d427205a73181d8098e79b6b426bdf8ad4e656", + "sha256:c61709a844acc6bf0b7dce7daae75195a10aac96a596ea1b776996414791ede4", + "sha256:c70c26d2c99f78b125a3459f8afe1aed4d9687c24fd677c6a4436bc042e50d6c", + "sha256:c817e2b40aba42bac6f457498dacabc568c3b7a986fc9ba7c8d9d260b71485fb", + "sha256:cabb9bcb7e0d97f74df8646f34fc76fbf793b7f6dc2438517d7a9e50eee4f14d", + "sha256:cc3f1a99a4f4f9dd1de4fe0312c114e740b5ddead65bb4102884b384c15d8bc9", + "sha256:cca63613e90d001b9f2f9a9ceb276c308bfa2a43fafb75c8031c4f66039e8c6e", + "sha256:ce8918cbebc8da707ba805b7fd0b382816858728ae7fe19a942080c24e5b7cd1", + "sha256:d2088237af596f0a524d3afc39ab3b036e8adb054ee57cbb1dcf8e09da5b29cc", + "sha256:d262606bf386a5ba0b0af3b97f37c83d7011439e3dc1a9298f21efb292e42f1a", + "sha256:d2d63f1215638d28221f664596b1ccb3944f6e25dd18cd3b86b0a4c408d5ebb9", + "sha256:d3e8d504bdd3f10835468f29008d72fc8359d95c9c415ce6e767203db6127506", + "sha256:d4041c0b966a84b4ae7a09832eb691a35aec90910cd2dbe7a208de59be77965b", + "sha256:d716e2e30c6f140d7560ef1538953a5cd1a87264c737643d481f2779fc247fe1", + "sha256:d81d2068e1c1228a565af076598f9e7451712700b673de8f502f0334f281387d", + "sha256:d9640b0059ff4f14d1f37321b94061c6db164fbe49b334b31643e0528d100d99", + "sha256:de3cd1899e2c279b140adde9357c4495ed9d47131b4a4eaff9052f23398076b3", + "sha256:e0fd26b16394ead34a424eecf8a31a1f5137094cabe84a1bcb10fa6ba39d3d31", + "sha256:e2bb4d3e5873c37bb3dd58714d4cd0b0e6238cebc4177ac8fe878f8b3aa8e74c", + "sha256:eb026e5a4c1fee05726072337ff51d1efb6f59090b7da90d30ea58625b1ffb39", + "sha256:eda3f5c2a021bbc5d976107bb302e0131351c2ba54343f8a496dc8783d3d3a6a", + "sha256:ef592d4bad47296fb11f96cd7dc898b92e795032b4894dfb4076cfccd43a9308", + "sha256:f141ee28a0ad2123b6611b6ceff018039df17f32ada8b534e6aa039545a3efb2", + "sha256:f66d89ba397d92f840f8654756196d93804278457b5fbede59598a1f9f90b228", + "sha256:f6f8e111843bbb0dee4cb6594cdc73e79b3329b526037ec242a3e49012495b3b", + "sha256:fa8e459d4954f608fa26116118bb67f56b93b209c39b008277ace29937453dc9", + "sha256:fd1aea04935a508f62e0d0ef1f5ae968774a32afc306fb8545e06f5ff5cdf3ad" ], "markers": "python_version >= '3.8'", - "version": "==2.27.1" + "version": "==2.27.2" }, "python-dateutil": { "hashes": [ @@ -1101,65 +906,6 @@ ], "version": "==2024.2" }, - "pyyaml": { - "hashes": [ - "sha256:01179a4a8559ab5de078078f37e5c1a30d76bb88519906844fd7bdea1b7729ff", - "sha256:0833f8694549e586547b576dcfaba4a6b55b9e96098b36cdc7ebefe667dfed48", - "sha256:0a9a2848a5b7feac301353437eb7d5957887edbf81d56e903999a75a3d743086", - "sha256:0b69e4ce7a131fe56b7e4d770c67429700908fc0752af059838b1cfb41960e4e", - "sha256:0ffe8360bab4910ef1b9e87fb812d8bc0a308b0d0eef8c8f44e0254ab3b07133", - "sha256:11d8f3dd2b9c1207dcaf2ee0bbbfd5991f571186ec9cc78427ba5bd32afae4b5", - "sha256:17e311b6c678207928d649faa7cb0d7b4c26a0ba73d41e99c4fff6b6c3276484", - "sha256:1e2120ef853f59c7419231f3bf4e7021f1b936f6ebd222406c3b60212205d2ee", - "sha256:1f71ea527786de97d1a0cc0eacd1defc0985dcf6b3f17bb77dcfc8c34bec4dc5", - "sha256:23502f431948090f597378482b4812b0caae32c22213aecf3b55325e049a6c68", - "sha256:24471b829b3bf607e04e88d79542a9d48bb037c2267d7927a874e6c205ca7e9a", - "sha256:29717114e51c84ddfba879543fb232a6ed60086602313ca38cce623c1d62cfbf", - "sha256:2e99c6826ffa974fe6e27cdb5ed0021786b03fc98e5ee3c5bfe1fd5015f42b99", - "sha256:39693e1f8320ae4f43943590b49779ffb98acb81f788220ea932a6b6c51004d8", - "sha256:3ad2a3decf9aaba3d29c8f537ac4b243e36bef957511b4766cb0057d32b0be85", - "sha256:3b1fdb9dc17f5a7677423d508ab4f243a726dea51fa5e70992e59a7411c89d19", - "sha256:41e4e3953a79407c794916fa277a82531dd93aad34e29c2a514c2c0c5fe971cc", - "sha256:43fa96a3ca0d6b1812e01ced1044a003533c47f6ee8aca31724f78e93ccc089a", - "sha256:50187695423ffe49e2deacb8cd10510bc361faac997de9efef88badc3bb9e2d1", - "sha256:5ac9328ec4831237bec75defaf839f7d4564be1e6b25ac710bd1a96321cc8317", - "sha256:5d225db5a45f21e78dd9358e58a98702a0302f2659a3c6cd320564b75b86f47c", - "sha256:6395c297d42274772abc367baaa79683958044e5d3835486c16da75d2a694631", - "sha256:688ba32a1cffef67fd2e9398a2efebaea461578b0923624778664cc1c914db5d", - "sha256:68ccc6023a3400877818152ad9a1033e3db8625d899c72eacb5a668902e4d652", - "sha256:70b189594dbe54f75ab3a1acec5f1e3faa7e8cf2f1e08d9b561cb41b845f69d5", - "sha256:797b4f722ffa07cc8d62053e4cff1486fa6dc094105d13fea7b1de7d8bf71c9e", - "sha256:7c36280e6fb8385e520936c3cb3b8042851904eba0e58d277dca80a5cfed590b", - "sha256:7e7401d0de89a9a855c839bc697c079a4af81cf878373abd7dc625847d25cbd8", - "sha256:80bab7bfc629882493af4aa31a4cfa43a4c57c83813253626916b8c7ada83476", - "sha256:82d09873e40955485746739bcb8b4586983670466c23382c19cffecbf1fd8706", - "sha256:8388ee1976c416731879ac16da0aff3f63b286ffdd57cdeb95f3f2e085687563", - "sha256:8824b5a04a04a047e72eea5cec3bc266db09e35de6bdfe34c9436ac5ee27d237", - "sha256:8b9c7197f7cb2738065c481a0461e50ad02f18c78cd75775628afb4d7137fb3b", - "sha256:9056c1ecd25795207ad294bcf39f2db3d845767be0ea6e6a34d856f006006083", - "sha256:936d68689298c36b53b29f23c6dbb74de12b4ac12ca6cfe0e047bedceea56180", - "sha256:9b22676e8097e9e22e36d6b7bda33190d0d400f345f23d4065d48f4ca7ae0425", - "sha256:a4d3091415f010369ae4ed1fc6b79def9416358877534caf6a0fdd2146c87a3e", - "sha256:a8786accb172bd8afb8be14490a16625cbc387036876ab6ba70912730faf8e1f", - "sha256:a9f8c2e67970f13b16084e04f134610fd1d374bf477b17ec1599185cf611d725", - "sha256:bc2fa7c6b47d6bc618dd7fb02ef6fdedb1090ec036abab80d4681424b84c1183", - "sha256:c70c95198c015b85feafc136515252a261a84561b7b1d51e3384e0655ddf25ab", - "sha256:cc1c1159b3d456576af7a3e4d1ba7e6924cb39de8f67111c735f6fc832082774", - "sha256:ce826d6ef20b1bc864f0a68340c8b3287705cae2f8b4b1d932177dcc76721725", - "sha256:d584d9ec91ad65861cc08d42e834324ef890a082e591037abe114850ff7bbc3e", - "sha256:d7fded462629cfa4b685c5416b949ebad6cec74af5e2d42905d41e257e0869f5", - "sha256:d84a1718ee396f54f3a086ea0a66d8e552b2ab2017ef8b420e92edbc841c352d", - "sha256:d8e03406cac8513435335dbab54c0d385e4a49e4945d2909a581c83647ca0290", - "sha256:e10ce637b18caea04431ce14fabcf5c64a1c61ec9c56b071a4b7ca131ca52d44", - "sha256:ec031d5d2feb36d1d1a24380e4db6d43695f3748343d99434e6f5f9156aaa2ed", - "sha256:ef6107725bd54b262d6dedcc2af448a266975032bc85ef0172c5f059da6325b4", - "sha256:efdca5630322a10774e8e98e1af481aad470dd62c3170801852d752aa7a783ba", - "sha256:f753120cb8181e736c57ef7636e83f31b9c0d1722c516f7e86cf15b7aa57ff12", - "sha256:ff3824dc5261f50c9b0dfb3be22b4567a6f938ccce4587b38952d85fd9e9afe4" - ], - "markers": "python_version >= '3.8'", - "version": "==6.0.2" - }, "regex": { "hashes": [ "sha256:02a02d2bb04fec86ad61f3ea7f49c015a0681bf76abb9857f945d26159d2968c", @@ -1278,206 +1024,6 @@ "markers": "python_version >= '3.8'", "version": "==0.10.4" }, - "safetensors": { - "hashes": [ - "sha256:01c8f00da537af711979e1b42a69a8ec9e1d7112f208e0e9b8a35d2c381085ef", - "sha256:023b6e5facda76989f4cba95a861b7e656b87e225f61811065d5c501f78cdb3f", - "sha256:09566792588d77b68abe53754c9f1308fadd35c9f87be939e22c623eaacbed6b", - "sha256:098923e2574ff237c517d6e840acada8e5b311cb1fa226019105ed82e9c3b62f", - "sha256:09dedf7c2fda934ee68143202acff6e9e8eb0ddeeb4cfc24182bef999efa9f42", - "sha256:133620f443450429322f238fda74d512c4008621227fccf2f8cf4a76206fea7c", - "sha256:139fbee92570ecea774e6344fee908907db79646d00b12c535f66bc78bd5ea2c", - "sha256:13ca0902d2648775089fa6a0c8fc9e6390c5f8ee576517d33f9261656f851e3f", - "sha256:1500418454529d0ed5c1564bda376c4ddff43f30fce9517d9bee7bcce5a8ef50", - "sha256:1524b54246e422ad6fb6aea1ac71edeeb77666efa67230e1faf6999df9b2e27f", - "sha256:21742b391b859e67b26c0b2ac37f52c9c0944a879a25ad2f9f9f3cd61e7fda8f", - "sha256:21f848d7aebd5954f92538552d6d75f7c1b4500f51664078b5b49720d180e47c", - "sha256:23fc9b4ec7b602915cbb4ec1a7c1ad96d2743c322f20ab709e2c35d1b66dad27", - "sha256:25e5f8e2e92a74f05b4ca55686234c32aac19927903792b30ee6d7bd5653d54e", - "sha256:2783956926303dcfeb1de91a4d1204cd4089ab441e622e7caee0642281109db3", - "sha256:309aaec9b66cbf07ad3a2e5cb8a03205663324fea024ba391594423d0f00d9fe", - "sha256:313514b0b9b73ff4ddfb4edd71860696dbe3c1c9dc4d5cc13dbd74da283d2cbf", - "sha256:31fa33ee326f750a2f2134a6174773c281d9a266ccd000bd4686d8021f1f3dac", - "sha256:3685ce7ed036f916316b567152482b7e959dc754fcc4a8342333d222e05f407c", - "sha256:39371fc551c1072976073ab258c3119395294cf49cdc1f8476794627de3130df", - "sha256:3a6ba28118636a130ccbb968bc33d4684c48678695dba2590169d5ab03a45646", - "sha256:4037676c86365a721a8c9510323a51861d703b399b78a6b4486a54a65a975fca", - "sha256:473300314e026bd1043cef391bb16a8689453363381561b8a3e443870937cc1e", - "sha256:4b99fbf72e3faf0b2f5f16e5e3458b93b7d0a83984fe8d5364c60aa169f2da89", - "sha256:4fb3e0609ec12d2a77e882f07cced530b8262027f64b75d399f1504ffec0ba56", - "sha256:500cac01d50b301ab7bb192353317035011c5ceeef0fca652f9f43c000bb7f8d", - "sha256:52452fa5999dc50c4decaf0c53aa28371f7f1e0fe5c2dd9129059fbe1e1599c7", - "sha256:53946c5813b8f9e26103c5efff4a931cc45d874f45229edd68557ffb35ffb9f8", - "sha256:540ce6c4bf6b58cb0fd93fa5f143bc0ee341c93bb4f9287ccd92cf898cc1b0dd", - "sha256:585f1703a518b437f5103aa9cf70e9bd437cb78eea9c51024329e4fb8a3e3679", - "sha256:59b77e4b7a708988d84f26de3ebead61ef1659c73dcbc9946c18f3b1786d2688", - "sha256:5a2d68a523a4cefd791156a4174189a4114cf0bf9c50ceb89f261600f3b2b81a", - "sha256:5d3bc83e14d67adc2e9387e511097f254bd1b43c3020440e708858c684cbac68", - "sha256:5f0032bedc869c56f8d26259fe39cd21c5199cd57f2228d817a0e23e8370af25", - "sha256:60c828a27e852ded2c85fc0f87bf1ec20e464c5cd4d56ff0e0711855cc2e17f8", - "sha256:63bfd425e25f5c733f572e2246e08a1c38bd6f2e027d3f7c87e2e43f228d1345", - "sha256:65573dc35be9059770808e276b017256fa30058802c29e1038eb1c00028502ea", - "sha256:670e95fe34e0d591d0529e5e59fd9d3d72bc77b1444fcaa14dccda4f36b5a38b", - "sha256:67e1e7cb8678bb1b37ac48ec0df04faf689e2f4e9e81e566b5c63d9f23748523", - "sha256:68814d599d25ed2fdd045ed54d370d1d03cf35e02dce56de44c651f828fb9b7b", - "sha256:6885016f34bef80ea1085b7e99b3c1f92cb1be78a49839203060f67b40aee761", - "sha256:6ac85d9a8c1af0e3132371d9f2d134695a06a96993c2e2f0bbe25debb9e3f67a", - "sha256:6d3de65718b86c3eeaa8b73a9c3d123f9307a96bbd7be9698e21e76a56443af5", - "sha256:7389129c03fadd1ccc37fd1ebbc773f2b031483b04700923c3511d2a939252cc", - "sha256:73e7d408e9012cd17511b382b43547850969c7979efc2bc353f317abaf23c84c", - "sha256:7469d70d3de970b1698d47c11ebbf296a308702cbaae7fcb993944751cf985f4", - "sha256:75331c0c746f03158ded32465b7d0b0e24c5a22121743662a2393439c43a45cf", - "sha256:76ded72f69209c9780fdb23ea89e56d35c54ae6abcdec67ccb22af8e696e449a", - "sha256:775409ce0fcc58b10773fdb4221ed1eb007de10fe7adbdf8f5e8a56096b6f0bc", - "sha256:77d9b228da8374c7262046a36c1f656ba32a93df6cc51cd4453af932011e77f1", - "sha256:788ee7d04cc0e0e7f944c52ff05f52a4415b312f5efd2ee66389fb7685ee030c", - "sha256:78dd8adfb48716233c45f676d6e48534d34b4bceb50162c13d1f0bdf6f78590a", - "sha256:801183a0f76dc647f51a2d9141ad341f9665602a7899a693207a82fb102cc53e", - "sha256:8158938cf3324172df024da511839d373c40fbfaa83e9abf467174b2910d7b4c", - "sha256:81efb124b58af39fcd684254c645e35692fea81c51627259cdf6d67ff4458916", - "sha256:834001bed193e4440c4a3950a31059523ee5090605c907c66808664c932b549c", - "sha256:83c4f13a9e687335c3928f615cd63a37e3f8ef072a3f2a0599fa09f863fb06a2", - "sha256:868f9df9e99ad1e7f38c52194063a982bc88fedc7d05096f4f8160403aaf4bd6", - "sha256:87bc42bd04fd9ca31396d3ca0433db0be1411b6b53ac5a32b7845a85d01ffc2e", - "sha256:8e8deb16c4321d61ae72533b8451ec4a9af8656d1c61ff81aa49f966406e4b68", - "sha256:9483f42be3b6bc8ff77dd67302de8ae411c4db39f7224dec66b0eb95822e4163", - "sha256:951d2fcf1817f4fb0ef0b48f6696688a4e852a95922a042b3f96aaa67eedc920", - "sha256:9633b663393d5796f0b60249549371e392b75a0b955c07e9c6f8708a87fc841f", - "sha256:96f1d038c827cdc552d97e71f522e1049fef0542be575421f7684756a748e457", - "sha256:9cc9449bd0b0bc538bd5e268221f0c5590bc5c14c1934a6ae359d44410dc68c4", - "sha256:9d1a94b9d793ed8fe35ab6d5cea28d540a46559bafc6aae98f30ee0867000cab", - "sha256:9e347d77e2c77eb7624400ccd09bed69d35c0332f417ce8c048d404a096c593b", - "sha256:9f556eea3aec1d3d955403159fe2123ddd68e880f83954ee9b4a3f2e15e716b6", - "sha256:a01e232e6d3d5cf8b1667bc3b657a77bdab73f0743c26c1d3c5dd7ce86bd3a92", - "sha256:a0dd565f83b30f2ca79b5d35748d0d99dd4b3454f80e03dfb41f0038e3bdf180", - "sha256:a3a315a6d0054bc6889a17f5668a73f94f7fe55121ff59e0a199e3519c08565f", - "sha256:a63eaccd22243c67e4f2b1c3e258b257effc4acd78f3b9d397edc8cf8f1298a7", - "sha256:a659467495de201e2f282063808a41170448c78bada1e62707b07a27b05e6943", - "sha256:a6c19feda32b931cae0acd42748a670bdf56bee6476a046af20181ad3fee4090", - "sha256:adaa9c6dead67e2dd90d634f89131e43162012479d86e25618e821a03d1eb1dc", - "sha256:b17b299ca9966ca983ecda1c0791a3f07f9ca6ab5ded8ef3d283fff45f6bcd5f", - "sha256:b3139098e3e8b2ad7afbca96d30ad29157b50c90861084e69fcb80dec7430461", - "sha256:b4db6a61d968de73722b858038c616a1bebd4a86abe2688e46ca0cc2d17558f2", - "sha256:b5a8810ad6a6f933fff6c276eae92c1da217b39b4d8b1bc1c0b8af2d270dc532", - "sha256:b75a616e02f21b6f1d5785b20cecbab5e2bd3f6358a90e8925b813d557666ec1", - "sha256:b98d40a2ffa560653f6274e15b27b3544e8e3713a44627ce268f419f35c49478", - "sha256:bad5e4b2476949bcd638a89f71b6916fa9a5cae5c1ae7eede337aca2100435c0", - "sha256:bb07000b19d41e35eecef9a454f31a8b4718a185293f0d0b1c4b61d6e4487971", - "sha256:bfeaa1a699c6b9ed514bd15e6a91e74738b71125a9292159e3d6b7f0a53d2cde", - "sha256:c36302c1c69eebb383775a89645a32b9d266878fab619819ce660309d6176c9b", - "sha256:c6d156bdb26732feada84f9388a9f135528c1ef5b05fae153da365ad4319c4c5", - "sha256:c7db3006a4915151ce1913652e907cdede299b974641a83fbc092102ac41b644", - "sha256:c859c7ed90b0047f58ee27751c8e56951452ed36a67afee1b0a87847d065eec6", - "sha256:cbd39cae1ad3e3ef6f63a6f07296b080c951f24cec60188378e43d3713000c04", - "sha256:cf727bb1281d66699bef5683b04d98c894a2803442c490a8d45cd365abfbdeb2", - "sha256:d0f1dd769f064adc33831f5e97ad07babbd728427f98e3e1db6902e369122737", - "sha256:d42ffd4c2259f31832cb17ff866c111684c87bd930892a1ba53fed28370c918c", - "sha256:d5f23198821e227cfc52d50fa989813513db381255c6d100927b012f0cfec63d", - "sha256:d641f5b8149ea98deb5ffcf604d764aad1de38a8285f86771ce1abf8e74c4891", - "sha256:d73de19682deabb02524b3d5d1f8b3aaba94c72f1bbfc7911b9b9d5d391c0310", - "sha256:d94581aab8c6b204def4d7320f07534d6ee34cd4855688004a4354e63b639a35", - "sha256:dbd280b07e6054ea68b0cb4b16ad9703e7d63cd6890f577cb98acc5354780142", - "sha256:dd8a1f6d2063a92cd04145c7fd9e31a1c7d85fbec20113a14b487563fdbc0597", - "sha256:dde2bf390d25f67908278d6f5d59e46211ef98e44108727084d4637ee70ab4f1", - "sha256:e3cec4a29eb7fe8da0b1c7988bc3828183080439dd559f720414450de076fcab", - "sha256:e7a97058f96340850da0601a3309f3d29d6191b0702b2da201e54c6e3e44ccf0", - "sha256:e98ef5524f8b6620c8cdef97220c0b6a5c1cef69852fcd2f174bb96c2bb316b1", - "sha256:f0b6453c54c57c1781292c46593f8a37254b8b99004c68d6c3ce229688931a22", - "sha256:f3664ac565d0e809b0b929dae7ccd74e4d3273cd0c6d1220c6430035befb678e", - "sha256:f4b15f51b4f8f2a512341d9ce3475cacc19c5fdfc5db1f0e19449e75f95c7dc8", - "sha256:f4beb84b6073b1247a773141a6331117e35d07134b3bb0383003f39971d414bb", - "sha256:f6594d130d0ad933d885c6a7b75c5183cb0e8450f799b80a39eae2b8508955eb", - "sha256:f68bf99ea970960a237f416ea394e266e0361895753df06e3e06e6ea7907d98b", - "sha256:fd33da8e9407559f8779c82a0448e2133737f922d71f884da27184549416bfed", - "sha256:fdadf66b5a22ceb645d5435a0be7a0292ce59648ca1d46b352f13cff3ea80410" - ], - "markers": "python_version >= '3.7'", - "version": "==0.4.5" - }, - "scikit-learn": { - "hashes": [ - "sha256:04a5ba45c12a5ff81518aa4f1604e826a45d20e53da47b15871526cda4ff5174", - "sha256:0baa91eeb8c32632628874a5c91885eaedd23b71504d24227925080da075837a", - "sha256:1dad624cffe3062276a0881d4e441bc9e3b19d02d17757cd6ae79a9d192a0027", - "sha256:1f50b4f24cf12a81c3c09958ae3b864d7534934ca66ded3822de4996d25d7285", - "sha256:21fadfc2ad7a1ce8bd1d90f23d17875b84ec765eecbbfc924ff11fb73db582ce", - "sha256:2fce7950a3fad85e0a61dc403df0f9345b53432ac0e47c50da210d22c60b6d85", - "sha256:30f34bb5fde90e020653bb84dcb38b6c83f90c70680dbd8c38bd9becbad7a127", - "sha256:34e20bfac8ff0ebe0ff20fb16a4d6df5dc4cc9ce383e00c2ab67a526a3c67b18", - "sha256:366fb3fa47dce90afed3d6106183f4978d6f24cfd595c2373424171b915ee718", - "sha256:3c716d13ba0a2f8762d96ff78d3e0cde90bc9c9b5c13d6ab6bb9b2d6ca6705fd", - "sha256:59cd96a8d9f8dfd546f5d6e9787e1b989e981388d7803abbc9efdcde61e47460", - "sha256:5be4577769c5dde6e1b53de8e6520f9b664ab5861dd57acee47ad119fd7405d6", - "sha256:5c3fa7d3dd5a0ec2d0baba0d644916fa2ab180ee37850c5d536245df916946bd", - "sha256:5fe11794236fb83bead2af26a87ced5d26e3370b8487430818b915dafab1724e", - "sha256:61fe3dcec0d82ae280877a818ab652f4988371e32dd5451e75251bece79668b1", - "sha256:66b1cf721a9f07f518eb545098226796c399c64abdcbf91c2b95d625068363da", - "sha256:7b35b60cf4cd6564b636e4a40516b3c61a4fa7a8b1f7a3ce80c38ebe04750bc3", - "sha256:98717d3c152f6842d36a70f21e1468fb2f1a2f8f2624d9a3f382211798516426", - "sha256:9aafd94bafc841b626681e626be27bf1233d5a0f20f0a6fdb4bee1a1963c6643", - "sha256:9d58481f9f7499dff4196927aedd4285a0baec8caa3790efbe205f13de37dd6e", - "sha256:a17860a562bac54384454d40b3f6155200c1c737c9399e6a97962c63fce503ac", - "sha256:a46d3ca0f11a540b8eaddaf5e38172d8cd65a86cb3e3632161ec96c0cffb774c", - "sha256:a73b1c2038c93bc7f4bf21f6c9828d5116c5d2268f7a20cfbbd41d3074d52083", - "sha256:b44e3a51e181933bdf9a4953cc69c6025b40d2b49e238233f149b98849beb4bf", - "sha256:b6916d1cec1ff163c7d281e699d7a6a709da2f2c5ec7b10547e08cc788ddd3ae", - "sha256:df778486a32518cda33818b7e3ce48c78cef1d5f640a6bc9d97c6d2e71449a51", - "sha256:e5453b2e87ef8accedc5a8a4e6709f887ca01896cd7cc8a174fe39bd4bb00aef", - "sha256:eb9ae21f387826da14b0b9cb1034f5048ddb9182da429c689f5f4a87dc96930b", - "sha256:eba06d75815406091419e06dd650b91ebd1c5f836392a0d833ff36447c2b1bfa", - "sha256:efa7a579606c73a0b3d210e33ea410ea9e1af7933fe324cb7e6fbafae4ea5948" - ], - "markers": "python_version >= '3.9'", - "version": "==1.6.0" - }, - "scipy": { - "hashes": [ - "sha256:0c2f95de3b04e26f5f3ad5bb05e74ba7f68b837133a4492414b3afd79dfe540e", - "sha256:1729560c906963fc8389f6aac023739ff3983e727b1a4d87696b7bf108316a79", - "sha256:278266012eb69f4a720827bdd2dc54b2271c97d84255b2faaa8f161a158c3b37", - "sha256:2843f2d527d9eebec9a43e6b406fb7266f3af25a751aa91d62ff416f54170bc5", - "sha256:2da0469a4ef0ecd3693761acbdc20f2fdeafb69e6819cc081308cc978153c675", - "sha256:2ff0a7e01e422c15739ecd64432743cf7aae2b03f3084288f399affcefe5222d", - "sha256:2ff38e22128e6c03ff73b6bb0f85f897d2362f8c052e3b8ad00532198fbdae3f", - "sha256:30ac8812c1d2aab7131a79ba62933a2a76f582d5dbbc695192453dae67ad6310", - "sha256:3a1b111fac6baec1c1d92f27e76511c9e7218f1695d61b59e05e0fe04dc59617", - "sha256:4079b90df244709e675cdc8b93bfd8a395d59af40b72e339c2287c91860deb8e", - "sha256:5149e3fd2d686e42144a093b206aef01932a0059c2a33ddfa67f5f035bdfe13e", - "sha256:5a275584e726026a5699459aa72f828a610821006228e841b94275c4a7c08417", - "sha256:631f07b3734d34aced009aaf6fedfd0eb3498a97e581c3b1e5f14a04164a456d", - "sha256:716e389b694c4bb564b4fc0c51bc84d381735e0d39d3f26ec1af2556ec6aad94", - "sha256:8426251ad1e4ad903a4514712d2fa8fdd5382c978010d1c6f5f37ef286a713ad", - "sha256:8475230e55549ab3f207bff11ebfc91c805dc3463ef62eda3ccf593254524ce8", - "sha256:8bddf15838ba768bb5f5083c1ea012d64c9a444e16192762bd858f1e126196d0", - "sha256:8e32dced201274bf96899e6491d9ba3e9a5f6b336708656466ad0522d8528f69", - "sha256:8f9ea80f2e65bdaa0b7627fb00cbeb2daf163caa015e59b7516395fe3bd1e066", - "sha256:97c5dddd5932bd2a1a31c927ba5e1463a53b87ca96b5c9bdf5dfd6096e27efc3", - "sha256:a49f6ed96f83966f576b33a44257d869756df6cf1ef4934f59dd58b25e0327e5", - "sha256:af29a935803cc707ab2ed7791c44288a682f9c8107bc00f0eccc4f92c08d6e07", - "sha256:b05d43735bb2f07d689f56f7b474788a13ed8adc484a85aa65c0fd931cf9ccd2", - "sha256:b28d2ca4add7ac16ae8bb6632a3c86e4b9e4d52d3e34267f6e1b0c1f8d87e389", - "sha256:b99722ea48b7ea25e8e015e8341ae74624f72e5f21fc2abd45f3a93266de4c5d", - "sha256:baff393942b550823bfce952bb62270ee17504d02a1801d7fd0719534dfb9c84", - "sha256:c0ee987efa6737242745f347835da2cc5bb9f1b42996a4d97d5c7ff7928cb6f2", - "sha256:d0d2821003174de06b69e58cef2316a6622b60ee613121199cb2852a873f8cf3", - "sha256:e0cf28db0f24a38b2a0ca33a85a54852586e43cf6fd876365c86e0657cfe7d73", - "sha256:e4f5a7c49323533f9103d4dacf4e4f07078f360743dec7f7596949149efeec06", - "sha256:eb58ca0abd96911932f688528977858681a59d61a7ce908ffd355957f7025cfc", - "sha256:edaf02b82cd7639db00dbff629995ef185c8df4c3ffa71a5562a595765a06ce1", - "sha256:fef8c87f8abfb884dac04e97824b61299880c43f4ce675dd2cbeadd3c9b466d2" - ], - "markers": "python_version >= '3.10'", - "version": "==1.14.1" - }, - "sentence-transformers": { - "hashes": [ - "sha256:9635dbfb11c6b01d036b9cfcee29f7716ab64cf2407ad9f403a2e607da2ac48b", - "sha256:abffcc79dab37b7d18d21a26d5914223dd42239cfe18cb5e111c66c54b658ae7" - ], - "index": "pypi", - "markers": "python_version >= '3.9'", - "version": "==3.3.1" - }, "setuptools": { "hashes": [ "sha256:8199222558df7c86216af4f84c30e9b34a61d8ba19366cc914424cdbd28252f6", @@ -1502,66 +1048,6 @@ "markers": "python_version >= '3.7'", "version": "==1.3.1" }, - "sympy": { - "hashes": [ - "sha256:9cebf7e04ff162015ce31c9c6c9144daa34a93bd082f54fd8f12deca4f47515f", - "sha256:db36cdc64bf61b9b24578b6f7bab1ecdd2452cf008f34faa33776680c26d66f8" - ], - "markers": "python_version >= '3.8'", - "version": "==1.13.1" - }, - "threadpoolctl": { - "hashes": [ - "sha256:082433502dd922bf738de0d8bcc4fdcbf0979ff44c42bd40f5af8a282f6fa107", - "sha256:56c1e26c150397e58c4926da8eeee87533b1e32bef131bd4bf6a2f45f3185467" - ], - "markers": "python_version >= '3.8'", - "version": "==3.5.0" - }, - "tokenizers": { - "hashes": [ - "sha256:089d56db6782a73a27fd8abf3ba21779f5b85d4a9f35e3b493c7bbcbbf0d539b", - "sha256:3c4c93eae637e7d2aaae3d376f06085164e1660f89304c0ab2b1d08a406636b2", - "sha256:400832c0904f77ce87c40f1a8a27493071282f785724ae62144324f171377273", - "sha256:4145505a973116f91bc3ac45988a92e618a6f83eb458f49ea0790df94ee243ff", - "sha256:6b177fb54c4702ef611de0c069d9169f0004233890e0c4c5bd5508ae05abf193", - "sha256:6b43779a269f4629bebb114e19c3fca0223296ae9fea8bb9a7a6c6fb0657ff8e", - "sha256:87841da5a25a3a5f70c102de371db120f41873b854ba65e52bccd57df5a3780c", - "sha256:9aeb255802be90acfd363626753fda0064a8df06031012fe7d52fd9a905eb00e", - "sha256:c87ca3dc48b9b1222d984b6b7490355a6fdb411a2d810f6f05977258400ddb74", - "sha256:d8b09dbeb7a8d73ee204a70f94fc06ea0f17dcf0844f16102b9f414f0b7463ba", - "sha256:e84ca973b3a96894d1707e189c14a774b701596d579ffc7e69debfc036a61a04", - "sha256:eb1702c2f27d25d9dd5b389cc1f2f51813e99f8ca30d9e25348db6585a97e24a", - "sha256:eb7202d231b273c34ec67767378cd04c767e967fda12d4a9e36208a34e2f137e", - "sha256:ee0894bf311b75b0c03079f33859ae4b2334d675d4e93f5a4132e1eae2834fe4", - "sha256:f53ea537c925422a2e0e92a24cce96f6bc5046bbef24a1652a5edc8ba975f62e" - ], - "markers": "python_version >= '3.7'", - "version": "==0.21.0" - }, - "torch": { - "hashes": [ - "sha256:1f3b7fb3cf7ab97fae52161423f81be8c6b8afac8d9760823fd623994581e1a3", - "sha256:23d062bf70776a3d04dbe74db950db2a5245e1ba4f27208a87f0d743b0d06e86", - "sha256:31f8c39660962f9ae4eeec995e3049b5492eb7360dd4f07377658ef4d728fa4c", - "sha256:32a037bd98a241df6c93e4c789b683335da76a2ac142c0973675b715102dc5fa", - "sha256:340ce0432cad0d37f5a31be666896e16788f1adf8ad7be481196b503dad675b9", - "sha256:34bfa1a852e5714cbfa17f27c49d8ce35e1b7af5608c4bc6e81392c352dbc601", - "sha256:3f4b7f10a247e0dcd7ea97dc2d3bfbfc90302ed36d7f3952b0008d0df264e697", - "sha256:46c817d3ea33696ad3b9df5e774dba2257e9a4cd3c4a3afbf92f6bb13ac5ce2d", - "sha256:603c52d2fe06433c18b747d25f5c333f9c1d58615620578c326d66f258686f9a", - "sha256:71328e1bbe39d213b8721678f9dcac30dfc452a46d586f1d514a6aa0a99d4744", - "sha256:73e58e78f7d220917c5dbfad1a40e09df9929d3b95d25e57d9f8558f84c9a11c", - "sha256:7974e3dce28b5a21fb554b73e1bc9072c25dde873fa00d54280861e7a009d7dc", - "sha256:8046768b7f6d35b85d101b4b38cba8aa2f3cd51952bc4c06a49580f2ce682291", - "sha256:8c712df61101964eb11910a846514011f0b6f5920c55dbf567bff8a34163d5b1", - "sha256:9b61edf3b4f6e3b0e0adda8b3960266b9009d02b37555971f4d1c8f7a05afed7", - "sha256:de5b7d6740c4b636ef4db92be922f0edc425b65ed78c5076c43c42d362a45457", - "sha256:ed231a4b3a5952177fafb661213d690a72caaad97d5824dd4fc17ab9e15cec03" - ], - "markers": "python_full_version >= '3.8.0'", - "version": "==2.5.1" - }, "tqdm": { "hashes": [ "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2", @@ -1570,14 +1056,6 @@ "markers": "python_version >= '3.7'", "version": "==4.67.1" }, - "transformers": { - "hashes": [ - "sha256:a8e1bafdaae69abdda3cad638fe392e37c86d2ce0ecfcae11d60abb8f949ff4d", - "sha256:f8ead7a5a4f6937bb507e66508e5e002dc5930f7b6122a9259c37b099d0f3b19" - ], - "markers": "python_full_version >= '3.9.0'", - "version": "==4.47.0" - }, "typing-extensions": { "hashes": [ "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d", @@ -1872,26 +1350,24 @@ }, "psutil": { "hashes": [ - "sha256:000d1d1ebd634b4efb383f4034437384e44a6d455260aaee2eca1e9c1b55f047", - "sha256:045f00a43c737f960d273a83973b2511430d61f283a44c96bf13a6e829ba8fdc", - "sha256:0895b8414afafc526712c498bd9de2b063deaac4021a3b3c34566283464aff8e", - "sha256:1209036fbd0421afde505a4879dee3b2fd7b1e14fee81c0069807adcbbcca747", - "sha256:1ad45a1f5d0b608253b11508f80940985d1d0c8f6111b5cb637533a0e6ddc13e", - "sha256:353815f59a7f64cdaca1c0307ee13558a0512f6db064e92fe833784f08539c7a", - "sha256:498c6979f9c6637ebc3a73b3f87f9eb1ec24e1ce53a7c5173b8508981614a90b", - "sha256:5cd2bcdc75b452ba2e10f0e8ecc0b57b827dd5d7aaffbc6821b2a9a242823a76", - "sha256:6d3fbbc8d23fcdcb500d2c9f94e07b1342df8ed71b948a2649b5cb060a7c94ca", - "sha256:6e2dcd475ce8b80522e51d923d10c7871e45f20918e027ab682f94f1c6351688", - "sha256:9118f27452b70bb1d9ab3198c1f626c2499384935aaf55388211ad982611407e", - "sha256:9dcbfce5d89f1d1f2546a2090f4fcf87c7f669d1d90aacb7d7582addece9fb38", - "sha256:a8506f6119cff7015678e2bce904a4da21025cc70ad283a53b099e7620061d85", - "sha256:a8fb3752b491d246034fa4d279ff076501588ce8cbcdbb62c32fd7a377d996be", - "sha256:c0e0c00aa18ca2d3b2b991643b799a15fc8f0563d2ebb6040f64ce8dc027b942", - "sha256:d905186d647b16755a800e7263d43df08b790d709d575105d419f8b6ef65423a", - "sha256:ff34df86226c0227c52f38b919213157588a678d049688eded74c76c8ba4a5d0" + "sha256:018aeae2af92d943fdf1da6b58665124897cfc94faa2ca92098838f83e1b1bca", + "sha256:0bdd4eab935276290ad3cb718e9809412895ca6b5b334f5a9111ee6d9aff9377", + "sha256:1924e659d6c19c647e763e78670a05dbb7feaf44a0e9c94bf9e14dfc6ba50468", + "sha256:33431e84fee02bc84ea36d9e2c4a6d395d479c9dd9bba2376c1f6ee8f3a4e0b3", + "sha256:384636b1a64b47814437d1173be1427a7c83681b17a450bfc309a1953e329603", + "sha256:8be07491f6ebe1a693f17d4f11e69d0dc1811fa082736500f649f79df7735303", + "sha256:8df0178ba8a9e5bc84fed9cfa61d54601b371fbec5c8eebad27575f1e105c0d4", + "sha256:97f7cb9921fbec4904f522d972f0c0e1f4fabbdd4e0287813b21215074a0f160", + "sha256:9ccc4316f24409159897799b83004cb1e24f9819b0dcf9c0b68bdcb6cefee6a8", + "sha256:b6e06c20c05fe95a3d7302d74e7097756d4ba1247975ad6905441ae1b5b66003", + "sha256:ca9609c77ea3b8481ab005da74ed894035936223422dc591d6772b147421f777", + "sha256:cf8496728c18f2d0b45198f06895be52f36611711746b7f30c464b422b50e2f5", + "sha256:eaa912e0b11848c4d9279a93d7e2783df352b082f40111e078388701fd479e53", + "sha256:f35cfccb065fff93529d2afb4a2e89e363fe63ca1e4a5da22b603a85833c2649", + "sha256:fc0ed7fe2231a444fc219b9c42d0376e0a9a1a72f16c5cfa0f68d19f1a0663e8" ], "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'", - "version": "==6.1.0" + "version": "==6.1.1" }, "ptyprocess": { "hashes": [ diff --git a/modules/rag_description_generation/main.py b/modules/rag_description_generation/main.py index ac6a63d..300a60f 100644 --- a/modules/rag_description_generation/main.py +++ b/modules/rag_description_generation/main.py @@ -6,7 +6,8 @@ import pandas as pd from config import CONFIGS -from modules.rag_description_generation.ec2_weaviate import Ec2 + +# from modules.rag_description_generation.ec2_weaviate import Ec2 from modules.rag_description_generation.rag_dynamodb import DynamoDB from modules.rag_description_generation.rag_functions import get_single_game_entries from modules.rag_description_generation.rag_weaviate import WeaviateClient @@ -36,19 +37,19 @@ def model_post_init(self, __context): self.end_block = int(self.end_block) self.num_completed_games = self.start_block - def confirm_running_ec2_host(self): - ec2_instance = Ec2() - ec2_instance.validate_ready_weaviate_instance() - self.ip_address = ec2_instance.get_ip_address() - ec2_instance.copy_docker_compose_to_instance() - ec2_instance.start_docker() + # def confirm_running_ec2_host(self): + # ec2_instance = Ec2() + # ec2_instance.validate_ready_weaviate_instance() + # self.ip_address = ec2_instance.get_ip_address() + # ec2_instance.copy_docker_compose_to_instance() + # ec2_instance.start_docker() - print(f"\nWeaviate instance running at {self.ip_address}") + # print(f"\nWeaviate instance running at {self.ip_address}") - def stop_ec2_instance(self): - ec2_instance = Ec2() - self.ip_address = ec2_instance.get_ip_address() - ec2_instance.stop_instance() + # def stop_ec2_instance(self): + # ec2_instance = Ec2() + # self.ip_address = ec2_instance.get_ip_address() + # ec2_instance.stop_instance() def compute_game_overall_stats(self, game_df): overall_mean = round(game_df["AvgRating"].describe()["mean"], 2) @@ -151,13 +152,13 @@ def process_single_game( print(f"Game {game_id} already processed") def rag_description_generation_chain(self): - self.confirm_running_ec2_host() + # self.confirm_running_ec2_host() game_df_reduced = self.load_reduced_game_df() all_games_df = self.merge_game_df_with_ratings_df(game_df_reduced) generate_prompt = self.load_prompt() weaviate_client = WeaviateClient( - ip_address=self.ip_address, + # ip_address=self.ip_address, collection_name=f"reviews_{self.start_block}_{self.end_block}", ) weaviate_client.create_weaviate_collection() diff --git a/modules/rag_description_generation/rag_encoder_compare.ipynb b/modules/rag_description_generation/rag_encoder_compare.ipynb deleted file mode 100644 index a1b99c7..0000000 --- a/modules/rag_description_generation/rag_encoder_compare.ipynb +++ /dev/null @@ -1,247 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "from sentence_transformers import SentenceTransformer\n", - "from dotenv import load_dotenv\n", - "import warnings\n", - "warnings.filterwarnings('ignore')\n", - "load_dotenv(\"../.env\")\n", - "\n", - "pd.set_option(\"display.max_columns\", 30)\n", - "pd.set_option(\"display.max_rows\", 30)\n", - "\n", - "ai_generator = \"gpt-4o-mini\"\n", - "word_vec = \"mpnet\"\n", - "collection_name = \"Reviews_MPNet\"\n", - "sample_pct=.05" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "games = pd.read_pickle(\"../data/prod/games/game_dfs_clean/games_clean.pkl\")\n", - "games = games[['BGGId', 'Name']]\n", - "\n", - "summaries = pd.read_csv('./ai_summaries_comparison.csv')\n", - "summaries = summaries.merge(games, on='BGGId', how='left')\n", - "names = summaries['Name'].tolist()\n", - "summaries = summaries.loc[summaries['Name'].isin(names)][['BGGId','mini_mpnet', 'Name']]\n", - "summaries.head()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "full_descriptions = summaries['mini_mpnet'].tolist()\n", - "full_descriptions = [x.split(\"\\n\\n### Pros\")[0].replace(\"### What is this game about?\\n\", \"\").replace('\"', '').replace(\"'\", \"\").replace(f\"{names[i]} is \", \"\") for x,i in zip(full_descriptions, range(len(names)))]\n", - "sentences1 = [x.split(\". \")[0] for x in full_descriptions]\n", - "sentences2 = [x.split(\". \")[1] for x in full_descriptions]\n", - "sentences3 = [\". \".join(x.split(\". \")[0:2]) for x in full_descriptions]\n", - "sentences3" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "model_comparison = {}\n", - "model_name = \"BAAI/bge-m3\"\n", - "# model_name = \"all-mpnet-base-v2\"\n", - "model = SentenceTransformer(model_name)\n", - "\n", - "print(f\"\\n\\nRunning with model: {model_name}\")\n", - "\n", - "print(\"Model loaded\")\n", - "embeddings = model.encode(sentences3)\n", - "print(\"Embeddings generated\")\n", - "similarities = model.similarity(embeddings, embeddings)\n", - "print(\"Similarities generated\")\n", - "\n", - "\n", - "similarity_df = pd.DataFrame(similarities, columns=names, index=names)\n", - "print(\"Similarity dataframe created\")\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "similarity_df" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# make a list of all the pairs of games and their similarity scores\n", - "\n", - "similarity_list = []\n", - "for i in range(len(similarity_df)):\n", - " for j in range(i+1, len(similarity_df)):\n", - " similarity_list.append([names[i], names[j], similarity_df.iloc[i,j]])\n", - "\n", - "\n", - "sim_df = pd.DataFrame(similarity_list, columns=['Game1', 'Game2', 'Similarity']).sort_values('Similarity', ascending=False)\n", - "\n", - "sim_df.head(30)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "sim_df.tail(30)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Test multiple embedding models" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "model_names = [\"all-mpnet-base-v2\", \"multi-qa-mpnet-base-cos-v1\",\"all-MiniLM-L6-v2\",\"BAAI/bge-m3\"]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "model_comparison = {}\n", - "model_name = \"BAAI/bge-m3\"\n", - "model = SentenceTransformer(model_name)\n", - "\n", - "# for model_name in model_names:\n", - "\n", - "for i, sentences in zip(range(3), [sentences1, sentences2, sentences3]):\n", - "\n", - " print(f\"\\n\\nRunning with model: {model_name} on {sentences[i]}\")\n", - "\n", - " # Load https://huggingface.co/sentence-transformers/all-mpnet-base-v2\n", - " # model = SentenceTransformer(model_name)\n", - " print(\"Model loaded\")\n", - " embeddings = model.encode(sentences)\n", - " print(\"Embeddings generated\")\n", - " similarities = model.similarity(embeddings, embeddings)\n", - " print(\"Similarities generated\")\n", - "\n", - " \n", - " similarity_df = pd.DataFrame(similarities, columns=names, index=names)\n", - " print(\"Similarity dataframe created\")\n", - "\n", - " gh_sim = similarity_df.iloc[1,4]\n", - " burg_sim = similarity_df.iloc[2,9]\n", - " brass_sim = similarity_df.iloc[0,8]\n", - " space_sim = similarity_df.iloc[3,6]\n", - "\n", - " print(f\"Scores: GH: {gh_sim}, Burg: {burg_sim}, Brass: {brass_sim}, Space: {space_sim}\")\n", - "\n", - " model_comparison = model_comparison | {f\"gh_{model_name}_{i}\":gh_sim, f\"burg_{model_name}_{i}\":burg_sim, f\"brass_{model_name}_{i}\":brass_sim, f\"space_{model_name}_{i}\":space_sim}" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "model_comparison = {}\n", - "model_name = \"BAAI/bge-m3\"\n", - "model = SentenceTransformer(model_name)\n", - "\n", - "# for model_name in model_names:\n", - "\n", - "for i, sentences in zip(range(3), [sentences1, sentences2, sentences3]):\n", - "\n", - " print(f\"\\n\\nRunning with model: {model_name} on {sentences[i]}\")\n", - "\n", - " # Load https://huggingface.co/sentence-transformers/all-mpnet-base-v2\n", - " # model = SentenceTransformer(model_name)\n", - " print(\"Model loaded\")\n", - " embeddings = model.encode(sentences)\n", - " print(\"Embeddings generated\")\n", - " similarities = model.similarity(embeddings, embeddings)\n", - " print(\"Similarities generated\")\n", - "\n", - " \n", - " similarity_df = pd.DataFrame(similarities, columns=names, index=names)\n", - " print(\"Similarity dataframe created\")\n", - "\n", - " gh_sim = similarity_df.iloc[1,4]\n", - " burg_sim = similarity_df.iloc[2,9]\n", - " brass_sim = similarity_df.iloc[0,8]\n", - " space_sim = similarity_df.iloc[3,6]\n", - "\n", - " print(f\"Scores: GH: {gh_sim}, Burg: {burg_sim}, Brass: {brass_sim}, Space: {space_sim}\")\n", - "\n", - " model_comparison = model_comparison | {f\"gh_{model_name}_{i}\":gh_sim, f\"burg_{model_name}_{i}\":burg_sim, f\"brass_{model_name}_{i}\":brass_sim, f\"space_{model_name}_{i}\":space_sim}" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "scores = pd.DataFrame.from_dict(model_comparison, orient='index').reset_index().rename(columns={'index':'game', 0:'Similarity Score'}).sort_values(\"game\")\n", - "scores" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "boardgamegeek-ZH0FNRKg", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.6" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/modules/rag_description_generation/rag_review_gen.ipynb b/modules/rag_description_generation/rag_review_gen.ipynb deleted file mode 100644 index 09760a3..0000000 --- a/modules/rag_description_generation/rag_review_gen.ipynb +++ /dev/null @@ -1,237 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "import weaviate.classes as wvc\n", - "from weaviate.classes.config import Configure\n", - "from dotenv import load_dotenv\n", - "import warnings\n", - "import gc\n", - "import time\n", - "import json\n", - "import weaviate.classes as wvc\n", - "import boto3\n", - "\n", - "from modules.rag_description_generation.rag_functions import connect_weaviate_client_docker, add_collection_batch, generate_aggregated_review, get_single_game_row, divide_and_process_generated_summary, remove_collection_items\n", - "\n", - "warnings.filterwarnings('ignore')\n", - "load_dotenv(\"../.env\")\n", - "\n", - "pd.set_option(\"display.max_columns\", 30)\n", - "pd.set_option(\"display.max_rows\", 30)\n", - "\n", - "ai_generator = \"gpt-4o-mini\"\n", - "collection_name = \"Reviews\"\n", - "sample_pct=.05" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "client = connect_weaviate_client_docker()\n", - "\n", - "meta_info = client.get_meta()\n", - "meta_info['modules']" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "if client.collections.exists(collection_name):\n", - " client.collections.delete(collection_name)\n", - " pass\n", - "\n", - "client.collections.create( \n", - " name=collection_name,\n", - " vectorizer_config=[\n", - " Configure.NamedVectors.text2vec_transformers(\n", - " name=\"title_vector\",\n", - " source_properties=[\"title\"],\n", - " )\n", - " ],\n", - " generative_config=wvc.config.Configure.Generative.openai(model=ai_generator),\n", - " properties=[\n", - " wvc.config.Property(\n", - " name=\"review_text\",\n", - " data_type=wvc.config.DataType.TEXT,\n", - " ),\n", - " wvc.config.Property(\n", - " name=\"product_id\",\n", - " data_type=wvc.config.DataType.TEXT,\n", - " skip_vectorization=True,\n", - " vectorize_property_name=False\n", - " )\n", - " ]\n", - " )" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "game_df = pd.read_pickle(\"../data/prod/games/game_dfs_clean/games_clean.pkl\")\n", - "\n", - "overall_stats: dict = {}\n", - "\n", - "game_mean = game_df[\"AvgRating\"].describe()[\"mean\"]\n", - "game_std = game_df[\"AvgRating\"].describe()[\"std\"]\n", - "\n", - "overall_stats[\"overall_mean\"] = game_mean\n", - "overall_stats[\"overall_std\"] = game_std\n", - "overall_stats[\"two_under\"] = round(game_mean - 2 * game_std, 2)\n", - "overall_stats[\"one_under\"] = round(game_mean - game_std, 2)\n", - "overall_stats[\"half_over\"] = round(game_mean + 0.5 * game_std, 2)\n", - "overall_stats[\"one_over\"] = round(game_mean + game_std, 2)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "top_25_df = game_df.sort_values(\"BayesAvgRating\", ascending=False)[:25]\n", - "\n", - "game_ids = top_25_df[\"BGGId\"].astype(str).tolist()\n", - "top_25_df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "user_df = pd.read_pickle(\"../data/prod/users/user_dfs_clean/complete_user_ratings.pkl\")\n", - "\n", - "all_games_df = user_df.merge(top_25_df[['BGGId','Name','Description','AvgRating', 'BayesAvgRating']], on=\"BGGId\", how=\"inner\")\n", - "all_games_df[\"BGGId\"] = all_games_df[\"BGGId\"].astype(\"string\")\n", - "\n", - "del game_df\n", - "del user_df\n", - "gc.collect()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Produce synopses" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "overall_summary = {}" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "all_prompts = json.loads(open('prompt.json').read())\n", - "generate_prompt = all_prompts['gpt4o_mini_generate_prompt_structured']" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def prompt_replacement(generate_prompt, overall_stats, game_name, game_mean):\n", - "\n", - " # turn all stats to strings\n", - " overall_stats = {k: str(v) for k, v in overall_stats.items()}\n", - "\n", - " current_prompt = generate_prompt.replace(\"{GAME_NAME_HERE}\", game_name)\n", - " current_prompt = current_prompt.replace(\"{GAME_AVERAGE_HERE}\", game_mean)\n", - " current_prompt = current_prompt.replace(\"{TWO_UNDER}\", overall_stats['two_under'])\n", - " current_prompt = current_prompt.replace(\"{ONE_UNDER}\", overall_stats['one_under'])\n", - " current_prompt = current_prompt.replace(\"{ONE_OVER}\", overall_stats['one_over'])\n", - " current_prompt = current_prompt.replace(\"{HALF_OVER}\", overall_stats['half_over'])\n", - " current_prompt = current_prompt.replace(\"{OVERALL_MEAN}\", overall_stats['overall_mean'])\n", - " return current_prompt" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "for game_id in game_ids[:1]:\n", - "\n", - " if not check_dynamo_db_key(game_id):\n", - " df, game_name, avg_rating = get_single_game_row(all_games_df, game_id, sample_pct=sample_pct)\n", - " game_id = df['BGGId'].iloc[0]\n", - " reviews = df['combined_review'].to_list()\n", - " add_collection_batch(client, collection_name, game_id, reviews)\n", - " current_prompt = prompt_replacement(generate_prompt, overall_stats, game_name, game_mean)\n", - " summary = generate_aggregated_review(client, collection_name, game_id, current_prompt)\n", - " divide_and_process_generated_summary(game_id, summary=summary.generated)\n", - " print(f\"\\n\\n{summary.generated}\")\n", - " remove_collection_batch(client, collection_name, game_id, reviews)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# get item from dynamodb\n", - "dynamodb_client = boto3.client('dynamodb')\n", - "table_name = 'game_generated_descriptions'\n", - "response = dynamodb_client.get_item(TableName=table_name, Key={'game_id': {'S': game_id}})['Item']\n", - "response\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "boardgamegeek-ZH0FNRKg", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.6" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/modules/rag_description_generation/rag_weaviate.py b/modules/rag_description_generation/rag_weaviate.py index a349607..3820f00 100644 --- a/modules/rag_description_generation/rag_weaviate.py +++ b/modules/rag_description_generation/rag_weaviate.py @@ -10,28 +10,37 @@ class WeaviateClient(BaseModel): model_config = ConfigDict(arbitrary_types_allowed=True) - ip_address: str - collection_name: str + # ip_address: str + collection_name: str = "None" weaviate_client: weaviate.client = None collection: weaviate.collections.Collection = None def model_post_init(self, __context): - self.weaviate_client = self.connect_weaviate_client_ec2() + # self.weaviate_client = self.connect_weaviate_client_ec2() + self.weaviate_client = self.connect_weaviate_client_docker() self.collection = self.weaviate_client.collections.get(self.collection_name) - def connect_weaviate_client_ec2(self) -> weaviate.client: - return weaviate.connect_to_custom( - http_host=self.ip_address, - http_port=8080, - http_secure=False, - grpc_host=self.ip_address, - grpc_port=50051, - grpc_secure=False, - skip_init_checks=False, + def connect_weaviate_client_docker(self) -> weaviate.client: + client = weaviate.connect_to_local( headers={ "X-OpenAI-Api-Key": os.environ["OPENAI_API_KEY"], - }, + } ) + return client + + # def connect_weaviate_client_ec2(self) -> weaviate.client: + # return weaviate.connect_to_custom( + # http_host=self.ip_address, + # http_port=8080, + # http_secure=False, + # grpc_host=self.ip_address, + # grpc_port=50051, + # grpc_secure=False, + # skip_init_checks=False, + # headers={ + # "X-OpenAI-Api-Key": os.environ["OPENAI_API_KEY"], + # }, + # ) def prompt_replacement( self, @@ -150,13 +159,3 @@ def create_weaviate_collection(self): def close_client(self): self.weaviate_client.close() - - -def connect_weaviate_client_docker() -> weaviate.client: - client = weaviate.connect_to_local( - headers={ - # "X-HuggingFace-Api-Key": os.environ["HUGGINGFACE_APIKEY"], - "X-OpenAI-Api-Key": os.environ["OPENAI_API_KEY"], - } - ) - return client From b8ace2f9c7e8dc44982b5140841eccf7f6002fb3 Mon Sep 17 00:00:00 2001 From: threnjen Date: Thu, 19 Dec 2024 11:19:36 -0800 Subject: [PATCH 09/26] update weaviate deployment file for PR --- .github/workflows/weaviate.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/weaviate.yml b/.github/workflows/weaviate.yml index e21d561..eaba7e9 100644 --- a/.github/workflows/weaviate.yml +++ b/.github/workflows/weaviate.yml @@ -4,6 +4,9 @@ on: push: branches: - main + pull_request: + branches: + - main permissions: id-token: write From 125ff378b751dabebd53ac49c74b4c0a535ceb12 Mon Sep 17 00:00:00 2001 From: threnjen Date: Thu, 19 Dec 2024 11:22:18 -0800 Subject: [PATCH 10/26] update weaviate yml --- .github/workflows/weaviate.yml | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/.github/workflows/weaviate.yml b/.github/workflows/weaviate.yml index eaba7e9..eeb4c07 100644 --- a/.github/workflows/weaviate.yml +++ b/.github/workflows/weaviate.yml @@ -35,19 +35,13 @@ jobs: ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }} ECR_REPOSITORY: weaviate_rag_server run: | - DOCKER_BUILDKIT=1 docker build -f Dockerfiles/Dockerfile.weaviate -t $ECR_REGISTRY/$ECR_REPOSITORY:latest - < Date: Thu, 19 Dec 2024 11:28:48 -0800 Subject: [PATCH 11/26] organize ymls and make weaviate and sentence transformer single-push --- .../t2v-transformers.yml} | 10 +---- .github/one_off_deployments/weaviate.yml | 39 +++++++++++++++++++ .../deploy_file_to_s3.yml | 0 3 files changed, 40 insertions(+), 9 deletions(-) rename .github/{workflows/weaviate.yml => one_off_deployments/t2v-transformers.yml} (69%) create mode 100644 .github/one_off_deployments/weaviate.yml rename .github/{workflows => temp_off}/deploy_file_to_s3.yml (100%) diff --git a/.github/workflows/weaviate.yml b/.github/one_off_deployments/t2v-transformers.yml similarity index 69% rename from .github/workflows/weaviate.yml rename to .github/one_off_deployments/t2v-transformers.yml index eeb4c07..d62d1ec 100644 --- a/.github/workflows/weaviate.yml +++ b/.github/one_off_deployments/t2v-transformers.yml @@ -1,4 +1,4 @@ -name: Build and Push Weaviate and Transformers Images +name: Build and Push all-mpnet-base-v2 Transformers Image on: push: @@ -30,14 +30,6 @@ jobs: id: login-ecr uses: aws-actions/amazon-ecr-login@v2 - - name: Build, tag, and push Weaviate image to AWS ECR - env: - ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }} - ECR_REPOSITORY: weaviate_rag_server - run: | - DOCKER_BUILDKIT=1 docker build -f Dockerfiles/Dockerfile.weaviate -t $ECR_REGISTRY/$ECR_REPOSITORY . - docker push $ECR_REGISTRY/$ECR_REPOSITORY - - name: Build, tag, and push Transformers image to AWS ECR env: ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }} diff --git a/.github/one_off_deployments/weaviate.yml b/.github/one_off_deployments/weaviate.yml new file mode 100644 index 0000000..9b45bce --- /dev/null +++ b/.github/one_off_deployments/weaviate.yml @@ -0,0 +1,39 @@ +name: Build and Push Weaviate VectorDB Image + +on: + push: + branches: + - main + pull_request: + branches: + - main + +permissions: + id-token: write + contents: read + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - name: Check out code + uses: actions/checkout@v2 + + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v4 + with: + role-to-assume: ${{ secrets.AWS_GITHUB_ROLE }} + aws-region: ${{ secrets.AWS_REGION }} + + - name: Login to Amazon ECR + id: login-ecr + uses: aws-actions/amazon-ecr-login@v2 + + - name: Build, tag, and push Weaviate image to AWS ECR + env: + ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }} + ECR_REPOSITORY: weaviate_rag_server + run: | + DOCKER_BUILDKIT=1 docker build -f Dockerfiles/Dockerfile.weaviate -t $ECR_REGISTRY/$ECR_REPOSITORY . + docker push $ECR_REGISTRY/$ECR_REPOSITORY diff --git a/.github/workflows/deploy_file_to_s3.yml b/.github/temp_off/deploy_file_to_s3.yml similarity index 100% rename from .github/workflows/deploy_file_to_s3.yml rename to .github/temp_off/deploy_file_to_s3.yml From 34efee93d8e874f0e44d54d354f80e5c20611ee4 Mon Sep 17 00:00:00 2001 From: threnjen Date: Thu, 19 Dec 2024 11:44:41 -0800 Subject: [PATCH 12/26] updates to task startup for rag --- aws_terraform_bgg/fargate_task_defs_rag.tf | 6 ++---- .../rag_description_generation_fargate_trigger.py | 5 ++--- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/aws_terraform_bgg/fargate_task_defs_rag.tf b/aws_terraform_bgg/fargate_task_defs_rag.tf index 3585f78..af2a766 100644 --- a/aws_terraform_bgg/fargate_task_defs_rag.tf +++ b/aws_terraform_bgg/fargate_task_defs_rag.tf @@ -6,7 +6,6 @@ resource "aws_ecs_task_definition" "weaviate_rag_generation" { name = var.rag_description_generation, image = "${data.aws_caller_identity.current.account_id}.dkr.ecr.${var.REGION}.amazonaws.com/${var.rag_description_generation}:latest" cpu = 0, - memory=2048, essential = true, environment = [ { @@ -47,7 +46,6 @@ resource "aws_ecs_task_definition" "weaviate_rag_generation" { name = var.weaviate_rag_server, image = "${data.aws_caller_identity.current.account_id}.dkr.ecr.${var.REGION}.amazonaws.com/${var.weaviate_rag_server}:latest" cpu = 0, - memory = 4096, portMappings = [ { containerPort = 8080, @@ -141,8 +139,8 @@ resource "aws_ecs_task_definition" "weaviate_rag_generation" { network_mode = "awsvpc" requires_compatibilities = ["FARGATE"] - cpu = "1024" - memory = "8092" + cpu = "2048" + memory = "8192" runtime_platform { cpu_architecture = "X86_64" diff --git a/modules/lambda_functions/rag_description_generation_fargate_trigger.py b/modules/lambda_functions/rag_description_generation_fargate_trigger.py index 217b977..037826a 100644 --- a/modules/lambda_functions/rag_description_generation_fargate_trigger.py +++ b/modules/lambda_functions/rag_description_generation_fargate_trigger.py @@ -45,9 +45,8 @@ def lambda_handler(event, context): terraform_state_file = get_terraform_state_file() - task_definition = ( - f"dev_{TASK_DEFINITION}" if ENVIRONMENT != "prod" else TASK_DEFINITION - ) + task_definition = TASK_DEFINITION + print(task_definition) ecs_client = boto3.client("ecs") From b19aba54673f3f530bb365a1764d9da3feeab688 Mon Sep 17 00:00:00 2001 From: threnjen Date: Thu, 19 Dec 2024 11:58:24 -0800 Subject: [PATCH 13/26] updates --- aws_terraform_bgg/fargate_task_defs_rag.tf | 156 ++++++++++++++++++ aws_terraform_bgg/lambda_iam_policies.tf | 3 - ..._description_generation_fargate_trigger.py | 5 +- 3 files changed, 159 insertions(+), 5 deletions(-) diff --git a/aws_terraform_bgg/fargate_task_defs_rag.tf b/aws_terraform_bgg/fargate_task_defs_rag.tf index af2a766..2e8ca4c 100644 --- a/aws_terraform_bgg/fargate_task_defs_rag.tf +++ b/aws_terraform_bgg/fargate_task_defs_rag.tf @@ -148,12 +148,168 @@ resource "aws_ecs_task_definition" "weaviate_rag_generation" { } } +resource "aws_ecs_task_definition" "dev_weaviate_rag_generation" { + family = "dev_${var.rag_description_generation}" + + container_definitions = jsonencode([ + { + name = var.rag_description_generation, + image = "${data.aws_caller_identity.current.account_id}.dkr.ecr.${var.REGION}.amazonaws.com/dev_${var.rag_description_generation}:latest" + cpu = 0, + essential = true, + environment = [ + { + name = "ENVIRONMENT", + value = "prod" + }, + { + name = "IS_LOCAL", + value = "false" + } + ], + environmentFiles = [ + { + value = "arn:aws:s3:::${var.S3_SCRAPER_BUCKET}/boardgamegeek.env", + type = "s3" + }, + { + value = "arn:aws:s3:::${var.S3_SCRAPER_BUCKET}/weaviate.env", + type = "s3" + } + ], + mountPoints = [], + volumesFrom = [], + ulimits = [], + logConfiguration = { + logDriver = "awslogs", + options = { + "awslogs-group" = "/ecs/dev_${var.rag_description_generation}", + "awslogs-create-group" = "true", + "awslogs-region" = var.REGION, + "awslogs-stream-prefix" = "ecs" + }, + secretOptions = [] + }, + systemControls = [] + }, + { + name = var.weaviate_rag_server, + image = "${data.aws_caller_identity.current.account_id}.dkr.ecr.${var.REGION}.amazonaws.com/${var.weaviate_rag_server}:latest" + cpu = 0, + portMappings = [ + { + containerPort = 8080, + hostPort = 8080 + }, + { + containerPort = 50051, + hostPort = 50051 + }, + ], + essential = true, + environment = [ + { + name = "ENVIRONMENT", + value = "prod" + }, + { + name = "IS_LOCAL", + value = "false" + } + ], + environmentFiles = [ + { + value = "arn:aws:s3:::${var.S3_SCRAPER_BUCKET}/boardgamegeek.env", + type = "s3" + }, + { + value = "arn:aws:s3:::${var.S3_SCRAPER_BUCKET}/weaviate.env", + type = "s3" + } + ], + command=[ + "--host", "0.0.0.0", + "--port", "8080", + "--scheme", "http" + ], + mountPoints = [], + volumesFrom = [], + ulimits = [], + logConfiguration = { + logDriver = "awslogs", + options = { + "awslogs-group" = "/ecs/${var.weaviate_rag_server}", + "awslogs-create-group" = "true", + "awslogs-region" = var.REGION, + "awslogs-stream-prefix" = "ecs" + }, + secretOptions = [] + }, + systemControls = [] + }, + { + name = var.t2v-transformers, + image = "${data.aws_caller_identity.current.account_id}.dkr.ecr.${var.REGION}.amazonaws.com/${var.t2v-transformers}:latest" + cpu = 0, + essential = true, + environment = [ + { + name = "ENVIRONMENT", + value = "prod" + }, + { + name = "IS_LOCAL", + value = "false" + }, + { + name = "ENABLE_CUDA", + value = "0" + }], + mountPoints = [], + volumesFrom = [], + ulimits = [], + logConfiguration = { + logDriver = "awslogs", + options = { + "awslogs-group" = "/ecs/${var.t2v-transformers}", + "awslogs-create-group" = "true", + "awslogs-region" = var.REGION, + "awslogs-stream-prefix" = "ecs" + }, + secretOptions = [] + }, + systemControls = [] + }, + + ]) + + task_role_arn = "arn:aws:iam::${data.aws_caller_identity.current.account_id}:role/${var.rag_description_generation}_FargateTaskRole" + execution_role_arn = "arn:aws:iam::${data.aws_caller_identity.current.account_id}:role/${var.rag_description_generation}_FargateExecutionRole" + + network_mode = "awsvpc" + requires_compatibilities = ["FARGATE"] + + cpu = "2048" + memory = "8192" + + runtime_platform { + cpu_architecture = "X86_64" + operating_system_family = "LINUX" + } +} + resource "aws_cloudwatch_log_group" "weaviate_rag_generation_log_group" { name = "/ecs/${var.rag_description_generation}" retention_in_days = 3 } +resource "aws_cloudwatch_log_group" "weaviate_rag_generation_dev_log_group" { + name = "/ecs/dev_${var.rag_description_generation}" + + retention_in_days = 3 +} + resource "aws_cloudwatch_log_group" "t2v-transformers_log_group" { name = "/ecs/${var.t2v-transformers}" diff --git a/aws_terraform_bgg/lambda_iam_policies.tf b/aws_terraform_bgg/lambda_iam_policies.tf index 9003336..ff568e8 100644 --- a/aws_terraform_bgg/lambda_iam_policies.tf +++ b/aws_terraform_bgg/lambda_iam_policies.tf @@ -176,9 +176,6 @@ module "rag_description_generation_describe_task_def_policy" { account_id = data.aws_caller_identity.current.account_id } - - - module "trigger_bgg_generate_game_urls_lambda" { source = "./modules/iam_lambda_run_permissions" function_name = module.bgg_generate_game_urls.function_name diff --git a/modules/lambda_functions/rag_description_generation_fargate_trigger.py b/modules/lambda_functions/rag_description_generation_fargate_trigger.py index 037826a..217b977 100644 --- a/modules/lambda_functions/rag_description_generation_fargate_trigger.py +++ b/modules/lambda_functions/rag_description_generation_fargate_trigger.py @@ -45,8 +45,9 @@ def lambda_handler(event, context): terraform_state_file = get_terraform_state_file() - task_definition = TASK_DEFINITION - + task_definition = ( + f"dev_{TASK_DEFINITION}" if ENVIRONMENT != "prod" else TASK_DEFINITION + ) print(task_definition) ecs_client = boto3.client("ecs") From 435882cc51ce753ccbf59f1dad8145ee94b0a330 Mon Sep 17 00:00:00 2001 From: threnjen Date: Thu, 19 Dec 2024 11:58:46 -0800 Subject: [PATCH 14/26] deploy lambda updates --- .github/{temp_off => workflows}/lambda_deployments_dev.yml | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename .github/{temp_off => workflows}/lambda_deployments_dev.yml (100%) diff --git a/.github/temp_off/lambda_deployments_dev.yml b/.github/workflows/lambda_deployments_dev.yml similarity index 100% rename from .github/temp_off/lambda_deployments_dev.yml rename to .github/workflows/lambda_deployments_dev.yml From aca8000b62e54a081a13d9a68bd4a9953f8e0966 Mon Sep 17 00:00:00 2001 From: threnjen Date: Thu, 19 Dec 2024 12:03:53 -0800 Subject: [PATCH 15/26] repair to task def in terraform --- aws_terraform_bgg/fargate_task_defs_rag.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aws_terraform_bgg/fargate_task_defs_rag.tf b/aws_terraform_bgg/fargate_task_defs_rag.tf index 2e8ca4c..bd77408 100644 --- a/aws_terraform_bgg/fargate_task_defs_rag.tf +++ b/aws_terraform_bgg/fargate_task_defs_rag.tf @@ -153,7 +153,7 @@ resource "aws_ecs_task_definition" "dev_weaviate_rag_generation" { container_definitions = jsonencode([ { - name = var.rag_description_generation, + name = "dev_${var.rag_description_generation}", image = "${data.aws_caller_identity.current.account_id}.dkr.ecr.${var.REGION}.amazonaws.com/dev_${var.rag_description_generation}:latest" cpu = 0, essential = true, From db9f8dd4e1d3e412561124caaa94b0b773d59352 Mon Sep 17 00:00:00 2001 From: threnjen Date: Thu, 19 Dec 2024 12:19:18 -0800 Subject: [PATCH 16/26] add container startup dependencies to task definition --- aws_terraform_bgg/fargate_task_defs_rag.tf | 32 +++++++++++++++++++--- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/aws_terraform_bgg/fargate_task_defs_rag.tf b/aws_terraform_bgg/fargate_task_defs_rag.tf index bd77408..f22fdb0 100644 --- a/aws_terraform_bgg/fargate_task_defs_rag.tf +++ b/aws_terraform_bgg/fargate_task_defs_rag.tf @@ -40,7 +40,13 @@ resource "aws_ecs_task_definition" "weaviate_rag_generation" { }, secretOptions = [] }, - systemControls = [] + systemControls = [], + dependsOn = [ + { + containerName = var.weaviate_rag_server, + condition = "START" + } + ] }, { name = var.weaviate_rag_server, @@ -95,7 +101,13 @@ resource "aws_ecs_task_definition" "weaviate_rag_generation" { }, secretOptions = [] }, - systemControls = [] + systemControls = [], + dependsOn = [ + { + containerName = var.t2v-transformers, + condition = "START" + } + ] }, { name = var.t2v-transformers, @@ -190,7 +202,13 @@ resource "aws_ecs_task_definition" "dev_weaviate_rag_generation" { }, secretOptions = [] }, - systemControls = [] + systemControls = [], + dependsOn = [ + { + containerName = var.weaviate_rag_server, + condition = "START" + } + ] }, { name = var.weaviate_rag_server, @@ -245,7 +263,13 @@ resource "aws_ecs_task_definition" "dev_weaviate_rag_generation" { }, secretOptions = [] }, - systemControls = [] + systemControls = [], + dependsOn = [ + { + containerName = var.t2v-transformers, + condition = "START" + } + ] }, { name = var.t2v-transformers, From c030979be72524c5500e678198580e96b80decad Mon Sep 17 00:00:00 2001 From: threnjen Date: Thu, 19 Dec 2024 12:19:25 -0800 Subject: [PATCH 17/26] update for pydantic validation error --- modules/rag_description_generation/main.py | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/rag_description_generation/main.py b/modules/rag_description_generation/main.py index 300a60f..d40039b 100644 --- a/modules/rag_description_generation/main.py +++ b/modules/rag_description_generation/main.py @@ -25,6 +25,7 @@ class RagDescription(BaseModel): model_config = ConfigDict(arbitrary_types_allowed=True) start_block: str end_block: str + num_completed_games: int = 0 ip_address: str = None overall_stats: dict = {} game_ids: list = [] From dec700a3f934587d69df37c0a43472526a6cb12b Mon Sep 17 00:00:00 2001 From: threnjen Date: Thu, 19 Dec 2024 12:21:27 -0800 Subject: [PATCH 18/26] temp turn off lambda deployment --- .../dev_deployment_rag_description_generation.yml | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename .github/{workflows => temp_off}/dev_deployment_rag_description_generation.yml (100%) diff --git a/.github/workflows/dev_deployment_rag_description_generation.yml b/.github/temp_off/dev_deployment_rag_description_generation.yml similarity index 100% rename from .github/workflows/dev_deployment_rag_description_generation.yml rename to .github/temp_off/dev_deployment_rag_description_generation.yml From a2361a6b50660f182d03b3950412d3273cbc9aec Mon Sep 17 00:00:00 2001 From: threnjen Date: Thu, 19 Dec 2024 15:01:35 -0800 Subject: [PATCH 19/26] update naming for weaviate dockerfile and deployment --- .../one_off_deployments/{weaviate.yml => weaviate_rag.yml} | 2 +- Dockerfiles/{Dockerfile.weaviate => Dockerfile.weaviate_rag} | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) rename .github/one_off_deployments/{weaviate.yml => weaviate_rag.yml} (94%) rename Dockerfiles/{Dockerfile.weaviate => Dockerfile.weaviate_rag} (84%) diff --git a/.github/one_off_deployments/weaviate.yml b/.github/one_off_deployments/weaviate_rag.yml similarity index 94% rename from .github/one_off_deployments/weaviate.yml rename to .github/one_off_deployments/weaviate_rag.yml index 9b45bce..973b983 100644 --- a/.github/one_off_deployments/weaviate.yml +++ b/.github/one_off_deployments/weaviate_rag.yml @@ -35,5 +35,5 @@ jobs: ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }} ECR_REPOSITORY: weaviate_rag_server run: | - DOCKER_BUILDKIT=1 docker build -f Dockerfiles/Dockerfile.weaviate -t $ECR_REGISTRY/$ECR_REPOSITORY . + DOCKER_BUILDKIT=1 docker build -f Dockerfiles/Dockerfile.weaviate_rag -t $ECR_REGISTRY/$ECR_REPOSITORY . docker push $ECR_REGISTRY/$ECR_REPOSITORY diff --git a/Dockerfiles/Dockerfile.weaviate b/Dockerfiles/Dockerfile.weaviate_rag similarity index 84% rename from Dockerfiles/Dockerfile.weaviate rename to Dockerfiles/Dockerfile.weaviate_rag index 45582c6..bd55c9e 100644 --- a/Dockerfiles/Dockerfile.weaviate +++ b/Dockerfiles/Dockerfile.weaviate_rag @@ -1,4 +1,4 @@ -# Dockerfile.weaviate +# Dockerfile.weaviate_rag FROM cr.weaviate.io/semitechnologies/weaviate:1.27.7 # Set environment variables @@ -6,7 +6,7 @@ ENV QUERY_DEFAULTS_LIMIT=25 ENV AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED=true ENV PERSISTENCE_DATA_PATH=/var/lib/weaviate ENV DEFAULT_VECTORIZER_MODULE=text2vec-transformers -ENV ENABLE_MODULES=text2vec-transformers,generative-openai +ENV ENABLE_MODULES=text2vec-transformers ENV CLUSTER_HOSTNAME=node1 ENV TRANSFORMERS_INFERENCE_API=http://t2v-transformers:8080 From ba577703f8456cc9ad91410abf1b4577034f82ff Mon Sep 17 00:00:00 2001 From: threnjen Date: Thu, 19 Dec 2024 15:02:58 -0800 Subject: [PATCH 20/26] updates to rag task info --- aws_terraform_bgg/fargate_task_defs_rag.tf | 275 ++++++--------------- aws_terraform_bgg/outputs.tf | 7 +- aws_terraform_bgg/vpc.tf | 11 + 3 files changed, 99 insertions(+), 194 deletions(-) diff --git a/aws_terraform_bgg/fargate_task_defs_rag.tf b/aws_terraform_bgg/fargate_task_defs_rag.tf index f22fdb0..6e827a2 100644 --- a/aws_terraform_bgg/fargate_task_defs_rag.tf +++ b/aws_terraform_bgg/fargate_task_defs_rag.tf @@ -1,173 +1,11 @@ -resource "aws_ecs_task_definition" "weaviate_rag_generation" { - family = var.rag_description_generation - - container_definitions = jsonencode([ - { - name = var.rag_description_generation, - image = "${data.aws_caller_identity.current.account_id}.dkr.ecr.${var.REGION}.amazonaws.com/${var.rag_description_generation}:latest" - cpu = 0, - essential = true, - environment = [ - { - name = "ENVIRONMENT", - value = "prod" - }, - { - name = "IS_LOCAL", - value = "false" - } - ], - environmentFiles = [ - { - value = "arn:aws:s3:::${var.S3_SCRAPER_BUCKET}/boardgamegeek.env", - type = "s3" - }, - { - value = "arn:aws:s3:::${var.S3_SCRAPER_BUCKET}/weaviate.env", - type = "s3" - } - ], - mountPoints = [], - volumesFrom = [], - ulimits = [], - logConfiguration = { - logDriver = "awslogs", - options = { - "awslogs-group" = "/ecs/${var.rag_description_generation}", - "awslogs-create-group" = "true", - "awslogs-region" = var.REGION, - "awslogs-stream-prefix" = "ecs" - }, - secretOptions = [] - }, - systemControls = [], - dependsOn = [ - { - containerName = var.weaviate_rag_server, - condition = "START" - } - ] - }, - { - name = var.weaviate_rag_server, - image = "${data.aws_caller_identity.current.account_id}.dkr.ecr.${var.REGION}.amazonaws.com/${var.weaviate_rag_server}:latest" - cpu = 0, - portMappings = [ - { - containerPort = 8080, - hostPort = 8080 - }, - { - containerPort = 50051, - hostPort = 50051 - }, - ], - essential = true, - environment = [ - { - name = "ENVIRONMENT", - value = "prod" - }, - { - name = "IS_LOCAL", - value = "false" - } - ], - environmentFiles = [ - { - value = "arn:aws:s3:::${var.S3_SCRAPER_BUCKET}/boardgamegeek.env", - type = "s3" - }, - { - value = "arn:aws:s3:::${var.S3_SCRAPER_BUCKET}/weaviate.env", - type = "s3" - } - ], - command=[ - "--host", "0.0.0.0", - "--port", "8080", - "--scheme", "http" - ], - mountPoints = [], - volumesFrom = [], - ulimits = [], - logConfiguration = { - logDriver = "awslogs", - options = { - "awslogs-group" = "/ecs/${var.weaviate_rag_server}", - "awslogs-create-group" = "true", - "awslogs-region" = var.REGION, - "awslogs-stream-prefix" = "ecs" - }, - secretOptions = [] - }, - systemControls = [], - dependsOn = [ - { - containerName = var.t2v-transformers, - condition = "START" - } - ] - }, - { - name = var.t2v-transformers, - image = "${data.aws_caller_identity.current.account_id}.dkr.ecr.${var.REGION}.amazonaws.com/${var.t2v-transformers}:latest" - cpu = 0, - essential = true, - environment = [ - { - name = "ENVIRONMENT", - value = "prod" - }, - { - name = "IS_LOCAL", - value = "false" - }, - { - name = "ENABLE_CUDA", - value = "0" - }], - mountPoints = [], - volumesFrom = [], - ulimits = [], - logConfiguration = { - logDriver = "awslogs", - options = { - "awslogs-group" = "/ecs/${var.t2v-transformers}", - "awslogs-create-group" = "true", - "awslogs-region" = var.REGION, - "awslogs-stream-prefix" = "ecs" - }, - secretOptions = [] - }, - systemControls = [] - }, - - ]) - - task_role_arn = "arn:aws:iam::${data.aws_caller_identity.current.account_id}:role/${var.rag_description_generation}_FargateTaskRole" - execution_role_arn = "arn:aws:iam::${data.aws_caller_identity.current.account_id}:role/${var.rag_description_generation}_FargateExecutionRole" - - network_mode = "awsvpc" - requires_compatibilities = ["FARGATE"] - - cpu = "2048" - memory = "8192" - - runtime_platform { - cpu_architecture = "X86_64" - operating_system_family = "LINUX" - } -} - resource "aws_ecs_task_definition" "dev_weaviate_rag_generation" { family = "dev_${var.rag_description_generation}" - + container_definitions = jsonencode([ { - name = "dev_${var.rag_description_generation}", - image = "${data.aws_caller_identity.current.account_id}.dkr.ecr.${var.REGION}.amazonaws.com/dev_${var.rag_description_generation}:latest" - cpu = 0, + name = "dev_${var.rag_description_generation}", + image = "${data.aws_caller_identity.current.account_id}.dkr.ecr.${var.REGION}.amazonaws.com/dev_${var.rag_description_generation}:latest" + cpu = 0, essential = true, environment = [ { @@ -175,7 +13,7 @@ resource "aws_ecs_task_definition" "dev_weaviate_rag_generation" { value = "prod" }, { - name = "IS_LOCAL", + name = "IS_LOCAL", value = "false" } ], @@ -189,9 +27,9 @@ resource "aws_ecs_task_definition" "dev_weaviate_rag_generation" { type = "s3" } ], - mountPoints = [], - volumesFrom = [], - ulimits = [], + mountPoints = [], + volumesFrom = [], + ulimits = [], logConfiguration = { logDriver = "awslogs", options = { @@ -231,8 +69,12 @@ resource "aws_ecs_task_definition" "dev_weaviate_rag_generation" { value = "prod" }, { - name = "IS_LOCAL", + name = "IS_LOCAL", value = "false" + }, + { + name = "TRANSFORMERS_INFERENCE_API" + value = "http://localhost:8080" } ], environmentFiles = [ @@ -245,14 +87,14 @@ resource "aws_ecs_task_definition" "dev_weaviate_rag_generation" { type = "s3" } ], - command=[ + command = [ "--host", "0.0.0.0", "--port", "8080", "--scheme", "http" ], - mountPoints = [], - volumesFrom = [], - ulimits = [], + mountPoints = [], + volumesFrom = [], + ulimits = [], logConfiguration = { logDriver = "awslogs", options = { @@ -272,9 +114,9 @@ resource "aws_ecs_task_definition" "dev_weaviate_rag_generation" { ] }, { - name = var.t2v-transformers, - image = "${data.aws_caller_identity.current.account_id}.dkr.ecr.${var.REGION}.amazonaws.com/${var.t2v-transformers}:latest" - cpu = 0, + name = var.t2v-transformers, + image = "${data.aws_caller_identity.current.account_id}.dkr.ecr.${var.REGION}.amazonaws.com/${var.t2v-transformers}:latest" + cpu = 0, essential = true, environment = [ { @@ -282,16 +124,16 @@ resource "aws_ecs_task_definition" "dev_weaviate_rag_generation" { value = "prod" }, { - name = "IS_LOCAL", + name = "IS_LOCAL", value = "false" }, { - name = "ENABLE_CUDA", + name = "ENABLE_CUDA", value = "0" - }], - mountPoints = [], - volumesFrom = [], - ulimits = [], + }], + mountPoints = [], + volumesFrom = [], + ulimits = [], logConfiguration = { logDriver = "awslogs", options = { @@ -302,20 +144,29 @@ resource "aws_ecs_task_definition" "dev_weaviate_rag_generation" { }, secretOptions = [] }, - systemControls = [] + systemControls = [], + healthCheck = { + command = ["CMD-SHELL", "curl -f http://localhost:8080/health || exit 1"], + interval = 30, + retries = 3, + startPeriod = 60, + timeout = 5 + } }, + ] + ) - ]) - - task_role_arn = "arn:aws:iam::${data.aws_caller_identity.current.account_id}:role/${var.rag_description_generation}_FargateTaskRole" + + + task_role_arn = "arn:aws:iam::${data.aws_caller_identity.current.account_id}:role/${var.rag_description_generation}_FargateTaskRole" execution_role_arn = "arn:aws:iam::${data.aws_caller_identity.current.account_id}:role/${var.rag_description_generation}_FargateExecutionRole" - - network_mode = "awsvpc" + + network_mode = "awsvpc" requires_compatibilities = ["FARGATE"] - + cpu = "2048" memory = "8192" - + runtime_platform { cpu_architecture = "X86_64" operating_system_family = "LINUX" @@ -344,4 +195,42 @@ resource "aws_cloudwatch_log_group" "weaviate_rag_server_log_group" { name = "/ecs/${var.weaviate_rag_server}" retention_in_days = 3 -} \ No newline at end of file +} + + +# resource "aws_service_discovery_private_dns_namespace" "t2v-transformers-dev" { +# name = "t2v-transformers-dev" +# vpc = module.vpc.vpc_id +# description = "Private DNS namespace for ECS service discovery" +# } + +# resource "aws_service_discovery_service" "transformers_service_dev" { +# name = "t2v-transformers-dev" +# namespace_id = aws_service_discovery_private_dns_namespace.t2v-transformers-dev.id + +# dns_config { +# namespace_id = aws_service_discovery_private_dns_namespace.t2v-transformers-dev.id +# dns_records { +# type = "A" +# ttl = 60 +# } +# } +# } + +# resource "aws_ecs_service" "transformers_service_dev" { +# name = "dev-bgg-weaviate-service" +# cluster = aws_ecs_cluster.boardgamegeek.id +# task_definition = aws_ecs_task_definition.dev_weaviate_rag_generation.arn +# desired_count = 0 +# launch_type = "FARGATE" + +# network_configuration { +# subnets = module.vpc.private_subnets +# security_groups = [aws_security_group.shared_resources_sg.id, aws_security_group.ec2_weaviate_port_access.id] +# assign_public_ip = false +# } + +# service_registries { +# registry_arn = aws_service_discovery_service.transformers_service_dev.arn +# } +# } \ No newline at end of file diff --git a/aws_terraform_bgg/outputs.tf b/aws_terraform_bgg/outputs.tf index 02ce662..1351194 100644 --- a/aws_terraform_bgg/outputs.tf +++ b/aws_terraform_bgg/outputs.tf @@ -24,6 +24,11 @@ output "shared_resources_sg" { } output "public_subnets" { - description = "Subnets in the VPC" + description = "Public subnets in the VPC" value = module.vpc.public_subnets } + +output "private_subnets" { + description = "Private subnets in the VPC" + value = module.vpc.private_subnets +} diff --git a/aws_terraform_bgg/vpc.tf b/aws_terraform_bgg/vpc.tf index 1d7800d..c288e44 100644 --- a/aws_terraform_bgg/vpc.tf +++ b/aws_terraform_bgg/vpc.tf @@ -9,7 +9,18 @@ data "aws_availability_zones" "available" { values = ["availability-zone"] } } +# resource "aws_eip" "nat-gateway" { +# domain = "vpc" +# } +# resource "aws_nat_gateway" "example" { +# allocation_id = aws_eip.nat-gateway.id +# subnet_id = module.vpc.private_subnets[0] +# tags = { +# Name = "t2v-transformers" +# } + +# } module "vpc" { source = "terraform-aws-modules/vpc/aws" name = "meeplemasters" From 3d0c43030191021953c05a89443b2e72443409e7 Mon Sep 17 00:00:00 2001 From: threnjen Date: Fri, 20 Dec 2024 09:14:14 -0800 Subject: [PATCH 21/26] update weaviate port from 8080 to 8081 --- ..._deployment_rag_description_generation.yml | 0 .../weaviate_rag.yml | 0 Dockerfiles/Dockerfile.weaviate_rag | 2 +- aws_terraform_bgg/fargate_task_defs_rag.tf | 107 +++++++++--------- ..._description_generation_fargate_trigger.py | 7 +- .../rag_weaviate.py | 5 +- 6 files changed, 64 insertions(+), 57 deletions(-) rename .github/{temp_off => workflows}/dev_deployment_rag_description_generation.yml (100%) rename .github/{one_off_deployments => workflows}/weaviate_rag.yml (100%) diff --git a/.github/temp_off/dev_deployment_rag_description_generation.yml b/.github/workflows/dev_deployment_rag_description_generation.yml similarity index 100% rename from .github/temp_off/dev_deployment_rag_description_generation.yml rename to .github/workflows/dev_deployment_rag_description_generation.yml diff --git a/.github/one_off_deployments/weaviate_rag.yml b/.github/workflows/weaviate_rag.yml similarity index 100% rename from .github/one_off_deployments/weaviate_rag.yml rename to .github/workflows/weaviate_rag.yml diff --git a/Dockerfiles/Dockerfile.weaviate_rag b/Dockerfiles/Dockerfile.weaviate_rag index bd55c9e..1f60bd0 100644 --- a/Dockerfiles/Dockerfile.weaviate_rag +++ b/Dockerfiles/Dockerfile.weaviate_rag @@ -11,4 +11,4 @@ ENV CLUSTER_HOSTNAME=node1 ENV TRANSFORMERS_INFERENCE_API=http://t2v-transformers:8080 # Define entrypoint command -CMD ["--host", "0.0.0.0", "--port", "8080", "--scheme", "http"] \ No newline at end of file +CMD ["--host", "0.0.0.0", "--port", "8081", "--scheme", "http"] \ No newline at end of file diff --git a/aws_terraform_bgg/fargate_task_defs_rag.tf b/aws_terraform_bgg/fargate_task_defs_rag.tf index 6e827a2..0541941 100644 --- a/aws_terraform_bgg/fargate_task_defs_rag.tf +++ b/aws_terraform_bgg/fargate_task_defs_rag.tf @@ -1,61 +1,62 @@ resource "aws_ecs_task_definition" "dev_weaviate_rag_generation" { family = "dev_${var.rag_description_generation}" + container_definitions = jsonencode([ - { - name = "dev_${var.rag_description_generation}", - image = "${data.aws_caller_identity.current.account_id}.dkr.ecr.${var.REGION}.amazonaws.com/dev_${var.rag_description_generation}:latest" - cpu = 0, - essential = true, - environment = [ - { - name = "ENVIRONMENT", - value = "prod" - }, - { - name = "IS_LOCAL", - value = "false" - } - ], - environmentFiles = [ - { - value = "arn:aws:s3:::${var.S3_SCRAPER_BUCKET}/boardgamegeek.env", - type = "s3" - }, - { - value = "arn:aws:s3:::${var.S3_SCRAPER_BUCKET}/weaviate.env", - type = "s3" - } - ], - mountPoints = [], - volumesFrom = [], - ulimits = [], - logConfiguration = { - logDriver = "awslogs", - options = { - "awslogs-group" = "/ecs/dev_${var.rag_description_generation}", - "awslogs-create-group" = "true", - "awslogs-region" = var.REGION, - "awslogs-stream-prefix" = "ecs" - }, - secretOptions = [] - }, - systemControls = [], - dependsOn = [ - { - containerName = var.weaviate_rag_server, - condition = "START" - } - ] - }, + # { + # name = "dev_${var.rag_description_generation}", + # image = "${data.aws_caller_identity.current.account_id}.dkr.ecr.${var.REGION}.amazonaws.com/dev_${var.rag_description_generation}:latest" + # cpu = 0, + # essential = true, + # environment = [ + # { + # name = "ENVIRONMENT", + # value = "prod" + # }, + # { + # name = "IS_LOCAL", + # value = "false" + # } + # ], + # environmentFiles = [ + # { + # value = "arn:aws:s3:::${var.S3_SCRAPER_BUCKET}/boardgamegeek.env", + # type = "s3" + # }, + # { + # value = "arn:aws:s3:::${var.S3_SCRAPER_BUCKET}/weaviate.env", + # type = "s3" + # } + # ], + # mountPoints = [], + # volumesFrom = [], + # ulimits = [], + # logConfiguration = { + # logDriver = "awslogs", + # options = { + # "awslogs-group" = "/ecs/dev_${var.rag_description_generation}", + # "awslogs-create-group" = "true", + # "awslogs-region" = var.REGION, + # "awslogs-stream-prefix" = "ecs" + # }, + # secretOptions = [] + # }, + # systemControls = [], + # dependsOn = [ + # { + # containerName = var.weaviate_rag_server, + # condition = "START" + # } + # ] + # }, { name = var.weaviate_rag_server, image = "${data.aws_caller_identity.current.account_id}.dkr.ecr.${var.REGION}.amazonaws.com/${var.weaviate_rag_server}:latest" cpu = 0, portMappings = [ { - containerPort = 8080, - hostPort = 8080 + containerPort = 8081, + hostPort = 8081 }, { containerPort = 50051, @@ -74,7 +75,7 @@ resource "aws_ecs_task_definition" "dev_weaviate_rag_generation" { }, { name = "TRANSFORMERS_INFERENCE_API" - value = "http://localhost:8080" + value = "http://127.0.0.1:8080" } ], environmentFiles = [ @@ -146,18 +147,18 @@ resource "aws_ecs_task_definition" "dev_weaviate_rag_generation" { }, systemControls = [], healthCheck = { - command = ["CMD-SHELL", "curl -f http://localhost:8080/health || exit 1"], + command = ["CMD-SHELL", "curl -f http://127.0.0.1:8080/health || exit 1"], interval = 30, retries = 3, startPeriod = 60, timeout = 5 } }, - ] + + ], + ) - - task_role_arn = "arn:aws:iam::${data.aws_caller_identity.current.account_id}:role/${var.rag_description_generation}_FargateTaskRole" execution_role_arn = "arn:aws:iam::${data.aws_caller_identity.current.account_id}:role/${var.rag_description_generation}_FargateExecutionRole" diff --git a/modules/lambda_functions/rag_description_generation_fargate_trigger.py b/modules/lambda_functions/rag_description_generation_fargate_trigger.py index 217b977..4859cf1 100644 --- a/modules/lambda_functions/rag_description_generation_fargate_trigger.py +++ b/modules/lambda_functions/rag_description_generation_fargate_trigger.py @@ -75,7 +75,10 @@ def lambda_handler(event, context): if ENVIRONMENT != "prod": blocks = [(10, 20)] - security_groups = terraform_state_file["outputs"]["shared_resources_sg"]["value"] + security_groups = [ + terraform_state_file["outputs"]["shared_resources_sg"]["value"], + # terraform_state_file["outputs"]["sg_ec2_weaviate_port_access"]["value"], + ] print(security_groups) @@ -97,7 +100,7 @@ def lambda_handler(event, context): networkConfiguration={ "awsvpcConfiguration": { "subnets": [subnets], - "securityGroups": [security_groups], + "securityGroups": security_groups, "assignPublicIp": "ENABLED", }, }, diff --git a/modules/rag_description_generation/rag_weaviate.py b/modules/rag_description_generation/rag_weaviate.py index 3820f00..e81a160 100644 --- a/modules/rag_description_generation/rag_weaviate.py +++ b/modules/rag_description_generation/rag_weaviate.py @@ -22,9 +22,12 @@ def model_post_init(self, __context): def connect_weaviate_client_docker(self) -> weaviate.client: client = weaviate.connect_to_local( + host="127.0.0.1", + port=8081, + grpc_port=50051, headers={ "X-OpenAI-Api-Key": os.environ["OPENAI_API_KEY"], - } + }, ) return client From 5e6b7c41b13bf81efffcf194489affc379ffb48c Mon Sep 17 00:00:00 2001 From: threnjen Date: Fri, 20 Dec 2024 09:16:31 -0800 Subject: [PATCH 22/26] don't delete collection items and hope it doesn't oom --- modules/rag_description_generation/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/rag_description_generation/main.py b/modules/rag_description_generation/main.py index d40039b..5519810 100644 --- a/modules/rag_description_generation/main.py +++ b/modules/rag_description_generation/main.py @@ -147,7 +147,7 @@ def process_single_game( game_id, summary=summary.generated ) # print(f"\n{summary.generated}") - weaviate_client.remove_collection_items(game_id=game_id, reviews=reviews) + # weaviate_client.remove_collection_items(game_id=game_id, reviews=reviews) return print(f"Game {game_id} already processed") From 64f8cbd7b8fe8e56385eefab22212ba798993244 Mon Sep 17 00:00:00 2001 From: threnjen Date: Fri, 20 Dec 2024 09:45:48 -0800 Subject: [PATCH 23/26] update deployments for additional container in task def --- .../weaviate_rag.yml | 0 ..._deployment_rag_description_generation.yml | 0 aws_terraform_bgg/fargate_task_defs_rag.tf | 94 +++++++++---------- 3 files changed, 47 insertions(+), 47 deletions(-) rename .github/{workflows => one_off_deployments}/weaviate_rag.yml (100%) rename .github/{workflows => temp_off}/dev_deployment_rag_description_generation.yml (100%) diff --git a/.github/workflows/weaviate_rag.yml b/.github/one_off_deployments/weaviate_rag.yml similarity index 100% rename from .github/workflows/weaviate_rag.yml rename to .github/one_off_deployments/weaviate_rag.yml diff --git a/.github/workflows/dev_deployment_rag_description_generation.yml b/.github/temp_off/dev_deployment_rag_description_generation.yml similarity index 100% rename from .github/workflows/dev_deployment_rag_description_generation.yml rename to .github/temp_off/dev_deployment_rag_description_generation.yml diff --git a/aws_terraform_bgg/fargate_task_defs_rag.tf b/aws_terraform_bgg/fargate_task_defs_rag.tf index 0541941..b212bef 100644 --- a/aws_terraform_bgg/fargate_task_defs_rag.tf +++ b/aws_terraform_bgg/fargate_task_defs_rag.tf @@ -3,52 +3,52 @@ resource "aws_ecs_task_definition" "dev_weaviate_rag_generation" { container_definitions = jsonencode([ - # { - # name = "dev_${var.rag_description_generation}", - # image = "${data.aws_caller_identity.current.account_id}.dkr.ecr.${var.REGION}.amazonaws.com/dev_${var.rag_description_generation}:latest" - # cpu = 0, - # essential = true, - # environment = [ - # { - # name = "ENVIRONMENT", - # value = "prod" - # }, - # { - # name = "IS_LOCAL", - # value = "false" - # } - # ], - # environmentFiles = [ - # { - # value = "arn:aws:s3:::${var.S3_SCRAPER_BUCKET}/boardgamegeek.env", - # type = "s3" - # }, - # { - # value = "arn:aws:s3:::${var.S3_SCRAPER_BUCKET}/weaviate.env", - # type = "s3" - # } - # ], - # mountPoints = [], - # volumesFrom = [], - # ulimits = [], - # logConfiguration = { - # logDriver = "awslogs", - # options = { - # "awslogs-group" = "/ecs/dev_${var.rag_description_generation}", - # "awslogs-create-group" = "true", - # "awslogs-region" = var.REGION, - # "awslogs-stream-prefix" = "ecs" - # }, - # secretOptions = [] - # }, - # systemControls = [], - # dependsOn = [ - # { - # containerName = var.weaviate_rag_server, - # condition = "START" - # } - # ] - # }, + { + name = "dev_${var.rag_description_generation}", + image = "${data.aws_caller_identity.current.account_id}.dkr.ecr.${var.REGION}.amazonaws.com/dev_${var.rag_description_generation}:latest" + cpu = 0, + essential = true, + environment = [ + { + name = "ENVIRONMENT", + value = "prod" + }, + { + name = "IS_LOCAL", + value = "false" + } + ], + environmentFiles = [ + { + value = "arn:aws:s3:::${var.S3_SCRAPER_BUCKET}/boardgamegeek.env", + type = "s3" + }, + { + value = "arn:aws:s3:::${var.S3_SCRAPER_BUCKET}/weaviate.env", + type = "s3" + } + ], + mountPoints = [], + volumesFrom = [], + ulimits = [], + logConfiguration = { + logDriver = "awslogs", + options = { + "awslogs-group" = "/ecs/dev_${var.rag_description_generation}", + "awslogs-create-group" = "true", + "awslogs-region" = var.REGION, + "awslogs-stream-prefix" = "ecs" + }, + secretOptions = [] + }, + systemControls = [], + dependsOn = [ + { + containerName = var.weaviate_rag_server, + condition = "START" + } + ] + }, { name = var.weaviate_rag_server, image = "${data.aws_caller_identity.current.account_id}.dkr.ecr.${var.REGION}.amazonaws.com/${var.weaviate_rag_server}:latest" @@ -90,7 +90,7 @@ resource "aws_ecs_task_definition" "dev_weaviate_rag_generation" { ], command = [ "--host", "0.0.0.0", - "--port", "8080", + "--port", "8081", "--scheme", "http" ], mountPoints = [], From d683d1d6c0cc8e1e2b9f9f2205bb3817479bb372 Mon Sep 17 00:00:00 2001 From: threnjen Date: Fri, 20 Dec 2024 10:31:01 -0800 Subject: [PATCH 24/26] reinstate all deployment yml --- .github/{temp_off => workflows}/deploy_file_to_s3.yml | 0 .../{temp_off => workflows}/dev_deployment_ecs_game_cleaner.yml | 0 .../{temp_off => workflows}/dev_deployment_ecs_orchestrator.yml | 0 .../dev_deployment_ecs_ratings_cleaner.yml | 0 .github/{temp_off => workflows}/dev_deployment_ecs_scraper.yml | 0 .../{temp_off => workflows}/dev_deployment_ecs_users_cleaner.yml | 0 .../dev_deployment_rag_description_generation.yml | 0 .github/{temp_off => workflows}/lambda_deployments_prod.yml | 0 .../prod_deployment_ecs_bgg_file_retrieval.yml | 0 .../{temp_off => workflows}/prod_deployment_ecs_game_cleaner.yml | 0 .../{temp_off => workflows}/prod_deployment_ecs_orchestrator.yml | 0 .../prod_deployment_ecs_ratings_cleaner.yml | 0 .github/{temp_off => workflows}/prod_deployment_ecs_scraper.yml | 0 .../{temp_off => workflows}/prod_deployment_ecs_users_cleaner.yml | 0 .../prod_deployment_rag_description_generation.yml | 0 15 files changed, 0 insertions(+), 0 deletions(-) rename .github/{temp_off => workflows}/deploy_file_to_s3.yml (100%) rename .github/{temp_off => workflows}/dev_deployment_ecs_game_cleaner.yml (100%) rename .github/{temp_off => workflows}/dev_deployment_ecs_orchestrator.yml (100%) rename .github/{temp_off => workflows}/dev_deployment_ecs_ratings_cleaner.yml (100%) rename .github/{temp_off => workflows}/dev_deployment_ecs_scraper.yml (100%) rename .github/{temp_off => workflows}/dev_deployment_ecs_users_cleaner.yml (100%) rename .github/{temp_off => workflows}/dev_deployment_rag_description_generation.yml (100%) rename .github/{temp_off => workflows}/lambda_deployments_prod.yml (100%) rename .github/{temp_off => workflows}/prod_deployment_ecs_bgg_file_retrieval.yml (100%) rename .github/{temp_off => workflows}/prod_deployment_ecs_game_cleaner.yml (100%) rename .github/{temp_off => workflows}/prod_deployment_ecs_orchestrator.yml (100%) rename .github/{temp_off => workflows}/prod_deployment_ecs_ratings_cleaner.yml (100%) rename .github/{temp_off => workflows}/prod_deployment_ecs_scraper.yml (100%) rename .github/{temp_off => workflows}/prod_deployment_ecs_users_cleaner.yml (100%) rename .github/{temp_off => workflows}/prod_deployment_rag_description_generation.yml (100%) diff --git a/.github/temp_off/deploy_file_to_s3.yml b/.github/workflows/deploy_file_to_s3.yml similarity index 100% rename from .github/temp_off/deploy_file_to_s3.yml rename to .github/workflows/deploy_file_to_s3.yml diff --git a/.github/temp_off/dev_deployment_ecs_game_cleaner.yml b/.github/workflows/dev_deployment_ecs_game_cleaner.yml similarity index 100% rename from .github/temp_off/dev_deployment_ecs_game_cleaner.yml rename to .github/workflows/dev_deployment_ecs_game_cleaner.yml diff --git a/.github/temp_off/dev_deployment_ecs_orchestrator.yml b/.github/workflows/dev_deployment_ecs_orchestrator.yml similarity index 100% rename from .github/temp_off/dev_deployment_ecs_orchestrator.yml rename to .github/workflows/dev_deployment_ecs_orchestrator.yml diff --git a/.github/temp_off/dev_deployment_ecs_ratings_cleaner.yml b/.github/workflows/dev_deployment_ecs_ratings_cleaner.yml similarity index 100% rename from .github/temp_off/dev_deployment_ecs_ratings_cleaner.yml rename to .github/workflows/dev_deployment_ecs_ratings_cleaner.yml diff --git a/.github/temp_off/dev_deployment_ecs_scraper.yml b/.github/workflows/dev_deployment_ecs_scraper.yml similarity index 100% rename from .github/temp_off/dev_deployment_ecs_scraper.yml rename to .github/workflows/dev_deployment_ecs_scraper.yml diff --git a/.github/temp_off/dev_deployment_ecs_users_cleaner.yml b/.github/workflows/dev_deployment_ecs_users_cleaner.yml similarity index 100% rename from .github/temp_off/dev_deployment_ecs_users_cleaner.yml rename to .github/workflows/dev_deployment_ecs_users_cleaner.yml diff --git a/.github/temp_off/dev_deployment_rag_description_generation.yml b/.github/workflows/dev_deployment_rag_description_generation.yml similarity index 100% rename from .github/temp_off/dev_deployment_rag_description_generation.yml rename to .github/workflows/dev_deployment_rag_description_generation.yml diff --git a/.github/temp_off/lambda_deployments_prod.yml b/.github/workflows/lambda_deployments_prod.yml similarity index 100% rename from .github/temp_off/lambda_deployments_prod.yml rename to .github/workflows/lambda_deployments_prod.yml diff --git a/.github/temp_off/prod_deployment_ecs_bgg_file_retrieval.yml b/.github/workflows/prod_deployment_ecs_bgg_file_retrieval.yml similarity index 100% rename from .github/temp_off/prod_deployment_ecs_bgg_file_retrieval.yml rename to .github/workflows/prod_deployment_ecs_bgg_file_retrieval.yml diff --git a/.github/temp_off/prod_deployment_ecs_game_cleaner.yml b/.github/workflows/prod_deployment_ecs_game_cleaner.yml similarity index 100% rename from .github/temp_off/prod_deployment_ecs_game_cleaner.yml rename to .github/workflows/prod_deployment_ecs_game_cleaner.yml diff --git a/.github/temp_off/prod_deployment_ecs_orchestrator.yml b/.github/workflows/prod_deployment_ecs_orchestrator.yml similarity index 100% rename from .github/temp_off/prod_deployment_ecs_orchestrator.yml rename to .github/workflows/prod_deployment_ecs_orchestrator.yml diff --git a/.github/temp_off/prod_deployment_ecs_ratings_cleaner.yml b/.github/workflows/prod_deployment_ecs_ratings_cleaner.yml similarity index 100% rename from .github/temp_off/prod_deployment_ecs_ratings_cleaner.yml rename to .github/workflows/prod_deployment_ecs_ratings_cleaner.yml diff --git a/.github/temp_off/prod_deployment_ecs_scraper.yml b/.github/workflows/prod_deployment_ecs_scraper.yml similarity index 100% rename from .github/temp_off/prod_deployment_ecs_scraper.yml rename to .github/workflows/prod_deployment_ecs_scraper.yml diff --git a/.github/temp_off/prod_deployment_ecs_users_cleaner.yml b/.github/workflows/prod_deployment_ecs_users_cleaner.yml similarity index 100% rename from .github/temp_off/prod_deployment_ecs_users_cleaner.yml rename to .github/workflows/prod_deployment_ecs_users_cleaner.yml diff --git a/.github/temp_off/prod_deployment_rag_description_generation.yml b/.github/workflows/prod_deployment_rag_description_generation.yml similarity index 100% rename from .github/temp_off/prod_deployment_rag_description_generation.yml rename to .github/workflows/prod_deployment_rag_description_generation.yml From 59d2da4bd3302446eacde5d332f737d8583254cc Mon Sep 17 00:00:00 2001 From: threnjen Date: Fri, 20 Dec 2024 11:34:18 -0800 Subject: [PATCH 25/26] update to add non dev rag generation task def --- aws_terraform_bgg/fargate_task_defs_rag.tf | 176 +++++++++++++++++++++ 1 file changed, 176 insertions(+) diff --git a/aws_terraform_bgg/fargate_task_defs_rag.tf b/aws_terraform_bgg/fargate_task_defs_rag.tf index b212bef..7ea7dfe 100644 --- a/aws_terraform_bgg/fargate_task_defs_rag.tf +++ b/aws_terraform_bgg/fargate_task_defs_rag.tf @@ -1,3 +1,179 @@ +resource "aws_ecs_task_definition" "weaviate_rag_generation" { + family = var.rag_description_generation + + + container_definitions = jsonencode([ + { + name = var.rag_description_generation, + image = "${data.aws_caller_identity.current.account_id}.dkr.ecr.${var.REGION}.amazonaws.com/${var.rag_description_generation}:latest" + cpu = 0, + essential = true, + environment = [ + { + name = "ENVIRONMENT", + value = "prod" + }, + { + name = "IS_LOCAL", + value = "false" + } + ], + environmentFiles = [ + { + value = "arn:aws:s3:::${var.S3_SCRAPER_BUCKET}/boardgamegeek.env", + type = "s3" + }, + { + value = "arn:aws:s3:::${var.S3_SCRAPER_BUCKET}/weaviate.env", + type = "s3" + } + ], + mountPoints = [], + volumesFrom = [], + ulimits = [], + logConfiguration = { + logDriver = "awslogs", + options = { + "awslogs-group" = "/ecs/${var.rag_description_generation}", + "awslogs-create-group" = "true", + "awslogs-region" = var.REGION, + "awslogs-stream-prefix" = "ecs" + }, + secretOptions = [] + }, + systemControls = [], + dependsOn = [ + { + containerName = var.weaviate_rag_server, + condition = "START" + } + ] + }, + { + name = var.weaviate_rag_server, + image = "${data.aws_caller_identity.current.account_id}.dkr.ecr.${var.REGION}.amazonaws.com/${var.weaviate_rag_server}:latest" + cpu = 0, + portMappings = [ + { + containerPort = 8081, + hostPort = 8081 + }, + { + containerPort = 50051, + hostPort = 50051 + }, + ], + essential = true, + environment = [ + { + name = "ENVIRONMENT", + value = "prod" + }, + { + name = "IS_LOCAL", + value = "false" + }, + { + name = "TRANSFORMERS_INFERENCE_API" + value = "http://127.0.0.1:8080" + } + ], + environmentFiles = [ + { + value = "arn:aws:s3:::${var.S3_SCRAPER_BUCKET}/boardgamegeek.env", + type = "s3" + }, + { + value = "arn:aws:s3:::${var.S3_SCRAPER_BUCKET}/weaviate.env", + type = "s3" + } + ], + command = [ + "--host", "0.0.0.0", + "--port", "8081", + "--scheme", "http" + ], + mountPoints = [], + volumesFrom = [], + ulimits = [], + logConfiguration = { + logDriver = "awslogs", + options = { + "awslogs-group" = "/ecs/${var.weaviate_rag_server}", + "awslogs-create-group" = "true", + "awslogs-region" = var.REGION, + "awslogs-stream-prefix" = "ecs" + }, + secretOptions = [] + }, + systemControls = [], + dependsOn = [ + { + containerName = var.t2v-transformers, + condition = "START" + } + ] + }, + { + name = var.t2v-transformers, + image = "${data.aws_caller_identity.current.account_id}.dkr.ecr.${var.REGION}.amazonaws.com/${var.t2v-transformers}:latest" + cpu = 0, + essential = true, + environment = [ + { + name = "ENVIRONMENT", + value = "prod" + }, + { + name = "IS_LOCAL", + value = "false" + }, + { + name = "ENABLE_CUDA", + value = "0" + }], + mountPoints = [], + volumesFrom = [], + ulimits = [], + logConfiguration = { + logDriver = "awslogs", + options = { + "awslogs-group" = "/ecs/${var.t2v-transformers}", + "awslogs-create-group" = "true", + "awslogs-region" = var.REGION, + "awslogs-stream-prefix" = "ecs" + }, + secretOptions = [] + }, + systemControls = [], + healthCheck = { + command = ["CMD-SHELL", "curl -f http://127.0.0.1:8080/health || exit 1"], + interval = 30, + retries = 3, + startPeriod = 60, + timeout = 5 + } + }, + + ], + + ) + + task_role_arn = "arn:aws:iam::${data.aws_caller_identity.current.account_id}:role/${var.rag_description_generation}_FargateTaskRole" + execution_role_arn = "arn:aws:iam::${data.aws_caller_identity.current.account_id}:role/${var.rag_description_generation}_FargateExecutionRole" + + network_mode = "awsvpc" + requires_compatibilities = ["FARGATE"] + + cpu = "2048" + memory = "8192" + + runtime_platform { + cpu_architecture = "X86_64" + operating_system_family = "LINUX" + } +} + resource "aws_ecs_task_definition" "dev_weaviate_rag_generation" { family = "dev_${var.rag_description_generation}" From b0e7a21f26325b4c1b2208048aeb02b34a71ee96 Mon Sep 17 00:00:00 2001 From: threnjen Date: Fri, 20 Dec 2024 11:53:13 -0800 Subject: [PATCH 26/26] combine all cleaners into single deployment yml --- ...er.yml => dev_deployment_ecs_cleaners.yml} | 15 ++++++- .../dev_deployment_ecs_ratings_cleaner.yml | 41 ------------------- .../dev_deployment_ecs_users_cleaner.yml | 41 ------------------- ...r.yml => prod_deployment_ecs_cleaners.yml} | 14 +++++++ .../prod_deployment_ecs_ratings_cleaner.yml | 32 --------------- .../prod_deployment_ecs_users_cleaner.yml | 32 --------------- 6 files changed, 28 insertions(+), 147 deletions(-) rename .github/workflows/{dev_deployment_ecs_game_cleaner.yml => dev_deployment_ecs_cleaners.yml} (58%) delete mode 100644 .github/workflows/dev_deployment_ecs_ratings_cleaner.yml delete mode 100644 .github/workflows/dev_deployment_ecs_users_cleaner.yml rename .github/workflows/{prod_deployment_ecs_game_cleaner.yml => prod_deployment_ecs_cleaners.yml} (56%) delete mode 100644 .github/workflows/prod_deployment_ecs_ratings_cleaner.yml delete mode 100644 .github/workflows/prod_deployment_ecs_users_cleaner.yml diff --git a/.github/workflows/dev_deployment_ecs_game_cleaner.yml b/.github/workflows/dev_deployment_ecs_cleaners.yml similarity index 58% rename from .github/workflows/dev_deployment_ecs_game_cleaner.yml rename to .github/workflows/dev_deployment_ecs_cleaners.yml index 05e3358..cd9ed07 100644 --- a/.github/workflows/dev_deployment_ecs_game_cleaner.yml +++ b/.github/workflows/dev_deployment_ecs_cleaners.yml @@ -38,4 +38,17 @@ jobs: run: | DOCKER_BUILDKIT=1 docker build -f Dockerfiles/Dockerfile.game-data-cleaner --build-arg GROUP="group1" -t $ECR_REGISTRY/$ECR_REPOSITORY . docker push $ECR_REGISTRY/$ECR_REPOSITORY - \ No newline at end of file + - name: Build, tag, push image to AWS ECR dev_bgg_users_data_cleaner + env: + ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }} + ECR_REPOSITORY: dev_bgg_users_data_cleaner + run: | + DOCKER_BUILDKIT=1 docker build -f Dockerfiles/Dockerfile.users-data-cleaner --build-arg GROUP="group1" -t $ECR_REGISTRY/$ECR_REPOSITORY . + docker push $ECR_REGISTRY/$ECR_REPOSITORY + - name: Build, tag, push image to AWS ECR dev_bgg_ratings_data_cleaner + env: + ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }} + ECR_REPOSITORY: dev_bgg_ratings_data_cleaner + run: | + DOCKER_BUILDKIT=1 docker build -f Dockerfiles/Dockerfile.ratings-data-cleaner --build-arg GROUP="group1" -t $ECR_REGISTRY/$ECR_REPOSITORY . + docker push $ECR_REGISTRY/$ECR_REPOSITORY \ No newline at end of file diff --git a/.github/workflows/dev_deployment_ecs_ratings_cleaner.yml b/.github/workflows/dev_deployment_ecs_ratings_cleaner.yml deleted file mode 100644 index 4c691d4..0000000 --- a/.github/workflows/dev_deployment_ecs_ratings_cleaner.yml +++ /dev/null @@ -1,41 +0,0 @@ -name: DEV deployment to boardgamegeek ratings data cleaner - -on: - push: - branches: - - main - pull_request: - branches: - - main - -permissions: - id-token: write # This is required for requesting the JWT - contents: read # This is required for actions/checkout - -jobs: - build: - runs-on: ubuntu-latest - steps: - - name: Check out code - uses: actions/checkout@v2 - with: - ref: ${{ github.event.pull_request.head.sha }} - - - name: Configure AWS credentials - uses: aws-actions/configure-aws-credentials@v4 - with: - role-to-assume: ${{ secrets.AWS_GITHUB_ROLE }} - aws-region: ${{ secrets.AWS_REGION }} - - - name: Login to Amazon ECR - id: login-ecr - uses: aws-actions/amazon-ecr-login@v2 - - - name: Build, tag, push image to AWS ECR dev_bgg_ratings_data_cleaner - env: - ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }} - ECR_REPOSITORY: dev_bgg_ratings_data_cleaner - run: | - DOCKER_BUILDKIT=1 docker build -f Dockerfiles/Dockerfile.ratings-data-cleaner --build-arg GROUP="group1" -t $ECR_REGISTRY/$ECR_REPOSITORY . - docker push $ECR_REGISTRY/$ECR_REPOSITORY - \ No newline at end of file diff --git a/.github/workflows/dev_deployment_ecs_users_cleaner.yml b/.github/workflows/dev_deployment_ecs_users_cleaner.yml deleted file mode 100644 index 9534ea3..0000000 --- a/.github/workflows/dev_deployment_ecs_users_cleaner.yml +++ /dev/null @@ -1,41 +0,0 @@ -name: DEV deployment to boardgamegeek users data cleaner - -on: - push: - branches: - - main - pull_request: - branches: - - main - -permissions: - id-token: write # This is required for requesting the JWT - contents: read # This is required for actions/checkout - -jobs: - build: - runs-on: ubuntu-latest - steps: - - name: Check out code - uses: actions/checkout@v2 - with: - ref: ${{ github.event.pull_request.head.sha }} - - - name: Configure AWS credentials - uses: aws-actions/configure-aws-credentials@v4 - with: - role-to-assume: ${{ secrets.AWS_GITHUB_ROLE }} - aws-region: ${{ secrets.AWS_REGION }} - - - name: Login to Amazon ECR - id: login-ecr - uses: aws-actions/amazon-ecr-login@v2 - - - name: Build, tag, push image to AWS ECR dev_bgg_users_data_cleaner - env: - ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }} - ECR_REPOSITORY: dev_bgg_users_data_cleaner - run: | - DOCKER_BUILDKIT=1 docker build -f Dockerfiles/Dockerfile.users-data-cleaner --build-arg GROUP="group1" -t $ECR_REGISTRY/$ECR_REPOSITORY . - docker push $ECR_REGISTRY/$ECR_REPOSITORY - \ No newline at end of file diff --git a/.github/workflows/prod_deployment_ecs_game_cleaner.yml b/.github/workflows/prod_deployment_ecs_cleaners.yml similarity index 56% rename from .github/workflows/prod_deployment_ecs_game_cleaner.yml rename to .github/workflows/prod_deployment_ecs_cleaners.yml index a6fbfe0..25fb79b 100644 --- a/.github/workflows/prod_deployment_ecs_game_cleaner.yml +++ b/.github/workflows/prod_deployment_ecs_cleaners.yml @@ -30,3 +30,17 @@ jobs: run: | DOCKER_BUILDKIT=1 docker build -f Dockerfiles/Dockerfile.game-data-cleaner --build-arg GROUP="group1" -t $ECR_REGISTRY/$ECR_REPOSITORY . docker push $ECR_REGISTRY/$ECR_REPOSITORY + - name: Build, tag, push image to AWS ECR bgg_users_data_cleaner + env: + ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }} + ECR_REPOSITORY: bgg_users_data_cleaner + run: | + DOCKER_BUILDKIT=1 docker build -f Dockerfiles/Dockerfile.users-data-cleaner --build-arg GROUP="group1" -t $ECR_REGISTRY/$ECR_REPOSITORY . + docker push $ECR_REGISTRY/$ECR_REPOSITORY + - name: Build, tag, push image to AWS ECR bgg_ratings_data_cleaner + env: + ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }} + ECR_REPOSITORY: bgg_ratings_data_cleaner + run: | + DOCKER_BUILDKIT=1 docker build -f Dockerfiles/Dockerfile.ratings-data-cleaner --build-arg GROUP="group1" -t $ECR_REGISTRY/$ECR_REPOSITORY . + docker push $ECR_REGISTRY/$ECR_REPOSITORY diff --git a/.github/workflows/prod_deployment_ecs_ratings_cleaner.yml b/.github/workflows/prod_deployment_ecs_ratings_cleaner.yml deleted file mode 100644 index df1d69b..0000000 --- a/.github/workflows/prod_deployment_ecs_ratings_cleaner.yml +++ /dev/null @@ -1,32 +0,0 @@ -name: PROD deployment to boardgamegeek ratings data cleaner - -on: - push: - branches: - - main - -permissions: - id-token: write # This is required for requesting the JWT - contents: read # This is required for actions/checkout - -jobs: - build: - runs-on: ubuntu-latest - steps: - - name: Check out code - uses: actions/checkout@v2 - - name: Configure AWS credentials - uses: aws-actions/configure-aws-credentials@v4 - with: - role-to-assume: ${{ secrets.AWS_GITHUB_ROLE }} - aws-region: ${{ secrets.AWS_REGION }} - - name: Login to Amazon ECR - id: login-ecr - uses: aws-actions/amazon-ecr-login@v2 - - name: Build, tag, push image to AWS ECR bgg_ratings_data_cleaner - env: - ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }} - ECR_REPOSITORY: bgg_ratings_data_cleaner - run: | - DOCKER_BUILDKIT=1 docker build -f Dockerfiles/Dockerfile.ratings-data-cleaner --build-arg GROUP="group1" -t $ECR_REGISTRY/$ECR_REPOSITORY . - docker push $ECR_REGISTRY/$ECR_REPOSITORY diff --git a/.github/workflows/prod_deployment_ecs_users_cleaner.yml b/.github/workflows/prod_deployment_ecs_users_cleaner.yml deleted file mode 100644 index d6e0fe6..0000000 --- a/.github/workflows/prod_deployment_ecs_users_cleaner.yml +++ /dev/null @@ -1,32 +0,0 @@ -name: PROD deployment to boardgamegeek users data cleaner - -on: - push: - branches: - - main - -permissions: - id-token: write # This is required for requesting the JWT - contents: read # This is required for actions/checkout - -jobs: - build: - runs-on: ubuntu-latest - steps: - - name: Check out code - uses: actions/checkout@v2 - - name: Configure AWS credentials - uses: aws-actions/configure-aws-credentials@v4 - with: - role-to-assume: ${{ secrets.AWS_GITHUB_ROLE }} - aws-region: ${{ secrets.AWS_REGION }} - - name: Login to Amazon ECR - id: login-ecr - uses: aws-actions/amazon-ecr-login@v2 - - name: Build, tag, push image to AWS ECR bgg_users_data_cleaner - env: - ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }} - ECR_REPOSITORY: bgg_users_data_cleaner - run: | - DOCKER_BUILDKIT=1 docker build -f Dockerfiles/Dockerfile.users-data-cleaner --build-arg GROUP="group1" -t $ECR_REGISTRY/$ECR_REPOSITORY . - docker push $ECR_REGISTRY/$ECR_REPOSITORY