Skip to content

Commit

Permalink
Merge pull request #76 from threnjen/updates_to_rag_generation
Browse files Browse the repository at this point in the history
initial draft for self contained weaviate containers
  • Loading branch information
threnjen authored Dec 20, 2024
2 parents fc224b9 + b0e7a21 commit 736723e
Show file tree
Hide file tree
Showing 27 changed files with 773 additions and 1,356 deletions.
Original file line number Diff line number Diff line change
@@ -1,32 +1,39 @@
name: PROD deployment to boardgamegeek users data cleaner
name: Build and Push all-mpnet-base-v2 Transformers Image

on:
push:
branches:
- main
pull_request:
branches:
- main

permissions:
id-token: write # This is required for requesting the JWT
contents: read # This is required for actions/checkout
id-token: write
contents: read

jobs:
build:
runs-on: ubuntu-latest

steps:
- name: Check out code
uses: actions/checkout@v2

- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: ${{ secrets.AWS_GITHUB_ROLE }}
aws-region: ${{ secrets.AWS_REGION }}

- name: Login to Amazon ECR
id: login-ecr
uses: aws-actions/amazon-ecr-login@v2
- name: Build, tag, push image to AWS ECR bgg_users_data_cleaner

- name: Build, tag, and push Transformers image to AWS ECR
env:
ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }}
ECR_REPOSITORY: bgg_users_data_cleaner
ECR_REPOSITORY: t2v-transformers
run: |
DOCKER_BUILDKIT=1 docker build -f Dockerfiles/Dockerfile.users-data-cleaner --build-arg GROUP="group1" -t $ECR_REGISTRY/$ECR_REPOSITORY .
docker push $ECR_REGISTRY/$ECR_REPOSITORY
DOCKER_BUILDKIT=1 docker build -f Dockerfiles/Dockerfile.t2v-transformers -t $ECR_REGISTRY/$ECR_REPOSITORY .
docker push $ECR_REGISTRY/$ECR_REPOSITORY
Original file line number Diff line number Diff line change
@@ -1,32 +1,39 @@
name: PROD deployment to boardgamegeek game data cleaner
name: Build and Push Weaviate VectorDB Image

on:
push:
branches:
- main
pull_request:
branches:
- main

permissions:
id-token: write # This is required for requesting the JWT
contents: read # This is required for actions/checkout
id-token: write
contents: read

jobs:
build:
runs-on: ubuntu-latest

steps:
- name: Check out code
uses: actions/checkout@v2

- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: ${{ secrets.AWS_GITHUB_ROLE }}
aws-region: ${{ secrets.AWS_REGION }}

- name: Login to Amazon ECR
id: login-ecr
uses: aws-actions/amazon-ecr-login@v2
- name: Build, tag, push image to AWS ECR bgg_game_data_cleaner

- name: Build, tag, and push Weaviate image to AWS ECR
env:
ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }}
ECR_REPOSITORY: bgg_game_data_cleaner
ECR_REPOSITORY: weaviate_rag_server
run: |
DOCKER_BUILDKIT=1 docker build -f Dockerfiles/Dockerfile.game-data-cleaner --build-arg GROUP="group1" -t $ECR_REGISTRY/$ECR_REPOSITORY .
DOCKER_BUILDKIT=1 docker build -f Dockerfiles/Dockerfile.weaviate_rag -t $ECR_REGISTRY/$ECR_REPOSITORY .
docker push $ECR_REGISTRY/$ECR_REPOSITORY
1 change: 1 addition & 0 deletions .github/workflows/deploy_file_to_s3.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,4 @@ jobs:
- name: Sync files to S3 bucket
run: |
aws s3 cp config.json s3://${{ secrets.AWS_BUCKET_NAME }}
aws s3 cp modules/rag_description_generation/weaviate.env s3://${{ secrets.AWS_BUCKET_NAME }}
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,17 @@ jobs:
run: |
DOCKER_BUILDKIT=1 docker build -f Dockerfiles/Dockerfile.game-data-cleaner --build-arg GROUP="group1" -t $ECR_REGISTRY/$ECR_REPOSITORY .
docker push $ECR_REGISTRY/$ECR_REPOSITORY
- name: Build, tag, push image to AWS ECR dev_bgg_users_data_cleaner
env:
ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }}
ECR_REPOSITORY: dev_bgg_users_data_cleaner
run: |
DOCKER_BUILDKIT=1 docker build -f Dockerfiles/Dockerfile.users-data-cleaner --build-arg GROUP="group1" -t $ECR_REGISTRY/$ECR_REPOSITORY .
docker push $ECR_REGISTRY/$ECR_REPOSITORY
- name: Build, tag, push image to AWS ECR dev_bgg_ratings_data_cleaner
env:
ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }}
ECR_REPOSITORY: dev_bgg_ratings_data_cleaner
run: |
DOCKER_BUILDKIT=1 docker build -f Dockerfiles/Dockerfile.ratings-data-cleaner --build-arg GROUP="group1" -t $ECR_REGISTRY/$ECR_REPOSITORY .
docker push $ECR_REGISTRY/$ECR_REPOSITORY
41 changes: 0 additions & 41 deletions .github/workflows/dev_deployment_ecs_ratings_cleaner.yml

This file was deleted.

41 changes: 0 additions & 41 deletions .github/workflows/dev_deployment_ecs_users_cleaner.yml

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: PROD deployment to boardgamegeek ratings data cleaner
name: PROD deployment to boardgamegeek game data cleaner

on:
push:
Expand All @@ -23,6 +23,20 @@ jobs:
- name: Login to Amazon ECR
id: login-ecr
uses: aws-actions/amazon-ecr-login@v2
- name: Build, tag, push image to AWS ECR bgg_game_data_cleaner
env:
ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }}
ECR_REPOSITORY: bgg_game_data_cleaner
run: |
DOCKER_BUILDKIT=1 docker build -f Dockerfiles/Dockerfile.game-data-cleaner --build-arg GROUP="group1" -t $ECR_REGISTRY/$ECR_REPOSITORY .
docker push $ECR_REGISTRY/$ECR_REPOSITORY
- name: Build, tag, push image to AWS ECR bgg_users_data_cleaner
env:
ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }}
ECR_REPOSITORY: bgg_users_data_cleaner
run: |
DOCKER_BUILDKIT=1 docker build -f Dockerfiles/Dockerfile.users-data-cleaner --build-arg GROUP="group1" -t $ECR_REGISTRY/$ECR_REPOSITORY .
docker push $ECR_REGISTRY/$ECR_REPOSITORY
- name: Build, tag, push image to AWS ECR bgg_ratings_data_cleaner
env:
ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }}
Expand Down
5 changes: 5 additions & 0 deletions Dockerfiles/Dockerfile.t2v-transformers
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Dockerfile.t2v-transformers
FROM cr.weaviate.io/semitechnologies/transformers-inference:sentence-transformers-all-mpnet-base-v2

# Set environment variables
ENV ENABLE_CUDA=0
14 changes: 14 additions & 0 deletions Dockerfiles/Dockerfile.weaviate_rag
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Dockerfile.weaviate_rag
FROM cr.weaviate.io/semitechnologies/weaviate:1.27.7

# Set environment variables
ENV QUERY_DEFAULTS_LIMIT=25
ENV AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED=true
ENV PERSISTENCE_DATA_PATH=/var/lib/weaviate
ENV DEFAULT_VECTORIZER_MODULE=text2vec-transformers
ENV ENABLE_MODULES=text2vec-transformers
ENV CLUSTER_HOSTNAME=node1
ENV TRANSFORMERS_INFERENCE_API=http://t2v-transformers:8080

# Define entrypoint command
CMD ["--host", "0.0.0.0", "--port", "8081", "--scheme", "http"]
46 changes: 23 additions & 23 deletions aws_terraform_bgg/ec2_instances.tf
Original file line number Diff line number Diff line change
@@ -1,26 +1,26 @@
resource "aws_instance" "weaviate_ec2_instance" {

instance_type = "t3.medium"
ami = "ami-055e3d4f0bbeb5878"
key_name = "weaviate-ec2"
monitoring = true
vpc_security_group_ids = [aws_security_group.ec2_ssh_access.id, aws_security_group.ec2_weaviate_port_access.id, aws_security_group.shared_resources_sg.id]
subnet_id = module.vpc.public_subnets[0]
associate_public_ip_address = true
iam_instance_profile = aws_iam_instance_profile.weaviate_ec2_instance_role.name

root_block_device {
volume_size = 30
encrypted = true
}
tags = {
Name = "weaviate_embedder"
Terraform = "true"
Environment = "dev"
}

user_data = data.cloudinit_config.weaviate_ec2_instance.rendered
}
# resource "aws_instance" "weaviate_ec2_instance" {

# instance_type = "t3.large"
# ami = "ami-055e3d4f0bbeb5878"
# key_name = "weaviate-ec2"
# monitoring = true
# vpc_security_group_ids = [aws_security_group.ec2_ssh_access.id, aws_security_group.ec2_weaviate_port_access.id, aws_security_group.shared_resources_sg.id]
# subnet_id = module.vpc.public_subnets[0]
# associate_public_ip_address = true
# iam_instance_profile = aws_iam_instance_profile.weaviate_ec2_instance_role.name

# root_block_device {
# volume_size = 30
# encrypted = true
# }
# tags = {
# Name = "weaviate_embedder"
# Terraform = "true"
# Environment = "dev"
# }

# user_data = data.cloudinit_config.weaviate_ec2_instance.rendered
# }

resource "aws_iam_instance_profile" "weaviate_ec2_instance_role" {
name = "test_profile"
Expand Down
14 changes: 13 additions & 1 deletion aws_terraform_bgg/fargate_ecr.tf
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@ locals {
module.bgg_users_data_cleaner_ecr.ecr_repository_name,
module.dev_bgg_users_data_cleaner_ecr.ecr_repository_name,
module.rag_description_generation_ecr.ecr_repository_name,
module.dev_rag_description_generation_ecr.ecr_repository_name
module.dev_rag_description_generation_ecr.ecr_repository_name,
module.weaviate_rag_server_ecr.ecr_repository_name,
module.t2v-transformers_ecr.ecr_repository_name
]
}

Expand All @@ -24,6 +26,16 @@ module "bgg_boardgame_file_retrieval_ecr" {
ecr_repository_name = "bgg_boardgame_file_retrieval"
}

module "weaviate_rag_server_ecr" {
source = "./modules/ecr"
ecr_repository_name = "weaviate_rag_server"
}

module "t2v-transformers_ecr" {
source = "./modules/ecr"
ecr_repository_name = "t2v-transformers"
}

module "rag_description_generation_ecr" {
source = "./modules/ecr"
ecr_repository_name = "rag_description_generation"
Expand Down
2 changes: 0 additions & 2 deletions aws_terraform_bgg/fargate_ecs_roles.tf
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,6 @@ resource "aws_iam_role_policy_attachment" "S3_Access_rag_description_generation_
policy_arn = aws_iam_policy.S3_Access_bgg_scraper_policy.arn
}



module "rag_description_generation_FargateTaskRole_role" {
source = "./modules/iam_ecs_roles"
task_definition = "rag_description_generation_FargateTaskRole"
Expand Down
30 changes: 0 additions & 30 deletions aws_terraform_bgg/fargate_task_defs.tf
Original file line number Diff line number Diff line change
Expand Up @@ -7,36 +7,6 @@ resource "aws_ecs_cluster" "boardgamegeek" {
}
}

module "rag_description_generation_ecs" {
source = "./modules/ecs_task_definition"
task_definition_family = var.rag_description_generation
task_definition_name = var.rag_description_generation
registry_name = "${data.aws_caller_identity.current.account_id}.dkr.ecr.${var.REGION}.amazonaws.com/${var.rag_description_generation}:latest"
environment = "prod"
env_file = "arn:aws:s3:::${var.S3_SCRAPER_BUCKET}/boardgamegeek.env"
task_role_arn = "arn:aws:iam::${data.aws_caller_identity.current.account_id}:role/${var.rag_description_generation}_FargateTaskRole"
execution_role_arn = "arn:aws:iam::${data.aws_caller_identity.current.account_id}:role/${var.rag_description_generation}_FargateExecutionRole"
image = "${data.aws_caller_identity.current.account_id}.dkr.ecr.${var.REGION}.amazonaws.com/${var.rag_description_generation}:latest"
cpu = "1024"
memory = "8192"
region = var.REGION
}

module "dev_rag_description_generation_ecs" {
source = "./modules/ecs_task_definition"
task_definition_family = "dev_${var.rag_description_generation}"
task_definition_name = "dev_${var.rag_description_generation}"
registry_name = "${data.aws_caller_identity.current.account_id}.dkr.ecr.${var.REGION}.amazonaws.com/${var.rag_description_generation}:latest"
environment = "dev"
env_file = "arn:aws:s3:::${var.S3_SCRAPER_BUCKET}/boardgamegeek.env"
task_role_arn = "arn:aws:iam::${data.aws_caller_identity.current.account_id}:role/${var.rag_description_generation}_FargateTaskRole"
execution_role_arn = "arn:aws:iam::${data.aws_caller_identity.current.account_id}:role/${var.rag_description_generation}_FargateExecutionRole"
image = "${data.aws_caller_identity.current.account_id}.dkr.ecr.${var.REGION}.amazonaws.com/dev_${var.rag_description_generation}:latest"
cpu = "1024"
memory = "8192"
region = var.REGION
}

module "boardgamegeek_orchestrator_ecs" {
source = "./modules/ecs_task_definition"
task_definition_family = var.boardgamegeek_orchestrator
Expand Down
Loading

0 comments on commit 736723e

Please sign in to comment.