VectorInstitute · rjavadi · Jun 3, 2024 · Jun 4, 2024 · Jun 4, 2024 · Jun 4, 2024
diff --git a/.gitignore b/.gitignore
@@ -7,6 +7,15 @@
 *.terraform/
 *terraform.tfstate*
 *.terraform.tfstate.lock.info
+*terraform.tfstate.backup
 
 *bart-large-mnli/
 *ml-api.zip
+/.idea
+
+*venv/
+*bart-large-mnli/
+*paraphrase-bert/
+
+__pycache__/
+*.zip
diff --git a/reference_implementations/aws/offline/01_provider.tf b/reference_implementations/aws/offline/01_provider.tf
@@ -0,0 +1,4 @@
+provider "aws" {
+  region  = var.region
+  profile = var.default_profile
+}
diff --git a/reference_implementations/aws/offline/02_sagemaker_execution_roles.tf b/reference_implementations/aws/offline/02_sagemaker_execution_roles.tf
@@ -0,0 +1,140 @@
+resource "aws_iam_role" "sagemaker_execution_role" {
+  name = "${local.prefix}-SagemakerModelExecutionRole"
+
+  assume_role_policy = jsonencode({
+    Version = "2012-10-17"
+    Statement = [
+      {
+        Action = "sts:AssumeRole"
+        Effect = "Allow"
+        Principal = {
+          Service = "sagemaker.amazonaws.com"
+        }
+      }
+    ]
+  })
+}
+
+resource "aws_iam_policy" "sagemaker_execution_role_policy" {
+  name        = "${local.prefix}-sagemaker-execution-role-policy"
+  description = "Policy for SageMaker model"
+  policy = jsonencode({
+    Version = "2012-10-17"
+    Statement = [
+      { # models and endpoint access
+        Action = [
+          "sagemaker:CreateModel",
+          "sagemaker:CreateEndpointConfig",
+          "sagemaker:CreateEndpoint",
+          "sagemaker:DeleteEndpoint",
+          "sagemaker:InvokeEndpoint",
+          "sagemaker:UpdateEndpoint",
+          "sagemaker:StopEndpoint",
+          "sagemaker:DeleteEndpointConfig",
+          "sagemaker:DeleteModel",
+          "sagemaker:DescribeEndpoint",
+          "sagemaker:DescribeEndpointConfig",
+          "sagemaker:DescribeModel",
+          "sagemaker:AddTags"
+        ]
+        Effect = "Allow"
+        Resource = [
+          "arn:aws:sagemaker:${var.region}:${local.aws_account_id}:endpoint-config/*",
+          "arn:aws:sagemaker:${var.region}:${local.aws_account_id}:model/*",
+          "arn:aws:sagemaker:${var.region}:${local.aws_account_id}:endpoint/*",
+          "arn:aws:sagemaker:${var.region}:${local.aws_account_id}:app/*"
+        ]
+      },
+      {
+        Action = [
+          "s3:GetObject",
+          "s3:PutObject",
+          "s3:ListBucket",
+          "s3:DeleteObject"
+        ]
+        Effect = "Allow"
+        # TODO: give permission to access S3 buckets - replace with your bucket names
+        Resource = [
+          "arn:aws:s3:::sagemaker-endpoint-deploy-tf-state-vector",
+          "arn:aws:s3:::sagemaker-endpoint-deploy-tf-state-vector/*",
+          "arn:aws:s3:::sagemaker-us-east-1-025066243062",
+          "arn:aws:s3:::sagemaker-us-east-1-025066243062/*"
+        ]
+      },
+      { # reading/writing logs
+        Action = [
+          "logs:CreateLogDelivery",
+          "logs:CreateLogGroup",
+          "logs:CreateLogStream",
+          "logs:DeleteLogDelivery",
+          "logs:Describe*",
+          "logs:GetLogDelivery",
+          "logs:GetLogEvents",
+          "logs:ListLogDeliveries",
+          "logs:PutLogEvents",
+          "logs:DescribeLogStreams",
+          "logs:DescribeLogGroups",
+          "logs:PutResourcePolicy",
+          "logs:UpdateLogDelivery",
+          "logs:FilterLogEvents"
+        ]
+        Effect = "Allow"
+        Resource = "*"
+      },
+      { # cloud watch
+        Action = [
+          "cloudwatch:DeleteAlarms",
+          "cloudwatch:DescribeAlarms",
+          "cloudwatch:GetMetricData",
+          "cloudwatch:GetMetricStatistics",
+          "cloudwatch:ListMetrics",
+          "cloudwatch:PutMetricAlarm",
+          "cloudwatch:PutMetricData"
+        ]
+        Effect = "Allow"
+        Resource = "*"
+      },
+      {
+        "Effect" : "Allow",
+        "Action" : "ecr:GetAuthorizationToken",
+        "Resource" : "*"
+      },
+      {
+        Action = [
+          "ecr:BatchCheckLayerAvailability",
+          "ecr:GetDownloadUrlForLayer",
+          "ecr:GetRepositoryPolicy",
+          "ecr:DescribeRepositories",
+          "ecr:ListImages",
+          "ecr:DescribeImages",
+          "ecr:BatchGetImage",
+          "ecr:GetLifecyclePolicy",
+          "ecr:GetLifecyclePolicyPreview",
+          "ecr:ListTagsForResource",
+          "ecr:DescribeImageScanFindings"
+        ]
+        Effect = "Allow"
+        Resource = [
+          "arn:aws:ecr:${var.region}:763104351884:repository/*",
+        ]
+      },
+      { # feature store
+        Action = [
+            "glue:GetTable",
+            "glue:UpdateTable"
+        ],
+        Effect = "Allow",
+        Resource = [
+            "arn:aws:glue:*:*:catalog",
+            "arn:aws:glue:*:*:database/sagemaker_featurestore",
+            "arn:aws:glue:*:*:table/sagemaker_featurestore/*"
+        ]
+        }
+    ]
+  })
+}
+
+resource "aws_iam_role_policy_attachment" "sagemaker_execution_role_policy_attachment" {
+  policy_arn = aws_iam_policy.sagemaker_execution_role_policy.arn
+  role       = aws_iam_role.sagemaker_execution_role.name
+}
diff --git a/reference_implementations/aws/offline/03_endpoint.tf b/reference_implementations/aws/offline/03_endpoint.tf
@@ -0,0 +1,42 @@
+resource "aws_sagemaker_model" "paraphrase_model" {
+  name = "${local.prefix}-${var.sagemaker_model_name}"
+  execution_role_arn = aws_iam_role.sagemaker_execution_role.arn
+  container {
+    image          = var.sagemaker_container_repo_url
+    model_data_url = var.sagemaker_model_data_s3_url
+    mode           = var.sagemaker_model_mode
+    environment = {
+      "SAGEMAKER_CONTAINER_LOG_LEVEL" = "20"
+      "SAGEMAKER_PROGRAM"             = "inference.py"
+      "SAGEMAKER_REGION"              = "${var.region}"
+      "SAGEMAKER_SUBMIT_DIRECTORY"    = "/opt/ml/model"
+    }
+
+  }
+}
+
+resource "aws_sagemaker_endpoint_configuration" "ec" {
+  name = "${local.prefix}-${var.sagemaker_endpoint_conf_name}"
+
+  production_variants {
+    variant_name           = var.sagemaker_endpoint_conf_variant_name
+    model_name             = aws_sagemaker_model.paraphrase_model.name
+    initial_instance_count = var.sagemaker_model_instance_count
+    instance_type          = var.sagemaker_model_instance_type
+  }
+
+  tags = merge(
+    local.common_tags,
+    { "Name" = "${local.prefix}-${var.sagemaker_endpoint_conf_name}" }
+  )
+}
+
+resource "aws_sagemaker_endpoint" "paraphrase_endpoint" {
+  name                 = "${local.prefix}-${var.sagemaker_endpoint_name}"
+  endpoint_config_name = aws_sagemaker_endpoint_configuration.ec.name
+
+  tags = merge(
+    local.common_tags,
+    { "Name" = "${local.prefix}-${var.sagemaker_endpoint_name}" }
+  )
+}
diff --git a/reference_implementations/aws/offline/04_lambda.tf b/reference_implementations/aws/offline/04_lambda.tf
@@ -0,0 +1,145 @@
+resource "aws_iam_role" "lambda_role" {
+  name = "${local.prefix}-BertParaphraseModelLambdaRoleTF"
+  assume_role_policy = jsonencode({
+    Version = "2012-10-17"
+    Statement = [
+      {
+        Action = "sts:AssumeRole"
+        Effect = "Allow"
+        Principal = {
+          Service = "lambda.amazonaws.com"
+        }
+      }
+    ]
+  })
+}
+
+# aws_iam_role_policy: For creating inline, role-specific policies.
+resource "aws_iam_role_policy" "lambda_logs_policy" {
+  name = "${local.prefix}-lambda_role_logs_policy"
+  role = aws_iam_role.lambda_role.id
+
+  policy = jsonencode({
+    Version = "2012-10-17"
+    Statement = [
+      {
+        Effect = "Allow"
+        "Action": [
+            "logs:CreateLogGroup",
+            "logs:CreateLogStream",
+            "logs:DescribeLogGroups",
+            "logs:DescribeLogStreams",
+            "logs:PutLogEvents",
+            "logs:GetLogEvents",
+            "logs:FilterLogEvents"
+        ],
+        Resource = "*"
+      }
+    ]
+  })
+}
+
+resource "aws_iam_role_policy" "lambda_sagemaker_policy" {
+  name = "${local.prefix}-lambda_role_sagemaker_policy"
+  role = aws_iam_role.lambda_role.id
+
+  policy = jsonencode({
+    Version = "2012-10-17"
+    Statement = [
+      {
+        Effect   = "Allow"
+        Action   = "sagemaker:InvokeEndpoint"
+        Resource = "arn:aws:sagemaker:${var.region}:${local.aws_account_id}:endpoint/*"
+      }
+    ]
+  })
+}
+
+resource "aws_iam_role_policy" "lambda_sagemaker_featurestore_policy" {
+  name = "${local.prefix}-lambda_role_sagemaker_featurestore_policy"
+  role = aws_iam_role.lambda_role.id
+
+  policy = jsonencode({
+    Version = "2012-10-17"
+    Statement = [
+      {
+        Effect   = "Allow"
+        Action   = [
+          "sagemaker:GetRecord",
+          "sagemaker:PutRecord",
+          "sagemaker:ListFeatureGroups",
+          "sagemaker:BatchGetRecord"
+        ]
+        Resource = "arn:aws:sagemaker:${var.region}:${local.aws_account_id}:feature-group/*"
+      }
+    ]
+  })
+}
+
+# Attach AmazonRedshiftDataFullAccess policy to the role
+# aws_iam_policy_attachment: For attaching existing managed policies 
+# (either AWS-managed or your own custom policies) to roles, users, or groups.
+resource "aws_iam_policy_attachment" "redshift_data_access" {
+  name       = "${local.prefix}-lambda_role_redshift_data_access_attachment"
+  roles      = [aws_iam_role.lambda_role.id]
+  policy_arn = "arn:aws:iam::aws:policy/AmazonRedshiftFullAccess"
+}
+
+resource "aws_iam_role_policy" "lambda_sqs_policy" {
+  role = aws_iam_role.lambda_role.id
+
+  policy = jsonencode({
+    Version = "2012-10-17"
+    Statement = [
+      {
+        Effect = "Allow"
+        Action = [
+          "sqs:SendMessage",
+          "sqs:ReceiveMessage",
+          "sqs:DeleteMessage",
+          "sqs:GetQueueAttributes"  # Add this line
+        ]
+        Resource = aws_sqs_queue.inference_queue.arn
+      }
+    ]
+  })
+}
+
+resource "aws_iam_role_policy" "cloudwatch_put_metric_data_policy" {
+  role = aws_iam_role.lambda_role.id
+
+  policy = jsonencode({
+    Version = "2012-10-17"
+    Statement = [
+      {
+        Effect   = "Allow"
+        Action   = "cloudwatch:PutMetricData"
+        Resource = "*"
+      }
+    ]
+  })
+}
+
+resource "aws_lambda_function" "inference_lambda_function" {
+  filename         = "./lambda.zip"
+  function_name    = "${local.prefix}-bert-paraphrase-tf"
+  role             = aws_iam_role.lambda_role.arn
+  handler          = "lambda_function.lambda_handler"
+  runtime          = "python3.8"
+  source_code_hash = filebase64sha256("lambda.zip")
+  timeout          = 300
+
+  layers = [
+    "arn:aws:lambda:us-east-1:017000801446:layer:AWSLambdaPowertoolsPythonV2:38"
+  ]
+  environment {
+    variables = {
+      ENDPOINT_NAME = "${aws_sagemaker_endpoint.paraphrase_endpoint.name}"
+      FEATURE_GROUP_NAME = "${aws_sagemaker_feature_group.paraphrase_fg.feature_group_name}"
+      REDSHIFT_URL = "${aws_redshift_cluster.redshift_feature_store.endpoint}:${aws_redshift_cluster.redshift_feature_store.port}"
+      REDSHIFT_USER = "${aws_redshift_cluster.redshift_feature_store.master_username}"
+      CLUSTER_ID = "${aws_redshift_cluster.redshift_feature_store.id}"
+      DB_NAME = "${var.db_name}"
+    }
+  }
+}