Skip to content

Commit

Permalink
Merge branch 'troubleshooting-module-all-scenarios' of https://github…
Browse files Browse the repository at this point in the history
….com/arcegacardenas/eks-workshop-v2 into troubleshooting-module-pagilla

latest changes
  • Loading branch information
Raghuveerareddy Pagilla committed Nov 18, 2024
2 parents 334793b + 59c73f0 commit 3b650c9
Show file tree
Hide file tree
Showing 59 changed files with 4,864 additions and 14 deletions.
8 changes: 7 additions & 1 deletion manifests/modules/troubleshooting/alb/.workshop/cleanup.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#!/bin/bash

set -e

logmessage "Restoring public subnet tags..."

# Function to create ftags for subnets ids
Expand All @@ -16,4 +18,8 @@ remove_tags_from_subnets() {
return 0
}

remove_tags_from_subnets
remove_tags_from_subnets

kubectl delete ingress -n ui ui --ignore-not-found

uninstall-helm-chart aws-load-balancer-controller kube-system
55 changes: 55 additions & 0 deletions manifests/modules/troubleshooting/cni/.workshop/cleanup.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
# VPC_CNI_IAM_ROLE_NAME="eksctl-eks-workshop-addon-vpc-cni-Role1-n85u3l0IhDSv"

kubectl delete namespace cni-tshoot
attached_policies=$(aws iam list-attached-role-policies --role-name $VPC_CNI_IAM_ROLE_NAME --query 'AttachedPolicies[*].PolicyArn' --output text)

is_policy_exist=0

for policy in ${attached_policies[@]}; do
if [ "$policy" == "arn:aws:iam::aws:policy/AmazonEKS_CNI_Policy" ]; then
is_policy_exist=1
else
aws iam detach-role-policy --role-name $VPC_CNI_IAM_ROLE_NAME --policy-arn $policy
fi
done

if [ $is_policy_exist -eq 0 ]; then
logmessage "Attaching back AmazonEKS_CNI_Policy policy into VPC CNI addon role"

aws iam attach-role-policy --role-name $VPC_CNI_IAM_ROLE_NAME --policy-arn arn:aws:iam::aws:policy/AmazonEKS_CNI_Policy
fi

nodes=$(aws eks list-nodegroups --cluster-name $EKS_CLUSTER_NAME --query 'nodegroups' --output text)
deleted_nodes=()

logmessage "Reverting EKS managed nodegroup configuration"
for node in ${nodes[@]}; do
if [[ "$node" != "default" && "$node" != "cni_troubleshooting_nodes" ]]; then
logmessage "Deleting nodegroup $node"
aws eks delete-nodegroup --cluster-name $EKS_CLUSTER_NAME --nodegroup-name $node
deleted_nodes+=$node
fi
done

logmessage "Waiting for EKS managed nodegroup to be deleted"
for deleted_node in ${deleted_nodes[@]}; do
logmessage "waiting for deletion of $deleted_node"
aws eks wait nodegroup-deleted --cluster-name $EKS_CLUSTER_NAME --nodegroup-name $deleted_node
done

DEFAULT_CONFIG='{"enableNetworkPolicy":"true","env":{"ENABLE_POD_ENI":"true","ENABLE_PREFIX_DELEGATION":"true","POD_SECURITY_GROUP_ENFORCING_MODE":"standard"},"nodeAgent":{"enablePolicyEventLogs":"true"}}'
CURRENT_CONFIG=$(aws eks describe-addon --addon-name vpc-cni --cluster-name $EKS_CLUSTER_NAME --query addon.configurationValues --output text | jq --sort-keys -c .)

if [ $DEFAULT_CONFIG != $CURRENT_CONFIG ]; then
logmessage "Reverting VPC CNI config to default"
addons_status=$(aws eks describe-addon --addon-name vpc-cni --cluster-name $EKS_CLUSTER_NAME --query addon.status --output text)
while [ $addons_status == "UPDATING" ]; do
logmessage "Waiting for VPC CNI addons status to not be in UPDATING"
sleep 60
addons_status=$(aws eks describe-addon --addon-name vpc-cni --cluster-name $EKS_CLUSTER_NAME --query addon.status --output text)
done

aws eks update-addon --addon-name vpc-cni --cluster-name $EKS_CLUSTER_NAME --service-account-role-arn $VPC_CNI_IAM_ROLE_ARN --configuration-values $DEFAULT_CONFIG
fi


23 changes: 23 additions & 0 deletions manifests/modules/troubleshooting/cni/.workshop/ssm.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#!/bin/bash
COMMAND_ID=$(aws ssm send-command \
--instance-ids $1 \
--document-name "AWS-RunShellScript" \
--comment "Demo run shell script on Linux Instances" \
--parameters '{"commands":["sudo -Hiu root bash << END","tail -n '$3' /var/log/aws-routed-eni/'$2'.log | grep '$4'", "END"]}' \
--output text \
--query "Command.CommandId")

STATUS=InProgress
while [ "$STATUS" == "InProgress" ]; do
STATUS=$(aws ssm get-command-invocation \
--command-id "$COMMAND_ID" \
--instance-id $1 \
--output text \
--query "Status")
done

aws ssm list-command-invocations \
--command-id "$COMMAND_ID" \
--details \
--output text \
--query "CommandInvocations[].CommandPlugins[].Output"
197 changes: 197 additions & 0 deletions manifests/modules/troubleshooting/cni/.workshop/terraform/main.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
locals {
tags = {
module = "troubleshooting"
}
secondary_cidr = "100.64.0.0/22"
}

data "aws_vpc" "selected" {
tags = {
created-by = "eks-workshop-v2"
env = var.addon_context.eks_cluster_id
}
}

data "aws_subnets" "private" {
tags = {
created-by = "eks-workshop-v2"
env = var.addon_context.eks_cluster_id
}

filter {
name = "tag:Name"
values = ["*Private*"]
}
}

resource "aws_vpc_ipv4_cidr_block_association" "secondary_cidr" {
vpc_id = data.aws_vpc.selected.id
cidr_block = local.secondary_cidr
}

data "aws_subnet" "selected" {
count = length(data.aws_subnets.private.ids)

id = data.aws_subnets.private.ids[count.index]
}

resource "aws_subnet" "large_subnet" {
count = length(data.aws_subnets.private.ids)

vpc_id = aws_vpc_ipv4_cidr_block_association.secondary_cidr.vpc_id
cidr_block = cidrsubnet(local.secondary_cidr, 2, count.index)
availability_zone = data.aws_subnet.selected[count.index].availability_zone

tags = merge(local.tags, var.tags, {
AdditionalSubnet = "true"
Size = "large"
})

depends_on = [
aws_vpc_ipv4_cidr_block_association.secondary_cidr
]
}

resource "aws_subnet" "small_subnet" {
count = length(data.aws_subnets.private.ids)

vpc_id = aws_vpc_ipv4_cidr_block_association.secondary_cidr.vpc_id
cidr_block = cidrsubnet(local.secondary_cidr, 6, count.index + 48)
availability_zone = data.aws_subnet.selected[count.index].availability_zone

tags = merge(local.tags, {
AdditionalSubnet = "true"
Size = "small"
})

depends_on = [
aws_vpc_ipv4_cidr_block_association.secondary_cidr
]
}

data "aws_route_table" "private" {
count = length(data.aws_subnets.private.ids)

vpc_id = data.aws_vpc.selected.id
subnet_id = data.aws_subnets.private.ids[count.index]
}

resource "aws_route_table_association" "small_subnet" {
count = length(data.aws_subnets.private.ids)

subnet_id = aws_subnet.small_subnet[count.index].id
route_table_id = data.aws_route_table.private[count.index].route_table_id
}

resource "aws_route_table_association" "large_subnet" {
count = length(data.aws_subnets.private.ids)

subnet_id = aws_subnet.large_subnet[count.index].id
route_table_id = data.aws_route_table.private[count.index].route_table_id
}

resource "aws_iam_role" "node_role" {
assume_role_policy = jsonencode({
Version = "2012-10-17"
Statement = [
{
Effect = "Allow"
Principal = {
Service = [
"ec2.amazonaws.com"
]
}
Action = "sts:AssumeRole"
}
]
})
managed_policy_arns = [
"arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore",
"arn:aws:iam::aws:policy/AmazonEKSWorkerNodePolicy",
"arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly"
]
}

resource "aws_eks_access_entry" "cni_troubleshooting_nodes" {
cluster_name = var.eks_cluster_id
principal_arn = aws_iam_role.node_role.arn
type = "EC2_LINUX"
}

resource "aws_eks_node_group" "cni_troubleshooting_nodes" {

cluster_name = var.eks_cluster_id
node_group_name = "cni_troubleshooting_nodes"
node_role_arn = aws_iam_role.node_role.arn
subnet_ids = aws_subnet.small_subnet[*].id
instance_types = ["m5.large"]

scaling_config {
desired_size = 0
max_size = 6
min_size = 0
}

labels = {
app = "cni_troubleshooting"
}

taint {
key = "purpose"
value = "cni_troubleshooting"
effect = "NO_SCHEDULE"
}

update_config {
max_unavailable = 1
}

tags = merge(local.tags, var.tags)

}


data "aws_eks_addon" "vpc_cni" {
addon_name = "vpc-cni"
cluster_name = var.addon_context.eks_cluster_id
}

resource "null_resource" "change_config" {
triggers = {
config = data.aws_eks_addon.vpc_cni.configuration_values,
cluster_name = var.addon_context.eks_cluster_id,
role_arn = data.aws_eks_addon.vpc_cni.service_account_role_arn,
node_group_name = aws_eks_node_group.cni_troubleshooting_nodes.node_group_name,
role_name = split("/", data.aws_eks_addon.vpc_cni.service_account_role_arn)[1],
timestamp = timestamp()
}

provisioner "local-exec" {
command = <<EOF
mkdir -p /eks-workshop/temp
CURRENT_CONFIG='${jsonencode(self.triggers.config)}'
NEW_CONFIG=$(echo $CURRENT_CONFIG | jq -r . | jq -c '. += {"resources":{"requests":{"memory":"2G"}}}')
aws eks update-addon --addon-name vpc-cni --cluster-name ${self.triggers.cluster_name} --service-account-role-arn ${self.triggers.role_arn} --configuration-values $NEW_CONFIG
addons_status="UPDATING"
while [ $addons_status == "UPDATING" ]; do
sleep 60
addons_status=$(aws eks describe-addon --addon-name vpc-cni --cluster-name ${self.triggers.cluster_name} --query addon.status --output text)
done
aws iam detach-role-policy --role-name ${self.triggers.role_name} --policy-arn arn:aws:iam::aws:policy/AmazonEKS_CNI_Policy
aws eks update-nodegroup-config --cluster-name ${self.triggers.cluster_name} --nodegroup-name ${self.triggers.node_group_name} --scaling-config minSize=0,maxSize=6,desiredSize=1
EOF
}

}

resource "null_resource" "kustomize_app" {
triggers = {
always_run = timestamp()
}

provisioner "local-exec" {
command = "kubectl apply -k ~/environment/eks-workshop/modules/troubleshooting/cni/workload"
when = create
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
output "environment_variables" {
description = "Environment variables to be added to the IDE shell"
value = {
# VPC_ID = data.aws_vpc.selected.id,
# LOAD_BALANCER_CONTROLLER_ROLE_NAME = module.eks_blueprints_addons.aws_load_balancer_controller.iam_role_name,
# LOAD_BALANCER_CONTROLLER_POLICY_ARN_FIX = module.eks_blueprints_addons.aws_load_balancer_controller.iam_policy_arn,
# LOAD_BALANCER_CONTROLLER_POLICY_ARN_ISSUE = aws_iam_policy.issue.arn,
# LOAD_BALANCER_CONTROLLER_ROLE_ARN = module.eks_blueprints_addons.aws_load_balancer_controller.iam_role_arn
VPC_CNI_IAM_ROLE_NAME = split("/", data.aws_eks_addon.vpc_cni.service_account_role_arn)[1],
VPC_CNI_IAM_ROLE_ARN = data.aws_eks_addon.vpc_cni.service_account_role_arn,
ADDITIONAL_SUBNET_1 = aws_subnet.large_subnet[0].id,
ADDITIONAL_SUBNET_2 = aws_subnet.large_subnet[1].id,
ADDITIONAL_SUBNET_3 = aws_subnet.large_subnet[2].id,
NODEGROUP_IAM_ROLE = aws_iam_role.node_role.arn,
AWS_NODE_ADDON_CONFIG = jsonencode(data.aws_eks_addon.vpc_cni.configuration_values)
}
}
35 changes: 35 additions & 0 deletions manifests/modules/troubleshooting/cni/.workshop/terraform/vars.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# tflint-ignore: terraform_unused_declarations
variable "eks_cluster_id" {
description = "EKS cluster name"
type = string
}

# tflint-ignore: terraform_unused_declarations
variable "eks_cluster_version" {
description = "EKS cluster version"
type = string
}

# tflint-ignore: terraform_unused_declarations
variable "cluster_security_group_id" {
description = "EKS cluster security group ID"
type = any
}

# tflint-ignore: terraform_unused_declarations
variable "addon_context" {
description = "Addon context that can be passed directly to blueprints addon modules"
type = any
}

# tflint-ignore: terraform_unused_declarations
variable "tags" {
description = "Tags to apply to AWS resources"
type = any
}

# tflint-ignore: terraform_unused_declarations
variable "resources_precreated" {
description = "Have expensive resources been created already"
type = bool
}
42 changes: 42 additions & 0 deletions manifests/modules/troubleshooting/cni/workload/deployment.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
---
apiVersion: v1
kind: Namespace
metadata:
name: cni-tshoot

---
apiVersion: apps/v1
kind: Deployment
metadata:
name: nginx-app
namespace: cni-tshoot
labels:
app: nginx
spec:
replicas: 15
selector:
matchLabels:
app: nginx
template:
metadata:
labels:
app: nginx
spec:
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: app
operator: In
values:
- cni_troubleshooting
containers:
- name: nginx
image: nginx:1.14.2
ports:
- containerPort: 80
tolerations:
- key: "purpose"
operator: "Exists"
effect: "NoSchedule"
Loading

0 comments on commit 3b650c9

Please sign in to comment.