Skip to content

Commit

Permalink
Merge pull request #12 from smartnews/v0.36.0-sn
Browse files Browse the repository at this point in the history
V0.36.0 sn
  • Loading branch information
Luke-Smartnews authored May 29, 2024
2 parents 490ef94 + 0aea29f commit 269babf
Show file tree
Hide file tree
Showing 9 changed files with 53 additions and 7 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ coverage.html
*.test
*.cpuprofile
*.heapprofile
*.swp
go.work
go.work.sum

Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ licenses: download ## Verifies dependency licenses
! go-licenses csv ./... | grep -v -e 'MIT' -e 'Apache-2.0' -e 'BSD-3-Clause' -e 'BSD-2-Clause' -e 'ISC' -e 'MPL-2.0' -e 'github.com/awslabs/amazon-eks-ami/nodeadm'

image: ## Build the Karpenter controller images using ko build
$(eval CONTROLLER_IMG=$(shell $(WITH_GOFLAGS) KOCACHE=$(KOCACHE) KO_DOCKER_REPO="$(KO_DOCKER_REPO)" ko build --bare github.com/aws/karpenter-provider-aws/cmd/controller))
$(eval CONTROLLER_IMG=$(shell $(WITH_GOFLAGS) KOCACHE=$(KOCACHE) KO_DOCKER_REPO="$(KO_DOCKER_REPO)" ko build --platform linux/amd64 --bare github.com/aws/karpenter-provider-aws/cmd/controller))
$(eval IMG_REPOSITORY=$(shell echo $(CONTROLLER_IMG) | cut -d "@" -f 1 | cut -d ":" -f 1))
$(eval IMG_TAG=$(shell echo $(CONTROLLER_IMG) | cut -d "@" -f 1 | cut -d ":" -f 2 -s))
$(eval IMG_DIGEST=$(shell echo $(CONTROLLER_IMG) | cut -d "@" -f 2))
Expand Down
2 changes: 2 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -116,3 +116,5 @@ require (
sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect
sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect
)

replace sigs.k8s.io/karpenter v0.37.0 => github.com/smartnews/karpenter v0.37.0-sn-1
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,8 @@ github.com/samber/lo v1.39.0/go.mod h1:+m/ZKRl6ClXCE2Lgf3MsQlWfh4bn1bz6CXEOxnEXn
github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo=
github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE=
github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88=
github.com/smartnews/karpenter v0.37.0-sn-1 h1:thaABSIfSnrPhqaYHAMfNIcvIc6ihtyYwuVrQ1Q6DVk=
github.com/smartnews/karpenter v0.37.0-sn-1/go.mod h1:5XYrIz9Bi7HgQyaUsx7O08ft+TJjrH+htlnPq8Sz9J8=
github.com/spf13/cobra v1.8.0 h1:7aJaZx1B85qltLMc546zn58BxxfZdR/W22ej9CFoEf0=
github.com/spf13/cobra v1.8.0/go.mod h1:WXLWApfZ71AjXPya3WOlMsY9yMs7YeiHhFVlvLyhcho=
github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
Expand Down Expand Up @@ -761,8 +763,6 @@ sigs.k8s.io/controller-runtime v0.18.2 h1:RqVW6Kpeaji67CY5nPEfRz6ZfFMk0lWQlNrLql
sigs.k8s.io/controller-runtime v0.18.2/go.mod h1:tuAt1+wbVsXIT8lPtk5RURxqAnq7xkpv2Mhttslg7Hw=
sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd h1:EDPBXCAspyGV4jQlpZSudPeMmr1bNJefnuqLsRAsHZo=
sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd/go.mod h1:B8JuhiUyNFVKdsE8h686QcCxMaH6HrOAZj4vswFpcB0=
sigs.k8s.io/karpenter v0.37.0 h1:eUFD9hJ2mpZrw31OUYhpbxLWEDmbXT05wX27dZB2E5o=
sigs.k8s.io/karpenter v0.37.0/go.mod h1:5XYrIz9Bi7HgQyaUsx7O08ft+TJjrH+htlnPq8Sz9J8=
sigs.k8s.io/structured-merge-diff/v4 v4.4.1 h1:150L+0vs/8DA78h1u02ooW1/fFq/Lwr+sGiqlzvrtq4=
sigs.k8s.io/structured-merge-diff/v4 v4.4.1/go.mod h1:N8hJocpFajUSSeSJ9bOZ77VzejKZaXsTtZo4/u7Io08=
sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E=
Expand Down
9 changes: 7 additions & 2 deletions pkg/apis/crds/karpenter.sh_nodepools.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -135,10 +135,15 @@ spec:
memory leak protection, and disruption testing.
pattern: ^(([0-9]+(s|m|h))+)|(Never)$
type: string
utilizationThreshold:
description: |-
UtilizationThreshold is defined as sum of requested resources divided by capacity
below which a node can be considered for disruption.
maximum: 100
minimum: 1
type: integer
type: object
x-kubernetes-validations:
- message: consolidateAfter cannot be combined with consolidationPolicy=WhenUnderutilized
rule: 'has(self.consolidateAfter) ? self.consolidationPolicy != ''WhenUnderutilized'' || self.consolidateAfter == ''Never'' : true'
- message: consolidateAfter must be specified with consolidationPolicy=WhenEmpty
rule: 'self.consolidationPolicy == ''WhenEmpty'' ? has(self.consolidateAfter) : true'
limits:
Expand Down
2 changes: 1 addition & 1 deletion pkg/cache/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ const (
// resources. Cache hits enable faster provisioning and reduced API load on
// AWS APIs, which can have a serious impact on performance and scalability.
// DO NOT CHANGE THIS VALUE WITHOUT DUE CONSIDERATION
DefaultTTL = time.Minute
DefaultTTL = 5 * time.Minute
// UnavailableOfferingsTTL is the time before offerings that were marked as unavailable
// are removed from the cache and are available for launch again
UnavailableOfferingsTTL = 3 * time.Minute
Expand Down
24 changes: 24 additions & 0 deletions pkg/controllers/interruption/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"github.com/samber/lo"
"go.uber.org/multierr"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/util/workqueue"
"k8s.io/klog/v2"
"k8s.io/utils/clock"
Expand Down Expand Up @@ -201,13 +202,36 @@ func (c *Controller) handleNodeClaim(ctx context.Context, msg messages.Message,
if zone != "" && instanceType != "" {
c.unavailableOfferingsCache.MarkUnavailable(ctx, string(msg.Kind()), instanceType, zone, v1beta1.CapacityTypeSpot)
}
spotTotal.WithLabelValues(instanceType, zone, nodeClaim.Status.NodeName, nodeClaim.Labels["karpenter.sh/nodepool"]).Inc()
// try to create a new nodeclaim immediately but ignore error if it fails
if err := c.createNodeClaim(ctx, nodeClaim); err != nil {
log.FromContext(ctx).Error(err, "[interruption handling]failed to create a new nodeclaim")
} else {
log.FromContext(ctx).Info("Created new nodeclaim due to spot interruption")
// wait for the node provisioning before draining
time.Sleep(60 * time.Second)
}
}
if action != NoAction {
return c.deleteNodeClaim(ctx, nodeClaim, node)
}
return nil
}

// createNodeClaim creates a new NodeClaim with the same spec of the interrupted one
func (c *Controller) createNodeClaim(ctx context.Context, oldNodeClaim *v1beta1.NodeClaim) error {
newNodeClaim := &v1beta1.NodeClaim{
ObjectMeta: metav1.ObjectMeta{
GenerateName: oldNodeClaim.ObjectMeta.GenerateName,
Annotations: oldNodeClaim.ObjectMeta.Annotations,
Labels: oldNodeClaim.ObjectMeta.Labels,
OwnerReferences: oldNodeClaim.ObjectMeta.OwnerReferences,
},
Spec: oldNodeClaim.Spec,
}
return c.kubeClient.Create(ctx, newNodeClaim)
}

// deleteNodeClaim removes the NodeClaim from the api-server
func (c *Controller) deleteNodeClaim(ctx context.Context, nodeClaim *v1beta1.NodeClaim, node *v1.Node) error {
if !nodeClaim.DeletionTimestamp.IsZero() {
Expand Down
15 changes: 14 additions & 1 deletion pkg/controllers/interruption/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@ const (
messageTypeLabel = "message_type"
actionTypeLabel = "action_type"
terminationReasonLabel = "interruption"
instanceTypeLabel = "instance_type"
zoneLabel = "zone"
hostLabel = "node_name"
poolLabel = "node_pool"
)

var (
Expand Down Expand Up @@ -67,8 +71,17 @@ var (
metrics.NodePoolLabel,
},
)
spotTotal = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: metrics.Namespace,
Subsystem: interruptionSubsystem,
Name: "spot_total",
Help: "Number of the spot interruption. Labeled by AZ, instance type",
},
[]string{instanceTypeLabel, zoneLabel, hostLabel, poolLabel},
)
)

func init() {
crmetrics.Registry.MustRegister(receivedMessages, deletedMessages, messageLatency, actionsPerformed)
crmetrics.Registry.MustRegister(receivedMessages, deletedMessages, messageLatency, actionsPerformed, spotTotal)
}
1 change: 1 addition & 0 deletions pkg/providers/instance/instance.go
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,7 @@ func getTags(ctx context.Context, nodeClass *v1beta1.EC2NodeClass, nodeClaim *co
corev1beta1.NodePoolLabelKey: nodeClaim.Labels[corev1beta1.NodePoolLabelKey],
corev1beta1.ManagedByAnnotationKey: options.FromContext(ctx).ClusterName,
v1beta1.LabelNodeClass: nodeClass.Name,
"Component": nodeClaim.Labels[corev1beta1.NodePoolLabelKey], // used for aws explore
}
return lo.Assign(nodeClass.Spec.Tags, staticTags)
}
Expand Down

0 comments on commit 269babf

Please sign in to comment.