Skip to content

Commit a65dee8

Browse files
committed
fix: CA node recognizing fake nodegroups
- add provider ID to nodes in the format `kwok:<node-name>` - fix invalid `KwokManagedAnnotation` - sanitize template nodes (remove `resourceVersion` etc.,) - not sanitizing the node leads to error during creation of new nodes - abstract code to get NG name into a separate function `getNGNameFromAnnotation` Signed-off-by: vadasambar <[email protected]>
1 parent 094f1b0 commit a65dee8

File tree

1 file changed

+52
-38
lines changed
  • cluster-autoscaler/cloudprovider/kwok

1 file changed

+52
-38
lines changed

cluster-autoscaler/cloudprovider/kwok/kwok.go

Lines changed: 52 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ const (
6262
// KwokManagedAnnotation is the default annotation
6363
// that kwok manages to decide if it should manage
6464
// a node it sees in the cluster
65-
KwokManagedAnnotation = "kwok.x-k8s.io/node=fake"
65+
KwokManagedAnnotation = "kwok.x-k8s.io/node"
6666

6767
// // GPULabel is the label added to nodes with GPU resource.
6868
// GPULabel = "cloud.google.com/gke-accelerator"
@@ -85,8 +85,7 @@ type KwokCloudProvider struct {
8585
// kubeClient is to be used only for create, delete and update
8686
kubeClient *kubeclient.Clientset
8787
// lister is to be used for get and list
88-
lister kube_util.NodeLister
89-
provider string
88+
lister kube_util.NodeLister
9089
}
9190

9291
// Name returns name of the cloud provider.
@@ -107,17 +106,13 @@ func (kwok *KwokCloudProvider) NodeGroups() []cloudprovider.NodeGroup {
107106
func (kwok *KwokCloudProvider) NodeGroupForNode(node *apiv1.Node) (cloudprovider.NodeGroup, error) {
108107
// Skip nodes that are not managed by kwok cloud provider.
109108
if !strings.HasPrefix(node.Spec.ProviderID, ProviderName) {
109+
klog.V(2).Infof("ignoring node '%s' because it is not managed by kwok", node.GetName())
110110
return nil, nil
111111
}
112112

113-
annotations := node.GetAnnotations()
114-
nodeGroupName := annotations[NGNameAnnotation]
115-
if nodeGroupName == "" {
116-
return nil, fmt.Errorf("nodegroup name annotation '%s' not found on the node '%s'", NGNameAnnotation, node.GetName())
117-
}
118-
119113
for _, nodeGroup := range kwok.nodeGroups {
120-
if nodeGroup.name == nodeGroupName {
114+
if nodeGroup.name == getNGNameFromAnnotation(node) {
115+
klog.V(5).Infof("found nodegroup '%s' for node '%s'", nodeGroup.name, node.GetName())
121116
return nodeGroup, nil
122117
}
123118
}
@@ -176,19 +171,22 @@ func (kwok *KwokCloudProvider) GetNodeGpuConfig(node *apiv1.Node) *cloudprovider
176171
// Refresh is called before every main loop and can be used to dynamically update cloud provider state.
177172
// In particular the list of node groups returned by NodeGroups can change as a result of CloudProvider.Refresh().
178173
func (kwok *KwokCloudProvider) Refresh() error {
179-
nodeList, err := kwok.lister.List()
180-
if err != nil {
181-
return err
182-
}
183174

184-
ngs := []*NodeGroup{}
185-
for _, no := range nodeList {
186-
ng := parseAnnotationsToNodegroup(no)
187-
ng.kubeClient = kwok.kubeClient
188-
ngs = append(ngs, ng)
189-
}
175+
// TODO(vadasambar): causes CA to not recognize kwok nodegroups
176+
// needs better implementation
177+
// nodeList, err := kwok.lister.List()
178+
// if err != nil {
179+
// return err
180+
// }
181+
182+
// ngs := []*NodeGroup{}
183+
// for _, no := range nodeList {
184+
// ng := parseAnnotationsToNodegroup(no)
185+
// ng.kubeClient = kwok.kubeClient
186+
// ngs = append(ngs, ng)
187+
// }
190188

191-
kwok.nodeGroups = ngs
189+
// kwok.nodeGroups = ngs
192190

193191
return nil
194192
}
@@ -254,9 +252,10 @@ func (nodeGroup *NodeGroup) IncreaseSize(delta int) error {
254252
for i := 0; i < delta; i++ {
255253
node := schedNode.Node()
256254
node.Name = fmt.Sprintf("%s-%s", nodeGroup.name, rand.String(5))
255+
node.Spec.ProviderID = fmt.Sprintf("kwok:%s", node.Name)
257256
_, err := nodeGroup.kubeClient.CoreV1().Nodes().Create(context.Background(), node, v1.CreateOptions{})
258257
if err != nil {
259-
return fmt.Errorf("couldn't create new node '%s'", node.Name)
258+
return fmt.Errorf("couldn't create new node '%s': %v", node.Name, err)
260259
}
261260
}
262261

@@ -319,7 +318,7 @@ func (nodeGroup *NodeGroup) getNodeNamesForNodeGroup() ([]string, error) {
319318
}
320319

321320
for _, no := range nodeList {
322-
if no.GetAnnotations()[NGNameAnnotation] == nodeGroup.Id() {
321+
if getNGNameFromAnnotation(no) == nodeGroup.Id() {
323322
names = append(names, no.GetName())
324323
}
325324
}
@@ -484,7 +483,9 @@ func parseNodeTemplates(data []byte, kubeClient *kubeclient.Clientset, lister ku
484483

485484
ngs := []*NodeGroup{}
486485
for _, no := range nodeTemplates {
486+
sanitizeNode(no)
487487
no.Annotations[KwokManagedAnnotation] = "fake"
488+
no.Spec.ProviderID = fmt.Sprintf("kwok:%s", no.GetName())
488489
ng := parseAnnotationsToNodegroup(no)
489490
ng.kubeClient = kubeClient
490491
ng.lister = lister
@@ -495,21 +496,14 @@ func parseNodeTemplates(data []byte, kubeClient *kubeclient.Clientset, lister ku
495496

496497
}
497498

498-
func parseAnnotationsToNodegroup(no *apiv1.Node) *NodeGroup {
499-
ngName := no.GetAnnotations()[NGNameAnnotation]
500-
501-
if ngName == "" {
502-
if no.GetAnnotations()["eks.amazonaws.com/nodegroup"] != "" {
503-
// add prefix to make it clear that this is a different nodegroup
504-
ngName = fmt.Sprintf("kwok-fake-%s", no.GetAnnotations()["eks.amazonaws.com/nodegroup"])
505-
} else if no.GetAnnotations()["cloud.google.com/gke-nodepool"] != "" {
506-
// add prefix to make it clear that this is a different nodegroup
507-
ngName = fmt.Sprintf("kwok-fake-%s", no.GetAnnotations()["cloud.google.com/gke-nodepool"])
508-
} else {
509-
klog.Fatalf("did not find nodegroup annotation on the template node '%s'", no.GetName())
510-
}
511-
}
499+
func sanitizeNode(no *apiv1.Node) {
500+
no.ResourceVersion = ""
501+
no.Generation = 0
502+
no.UID = ""
503+
no.CreationTimestamp = v1.Time{}
504+
}
512505

506+
func parseAnnotationsToNodegroup(no *apiv1.Node) *NodeGroup {
513507
min := 0
514508
max := 200
515509
target := min
@@ -550,10 +544,30 @@ func parseAnnotationsToNodegroup(no *apiv1.Node) *NodeGroup {
550544
no.Name = fmt.Sprintf("kwok-fake-%s", no.GetName())
551545

552546
return &NodeGroup{
553-
name: ngName,
547+
name: getNGNameFromAnnotation(no),
554548
minSize: min,
555549
maxSize: max,
556550
targetSize: target,
557551
nodeTemplate: no,
558552
}
559553
}
554+
555+
func getNGNameFromAnnotation(no *apiv1.Node) string {
556+
ngName := no.GetAnnotations()[NGNameAnnotation]
557+
558+
if ngName != "" {
559+
return ngName
560+
}
561+
562+
if no.GetAnnotations()["eks.amazonaws.com/nodegroup"] != "" {
563+
// add prefix to make it clear that this is a different nodegroup
564+
ngName = fmt.Sprintf("kwok-fake-%s", no.GetAnnotations()["eks.amazonaws.com/nodegroup"])
565+
} else if no.GetAnnotations()["cloud.google.com/gke-nodepool"] != "" {
566+
// add prefix to make it clear that this is a different nodegroup
567+
ngName = fmt.Sprintf("kwok-fake-%s", no.GetAnnotations()["cloud.google.com/gke-nodepool"])
568+
} else {
569+
klog.Fatalf("did not find nodegroup annotation on the template node '%s'", no.GetName())
570+
}
571+
572+
return ngName
573+
}

0 commit comments

Comments
 (0)