Skip to content

Commit 086c713

Browse files
committed
Change handling of atomic scale ups options in orchestrator
* Started handling atomic scale ups with the existing estimator * Skip setting similar node groups for the atomic node groups * Renamed the autoscaling option from "AtomicScaleUp" to "AtomicScaling" * Merged multiple tests into one single table driven test. * Fixed some typos.
1 parent 06ef4a3 commit 086c713

File tree

3 files changed

+26
-38
lines changed

3 files changed

+26
-38
lines changed

cluster-autoscaler/config/autoscaling_options.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,8 +46,8 @@ type NodeGroupAutoscalingOptions struct {
4646
ScaleDownUnreadyTime time.Duration
4747
// Maximum time CA waits for node to be provisioned
4848
MaxNodeProvisionTime time.Duration
49-
// AtomicScaleUp means that a node group should be scaled up to maximum size in a scale up.
50-
AtomicScaleUp bool
49+
// AtomicScaling means that a node group should be scaled up or down all at once instead of one-by-one
50+
AtomicScaling bool
5151
}
5252

5353
// GCEOptions contain autoscaling options specific to GCE cloud provider.

cluster-autoscaler/core/scaleup/orchestrator/orchestrator.go

Lines changed: 23 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@ import (
3232
"k8s.io/autoscaler/cluster-autoscaler/core/scaleup/equivalence"
3333
"k8s.io/autoscaler/cluster-autoscaler/core/scaleup/resource"
3434
"k8s.io/autoscaler/cluster-autoscaler/core/utils"
35-
"k8s.io/autoscaler/cluster-autoscaler/estimator"
3635
"k8s.io/autoscaler/cluster-autoscaler/expander"
3736
"k8s.io/autoscaler/cluster-autoscaler/metrics"
3837
ca_processors "k8s.io/autoscaler/cluster-autoscaler/processors"
@@ -363,7 +362,7 @@ func (o *ScaleUpOrchestrator) ScaleUpToNodeGroupMinSize(
363362
continue
364363
}
365364

366-
if skipReason := o.IsNodeGroupResourceExceeded(resourcesLeft, ng, nodeInfo); skipReason != nil {
365+
if skipReason := o.IsNodeGroupResourceExceeded(resourcesLeft, ng, nodeInfo, 1); skipReason != nil {
367366
klog.Warning("ScaleUpToNodeGroupMinSize: node group resource excceded: %v", skipReason)
368367
continue
369368
}
@@ -446,8 +445,10 @@ func (o *ScaleUpOrchestrator) filterValidScaleUpNodeGroups(
446445
if err != nil {
447446
klog.Errorf("Couldn't get autoscaling options for ng: %v", nodeGroup.Id())
448447
}
449-
if autoscalingOptions != nil && autoscalingOptions.AtomicScaleUp {
450-
if o.autoscalingContext.MaxNodesTotal != 0 && currentNodeCount+(nodeGroup.MaxSize()-currentTargetSize) > o.autoscalingContext.MaxNodesTotal {
448+
numNodes := 1
449+
if autoscalingOptions != nil && autoscalingOptions.AtomicScaling {
450+
numNodes = nodeGroup.MaxSize() - currentTargetSize
451+
if o.autoscalingContext.MaxNodesTotal != 0 && currentNodeCount+numNodes > o.autoscalingContext.MaxNodesTotal {
451452
klog.V(4).Infof("Skipping node group %s - atomic scale-up exceeds cluster node count limit", nodeGroup.Id())
452453
skippedNodeGroups[nodeGroup.Id()] = NewSkippedReasons("atomic scale-up exceeds cluster node count limit")
453454
continue
@@ -460,7 +461,7 @@ func (o *ScaleUpOrchestrator) filterValidScaleUpNodeGroups(
460461
skippedNodeGroups[nodeGroup.Id()] = NotReadyReason
461462
continue
462463
}
463-
if skipReason := o.IsNodeGroupResourceExceeded(resourcesLeft, nodeGroup, nodeInfo); skipReason != nil {
464+
if skipReason := o.IsNodeGroupResourceExceeded(resourcesLeft, nodeGroup, nodeInfo, numNodes); skipReason != nil {
464465
skippedNodeGroups[nodeGroup.Id()] = skipReason
465466
continue
466467
}
@@ -486,35 +487,19 @@ func (o *ScaleUpOrchestrator) ComputeExpansionOption(
486487
return option
487488
}
488489

490+
estimator := o.autoscalingContext.EstimatorBuilder(o.autoscalingContext.PredicateChecker, o.autoscalingContext.ClusterSnapshot)
491+
option.NodeCount, option.Pods = estimator.Estimate(pods, nodeInfo, nodeGroup)
492+
option.SimilarNodeGroups = o.ComputeSimilarNodeGroups(nodeGroup, nodeInfos, schedulablePods, now)
493+
489494
autoscalingOptions, err := nodeGroup.GetOptions(o.autoscalingContext.NodeGroupDefaults)
490495
if err != nil {
491496
klog.Errorf("Failed to get autoscaling options for node group %s: %v", nodeGroup.Id(), err)
492497
}
493-
if autoscalingOptions != nil && autoscalingOptions.AtomicScaleUp {
494-
// If atomic, there should be no similar node groups for balancing
495-
// Also the number of pods should be capped according to the node count.
496-
497-
// Limit the number of nodes we estimate to the max scale up possible.
498-
currentTargetSize, err := nodeGroup.TargetSize()
499-
if err != nil {
500-
klog.Errorf("Failed to get node group size: %v", err)
501-
return option
502-
}
503-
// When the node group has the atomic scale up option, the number of nodes
504-
// that can be added to the node group should be capped according to the
505-
// maximum size of the node group and not the 'MaxNodesPerScaleUp' option.
506-
nodeEstimationCap := nodeGroup.MaxSize() - currentTargetSize
507-
limiter := estimator.NewThresholdBasedEstimationLimiter(nodeEstimationCap, 0)
508-
estimator := estimator.NewBinpackingNodeEstimator(o.autoscalingContext.PredicateChecker, o.autoscalingContext.ClusterSnapshot, limiter, estimator.NewDecreasingPodOrderer())
509-
option.NodeCount, option.Pods = estimator.Estimate(pods, nodeInfo, nodeGroup)
498+
if autoscalingOptions != nil && autoscalingOptions.AtomicScaling {
510499
if option.NodeCount > 0 && option.NodeCount != nodeGroup.MaxSize() {
511500
option.NodeCount = nodeGroup.MaxSize()
512501
}
513-
return option
514502
}
515-
estimator := o.autoscalingContext.EstimatorBuilder(o.autoscalingContext.PredicateChecker, o.autoscalingContext.ClusterSnapshot)
516-
option.NodeCount, option.Pods = estimator.Estimate(pods, nodeInfo, nodeGroup)
517-
option.SimilarNodeGroups = o.ComputeSimilarNodeGroups(nodeGroup, nodeInfos, schedulablePods, now)
518503
return option
519504
}
520505

@@ -590,20 +575,15 @@ func (o *ScaleUpOrchestrator) IsNodeGroupReadyToScaleUp(nodeGroup cloudprovider.
590575
}
591576

592577
// IsNodeGroupResourceExceeded returns nil if node group resource limits are not exceeded, otherwise a reason is provided.
593-
func (o *ScaleUpOrchestrator) IsNodeGroupResourceExceeded(resourcesLeft resource.Limits, nodeGroup cloudprovider.NodeGroup, nodeInfo *schedulerframework.NodeInfo) status.Reasons {
578+
func (o *ScaleUpOrchestrator) IsNodeGroupResourceExceeded(resourcesLeft resource.Limits, nodeGroup cloudprovider.NodeGroup, nodeInfo *schedulerframework.NodeInfo, numNodes int) status.Reasons {
594579
resourcesDelta, err := o.resourceManager.DeltaForNode(o.autoscalingContext, nodeInfo, nodeGroup)
595580
if err != nil {
596581
klog.Errorf("Skipping node group %s; error getting node group resources: %v", nodeGroup.Id(), err)
597582
return NotReadyReason
598583
}
599-
autoscalingOptions, aErr := nodeGroup.GetOptions(o.autoscalingContext.NodeGroupDefaults)
600-
if aErr != nil {
601-
klog.Errorf("Couldn't get autoscaling options for ng: %v", nodeGroup.Id())
602-
}
603-
if autoscalingOptions != nil && autoscalingOptions.AtomicScaleUp {
604-
for resource, delta := range resourcesDelta {
605-
resourcesDelta[resource] = delta * int64(nodeGroup.MaxSize())
606-
}
584+
585+
for resource, delta := range resourcesDelta {
586+
resourcesDelta[resource] = delta * int64(numNodes)
607587
}
608588

609589
checkResult := resource.CheckDeltaWithinLimits(resourcesLeft, resourcesDelta)
@@ -649,6 +629,14 @@ func (o *ScaleUpOrchestrator) ComputeSimilarNodeGroups(
649629
return nil
650630
}
651631

632+
autoscalingOptions, err := nodeGroup.GetOptions(o.autoscalingContext.NodeGroupDefaults)
633+
if err != nil {
634+
klog.Errorf("Failed to get autoscaling options for node group %s: %v", nodeGroup.Id(), err)
635+
}
636+
if autoscalingOptions != nil && autoscalingOptions.AtomicScaling {
637+
return nil
638+
}
639+
652640
groupSchedulablePods, found := schedulablePods[nodeGroup.Id()]
653641
if !found || len(groupSchedulablePods) == 0 {
654642
return nil

cluster-autoscaler/core/scaleup/orchestrator/orchestrator_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ func TestMixedScaleUp(t *testing.T) {
126126

127127
func TestAtomicScaleUp(t *testing.T) {
128128
options := defaultOptions
129-
options.NodeGroupDefaults.AtomicScaleUp = true
129+
options.NodeGroupDefaults.AtomicScaling = true
130130

131131
optionsWithLimitedMaxCores := options
132132
optionsWithLimitedMaxCores.MaxCoresTotal = 3

0 commit comments

Comments
 (0)