Skip to content

Add support for scaling up with ZeroToMaxNodesScaling option #5826

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Jun 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -443,7 +443,7 @@ func (tng *TestNodeGroup) DeleteNodes(nodes []*apiv1.Node) error {
id := tng.id
tng.targetSize -= len(nodes)
tng.Unlock()
if tng.opts != nil && tng.opts.AtomicScaling && tng.targetSize != 0 {
if tng.opts != nil && tng.opts.ZeroOrMaxNodeScaling && tng.targetSize != 0 {
return fmt.Errorf("TestNodeGroup: attempted to partially scale down a node group that should be scaled down atomically")
}
for _, node := range nodes {
Expand Down
4 changes: 2 additions & 2 deletions cluster-autoscaler/config/autoscaling_options.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,8 @@ type NodeGroupAutoscalingOptions struct {
ScaleDownUnreadyTime time.Duration
// Maximum time CA waits for node to be provisioned
MaxNodeProvisionTime time.Duration
// AtomicScaling means that all nodes should be provisioned and brought down all at once instead of one-by-one
AtomicScaling bool
// ZeroOrMaxNodeScaling means that a node group should be scaled up to maximum size or down to zero nodes all at once instead of one-by-one.
ZeroOrMaxNodeScaling bool
}

// GCEOptions contain autoscaling options specific to GCE cloud provider.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1103,7 +1103,7 @@ func TestStartDeletionInBatchBasic(t *testing.T) {
func sizedNodeGroup(id string, size int, atomic bool) cloudprovider.NodeGroup {
ng := testprovider.NewTestNodeGroup(id, 10000, 0, size, true, false, "n1-standard-2", nil, nil)
ng.SetOptions(&config.NodeGroupAutoscalingOptions{
AtomicScaling: atomic,
ZeroOrMaxNodeScaling: atomic,
})
return ng
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ func (ds *GroupDeletionScheduler) ScheduleDeletion(nodeInfo *framework.NodeInfo,
return
}

ds.addToBatcher(nodeInfo, nodeGroup, batchSize, drain, opts.AtomicScaling)
ds.addToBatcher(nodeInfo, nodeGroup, batchSize, drain, opts.ZeroOrMaxNodeScaling)
}

// prepareNodeForDeletion is a long-running operation, so it needs to avoid locking the AtomicDeletionScheduler object
Expand Down
4 changes: 2 additions & 2 deletions cluster-autoscaler/core/scaledown/budgets/budgets.go
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ func (bp *ScaleDownBudgetProcessor) categorize(groups []*NodeGroupView) (individ
klog.Errorf("Failed to get autoscaling options for node group %s: %v", view.Group.Id(), err)
continue
}
if autoscalingOptions != nil && autoscalingOptions.AtomicScaling {
if autoscalingOptions != nil && autoscalingOptions.ZeroOrMaxNodeScaling {
atomic = append(atomic, view)
} else {
individual = append(individual, view)
Expand All @@ -196,7 +196,7 @@ func (bp *ScaleDownBudgetProcessor) groupByNodeGroup(nodes []*apiv1.Node) (indiv
klog.Errorf("Failed to get autoscaling options for node group %s: %v", nodeGroup.Id(), err)
continue
}
if autoscalingOptions != nil && autoscalingOptions.AtomicScaling {
if autoscalingOptions != nil && autoscalingOptions.ZeroOrMaxNodeScaling {
if idx, ok := atomicGroup[nodeGroup]; ok {
atomic[idx].Nodes = append(atomic[idx].Nodes, node)
} else {
Expand Down
2 changes: 1 addition & 1 deletion cluster-autoscaler/core/scaledown/budgets/budgets_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -361,7 +361,7 @@ var transformNodeGroupView = cmp.Transformer("transformNodeGroupView", func(b No
func sizedNodeGroup(id string, size int, atomic bool) cloudprovider.NodeGroup {
ng := testprovider.NewTestNodeGroup(id, 10000, 0, size, true, false, "n1-standard-2", nil, nil)
ng.SetOptions(&config.NodeGroupAutoscalingOptions{
AtomicScaling: atomic,
ZeroOrMaxNodeScaling: atomic,
})
return ng
}
Expand Down
2 changes: 1 addition & 1 deletion cluster-autoscaler/core/scaledown/planner/planner_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -795,7 +795,7 @@ func TestNodesToDelete(t *testing.T) {
func sizedNodeGroup(id string, size int, atomic bool) cloudprovider.NodeGroup {
ng := testprovider.NewTestNodeGroup(id, 10000, 0, size, true, false, "n1-standard-2", nil, nil)
ng.SetOptions(&config.NodeGroupAutoscalingOptions{
AtomicScaling: atomic,
ZeroOrMaxNodeScaling: atomic,
})
return ng
}
Expand Down
44 changes: 40 additions & 4 deletions cluster-autoscaler/core/scaleup/orchestrator/orchestrator.go
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ func (o *ScaleUpOrchestrator) ScaleUp(
now := time.Now()

// Filter out invalid node groups
validNodeGroups, skippedNodeGroups := o.filterValidScaleUpNodeGroups(nodeGroups, nodeInfos, resourcesLeft, now)
validNodeGroups, skippedNodeGroups := o.filterValidScaleUpNodeGroups(nodeGroups, nodeInfos, resourcesLeft, len(nodes)+len(upcomingNodes), now)

// Mark skipped node groups as processed.
for nodegroupID := range skippedNodeGroups {
Expand Down Expand Up @@ -362,7 +362,7 @@ func (o *ScaleUpOrchestrator) ScaleUpToNodeGroupMinSize(
continue
}

if skipReason := o.IsNodeGroupResourceExceeded(resourcesLeft, ng, nodeInfo); skipReason != nil {
if skipReason := o.IsNodeGroupResourceExceeded(resourcesLeft, ng, nodeInfo, 1); skipReason != nil {
klog.Warning("ScaleUpToNodeGroupMinSize: node group resource excceded: %v", skipReason)
continue
}
Expand Down Expand Up @@ -418,6 +418,7 @@ func (o *ScaleUpOrchestrator) filterValidScaleUpNodeGroups(
nodeGroups []cloudprovider.NodeGroup,
nodeInfos map[string]*schedulerframework.NodeInfo,
resourcesLeft resource.Limits,
currentNodeCount int,
now time.Time,
) ([]cloudprovider.NodeGroup, map[string]status.Reasons) {
var validNodeGroups []cloudprovider.NodeGroup
Expand All @@ -440,14 +441,27 @@ func (o *ScaleUpOrchestrator) filterValidScaleUpNodeGroups(
skippedNodeGroups[nodeGroup.Id()] = MaxLimitReachedReason
continue
}
autoscalingOptions, err := nodeGroup.GetOptions(o.autoscalingContext.NodeGroupDefaults)
if err != nil {
klog.Errorf("Couldn't get autoscaling options for ng: %v", nodeGroup.Id())
}
numNodes := 1
if autoscalingOptions != nil && autoscalingOptions.ZeroOrMaxNodeScaling {
numNodes = nodeGroup.MaxSize() - currentTargetSize
if o.autoscalingContext.MaxNodesTotal != 0 && currentNodeCount+numNodes > o.autoscalingContext.MaxNodesTotal {
klog.V(4).Infof("Skipping node group %s - atomic scale-up exceeds cluster node count limit", nodeGroup.Id())
skippedNodeGroups[nodeGroup.Id()] = NewSkippedReasons("atomic scale-up exceeds cluster node count limit")
continue
}
}

nodeInfo, found := nodeInfos[nodeGroup.Id()]
if !found {
klog.Errorf("No node info for: %s", nodeGroup.Id())
skippedNodeGroups[nodeGroup.Id()] = NotReadyReason
continue
}
if skipReason := o.IsNodeGroupResourceExceeded(resourcesLeft, nodeGroup, nodeInfo); skipReason != nil {
if skipReason := o.IsNodeGroupResourceExceeded(resourcesLeft, nodeGroup, nodeInfo, numNodes); skipReason != nil {
skippedNodeGroups[nodeGroup.Id()] = skipReason
continue
}
Expand Down Expand Up @@ -476,6 +490,16 @@ func (o *ScaleUpOrchestrator) ComputeExpansionOption(
estimator := o.autoscalingContext.EstimatorBuilder(o.autoscalingContext.PredicateChecker, o.autoscalingContext.ClusterSnapshot)
option.NodeCount, option.Pods = estimator.Estimate(pods, nodeInfo, nodeGroup)
option.SimilarNodeGroups = o.ComputeSimilarNodeGroups(nodeGroup, nodeInfos, schedulablePods, now)

autoscalingOptions, err := nodeGroup.GetOptions(o.autoscalingContext.NodeGroupDefaults)
if err != nil {
klog.Errorf("Failed to get autoscaling options for node group %s: %v", nodeGroup.Id(), err)
}
if autoscalingOptions != nil && autoscalingOptions.ZeroOrMaxNodeScaling {
if option.NodeCount > 0 && option.NodeCount != nodeGroup.MaxSize() {
option.NodeCount = nodeGroup.MaxSize()
}
}
return option
}

Expand Down Expand Up @@ -551,13 +575,17 @@ func (o *ScaleUpOrchestrator) IsNodeGroupReadyToScaleUp(nodeGroup cloudprovider.
}

// IsNodeGroupResourceExceeded returns nil if node group resource limits are not exceeded, otherwise a reason is provided.
func (o *ScaleUpOrchestrator) IsNodeGroupResourceExceeded(resourcesLeft resource.Limits, nodeGroup cloudprovider.NodeGroup, nodeInfo *schedulerframework.NodeInfo) status.Reasons {
func (o *ScaleUpOrchestrator) IsNodeGroupResourceExceeded(resourcesLeft resource.Limits, nodeGroup cloudprovider.NodeGroup, nodeInfo *schedulerframework.NodeInfo, numNodes int) status.Reasons {
resourcesDelta, err := o.resourceManager.DeltaForNode(o.autoscalingContext, nodeInfo, nodeGroup)
if err != nil {
klog.Errorf("Skipping node group %s; error getting node group resources: %v", nodeGroup.Id(), err)
return NotReadyReason
}

for resource, delta := range resourcesDelta {
resourcesDelta[resource] = delta * int64(numNodes)
}

checkResult := resource.CheckDeltaWithinLimits(resourcesLeft, resourcesDelta)
if checkResult.Exceeded {
klog.V(4).Infof("Skipping node group %s; maximal limit exceeded for %v", nodeGroup.Id(), checkResult.ExceededResources)
Expand Down Expand Up @@ -601,6 +629,14 @@ func (o *ScaleUpOrchestrator) ComputeSimilarNodeGroups(
return nil
}

autoscalingOptions, err := nodeGroup.GetOptions(o.autoscalingContext.NodeGroupDefaults)
if err != nil {
klog.Errorf("Failed to get autoscaling options for node group %s: %v", nodeGroup.Id(), err)
}
if autoscalingOptions != nil && autoscalingOptions.ZeroOrMaxNodeScaling {
return nil
}

groupSchedulablePods, found := schedulablePods[nodeGroup.Id()]
if !found || len(groupSchedulablePods) == 0 {
return nil
Expand Down
Loading