@@ -18,7 +18,6 @@ package actuation
18
18
19
19
import (
20
20
"fmt"
21
- "reflect"
22
21
"sync"
23
22
"time"
24
23
@@ -67,59 +66,68 @@ func NewNodeDeletionBatcher(ctx *context.AutoscalingContext, csr *clusterstate.C
67
66
}
68
67
}
69
68
70
- // AddNode adds node to delete candidates and schedule deletion.
69
+ // AddNode adds node to delete candidates and schedules deletion.
71
70
func (d * NodeDeletionBatcher ) AddNode (node * apiv1.Node , drain bool ) error {
71
+ nodeGroup , err := d .ctx .CloudProvider .NodeGroupForNode (node )
72
+ if err != nil {
73
+ return err
74
+ }
75
+ return d .AddNodes ([]* apiv1.Node {node }, nodeGroup , drain )
76
+ }
77
+
78
+ // AddNodes adds node list to delete candidates and schedules deletion.
79
+ func (d * NodeDeletionBatcher ) AddNodes (nodes []* apiv1.Node , nodeGroup cloudprovider.NodeGroup , drain bool ) error {
72
80
// If delete interval is 0, than instantly start node deletion.
73
81
if d .deleteInterval == 0 {
74
- nodeGroup , err := deleteNodesFromCloudProvider (d .ctx , []* apiv1.Node {node })
75
- if err != nil {
76
- result := status.NodeDeleteResult {ResultType : status .NodeDeleteErrorFailedToDelete , Err : err }
77
- CleanUpAndRecordFailedScaleDownEvent (d .ctx , node , nodeGroup .Id (), drain , d .nodeDeletionTracker , "" , result )
78
- } else {
79
- RegisterAndRecordSuccessfulScaleDownEvent (d .ctx , d .clusterState , node , nodeGroup , drain , d .nodeDeletionTracker )
82
+ err := deleteNodesFromCloudProvider (d .ctx , nodes , nodeGroup )
83
+ for _ , node := range nodes {
84
+ if err != nil {
85
+ result := status.NodeDeleteResult {ResultType : status .NodeDeleteErrorFailedToDelete , Err : err }
86
+ CleanUpAndRecordFailedScaleDownEvent (d .ctx , node , nodeGroup .Id (), drain , d .nodeDeletionTracker , "" , result )
87
+ } else {
88
+ RegisterAndRecordSuccessfulScaleDownEvent (d .ctx , d .clusterState , node , nodeGroup , drain , d .nodeDeletionTracker )
89
+ }
80
90
}
81
91
return nil
82
92
}
83
- nodeGroupId , first , err := d .addNodeToBucket ( node , drain )
93
+ first , err := d .addNodesToBucket ( nodes , nodeGroup , drain )
84
94
if err != nil {
85
95
return err
86
96
}
87
97
if first {
88
- go func (nodeGroupId string ) {
98
+ go func (nodeGroup cloudprovider. NodeGroup ) {
89
99
time .Sleep (d .deleteInterval )
90
- d .remove ( nodeGroupId )
91
- }(nodeGroupId )
100
+ d .executeForBucket ( nodeGroup )
101
+ }(nodeGroup )
92
102
}
93
103
return nil
94
104
}
95
105
96
106
// AddToBucket adds node to delete candidates and return if it's a first node in the group.
97
- func (d * NodeDeletionBatcher ) addNodeToBucket ( node * apiv1.Node , drain bool ) (string , bool , error ) {
107
+ func (d * NodeDeletionBatcher ) addNodesToBucket ( nodes [] * apiv1.Node , nodeGroup cloudprovider. NodeGroup , drain bool ) (bool , error ) {
98
108
d .Lock ()
99
109
defer d .Unlock ()
100
- nodeGroup , err := d .ctx .CloudProvider .NodeGroupForNode (node )
101
- if err != nil {
102
- return "" , false , err
110
+ for _ , node := range nodes {
111
+ d .drainedNodeDeletions [node .Name ] = drain
103
112
}
104
- d .drainedNodeDeletions [node .Name ] = drain
105
113
val , ok := d .deletionsPerNodeGroup [nodeGroup .Id ()]
106
114
if ! ok || len (val ) == 0 {
107
- d .deletionsPerNodeGroup [nodeGroup .Id ()] = [] * apiv1. Node { node }
108
- return nodeGroup . Id (), true , nil
115
+ d .deletionsPerNodeGroup [nodeGroup .Id ()] = nodes
116
+ return true , nil
109
117
}
110
- d .deletionsPerNodeGroup [nodeGroup .Id ()] = append (d .deletionsPerNodeGroup [nodeGroup .Id ()], node )
111
- return nodeGroup . Id (), false , nil
118
+ d .deletionsPerNodeGroup [nodeGroup .Id ()] = append (d .deletionsPerNodeGroup [nodeGroup .Id ()], nodes ... )
119
+ return false , nil
112
120
}
113
121
114
- // remove delete nodes of a given nodeGroup, if successful, the deletion is recorded in CSR, and an event is emitted on the node.
115
- func (d * NodeDeletionBatcher ) remove ( nodeGroupId string ) error {
122
+ // executeForBucket deletes nodes of a given nodeGroup, if successful, the deletion is recorded in CSR, and an event is emitted on the node.
123
+ func (d * NodeDeletionBatcher ) executeForBucket ( nodeGroup cloudprovider. NodeGroup ) error {
116
124
d .Lock ()
117
125
defer d .Unlock ()
118
- nodes , ok := d .deletionsPerNodeGroup [nodeGroupId ]
126
+ nodes , ok := d .deletionsPerNodeGroup [nodeGroup . Id () ]
119
127
if ! ok {
120
- return fmt .Errorf ("Node Group %s is not present in the batch deleter" , nodeGroupId )
128
+ return fmt .Errorf ("Node Group %s is not present in the batch deleter" , nodeGroup . Id () )
121
129
}
122
- delete (d .deletionsPerNodeGroup , nodeGroupId )
130
+ delete (d .deletionsPerNodeGroup , nodeGroup . Id () )
123
131
drainedNodeDeletions := make (map [string ]bool )
124
132
for _ , node := range nodes {
125
133
drainedNodeDeletions [node .Name ] = d .drainedNodeDeletions [node .Name ]
@@ -128,7 +136,7 @@ func (d *NodeDeletionBatcher) remove(nodeGroupId string) error {
128
136
129
137
go func (nodes []* apiv1.Node , drainedNodeDeletions map [string ]bool ) {
130
138
var result status.NodeDeleteResult
131
- nodeGroup , err := deleteNodesFromCloudProvider (d .ctx , nodes )
139
+ err := deleteNodesFromCloudProvider (d .ctx , nodes , nodeGroup )
132
140
for _ , node := range nodes {
133
141
drain := drainedNodeDeletions [node .Name ]
134
142
if err != nil {
@@ -137,26 +145,18 @@ func (d *NodeDeletionBatcher) remove(nodeGroupId string) error {
137
145
} else {
138
146
RegisterAndRecordSuccessfulScaleDownEvent (d .ctx , d .clusterState , node , nodeGroup , drain , d .nodeDeletionTracker )
139
147
}
140
-
141
148
}
142
149
}(nodes , drainedNodeDeletions )
143
150
return nil
144
151
}
145
152
146
153
// deleteNodeFromCloudProvider removes the given nodes from cloud provider. No extra pre-deletion actions are executed on
147
154
// the Kubernetes side.
148
- func deleteNodesFromCloudProvider (ctx * context.AutoscalingContext , nodes []* apiv1.Node ) (cloudprovider.NodeGroup , error ) {
149
- nodeGroup , err := ctx .CloudProvider .NodeGroupForNode (nodes [0 ])
150
- if err != nil {
151
- return nodeGroup , errors .NewAutoscalerError (errors .CloudProviderError , "failed to find node group for %s: %v" , nodes [0 ].Name , err )
152
- }
153
- if nodeGroup == nil || reflect .ValueOf (nodeGroup ).IsNil () {
154
- return nodeGroup , errors .NewAutoscalerError (errors .InternalError , "picked node that doesn't belong to a node group: %s" , nodes [0 ].Name )
155
+ func deleteNodesFromCloudProvider (ctx * context.AutoscalingContext , nodes []* apiv1.Node , nodeGroup cloudprovider.NodeGroup ) error {
156
+ if err := nodeGroup .DeleteNodes (nodes ); err != nil {
157
+ return errors .NewAutoscalerError (errors .CloudProviderError , "failed to delete nodes from group %s: %v" , nodeGroup .Id (), err )
155
158
}
156
- if err = nodeGroup .DeleteNodes (nodes ); err != nil {
157
- return nodeGroup , errors .NewAutoscalerError (errors .CloudProviderError , "failed to delete %s: %v" , nodes [0 ].Name , err )
158
- }
159
- return nodeGroup , nil
159
+ return nil
160
160
}
161
161
162
162
func nodeScaleDownReason (node * apiv1.Node , drain bool ) metrics.NodeScaleDownReason {
0 commit comments