Skip to content

Commit 973f9fd

Browse files
authored
Merge pull request #5894 from BigDarkClown/fix-cs
Include short unregistered nodes in calculation of incorrect node group
2 parents 8f83f7e + 67d3e7e commit 973f9fd

File tree

2 files changed

+319
-2
lines changed

2 files changed

+319
-2
lines changed

cluster-autoscaler/clusterstate/clusterstate.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -648,8 +648,9 @@ func (csr *ClusterStateRegistry) updateIncorrectNodeGroupSizes(currentTime time.
648648
}
649649
continue
650650
}
651-
if len(readiness.Registered) > acceptableRange.MaxNodes ||
652-
len(readiness.Registered) < acceptableRange.MinNodes {
651+
unregisteredNodes := len(readiness.Unregistered) + len(readiness.LongUnregistered)
652+
if len(readiness.Registered) > acceptableRange.CurrentTarget ||
653+
len(readiness.Registered) < acceptableRange.CurrentTarget-unregisteredNodes {
653654
incorrect := IncorrectNodeGroupSize{
654655
CurrentSize: len(readiness.Registered),
655656
ExpectedSize: acceptableRange.CurrentTarget,

cluster-autoscaler/clusterstate/clusterstate_test.go

Lines changed: 316 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1064,3 +1064,319 @@ func newBackoff() backoff.Backoff {
10641064
return backoff.NewIdBasedExponentialBackoff(5*time.Minute, /*InitialNodeGroupBackoffDuration*/
10651065
30*time.Minute /*MaxNodeGroupBackoffDuration*/, 3*time.Hour /*NodeGroupBackoffResetTimeout*/)
10661066
}
1067+
1068+
func TestUpdateAcceptableRanges(t *testing.T) {
1069+
testCases := []struct {
1070+
name string
1071+
1072+
targetSizes map[string]int
1073+
readiness map[string]Readiness
1074+
scaleUpRequests map[string]*ScaleUpRequest
1075+
scaledDownGroups []string
1076+
1077+
wantAcceptableRanges map[string]AcceptableRange
1078+
}{
1079+
{
1080+
name: "No scale-ups/scale-downs",
1081+
targetSizes: map[string]int{
1082+
"ng1": 10,
1083+
"ng2": 20,
1084+
},
1085+
readiness: map[string]Readiness{
1086+
"ng1": {Ready: make([]string, 10)},
1087+
"ng2": {Ready: make([]string, 20)},
1088+
},
1089+
wantAcceptableRanges: map[string]AcceptableRange{
1090+
"ng1": {MinNodes: 10, MaxNodes: 10, CurrentTarget: 10},
1091+
"ng2": {MinNodes: 20, MaxNodes: 20, CurrentTarget: 20},
1092+
},
1093+
},
1094+
{
1095+
name: "Ongoing scale-ups",
1096+
targetSizes: map[string]int{
1097+
"ng1": 10,
1098+
"ng2": 20,
1099+
},
1100+
readiness: map[string]Readiness{
1101+
"ng1": {Ready: make([]string, 10)},
1102+
"ng2": {Ready: make([]string, 20)},
1103+
},
1104+
scaleUpRequests: map[string]*ScaleUpRequest{
1105+
"ng1": {Increase: 3},
1106+
"ng2": {Increase: 5},
1107+
},
1108+
wantAcceptableRanges: map[string]AcceptableRange{
1109+
"ng1": {MinNodes: 7, MaxNodes: 10, CurrentTarget: 10},
1110+
"ng2": {MinNodes: 15, MaxNodes: 20, CurrentTarget: 20},
1111+
},
1112+
},
1113+
{
1114+
name: "Ongoing scale-downs",
1115+
targetSizes: map[string]int{
1116+
"ng1": 10,
1117+
"ng2": 20,
1118+
},
1119+
readiness: map[string]Readiness{
1120+
"ng1": {Ready: make([]string, 10)},
1121+
"ng2": {Ready: make([]string, 20)},
1122+
},
1123+
scaledDownGroups: []string{"ng1", "ng1", "ng2", "ng2", "ng2"},
1124+
wantAcceptableRanges: map[string]AcceptableRange{
1125+
"ng1": {MinNodes: 10, MaxNodes: 12, CurrentTarget: 10},
1126+
"ng2": {MinNodes: 20, MaxNodes: 23, CurrentTarget: 20},
1127+
},
1128+
},
1129+
{
1130+
name: "Some short unregistered nodes",
1131+
targetSizes: map[string]int{
1132+
"ng1": 10,
1133+
"ng2": 20,
1134+
},
1135+
readiness: map[string]Readiness{
1136+
"ng1": {Ready: make([]string, 8), Unregistered: make([]string, 2)},
1137+
"ng2": {Ready: make([]string, 17), Unregistered: make([]string, 3)},
1138+
},
1139+
wantAcceptableRanges: map[string]AcceptableRange{
1140+
"ng1": {MinNodes: 10, MaxNodes: 10, CurrentTarget: 10},
1141+
"ng2": {MinNodes: 20, MaxNodes: 20, CurrentTarget: 20},
1142+
},
1143+
},
1144+
{
1145+
name: "Some long unregistered nodes",
1146+
targetSizes: map[string]int{
1147+
"ng1": 10,
1148+
"ng2": 20,
1149+
},
1150+
readiness: map[string]Readiness{
1151+
"ng1": {Ready: make([]string, 8), LongUnregistered: make([]string, 2)},
1152+
"ng2": {Ready: make([]string, 17), LongUnregistered: make([]string, 3)},
1153+
},
1154+
wantAcceptableRanges: map[string]AcceptableRange{
1155+
"ng1": {MinNodes: 8, MaxNodes: 10, CurrentTarget: 10},
1156+
"ng2": {MinNodes: 17, MaxNodes: 20, CurrentTarget: 20},
1157+
},
1158+
},
1159+
{
1160+
name: "Everything together",
1161+
targetSizes: map[string]int{
1162+
"ng1": 10,
1163+
"ng2": 20,
1164+
},
1165+
readiness: map[string]Readiness{
1166+
"ng1": {Ready: make([]string, 8), Unregistered: make([]string, 1), LongUnregistered: make([]string, 2)},
1167+
"ng2": {Ready: make([]string, 17), Unregistered: make([]string, 3), LongUnregistered: make([]string, 4)},
1168+
},
1169+
scaleUpRequests: map[string]*ScaleUpRequest{
1170+
"ng1": {Increase: 3},
1171+
"ng2": {Increase: 5},
1172+
},
1173+
scaledDownGroups: []string{"ng1", "ng1", "ng2", "ng2", "ng2"},
1174+
wantAcceptableRanges: map[string]AcceptableRange{
1175+
"ng1": {MinNodes: 5, MaxNodes: 12, CurrentTarget: 10},
1176+
"ng2": {MinNodes: 11, MaxNodes: 23, CurrentTarget: 20},
1177+
},
1178+
},
1179+
}
1180+
1181+
for _, tc := range testCases {
1182+
t.Run(tc.name, func(t *testing.T) {
1183+
provider := testprovider.NewTestCloudProvider(nil, nil)
1184+
for nodeGroupName, targetSize := range tc.targetSizes {
1185+
provider.AddNodeGroup(nodeGroupName, 0, 1000, targetSize)
1186+
}
1187+
var scaleDownRequests []*ScaleDownRequest
1188+
for _, nodeGroupName := range tc.scaledDownGroups {
1189+
scaleDownRequests = append(scaleDownRequests, &ScaleDownRequest{
1190+
NodeGroup: provider.GetNodeGroup(nodeGroupName),
1191+
})
1192+
}
1193+
1194+
clusterState := &ClusterStateRegistry{
1195+
cloudProvider: provider,
1196+
perNodeGroupReadiness: tc.readiness,
1197+
scaleUpRequests: tc.scaleUpRequests,
1198+
scaleDownRequests: scaleDownRequests,
1199+
}
1200+
1201+
clusterState.updateAcceptableRanges(tc.targetSizes)
1202+
assert.Equal(t, tc.wantAcceptableRanges, clusterState.acceptableRanges)
1203+
})
1204+
}
1205+
}
1206+
1207+
func TestUpdateIncorrectNodeGroupSizes(t *testing.T) {
1208+
timeNow := time.Now()
1209+
testCases := []struct {
1210+
name string
1211+
1212+
acceptableRanges map[string]AcceptableRange
1213+
readiness map[string]Readiness
1214+
incorrectSizes map[string]IncorrectNodeGroupSize
1215+
1216+
wantIncorrectSizes map[string]IncorrectNodeGroupSize
1217+
}{
1218+
{
1219+
name: "node groups with correct sizes",
1220+
acceptableRanges: map[string]AcceptableRange{
1221+
"ng1": {CurrentTarget: 10},
1222+
"ng2": {CurrentTarget: 20},
1223+
},
1224+
readiness: map[string]Readiness{
1225+
"ng1": {Registered: make([]string, 10)},
1226+
"ng2": {Registered: make([]string, 20)},
1227+
},
1228+
incorrectSizes: map[string]IncorrectNodeGroupSize{},
1229+
wantIncorrectSizes: map[string]IncorrectNodeGroupSize{},
1230+
},
1231+
{
1232+
name: "node groups with correct sizes after not being correct sized",
1233+
acceptableRanges: map[string]AcceptableRange{
1234+
"ng1": {CurrentTarget: 10},
1235+
"ng2": {CurrentTarget: 20},
1236+
},
1237+
readiness: map[string]Readiness{
1238+
"ng1": {Registered: make([]string, 10)},
1239+
"ng2": {Registered: make([]string, 20)},
1240+
},
1241+
incorrectSizes: map[string]IncorrectNodeGroupSize{
1242+
"ng1": {CurrentSize: 8, ExpectedSize: 10, FirstObserved: timeNow.Add(-time.Hour)},
1243+
"ng2": {CurrentSize: 15, ExpectedSize: 20, FirstObserved: timeNow.Add(-time.Minute)},
1244+
},
1245+
wantIncorrectSizes: map[string]IncorrectNodeGroupSize{},
1246+
},
1247+
{
1248+
name: "node groups below the target size",
1249+
acceptableRanges: map[string]AcceptableRange{
1250+
"ng1": {CurrentTarget: 10},
1251+
"ng2": {CurrentTarget: 20},
1252+
},
1253+
readiness: map[string]Readiness{
1254+
"ng1": {Registered: make([]string, 8)},
1255+
"ng2": {Registered: make([]string, 15)},
1256+
},
1257+
incorrectSizes: map[string]IncorrectNodeGroupSize{},
1258+
wantIncorrectSizes: map[string]IncorrectNodeGroupSize{
1259+
"ng1": {CurrentSize: 8, ExpectedSize: 10, FirstObserved: timeNow},
1260+
"ng2": {CurrentSize: 15, ExpectedSize: 20, FirstObserved: timeNow},
1261+
},
1262+
},
1263+
{
1264+
name: "node groups above the target size",
1265+
acceptableRanges: map[string]AcceptableRange{
1266+
"ng1": {CurrentTarget: 10},
1267+
"ng2": {CurrentTarget: 20},
1268+
},
1269+
readiness: map[string]Readiness{
1270+
"ng1": {Registered: make([]string, 12)},
1271+
"ng2": {Registered: make([]string, 25)},
1272+
},
1273+
incorrectSizes: map[string]IncorrectNodeGroupSize{},
1274+
wantIncorrectSizes: map[string]IncorrectNodeGroupSize{
1275+
"ng1": {CurrentSize: 12, ExpectedSize: 10, FirstObserved: timeNow},
1276+
"ng2": {CurrentSize: 25, ExpectedSize: 20, FirstObserved: timeNow},
1277+
},
1278+
},
1279+
{
1280+
name: "node groups below the target size with changed delta",
1281+
acceptableRanges: map[string]AcceptableRange{
1282+
"ng1": {CurrentTarget: 10},
1283+
"ng2": {CurrentTarget: 20},
1284+
},
1285+
readiness: map[string]Readiness{
1286+
"ng1": {Registered: make([]string, 8)},
1287+
"ng2": {Registered: make([]string, 15)},
1288+
},
1289+
incorrectSizes: map[string]IncorrectNodeGroupSize{
1290+
"ng1": {CurrentSize: 7, ExpectedSize: 10, FirstObserved: timeNow.Add(-time.Hour)},
1291+
"ng2": {CurrentSize: 14, ExpectedSize: 20, FirstObserved: timeNow.Add(-time.Minute)},
1292+
},
1293+
wantIncorrectSizes: map[string]IncorrectNodeGroupSize{
1294+
"ng1": {CurrentSize: 8, ExpectedSize: 10, FirstObserved: timeNow},
1295+
"ng2": {CurrentSize: 15, ExpectedSize: 20, FirstObserved: timeNow},
1296+
},
1297+
},
1298+
{
1299+
name: "node groups below the target size with the same delta",
1300+
acceptableRanges: map[string]AcceptableRange{
1301+
"ng1": {CurrentTarget: 10},
1302+
"ng2": {CurrentTarget: 20},
1303+
},
1304+
readiness: map[string]Readiness{
1305+
"ng1": {Registered: make([]string, 8)},
1306+
"ng2": {Registered: make([]string, 15)},
1307+
},
1308+
incorrectSizes: map[string]IncorrectNodeGroupSize{
1309+
"ng1": {CurrentSize: 8, ExpectedSize: 10, FirstObserved: timeNow.Add(-time.Hour)},
1310+
"ng2": {CurrentSize: 15, ExpectedSize: 20, FirstObserved: timeNow.Add(-time.Minute)},
1311+
},
1312+
wantIncorrectSizes: map[string]IncorrectNodeGroupSize{
1313+
"ng1": {CurrentSize: 8, ExpectedSize: 10, FirstObserved: timeNow.Add(-time.Hour)},
1314+
"ng2": {CurrentSize: 15, ExpectedSize: 20, FirstObserved: timeNow.Add(-time.Minute)},
1315+
},
1316+
},
1317+
{
1318+
name: "node groups below the target size with short unregistered nodes",
1319+
acceptableRanges: map[string]AcceptableRange{
1320+
"ng1": {CurrentTarget: 10},
1321+
"ng2": {CurrentTarget: 20},
1322+
},
1323+
readiness: map[string]Readiness{
1324+
"ng1": {Registered: make([]string, 8), Unregistered: make([]string, 2)},
1325+
"ng2": {Registered: make([]string, 15), Unregistered: make([]string, 3)},
1326+
},
1327+
incorrectSizes: map[string]IncorrectNodeGroupSize{},
1328+
wantIncorrectSizes: map[string]IncorrectNodeGroupSize{
1329+
"ng2": {CurrentSize: 15, ExpectedSize: 20, FirstObserved: timeNow},
1330+
},
1331+
},
1332+
{
1333+
name: "node groups below the target size with long unregistered nodes",
1334+
acceptableRanges: map[string]AcceptableRange{
1335+
"ng1": {CurrentTarget: 10},
1336+
"ng2": {CurrentTarget: 20},
1337+
},
1338+
readiness: map[string]Readiness{
1339+
"ng1": {Registered: make([]string, 8), LongUnregistered: make([]string, 2)},
1340+
"ng2": {Registered: make([]string, 15), LongUnregistered: make([]string, 3)},
1341+
},
1342+
incorrectSizes: map[string]IncorrectNodeGroupSize{},
1343+
wantIncorrectSizes: map[string]IncorrectNodeGroupSize{
1344+
"ng2": {CurrentSize: 15, ExpectedSize: 20, FirstObserved: timeNow},
1345+
},
1346+
},
1347+
{
1348+
name: "node groups below the target size with various unregistered nodes",
1349+
acceptableRanges: map[string]AcceptableRange{
1350+
"ng1": {CurrentTarget: 10},
1351+
"ng2": {CurrentTarget: 20},
1352+
},
1353+
readiness: map[string]Readiness{
1354+
"ng1": {Registered: make([]string, 8), Unregistered: make([]string, 1), LongUnregistered: make([]string, 1)},
1355+
"ng2": {Registered: make([]string, 15), Unregistered: make([]string, 2), LongUnregistered: make([]string, 2)},
1356+
},
1357+
incorrectSizes: map[string]IncorrectNodeGroupSize{},
1358+
wantIncorrectSizes: map[string]IncorrectNodeGroupSize{
1359+
"ng2": {CurrentSize: 15, ExpectedSize: 20, FirstObserved: timeNow},
1360+
},
1361+
},
1362+
}
1363+
1364+
for _, tc := range testCases {
1365+
t.Run(tc.name, func(t *testing.T) {
1366+
provider := testprovider.NewTestCloudProvider(nil, nil)
1367+
for nodeGroupName, acceptableRange := range tc.acceptableRanges {
1368+
provider.AddNodeGroup(nodeGroupName, 0, 1000, acceptableRange.CurrentTarget)
1369+
}
1370+
1371+
clusterState := &ClusterStateRegistry{
1372+
cloudProvider: provider,
1373+
acceptableRanges: tc.acceptableRanges,
1374+
perNodeGroupReadiness: tc.readiness,
1375+
incorrectNodeGroupSizes: tc.incorrectSizes,
1376+
}
1377+
1378+
clusterState.updateIncorrectNodeGroupSizes(timeNow)
1379+
assert.Equal(t, tc.wantIncorrectSizes, clusterState.incorrectNodeGroupSizes)
1380+
})
1381+
}
1382+
}

0 commit comments

Comments
 (0)