Skip to content

Commit b663f13

Browse files
committed
feat: add annotation to ignore local storage volume during scale down
- this is so that scale down is not blocked on local storage volume - for pods where it is okay to ignore local storage volume Signed-off-by: vadasambar <[email protected]> fix: tests failing - there was a problem in the logic Signed-off-by: vadasambar <[email protected]> test: add unit test for `IgnoreLocalStorageVolumeKey` Signed-off-by: vadasambar <[email protected]> refactor: use `IgnoreLocalStorageVolumeKey` in tests instead of hardcoding the annotation Signed-off-by: vadasambar <[email protected]> fix: wording for test name - `pod with EmptyDir but IgnoreLocalStorageVolumeKey annotation` -> `pod with EmptyDir and IgnoreLocalStorageVolumeKey annotation` Signed-off-by: vadasambar <[email protected]> fix: simulator drain tests failing - set local storage vol name (required) Signed-off-by: vadasambar <[email protected]> refactor: add support for multiple vals in `safe-to-evict-local-volume` annotation - add more unit tests Signed-off-by: vadasambar <[email protected]> refactor: rename ignore local vol key `safe-to-evict-local-volume` -> `safe-to-evict-local-volumes` - abtract code to process annotation into a separate fn - shorten name for test cases Signed-off-by: vadasambar <[email protected]> docs: update FAQ with info about `safe-to-evict-local-volumes` annotation Signed-off-by: vadasambar <[email protected]> docs: add the FAQ for `safe-to-evict-local-volumes` annotation Signed-off-by: vadasambar <[email protected]> docs: fix formatting for `safe-to-evict-local-volumes` in FAQ Signed-off-by: vadasambar <[email protected]> docs: format the `safe-to-evict-local-volumes` as a bullet Signed-off-by: vadasambar <[email protected]> docs: fix `Unless` -> `unless` to make it consistent with other lines Signed-off-by: vadasambar <[email protected]> test: add an extra test for mismatching local vol value in annotation Signed-off-by: vadasambar <[email protected]> docs: make the wording clearer - for `safe-to-evict-local-volumes` annotation Signed-off-by: vadasambar <[email protected]>
1 parent 63b334f commit b663f13

File tree

4 files changed

+271
-5
lines changed

4 files changed

+271
-5
lines changed

cluster-autoscaler/FAQ.md

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,12 @@ Cluster Autoscaler decreases the size of the cluster when some nodes are consist
8888
* are not run on the node by default, *
8989
* don't have a [pod disruption budget](https://kubernetes.io/docs/concepts/workloads/pods/disruptions/#how-disruption-budgets-work) set or their PDB is too restrictive (since CA 0.6).
9090
* Pods that are not backed by a controller object (so not created by deployment, replica set, job, stateful set etc). *
91-
* Pods with local storage. *
91+
* Pods with local storage. *
92+
- unless the pod has the following annotation set:
93+
```
94+
"cluster-autoscaler.kubernetes.io/safe-to-evict-local-volumes": "volume-1,volume-2,.."
95+
```
96+
and all of the pod's local volumes are listed in the annotation value.
9297
* Pods that cannot be moved elsewhere due to various constraints (lack of resources, non-matching node selectors or affinity,
9398
matching anti-affinity, etc)
9499
* Pods that have the following annotation set:

cluster-autoscaler/simulator/drain_test.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,7 @@ func TestGetPodsToMove(t *testing.T) {
115115
Spec: apiv1.PodSpec{
116116
Volumes: []apiv1.Volume{
117117
{
118+
Name: "empty-vol",
118119
VolumeSource: apiv1.VolumeSource{
119120
EmptyDir: &apiv1.EmptyDirVolumeSource{},
120121
},
@@ -136,6 +137,7 @@ func TestGetPodsToMove(t *testing.T) {
136137
Spec: apiv1.PodSpec{
137138
Volumes: []apiv1.Volume{
138139
{
140+
Name: "my-repo",
139141
VolumeSource: apiv1.VolumeSource{
140142
GitRepo: &apiv1.GitRepoVolumeSource{
141143
Repository: "my-repo",

cluster-autoscaler/utils/drain/drain.go

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ package drain
1818

1919
import (
2020
"fmt"
21+
"strings"
2122
"time"
2223

2324
apiv1 "k8s.io/api/core/v1"
@@ -38,6 +39,8 @@ const (
3839
// PodSafeToEvictKey - annotation that ignores constraints to evict a pod like not being replicated, being on
3940
// kube-system namespace or having a local storage.
4041
PodSafeToEvictKey = "cluster-autoscaler.kubernetes.io/safe-to-evict"
42+
// SafeToEvictLocalVolumesKey - annotation that ignores (doesn't block on) a local storage volume during node scale down
43+
SafeToEvictLocalVolumesKey = "cluster-autoscaler.kubernetes.io/safe-to-evict-local-volumes"
4144
)
4245

4346
// BlockingPod represents a pod which is blocking the scale down of a node.
@@ -219,7 +222,7 @@ func GetPodsForDeletionOnNodeDrain(
219222
return []*apiv1.Pod{}, []*apiv1.Pod{}, &BlockingPod{Pod: pod, Reason: UnmovableKubeSystemPod}, fmt.Errorf("non-daemonset, non-mirrored, non-pdb-assigned kube-system pod present: %s", pod.Name)
220223
}
221224
}
222-
if HasLocalStorage(pod) && skipNodesWithLocalStorage {
225+
if HasBlockingLocalStorage(pod) && skipNodesWithLocalStorage {
223226
return []*apiv1.Pod{}, []*apiv1.Pod{}, &BlockingPod{Pod: pod, Reason: LocalStorageRequested}, fmt.Errorf("pod with local storage present: %s", pod.Name)
224227
}
225228
if hasNotSafeToEvictAnnotation(pod) {
@@ -250,16 +253,30 @@ func isPodTerminal(pod *apiv1.Pod) bool {
250253
return pod.Status.Phase == apiv1.PodFailed
251254
}
252255

253-
// HasLocalStorage returns true if pod has any local storage.
254-
func HasLocalStorage(pod *apiv1.Pod) bool {
256+
// HasBlockingLocalStorage returns true if pod has any local storage
257+
// without pod annotation `<SafeToEvictLocalVolumeKey>: <volume-name-1>,<volume-name-2>...`
258+
func HasBlockingLocalStorage(pod *apiv1.Pod) bool {
259+
isNonBlocking := getNonBlockingVolumes(pod)
255260
for _, volume := range pod.Spec.Volumes {
256-
if isLocalVolume(&volume) {
261+
if isLocalVolume(&volume) && !isNonBlocking[volume.Name] {
257262
return true
258263
}
259264
}
260265
return false
261266
}
262267

268+
func getNonBlockingVolumes(pod *apiv1.Pod) map[string]bool {
269+
isNonBlocking := map[string]bool{}
270+
annotationVal := pod.GetAnnotations()[SafeToEvictLocalVolumesKey]
271+
if annotationVal != "" {
272+
vols := strings.Split(annotationVal, ",")
273+
for _, v := range vols {
274+
isNonBlocking[v] = true
275+
}
276+
}
277+
return isNonBlocking
278+
}
279+
263280
func isLocalVolume(volume *apiv1.Volume) bool {
264281
return volume.HostPath != nil || volume.EmptyDir != nil
265282
}

cluster-autoscaler/utils/drain/drain_test.go

Lines changed: 242 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,178 @@ func TestDrain(t *testing.T) {
210210
},
211211
}
212212

213+
emptyDirSafeToEvictVolumeSingleVal := &apiv1.Pod{
214+
ObjectMeta: metav1.ObjectMeta{
215+
Name: "bar",
216+
Namespace: "default",
217+
OwnerReferences: GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""),
218+
Annotations: map[string]string{
219+
SafeToEvictLocalVolumesKey: "scratch",
220+
},
221+
},
222+
Spec: apiv1.PodSpec{
223+
NodeName: "node",
224+
Volumes: []apiv1.Volume{
225+
{
226+
Name: "scratch",
227+
VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}},
228+
},
229+
},
230+
},
231+
}
232+
233+
emptyDirSafeToEvictLocalVolumeSingleValEmpty := &apiv1.Pod{
234+
ObjectMeta: metav1.ObjectMeta{
235+
Name: "bar",
236+
Namespace: "default",
237+
OwnerReferences: GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""),
238+
Annotations: map[string]string{
239+
SafeToEvictLocalVolumesKey: "",
240+
},
241+
},
242+
Spec: apiv1.PodSpec{
243+
NodeName: "node",
244+
Volumes: []apiv1.Volume{
245+
{
246+
Name: "scratch",
247+
VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}},
248+
},
249+
},
250+
},
251+
}
252+
253+
emptyDirSafeToEvictLocalVolumeSingleValNonMatching := &apiv1.Pod{
254+
ObjectMeta: metav1.ObjectMeta{
255+
Name: "bar",
256+
Namespace: "default",
257+
OwnerReferences: GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""),
258+
Annotations: map[string]string{
259+
SafeToEvictLocalVolumesKey: "scratch-2",
260+
},
261+
},
262+
Spec: apiv1.PodSpec{
263+
NodeName: "node",
264+
Volumes: []apiv1.Volume{
265+
{
266+
Name: "scratch-1",
267+
VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}},
268+
},
269+
},
270+
},
271+
}
272+
273+
emptyDirSafeToEvictLocalVolumeMultiValAllMatching := &apiv1.Pod{
274+
ObjectMeta: metav1.ObjectMeta{
275+
Name: "bar",
276+
Namespace: "default",
277+
OwnerReferences: GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""),
278+
Annotations: map[string]string{
279+
SafeToEvictLocalVolumesKey: "scratch-1,scratch-2,scratch-3",
280+
},
281+
},
282+
Spec: apiv1.PodSpec{
283+
NodeName: "node",
284+
Volumes: []apiv1.Volume{
285+
{
286+
Name: "scratch-1",
287+
VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}},
288+
},
289+
{
290+
Name: "scratch-2",
291+
VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}},
292+
},
293+
{
294+
Name: "scratch-3",
295+
VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}},
296+
},
297+
},
298+
},
299+
}
300+
301+
emptyDirSafeToEvictLocalVolumeMultiValNonMatching := &apiv1.Pod{
302+
ObjectMeta: metav1.ObjectMeta{
303+
Name: "bar",
304+
Namespace: "default",
305+
OwnerReferences: GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""),
306+
Annotations: map[string]string{
307+
SafeToEvictLocalVolumesKey: "scratch-1,scratch-2,scratch-5",
308+
},
309+
},
310+
Spec: apiv1.PodSpec{
311+
NodeName: "node",
312+
Volumes: []apiv1.Volume{
313+
{
314+
Name: "scratch-1",
315+
VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}},
316+
},
317+
{
318+
Name: "scratch-2",
319+
VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}},
320+
},
321+
{
322+
Name: "scratch-3",
323+
VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}},
324+
},
325+
},
326+
},
327+
}
328+
329+
emptyDirSafeToEvictLocalVolumeMultiValSomeMatchingVals := &apiv1.Pod{
330+
ObjectMeta: metav1.ObjectMeta{
331+
Name: "bar",
332+
Namespace: "default",
333+
OwnerReferences: GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""),
334+
Annotations: map[string]string{
335+
SafeToEvictLocalVolumesKey: "scratch-1,scratch-2",
336+
},
337+
},
338+
Spec: apiv1.PodSpec{
339+
NodeName: "node",
340+
Volumes: []apiv1.Volume{
341+
{
342+
Name: "scratch-1",
343+
VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}},
344+
},
345+
{
346+
Name: "scratch-2",
347+
VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}},
348+
},
349+
{
350+
Name: "scratch-3",
351+
VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}},
352+
},
353+
},
354+
},
355+
}
356+
357+
emptyDirSafeToEvictLocalVolumeMultiValEmpty := &apiv1.Pod{
358+
ObjectMeta: metav1.ObjectMeta{
359+
Name: "bar",
360+
Namespace: "default",
361+
OwnerReferences: GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""),
362+
Annotations: map[string]string{
363+
SafeToEvictLocalVolumesKey: ",",
364+
},
365+
},
366+
Spec: apiv1.PodSpec{
367+
NodeName: "node",
368+
Volumes: []apiv1.Volume{
369+
{
370+
Name: "scratch-1",
371+
VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}},
372+
},
373+
{
374+
Name: "scratch-2",
375+
VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}},
376+
},
377+
{
378+
Name: "scratch-3",
379+
VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}},
380+
},
381+
},
382+
},
383+
}
384+
213385
terminalPod := &apiv1.Pod{
214386
ObjectMeta: metav1.ObjectMeta{
215387
Name: "bar",
@@ -489,6 +661,76 @@ func TestDrain(t *testing.T) {
489661
expectBlockingPod: &BlockingPod{Pod: emptydirPod, Reason: LocalStorageRequested},
490662
expectDaemonSetPods: []*apiv1.Pod{},
491663
},
664+
{
665+
description: "pod with EmptyDir and SafeToEvictLocalVolumesKey annotation",
666+
pods: []*apiv1.Pod{emptyDirSafeToEvictVolumeSingleVal},
667+
pdbs: []*policyv1.PodDisruptionBudget{},
668+
rcs: []*apiv1.ReplicationController{&rc},
669+
expectFatal: false,
670+
expectPods: []*apiv1.Pod{emptyDirSafeToEvictVolumeSingleVal},
671+
expectBlockingPod: &BlockingPod{},
672+
expectDaemonSetPods: []*apiv1.Pod{},
673+
},
674+
{
675+
description: "pod with EmptyDir and empty value for SafeToEvictLocalVolumesKey annotation",
676+
pods: []*apiv1.Pod{emptyDirSafeToEvictLocalVolumeSingleValEmpty},
677+
pdbs: []*policyv1.PodDisruptionBudget{},
678+
rcs: []*apiv1.ReplicationController{&rc},
679+
expectFatal: true,
680+
expectPods: []*apiv1.Pod{},
681+
expectBlockingPod: &BlockingPod{Pod: emptyDirSafeToEvictLocalVolumeSingleValEmpty, Reason: LocalStorageRequested},
682+
expectDaemonSetPods: []*apiv1.Pod{},
683+
},
684+
{
685+
description: "pod with EmptyDir and non-matching value for SafeToEvictLocalVolumesKey annotation",
686+
pods: []*apiv1.Pod{emptyDirSafeToEvictLocalVolumeSingleValNonMatching},
687+
pdbs: []*policyv1.PodDisruptionBudget{},
688+
rcs: []*apiv1.ReplicationController{&rc},
689+
expectFatal: true,
690+
expectPods: []*apiv1.Pod{},
691+
expectBlockingPod: &BlockingPod{Pod: emptyDirSafeToEvictLocalVolumeSingleValNonMatching, Reason: LocalStorageRequested},
692+
expectDaemonSetPods: []*apiv1.Pod{},
693+
},
694+
{
695+
description: "pod with EmptyDir and SafeToEvictLocalVolumesKey annotation with matching values",
696+
pods: []*apiv1.Pod{emptyDirSafeToEvictLocalVolumeMultiValAllMatching},
697+
pdbs: []*policyv1.PodDisruptionBudget{},
698+
rcs: []*apiv1.ReplicationController{&rc},
699+
expectFatal: false,
700+
expectPods: []*apiv1.Pod{emptyDirSafeToEvictLocalVolumeMultiValAllMatching},
701+
expectBlockingPod: &BlockingPod{},
702+
expectDaemonSetPods: []*apiv1.Pod{},
703+
},
704+
{
705+
description: "pod with EmptyDir and SafeToEvictLocalVolumesKey annotation with non-matching values",
706+
pods: []*apiv1.Pod{emptyDirSafeToEvictLocalVolumeMultiValNonMatching},
707+
pdbs: []*policyv1.PodDisruptionBudget{},
708+
rcs: []*apiv1.ReplicationController{&rc},
709+
expectFatal: true,
710+
expectPods: []*apiv1.Pod{},
711+
expectBlockingPod: &BlockingPod{Pod: emptyDirSafeToEvictLocalVolumeMultiValNonMatching, Reason: LocalStorageRequested},
712+
expectDaemonSetPods: []*apiv1.Pod{},
713+
},
714+
{
715+
description: "pod with EmptyDir and SafeToEvictLocalVolumesKey annotation with some matching values",
716+
pods: []*apiv1.Pod{emptyDirSafeToEvictLocalVolumeMultiValSomeMatchingVals},
717+
pdbs: []*policyv1.PodDisruptionBudget{},
718+
rcs: []*apiv1.ReplicationController{&rc},
719+
expectFatal: true,
720+
expectPods: []*apiv1.Pod{},
721+
expectBlockingPod: &BlockingPod{Pod: emptyDirSafeToEvictLocalVolumeMultiValSomeMatchingVals, Reason: LocalStorageRequested},
722+
expectDaemonSetPods: []*apiv1.Pod{},
723+
},
724+
{
725+
description: "pod with EmptyDir and SafeToEvictLocalVolumesKey annotation empty values",
726+
pods: []*apiv1.Pod{emptyDirSafeToEvictLocalVolumeMultiValEmpty},
727+
pdbs: []*policyv1.PodDisruptionBudget{},
728+
rcs: []*apiv1.ReplicationController{&rc},
729+
expectFatal: true,
730+
expectPods: []*apiv1.Pod{},
731+
expectBlockingPod: &BlockingPod{Pod: emptyDirSafeToEvictLocalVolumeMultiValEmpty, Reason: LocalStorageRequested},
732+
expectDaemonSetPods: []*apiv1.Pod{},
733+
},
492734
{
493735
description: "failed pod",
494736
pods: []*apiv1.Pod{failedPod},

0 commit comments

Comments
 (0)