Skip to content

Commit 4b18279

Browse files
feat: add --tolerate-failures-until-deadline flag and deploy.tolerateFailuresUntilDeadline config for improved ci/cd usage (#8047)
1 parent 332b6c1 commit 4b18279

File tree

14 files changed

+260
-95
lines changed

14 files changed

+260
-95
lines changed

cmd/skaffold/app/cmd/flags.go

+9
Original file line numberDiff line numberDiff line change
@@ -332,6 +332,15 @@ var flagRegistry = []Flag{
332332
DefinedOn: []string{"dev", "debug", "deploy", "run", "apply"},
333333
IsEnum: true,
334334
},
335+
{
336+
Name: "tolerate-failures-until-deadline",
337+
Usage: "Configures `status-check` to tolerate failures until Skaffold's statusCheckDeadline duration or the deployments progressDeadlineSeconds Otherwise deployment failures skaffold encounters will immediately fail the deployment. Defaults to 'false'",
338+
Value: &opts.TolerateFailuresStatusCheck,
339+
DefValue: false,
340+
FlagAddMethod: "BoolVar",
341+
DefinedOn: []string{"dev", "debug", "deploy", "run", "apply"},
342+
IsEnum: true,
343+
},
335344
{
336345
Name: "fast-fail-status-check",
337346
Usage: "Configures `status-check` to fail immediately if any error occurs. Otherwise `status-check` will attempt to check all resources once and only then report errors and possibly exit. Defaults to 'true'",

docs-v2/content/en/schemas/v4beta1.json

+7
Original file line numberDiff line numberDiff line change
@@ -1422,6 +1422,12 @@
14221422
"type": "integer",
14231423
"description": "*beta* deadline for deployments to stabilize in seconds.",
14241424
"x-intellij-html-description": "<em>beta</em> deadline for deployments to stabilize in seconds."
1425+
},
1426+
"tolerateFailuresUntilDeadline": {
1427+
"type": "boolean",
1428+
"description": "configures the Skaffold \"status-check\" to tolerate failures (flapping deployments, etc.) until the statusCheckDeadlineSeconds duration or k8s object timeouts such as progressDeadlineSeconds, etc.",
1429+
"x-intellij-html-description": "configures the Skaffold &quot;status-check&quot; to tolerate failures (flapping deployments, etc.) until the statusCheckDeadlineSeconds duration or k8s object timeouts such as progressDeadlineSeconds, etc.",
1430+
"default": "false"
14251431
}
14261432
},
14271433
"preferredOrder": [
@@ -1432,6 +1438,7 @@
14321438
"cloudrun",
14331439
"statusCheck",
14341440
"statusCheckDeadlineSeconds",
1441+
"tolerateFailuresUntilDeadline",
14351442
"kubeContext",
14361443
"logs"
14371444
],

docs/content/en/docs/references/cli/_index.md

+10
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,7 @@ Options:
139139
--status-check=: Wait for deployed resources to stabilize
140140
--sync-remote-cache='always': Controls how Skaffold manages the remote config cache (see `remote-cache-dir`). One of `always` (default), `missing`, or `never`. `always` syncs remote repositories to latest on access. `missing` only clones remote repositories if they do not exist locally. `never` means the user takes responsibility for updating remote repositories.
141141
--tail=false: Stream logs from deployed objects
142+
--tolerate-failures-until-deadline=false: Configures `status-check` to tolerate failures until Skaffold's statusCheckDeadline duration or the deployments progressDeadlineSeconds Otherwise deployment failures skaffold encounters will immediately fail the deployment. Defaults to 'false'
142143
--wait-for-connection=false: Blocks ending execution of skaffold until the /v2/events gRPC/HTTP endpoint is hit
143144
144145
Usage:
@@ -168,6 +169,7 @@ Env vars:
168169
* `SKAFFOLD_STATUS_CHECK` (same as `--status-check`)
169170
* `SKAFFOLD_SYNC_REMOTE_CACHE` (same as `--sync-remote-cache`)
170171
* `SKAFFOLD_TAIL` (same as `--tail`)
172+
* `SKAFFOLD_TOLERATE_FAILURES_UNTIL_DEADLINE` (same as `--tolerate-failures-until-deadline`)
171173
* `SKAFFOLD_WAIT_FOR_CONNECTION` (same as `--wait-for-connection`)
172174

173175
### skaffold build
@@ -453,6 +455,7 @@ Options:
453455
--sync-remote-cache='always': Controls how Skaffold manages the remote config cache (see `remote-cache-dir`). One of `always` (default), `missing`, or `never`. `always` syncs remote repositories to latest on access. `missing` only clones remote repositories if they do not exist locally. `never` means the user takes responsibility for updating remote repositories.
454456
-t, --tag='': The optional custom tag to use for images which overrides the current Tagger configuration
455457
--tail=true: Stream logs from deployed objects
458+
--tolerate-failures-until-deadline=false: Configures `status-check` to tolerate failures until Skaffold's statusCheckDeadline duration or the deployments progressDeadlineSeconds Otherwise deployment failures skaffold encounters will immediately fail the deployment. Defaults to 'false'
456459
--toot=false: Emit a terminal beep after the deploy is complete
457460
--trigger='notify': How is change detection triggered? (polling, notify, or manual)
458461
--wait-for-connection=false: Blocks ending execution of skaffold until the /v2/events gRPC/HTTP endpoint is hit
@@ -516,6 +519,7 @@ Env vars:
516519
* `SKAFFOLD_SYNC_REMOTE_CACHE` (same as `--sync-remote-cache`)
517520
* `SKAFFOLD_TAG` (same as `--tag`)
518521
* `SKAFFOLD_TAIL` (same as `--tail`)
522+
* `SKAFFOLD_TOLERATE_FAILURES_UNTIL_DEADLINE` (same as `--tolerate-failures-until-deadline`)
519523
* `SKAFFOLD_TOOT` (same as `--toot`)
520524
* `SKAFFOLD_TRIGGER` (same as `--trigger`)
521525
* `SKAFFOLD_WAIT_FOR_CONNECTION` (same as `--wait-for-connection`)
@@ -633,6 +637,7 @@ Options:
633637
--sync-remote-cache='always': Controls how Skaffold manages the remote config cache (see `remote-cache-dir`). One of `always` (default), `missing`, or `never`. `always` syncs remote repositories to latest on access. `missing` only clones remote repositories if they do not exist locally. `never` means the user takes responsibility for updating remote repositories.
634638
-t, --tag='': The optional custom tag to use for images which overrides the current Tagger configuration
635639
--tail=false: Stream logs from deployed objects
640+
--tolerate-failures-until-deadline=false: Configures `status-check` to tolerate failures until Skaffold's statusCheckDeadline duration or the deployments progressDeadlineSeconds Otherwise deployment failures skaffold encounters will immediately fail the deployment. Defaults to 'false'
636641
--toot=false: Emit a terminal beep after the deploy is complete
637642
--wait-for-connection=false: Blocks ending execution of skaffold until the /v2/events gRPC/HTTP endpoint is hit
638643
--wait-for-deletions=true: Wait for pending deletions to complete before a deployment
@@ -681,6 +686,7 @@ Env vars:
681686
* `SKAFFOLD_SYNC_REMOTE_CACHE` (same as `--sync-remote-cache`)
682687
* `SKAFFOLD_TAG` (same as `--tag`)
683688
* `SKAFFOLD_TAIL` (same as `--tail`)
689+
* `SKAFFOLD_TOLERATE_FAILURES_UNTIL_DEADLINE` (same as `--tolerate-failures-until-deadline`)
684690
* `SKAFFOLD_TOOT` (same as `--toot`)
685691
* `SKAFFOLD_WAIT_FOR_CONNECTION` (same as `--wait-for-connection`)
686692
* `SKAFFOLD_WAIT_FOR_DELETIONS` (same as `--wait-for-deletions`)
@@ -740,6 +746,7 @@ Options:
740746
--sync-remote-cache='always': Controls how Skaffold manages the remote config cache (see `remote-cache-dir`). One of `always` (default), `missing`, or `never`. `always` syncs remote repositories to latest on access. `missing` only clones remote repositories if they do not exist locally. `never` means the user takes responsibility for updating remote repositories.
741747
-t, --tag='': The optional custom tag to use for images which overrides the current Tagger configuration
742748
--tail=true: Stream logs from deployed objects
749+
--tolerate-failures-until-deadline=false: Configures `status-check` to tolerate failures until Skaffold's statusCheckDeadline duration or the deployments progressDeadlineSeconds Otherwise deployment failures skaffold encounters will immediately fail the deployment. Defaults to 'false'
743750
--toot=false: Emit a terminal beep after the deploy is complete
744751
--trigger='notify': How is change detection triggered? (polling, notify, or manual)
745752
--wait-for-connection=false: Blocks ending execution of skaffold until the /v2/events gRPC/HTTP endpoint is hit
@@ -803,6 +810,7 @@ Env vars:
803810
* `SKAFFOLD_SYNC_REMOTE_CACHE` (same as `--sync-remote-cache`)
804811
* `SKAFFOLD_TAG` (same as `--tag`)
805812
* `SKAFFOLD_TAIL` (same as `--tail`)
813+
* `SKAFFOLD_TOLERATE_FAILURES_UNTIL_DEADLINE` (same as `--tolerate-failures-until-deadline`)
806814
* `SKAFFOLD_TOOT` (same as `--toot`)
807815
* `SKAFFOLD_TRIGGER` (same as `--trigger`)
808816
* `SKAFFOLD_WAIT_FOR_CONNECTION` (same as `--wait-for-connection`)
@@ -1095,6 +1103,7 @@ Options:
10951103
--sync-remote-cache='always': Controls how Skaffold manages the remote config cache (see `remote-cache-dir`). One of `always` (default), `missing`, or `never`. `always` syncs remote repositories to latest on access. `missing` only clones remote repositories if they do not exist locally. `never` means the user takes responsibility for updating remote repositories.
10961104
-t, --tag='': The optional custom tag to use for images which overrides the current Tagger configuration
10971105
--tail=false: Stream logs from deployed objects
1106+
--tolerate-failures-until-deadline=false: Configures `status-check` to tolerate failures until Skaffold's statusCheckDeadline duration or the deployments progressDeadlineSeconds Otherwise deployment failures skaffold encounters will immediately fail the deployment. Defaults to 'false'
10981107
--toot=false: Emit a terminal beep after the deploy is complete
10991108
--wait-for-connection=false: Blocks ending execution of skaffold until the /v2/events gRPC/HTTP endpoint is hit
11001109
--wait-for-deletions=true: Wait for pending deletions to complete before a deployment
@@ -1153,6 +1162,7 @@ Env vars:
11531162
* `SKAFFOLD_SYNC_REMOTE_CACHE` (same as `--sync-remote-cache`)
11541163
* `SKAFFOLD_TAG` (same as `--tag`)
11551164
* `SKAFFOLD_TAIL` (same as `--tail`)
1165+
* `SKAFFOLD_TOLERATE_FAILURES_UNTIL_DEADLINE` (same as `--tolerate-failures-until-deadline`)
11561166
* `SKAFFOLD_TOOT` (same as `--toot`)
11571167
* `SKAFFOLD_WAIT_FOR_CONNECTION` (same as `--wait-for-connection`)
11581168
* `SKAFFOLD_WAIT_FOR_DELETIONS` (same as `--wait-for-deletions`)

integration/run_test.go

+39
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ limitations under the License.
1717
package integration
1818

1919
import (
20+
"fmt"
2021
"os"
2122
"path/filepath"
2223
"strings"
@@ -247,6 +248,44 @@ func TestRunTailDefaultNamespace(t *testing.T) {
247248
}
248249
}
249250

251+
func TestRunTailTolerateFailuresUntilDeadline(t *testing.T) {
252+
MarkIntegrationTest(t, CanRunWithoutGcp)
253+
var tsts = []struct {
254+
description string
255+
dir string
256+
args []string
257+
deployments []string
258+
env []string
259+
targetLogOne string
260+
targetLogTwo string
261+
}{
262+
{
263+
description: "status-check-tolerance",
264+
dir: "testdata/status-check-tolerance",
265+
args: []string{"--tolerate-failures-until-deadline"},
266+
deployments: []string{"tolerance-check"},
267+
targetLogOne: "container will exit with error",
268+
targetLogTwo: "Hello world!",
269+
env: []string{fmt.Sprintf("STOP_FAILING_TIME=%d", time.Now().Unix()+10)},
270+
},
271+
}
272+
273+
for _, test := range tsts {
274+
t.Run(test.description, func(t *testing.T) {
275+
if test.targetLogOne == "" || test.targetLogTwo == "" {
276+
t.SkipNow()
277+
}
278+
ns, _ := SetupNamespace(t)
279+
280+
args := append(test.args, "--tail")
281+
out := skaffold.Run(args...).InDir(test.dir).InNs(ns.Name).WithEnv(test.env).RunLive(t)
282+
defer skaffold.Delete().InDir(test.dir).WithEnv(test.env).RunOrFail(t)
283+
WaitForLogs(t, out, test.targetLogOne)
284+
WaitForLogs(t, out, test.targetLogTwo)
285+
})
286+
}
287+
}
288+
250289
func TestRunRenderOnly(t *testing.T) {
251290
MarkIntegrationTest(t, CanRunWithoutGcp)
252291

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
apiVersion: skaffold/v2beta29
2+
kind: Config
3+
build:
4+
artifacts:
5+
- image: tolerance-check
6+
context: tolerance-check
7+
docker:
8+
buildArgs:
9+
STOP_FAILING_TIME: '{{.STOP_FAILING_TIME}}'
10+
deploy:
11+
statusCheckDeadlineSeconds: 120
12+
kubectl:
13+
manifests:
14+
- tolerance-check/kubernetes/*
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
FROM alpine:latest
2+
ARG STOP_FAILING_TIME
3+
ENV STOP_FAILING_TIME=${STOP_FAILING_TIME:-not_found}
4+
5+
6+
COPY script.sh /script.sh
7+
ENTRYPOINT [ "/script.sh" ]
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
apiVersion: apps/v1
2+
kind: Deployment
3+
metadata:
4+
name: tolerance-check
5+
labels:
6+
app: tolerance-check
7+
spec:
8+
replicas: 1
9+
selector:
10+
matchLabels:
11+
app: tolerance-check
12+
template:
13+
metadata:
14+
labels:
15+
app: tolerance-check
16+
spec:
17+
containers:
18+
- name: tolerance-check
19+
image: tolerance-check
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
#!/bin/sh
2+
3+
current_time=$(date +%s)
4+
stop_failing_time=$STOP_FAILING_TIME
5+
6+
echo $current_time
7+
echo "========"
8+
echo $stop_failing_time
9+
echo "========"
10+
11+
if [[ $current_time -le $stop_failing_time ]]; then
12+
echo "current time less than stop failing time, container will exit with error"
13+
exit 1
14+
fi
15+
while :
16+
do
17+
echo "Hello world!!!! - current time greater than stop failing time!"
18+
sleep 2
19+
done

pkg/skaffold/config/options.go

+75-74
Original file line numberDiff line numberDiff line change
@@ -32,80 +32,81 @@ type WaitForDeletions struct {
3232

3333
// SkaffoldOptions are options that are set by command line arguments not included in the config file itself
3434
type SkaffoldOptions struct {
35-
Apply bool
36-
AutoBuild bool
37-
AutoCreateConfig bool
38-
AutoDeploy bool
39-
AutoSync bool
40-
AssumeYes bool
41-
CacheArtifacts bool
42-
ContainerDebugging bool
43-
Cleanup bool
44-
DetectMinikube bool
45-
DryRun bool
46-
EnableRPC bool
47-
Force bool
48-
ForceLoadImages bool
49-
IterativeStatusCheck bool
50-
FastFailStatusCheck bool
51-
Notification bool
52-
NoPrune bool
53-
NoPruneChildren bool
54-
ProfileAutoActivation bool
55-
PropagateProfiles bool
56-
RenderOnly bool
57-
SkipTests bool
58-
SkipConfigDefaults bool
59-
Tail bool
60-
WaitForConnection bool
61-
EnablePlatformNodeAffinity bool
62-
EnableGKEARMNodeToleration bool
63-
DisableMultiPlatformBuild bool
64-
CheckClusterNodePlatforms bool
65-
MakePathsAbsolute *bool
66-
MultiLevelRepo *bool
67-
CloudRunProject string
68-
CloudRunLocation string
69-
ConfigurationFile string
70-
HydrationDir string
71-
InventoryNamespace string
72-
InventoryID string
73-
InventoryName string
74-
GlobalConfig string
75-
EventLogFile string
76-
RenderOutput string
77-
User string
78-
CustomTag string
79-
Namespace string
80-
CacheFile string
81-
Trigger string
82-
KubeContext string
83-
KubeConfig string
84-
LastLogFile string
85-
DigestSource string
86-
Command string
87-
MinikubeProfile string
88-
RepoCacheDir string
89-
TransformRulesFile string
90-
VerifyDockerNetwork string
91-
CustomLabels []string
92-
TargetImages []string
93-
Profiles []string
94-
InsecureRegistries []string
95-
ConfigurationFilter []string
96-
HydratedManifests []string
97-
Platforms []string
98-
BuildConcurrency int
99-
WatchPollInterval int
100-
StatusCheck BoolOrUndefined
101-
PushImages BoolOrUndefined
102-
RPCPort IntOrUndefined
103-
RPCHTTPPort IntOrUndefined
104-
Muted Muted
105-
PortForward PortForwardOptions
106-
DefaultRepo StringOrUndefined
107-
SyncRemoteCache SyncRemoteCacheOption
108-
WaitForDeletions WaitForDeletions
35+
Apply bool
36+
AutoBuild bool
37+
AutoCreateConfig bool
38+
AutoDeploy bool
39+
AutoSync bool
40+
AssumeYes bool
41+
CacheArtifacts bool
42+
ContainerDebugging bool
43+
Cleanup bool
44+
DetectMinikube bool
45+
DryRun bool
46+
EnableRPC bool
47+
Force bool
48+
ForceLoadImages bool
49+
IterativeStatusCheck bool
50+
FastFailStatusCheck bool
51+
TolerateFailuresStatusCheck bool
52+
Notification bool
53+
NoPrune bool
54+
NoPruneChildren bool
55+
ProfileAutoActivation bool
56+
PropagateProfiles bool
57+
RenderOnly bool
58+
SkipTests bool
59+
SkipConfigDefaults bool
60+
Tail bool
61+
WaitForConnection bool
62+
EnablePlatformNodeAffinity bool
63+
EnableGKEARMNodeToleration bool
64+
DisableMultiPlatformBuild bool
65+
CheckClusterNodePlatforms bool
66+
MakePathsAbsolute *bool
67+
MultiLevelRepo *bool
68+
CloudRunProject string
69+
CloudRunLocation string
70+
ConfigurationFile string
71+
HydrationDir string
72+
InventoryNamespace string
73+
InventoryID string
74+
InventoryName string
75+
GlobalConfig string
76+
EventLogFile string
77+
RenderOutput string
78+
User string
79+
CustomTag string
80+
Namespace string
81+
CacheFile string
82+
Trigger string
83+
KubeContext string
84+
KubeConfig string
85+
LastLogFile string
86+
DigestSource string
87+
Command string
88+
MinikubeProfile string
89+
RepoCacheDir string
90+
TransformRulesFile string
91+
VerifyDockerNetwork string
92+
CustomLabels []string
93+
TargetImages []string
94+
Profiles []string
95+
InsecureRegistries []string
96+
ConfigurationFilter []string
97+
HydratedManifests []string
98+
Platforms []string
99+
BuildConcurrency int
100+
WatchPollInterval int
101+
StatusCheck BoolOrUndefined
102+
PushImages BoolOrUndefined
103+
RPCPort IntOrUndefined
104+
RPCHTTPPort IntOrUndefined
105+
Muted Muted
106+
PortForward PortForwardOptions
107+
DefaultRepo StringOrUndefined
108+
SyncRemoteCache SyncRemoteCacheOption
109+
WaitForDeletions WaitForDeletions
109110
}
110111

111112
type RunMode string

pkg/skaffold/deploy/component/kubernetes/monitor_test.go

+2
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@ func (m mockStatusConfig) GetKubeContext() string { return "" }
3838

3939
func (m mockStatusConfig) StatusCheckDeadlineSeconds() int { return 0 }
4040

41+
func (m mockStatusConfig) StatusCheckTolerateFailures() bool { return false }
42+
4143
func (m mockStatusConfig) FastFailStatusCheck() bool { return true }
4244

4345
func (m mockStatusConfig) Muted() config.Muted { return config.Muted{} }

0 commit comments

Comments
 (0)