Skip to content

Commit 3bf856f

Browse files
committed
change logic to detect skaffold deplpy status
1 parent fda2a58 commit 3bf856f

File tree

2 files changed

+87
-107
lines changed

2 files changed

+87
-107
lines changed

pkg/skaffold/kubernetes/status/status_check.go

+39-27
Original file line numberDiff line numberDiff line change
@@ -67,9 +67,10 @@ const (
6767
)
6868

6969
type counter struct {
70-
total int
71-
pending int32
72-
failed int32
70+
total int
71+
pending int32
72+
failed int32
73+
cancelled int32
7374
}
7475

7576
type Config interface {
@@ -220,17 +221,20 @@ func (s *monitor) statusCheck(ctx context.Context, out io.Writer) (proto.StatusC
220221

221222
ctx, cancel := context.WithCancel(ctx)
222223
defer cancel()
224+
// Set default status check exit code.
225+
var exitStatusCode proto.StatusCode
223226

224227
for _, d := range resources {
225228
wg.Add(1)
226229
go func(r *resource.Resource) {
227230
defer wg.Done()
228231
// keep updating the resource status until it fails/succeeds/times out
229232
pollResourceStatus(ctx, s.cfg, r)
230-
rcCopy := c.markProcessed(r.Status().Error())
233+
rcCopy := c.markProcessed(ctx, r.StatusCode())
231234
s.printStatusCheckSummary(out, r, rcCopy)
232-
// if one deployment fails, cancel status checks for all deployments.
233-
if r.Status().Error() != nil && r.StatusCode() != proto.StatusCode_STATUSCHECK_USER_CANCELLED {
235+
// if one resource fails, cancel status checks for all resources.
236+
if resourceFailed(r.StatusCode()) {
237+
exitStatusCode = r.StatusCode()
234238
cancel()
235239
}
236240
}(d)
@@ -243,8 +247,7 @@ func (s *monitor) statusCheck(ctx context.Context, out io.Writer) (proto.StatusC
243247

244248
// Wait for all deployment statuses to be fetched
245249
wg.Wait()
246-
cancel()
247-
return getSkaffoldDeployStatus(c, resources, ctx.Err())
250+
return getSkaffoldDeployStatus(c, exitStatusCode)
248251
}
249252

250253
func getStandalonePods(ctx context.Context, client kubernetes.Interface, ns string, l *label.DefaultLabeller, deadlineDuration time.Duration) ([]*resource.Resource, error) {
@@ -382,25 +385,21 @@ func pollResourceStatus(ctx context.Context, cfg kubectl.Config, r *resource.Res
382385
}
383386
}
384387

385-
func getSkaffoldDeployStatus(c *counter, rs []*resource.Resource, ctxErr error) (proto.StatusCode, error) {
388+
func getSkaffoldDeployStatus(c *counter, sc proto.StatusCode) (proto.StatusCode, error) {
389+
// return overall code cancelled if status check for all resources was cancelled
390+
if int(c.cancelled) == c.total && c.total > 0 {
391+
return proto.StatusCode_STATUSCHECK_USER_CANCELLED, fmt.Errorf("status check cancelled")
392+
}
393+
// return success if no failures find.
386394
if c.failed == 0 {
387395
return proto.StatusCode_STATUSCHECK_SUCCESS, nil
388396
}
397+
// construct an error message and return appropriate error code
389398
err := fmt.Errorf("%d/%d deployment(s) failed", c.failed, c.total)
390-
if ctxErr == context.DeadlineExceeded {
391-
return proto.StatusCode_STATUSCHECK_DEADLINE_EXCEEDED, err
392-
} else {
393-
if ctxErr == context.Canceled {
394-
return proto.StatusCode_STATUSCHECK_USER_CANCELLED, err
395-
}
396-
}
397-
for _, r := range rs {
398-
if r.StatusCode() != proto.StatusCode_STATUSCHECK_SUCCESS &&
399-
r.StatusCode() != proto.StatusCode_STATUSCHECK_USER_CANCELLED {
400-
return r.StatusCode(), err
401-
}
399+
if sc == proto.StatusCode_STATUSCHECK_SUCCESS || sc == 0 {
400+
return proto.StatusCode_STATUSCHECK_INTERNAL_ERROR, err
402401
}
403-
return proto.StatusCode_STATUSCHECK_INTERNAL_ERROR, err
402+
return sc, err
404403
}
405404

406405
func getDeadline(d int) time.Duration {
@@ -490,8 +489,12 @@ func newCounter(i int) *counter {
490489
}
491490
}
492491

493-
func (c *counter) markProcessed(err error) counter {
494-
if err != nil && err != context.Canceled {
492+
func (c *counter) markProcessed(ctx context.Context, sc proto.StatusCode) counter {
493+
if resourceCancelled(sc) {
494+
log.Entry(ctx).Debug("marking resource status check cancelled", sc)
495+
atomic.AddInt32(&c.cancelled, 1)
496+
} else if resourceFailed(sc) {
497+
log.Entry(ctx).Debugf("marking resource failed due to error code %s", sc)
495498
atomic.AddInt32(&c.failed, 1)
496499
}
497500
atomic.AddInt32(&c.pending, -1)
@@ -500,12 +503,21 @@ func (c *counter) markProcessed(err error) counter {
500503

501504
func (c *counter) copy() counter {
502505
return counter{
503-
total: c.total,
504-
pending: c.pending,
505-
failed: c.failed,
506+
total: c.total,
507+
pending: c.pending,
508+
failed: c.failed,
509+
cancelled: c.cancelled,
506510
}
507511
}
508512

513+
func resourceFailed(sc proto.StatusCode) bool {
514+
return sc != proto.StatusCode_STATUSCHECK_SUCCESS && sc != proto.StatusCode_STATUSCHECK_USER_CANCELLED
515+
}
516+
517+
func resourceCancelled(sc proto.StatusCode) bool {
518+
return sc == proto.StatusCode_STATUSCHECK_USER_CANCELLED
519+
}
520+
509521
type NoopMonitor struct {
510522
status.NoopMonitor
511523
}

pkg/skaffold/kubernetes/status/status_check_test.go

+48-80
Original file line numberDiff line numberDiff line change
@@ -230,122 +230,82 @@ func TestGetDeployments(t *testing.T) {
230230
}
231231
}
232232

233-
func TestGetDeployStatus(t *testing.T) {
233+
func TestExitErrorMessage(t *testing.T) {
234234
tests := []struct {
235235
description string
236236
counter *counter
237-
deployments []*resource.Resource
238-
ctxErr error
237+
sc proto.StatusCode
239238
expected string
240239
expectedCode proto.StatusCode
241240
shouldErr bool
242241
}{
243242
{
244-
description: "one error",
245-
counter: &counter{total: 2, failed: 1},
246-
deployments: []*resource.Resource{
247-
resource.NewResource("foo", resource.ResourceTypes.Deployment, "test", time.Second).
248-
WithPodStatuses([]proto.StatusCode{proto.StatusCode_STATUSCHECK_NODE_DISK_PRESSURE}),
249-
},
243+
description: "one error",
244+
counter: &counter{total: 2, failed: 1},
250245
expected: "1/2 deployment(s) failed",
251-
expectedCode: proto.StatusCode_STATUSCHECK_NODE_DISK_PRESSURE,
246+
sc: proto.StatusCode_STATUSCHECK_POD_INITIALIZING,
247+
expectedCode: proto.StatusCode_STATUSCHECK_POD_INITIALIZING,
252248
shouldErr: true,
253249
},
254250
{
255-
description: "no error",
256-
counter: &counter{total: 2},
257-
deployments: []*resource.Resource{
258-
withStatus(
259-
resource.NewResource("r1", resource.ResourceTypes.Deployment, "test", 1),
260-
&proto.ActionableErr{ErrCode: proto.StatusCode_STATUSCHECK_SUCCESS},
261-
),
262-
withStatus(
263-
resource.NewResource("r2", resource.ResourceTypes.Deployment, "test", 1),
264-
&proto.ActionableErr{ErrCode: proto.StatusCode_STATUSCHECK_SUCCESS},
265-
),
266-
},
251+
description: "no error",
252+
sc: proto.StatusCode_STATUSCHECK_SUCCESS,
253+
expectedCode: proto.StatusCode_STATUSCHECK_SUCCESS,
254+
counter: &counter{total: 2},
267255
},
268256
{
269-
description: "multiple errors",
270-
counter: &counter{total: 3, failed: 2},
271-
expected: "2/3 deployment(s) failed",
272-
deployments: []*resource.Resource{
273-
resource.NewResource("foo", resource.ResourceTypes.Deployment, "test", time.Second).
274-
WithPodStatuses([]proto.StatusCode{proto.StatusCode_STATUSCHECK_NODE_DISK_PRESSURE}),
275-
},
276-
expectedCode: proto.StatusCode_STATUSCHECK_NODE_DISK_PRESSURE,
257+
description: "multiple errors",
258+
counter: &counter{total: 3, failed: 2},
259+
expected: "2/3 deployment(s) failed",
260+
sc: proto.StatusCode_STATUSCHECK_CONFIG_CONNECTOR_FAILED,
261+
expectedCode: proto.StatusCode_STATUSCHECK_CONFIG_CONNECTOR_FAILED,
277262
shouldErr: true,
278263
},
279264
{
280265
description: "0 deployments",
281-
counter: &counter{},
266+
counter: &counter{total: 0},
267+
expectedCode: proto.StatusCode_STATUSCHECK_SUCCESS,
282268
},
283269
{
284-
description: "unable to retrieve pods for deployment",
285-
counter: &counter{total: 1, failed: 1},
286-
deployments: []*resource.Resource{
287-
withStatus(
288-
resource.NewResource("deployment", resource.ResourceTypes.Deployment, "test", 1),
289-
&proto.ActionableErr{ErrCode: proto.StatusCode_STATUSCHECK_DEPLOYMENT_FETCH_ERR},
290-
),
291-
},
292-
shouldErr: true,
270+
description: "unable to retrieve pods for deployment",
271+
counter: &counter{total: 1, failed: 1},
272+
sc: proto.StatusCode_STATUSCHECK_DEPLOYMENT_FETCH_ERR,
293273
expectedCode: proto.StatusCode_STATUSCHECK_DEPLOYMENT_FETCH_ERR,
274+
shouldErr: true,
294275
},
295276
{
296-
description: "one deployment failed and others cancelled and or succeeded",
297-
counter: &counter{total: 3, failed: 2},
298-
deployments: []*resource.Resource{
299-
withStatus(
300-
resource.NewResource("deployment-cancelled", resource.ResourceTypes.Deployment, "test", 1),
301-
&proto.ActionableErr{ErrCode: proto.StatusCode_STATUSCHECK_USER_CANCELLED},
302-
),
303-
withStatus(
304-
resource.NewResource("deployment-success", resource.ResourceTypes.Deployment, "test", 1),
305-
&proto.ActionableErr{ErrCode: proto.StatusCode_STATUSCHECK_SUCCESS},
306-
),
307-
withStatus(
308-
resource.NewResource("deployment", resource.ResourceTypes.Deployment, "test", 1),
309-
&proto.ActionableErr{ErrCode: proto.StatusCode_STATUSCHECK_DEPLOYMENT_FETCH_ERR},
310-
),
311-
},
277+
description: "one deployment failed and others cancelled and or succeeded",
278+
counter: &counter{total: 3, failed: 2},
279+
sc: proto.StatusCode_STATUSCHECK_NODE_DISK_PRESSURE,
280+
expectedCode: proto.StatusCode_STATUSCHECK_NODE_DISK_PRESSURE,
281+
expected: "2/3 deployment(s) failed",
312282
shouldErr: true,
313-
expectedCode: proto.StatusCode_STATUSCHECK_DEPLOYMENT_FETCH_ERR,
314283
},
315284
{
316-
description: "deployments did not stabilize within deadline",
317-
counter: &counter{total: 3, failed: 2},
318-
ctxErr: context.DeadlineExceeded,
319-
expected: "2/3 deployment(s) failed",
320-
deployments: []*resource.Resource{
321-
resource.NewResource("foo", resource.ResourceTypes.Deployment, "test", time.Second).
322-
WithPodStatuses([]proto.StatusCode{proto.StatusCode_STATUSCHECK_NODE_DISK_PRESSURE}),
323-
},
324-
expectedCode: proto.StatusCode_STATUSCHECK_DEADLINE_EXCEEDED,
285+
description: "deployments did not stabilize within deadline returns the pod error",
286+
counter: &counter{total: 1, failed: 1},
287+
sc: proto.StatusCode_STATUSCHECK_UNHEALTHY,
288+
expected: "1/1 deployment(s) failed",
289+
expectedCode: proto.StatusCode_STATUSCHECK_UNHEALTHY,
325290
shouldErr: true,
326291
},
327292
{
328-
description: "user cancelled session",
329-
counter: &counter{total: 1, failed: 1},
330-
ctxErr: context.Canceled,
331-
expected: "1/1 deployment(s) failed",
332-
deployments: []*resource.Resource{
333-
resource.NewResource("foo", resource.ResourceTypes.Deployment, "test", time.Second).
334-
WithPodStatuses([]proto.StatusCode{proto.StatusCode_STATUSCHECK_POD_INITIALIZING}),
335-
},
293+
description: "user cancelled session",
294+
counter: &counter{total: 2, failed: 0, cancelled: 2},
295+
sc: proto.StatusCode_STATUSCHECK_USER_CANCELLED,
296+
expected: "status check cancelled",
336297
expectedCode: proto.StatusCode_STATUSCHECK_USER_CANCELLED,
337298
shouldErr: true,
338299
},
339300
}
340301

341302
for _, test := range tests {
342303
testutil.Run(t, test.description, func(t *testutil.T) {
343-
testEvent.InitializeState([]latestV1.Pipeline{{}})
344-
errCode, err := getSkaffoldDeployStatus(test.counter, test.deployments, test.ctxErr)
304+
actual, err := getSkaffoldDeployStatus(test.counter, test.sc)
345305
t.CheckError(test.shouldErr, err)
306+
t.CheckDeepEqual(test.expectedCode, actual)
346307
if test.shouldErr {
347308
t.CheckErrorContains(test.expected, err)
348-
t.CheckDeepEqual(test.expectedCode, errCode)
349309
}
350310
})
351311
}
@@ -554,29 +514,37 @@ func TestResourceMarkProcessed(t *testing.T) {
554514
tests := []struct {
555515
description string
556516
c *counter
557-
err error
517+
sc proto.StatusCode
558518
expected counter
559519
}{
560520
{
561521
description: "when deployment failed, counter is updated",
562522
c: newCounter(10),
563-
err: errors.New("some ae"),
523+
sc: proto.StatusCode_STATUSCHECK_DEADLINE_EXCEEDED,
564524
expected: counter{total: 10, failed: 1, pending: 9},
565525
},
526+
{
527+
description: "when deployment is cancelled, failed is not updated",
528+
c: newCounter(10),
529+
sc: proto.StatusCode_STATUSCHECK_USER_CANCELLED,
530+
expected: counter{total: 10, failed: 0, pending: 9},
531+
},
566532
{
567533
description: "when deployment is successful, counter is updated",
568534
c: newCounter(10),
535+
sc: proto.StatusCode_STATUSCHECK_SUCCESS,
569536
expected: counter{total: 10, failed: 0, pending: 9},
570537
},
571538
{
572539
description: "counter when 1 deployment is updated correctly",
573540
c: newCounter(1),
541+
sc: proto.StatusCode_STATUSCHECK_SUCCESS,
574542
expected: counter{total: 1, failed: 0, pending: 0},
575543
},
576544
}
577545
for _, test := range tests {
578546
testutil.Run(t, test.description, func(t *testutil.T) {
579-
t.CheckDeepEqual(test.expected, test.c.markProcessed(test.err), cmp.AllowUnexported(counter{}))
547+
t.CheckDeepEqual(test.expected, test.c.markProcessed(context.Background(), test.sc), cmp.AllowUnexported(counter{}))
580548
})
581549
}
582550
}

0 commit comments

Comments
 (0)