Skip to content

Commit adf6da6

Browse files
authored
Merge pull request #4223 from tejal29/hook_pod_check
Initial prototype for pod health check hook up
2 parents 08b086c + 3ae7d5e commit adf6da6

File tree

6 files changed

+117
-30
lines changed

6 files changed

+117
-30
lines changed

pkg/skaffold/deploy/resource/deployment.go

+80-17
Original file line numberDiff line numberDiff line change
@@ -23,15 +23,24 @@ import (
2323
"strings"
2424
"time"
2525

26+
"github.com/sirupsen/logrus"
27+
28+
"github.com/GoogleContainerTools/skaffold/pkg/diag"
29+
"github.com/GoogleContainerTools/skaffold/pkg/diag/validator"
30+
"github.com/GoogleContainerTools/skaffold/pkg/skaffold/event"
2631
"github.com/GoogleContainerTools/skaffold/pkg/skaffold/kubectl"
2732
"github.com/GoogleContainerTools/skaffold/pkg/skaffold/runner/runcontext"
33+
"github.com/GoogleContainerTools/skaffold/proto"
2834
)
2935

3036
const (
31-
deploymentType = "deployment"
32-
rollOutSuccess = "successfully rolled out"
33-
connectionErrMsg = "Unable to connect to the server"
34-
killedErrMsg = "signal: killed"
37+
deploymentType = "deployment"
38+
rollOutSuccess = "successfully rolled out"
39+
connectionErrMsg = "Unable to connect to the server"
40+
killedErrMsg = "signal: killed"
41+
defaultPodCheckDeadline = 30 * time.Second
42+
tabHeader = " -"
43+
tab = " "
3544
)
3645

3746
var (
@@ -40,20 +49,26 @@ var (
4049
)
4150

4251
type Deployment struct {
43-
name string
44-
namespace string
45-
rType string
46-
status Status
47-
done bool
48-
deadline time.Duration
52+
name string
53+
namespace string
54+
rType string
55+
status Status
56+
done bool
57+
deadline time.Duration
58+
pods map[string]validator.Resource
59+
podValidator diag.Diagnose
4960
}
5061

5162
func (d *Deployment) Deadline() time.Duration {
5263
return d.deadline
5364
}
5465

5566
func (d *Deployment) UpdateStatus(details string, err error) {
56-
updated := newStatus(details, err)
67+
errCode := proto.StatusCode_STATUSCHECK_SUCCESS
68+
if err != nil {
69+
errCode = proto.StatusCode_STATUSCHECK_UNKNOWN
70+
}
71+
updated := newStatus(details, errCode, err)
5772
if d.status.Equal(updated) {
5873
d.status.changed = false
5974
return
@@ -67,14 +82,20 @@ func (d *Deployment) UpdateStatus(details string, err error) {
6782

6883
func NewDeployment(name string, ns string, deadline time.Duration) *Deployment {
6984
return &Deployment{
70-
name: name,
71-
namespace: ns,
72-
rType: deploymentType,
73-
status: newStatus("", nil),
74-
deadline: deadline,
85+
name: name,
86+
namespace: ns,
87+
rType: deploymentType,
88+
status: newStatus("", proto.StatusCode_STATUSCHECK_UNKNOWN, nil),
89+
deadline: deadline,
90+
podValidator: diag.New(nil),
7591
}
7692
}
7793

94+
func (d *Deployment) WithValidator(pd diag.Diagnose) *Deployment {
95+
d.podValidator = pd
96+
return d
97+
}
98+
7899
func (d *Deployment) CheckStatus(ctx context.Context, runCtx *runcontext.RunContext) {
79100
kubeCtl := kubectl.NewFromRunContext(runCtx)
80101

@@ -91,6 +112,9 @@ func (d *Deployment) CheckStatus(ctx context.Context, runCtx *runcontext.RunCont
91112
}
92113

93114
d.UpdateStatus(details, err)
115+
if err := d.fetchPods(ctx); err != nil {
116+
logrus.Debugf("pod statuses could be fetched this time due to %s", err)
117+
}
94118
}
95119

96120
func (d *Deployment) String() string {
@@ -113,12 +137,26 @@ func (d *Deployment) IsStatusCheckComplete() bool {
113137
return d.done
114138
}
115139

140+
// This returns a string representing deployment status along with tab header
141+
// e.g.
142+
// - testNs:deployment/leeroy-app: waiting for rollout to complete. (1/2) pending
143+
// - testNs:pod/leeroy-app-xvbg : error pulling container image
116144
func (d *Deployment) ReportSinceLastUpdated() string {
117145
if d.status.reported && !d.status.changed {
118146
return ""
119147
}
120148
d.status.reported = true
121-
return fmt.Sprintf("%s: %s", d, d.status)
149+
if d.status.String() == "" {
150+
return ""
151+
}
152+
var result strings.Builder
153+
result.WriteString(fmt.Sprintf("%s %s: %s", tabHeader, d, d.status))
154+
for _, p := range d.pods {
155+
if p.Error() != nil {
156+
result.WriteString(fmt.Sprintf("%s %s %s: %s\n", tab, tabHeader, p, p.Error()))
157+
}
158+
}
159+
return result.String()
122160
}
123161

124162
func (d *Deployment) cleanupStatus(msg string) string {
@@ -148,3 +186,28 @@ func isErrAndNotRetryAble(err error) bool {
148186
}
149187
return err != ErrKubectlConnection
150188
}
189+
190+
func (d *Deployment) fetchPods(ctx context.Context) error {
191+
timeoutContext, cancel := context.WithTimeout(ctx, defaultPodCheckDeadline)
192+
defer cancel()
193+
pods, err := d.podValidator.Run(timeoutContext)
194+
if err != nil {
195+
return err
196+
}
197+
198+
newPods := map[string]validator.Resource{}
199+
d.status.changed = false
200+
for _, p := range pods {
201+
originalPod, ok := d.pods[p.String()]
202+
if !ok {
203+
d.status.changed = true
204+
event.ResourceStatusCheckEventCompleted(p.String(), p.Error())
205+
} else if originalPod.StatusCode != p.StatusCode {
206+
d.status.changed = true
207+
event.ResourceStatusCheckEventCompleted(p.String(), p.Error())
208+
}
209+
newPods[p.String()] = p
210+
}
211+
d.pods = newPods
212+
return nil
213+
}

pkg/skaffold/deploy/resource/deployment_test.go

+4-4
Original file line numberDiff line numberDiff line change
@@ -192,12 +192,12 @@ func TestReportSinceLastUpdated(t *testing.T) {
192192
description: "updating an error status",
193193
message: "cannot pull image",
194194
err: errors.New("cannot pull image"),
195-
expected: "test-ns:deployment/test: cannot pull image",
195+
expected: " - test-ns:deployment/test: cannot pull image",
196196
},
197197
{
198198
description: "updating a non error status",
199199
message: "waiting for container",
200-
expected: "test-ns:deployment/test: waiting for container",
200+
expected: " - test-ns:deployment/test: waiting for container",
201201
},
202202
}
203203
for _, test := range tests {
@@ -222,7 +222,7 @@ func TestReportSinceLastUpdatedMultipleTimes(t *testing.T) {
222222
description: "report first time should return status",
223223
statuses: []string{"cannot pull image"},
224224
reportStatusSeq: []bool{true},
225-
expected: "test-ns:deployment/test: cannot pull image",
225+
expected: " - test-ns:deployment/test: cannot pull image",
226226
},
227227
{
228228
description: "report 2nd time should not return when same status",
@@ -234,7 +234,7 @@ func TestReportSinceLastUpdatedMultipleTimes(t *testing.T) {
234234
description: "report called after multiple changes but last status was not changed.",
235235
statuses: []string{"cannot pull image", "changed but not reported", "changed but not reported", "changed but not reported"},
236236
reportStatusSeq: []bool{true, false, false, true},
237-
expected: "test-ns:deployment/test: changed but not reported",
237+
expected: " - test-ns:deployment/test: changed but not reported",
238238
},
239239
}
240240
for _, test := range tests {

pkg/skaffold/deploy/resource/status.go

+11-1
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,14 @@ limitations under the License.
1616

1717
package resource
1818

19+
import (
20+
"github.com/GoogleContainerTools/skaffold/proto"
21+
)
22+
1923
type Status struct {
2024
err error
2125
details string
26+
errCode proto.StatusCode
2227
changed bool
2328
reported bool
2429
}
@@ -27,6 +32,10 @@ func (rs Status) Error() error {
2732
return rs.err
2833
}
2934

35+
func (rs Status) ErrorCode() proto.StatusCode {
36+
return rs.errCode
37+
}
38+
3039
func (rs Status) String() string {
3140
if rs.err != nil {
3241
return rs.err.Error()
@@ -44,10 +53,11 @@ func (rs Status) Equal(other Status) bool {
4453
return rs.err == other.err
4554
}
4655

47-
func newStatus(msg string, err error) Status {
56+
func newStatus(msg string, errCode proto.StatusCode, err error) Status {
4857
return Status{
4958
details: msg,
5059
err: err,
60+
errCode: errCode,
5161
changed: true,
5262
}
5363
}

pkg/skaffold/deploy/resource/status_test.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ func TestString(t *testing.T) {
5353
}
5454
for _, test := range tests {
5555
testutil.Run(t, test.description, func(t *testutil.T) {
56-
status := newStatus(test.details, test.err)
56+
status := newStatus(test.details, 0, test.err)
5757
t.CheckDeepEqual(test.expected, status.String())
5858
})
5959
}

pkg/skaffold/deploy/status_check.go

+17-6
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ import (
3030
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
3131
"k8s.io/client-go/kubernetes"
3232

33+
"github.com/GoogleContainerTools/skaffold/pkg/diag"
34+
"github.com/GoogleContainerTools/skaffold/pkg/diag/validator"
3335
"github.com/GoogleContainerTools/skaffold/pkg/skaffold/deploy/resource"
3436
"github.com/GoogleContainerTools/skaffold/pkg/skaffold/event"
3537
pkgkubernetes "github.com/GoogleContainerTools/skaffold/pkg/skaffold/kubernetes"
@@ -85,18 +87,19 @@ func statusCheck(ctx context.Context, defaultLabeller *DefaultLabeller, runCtx *
8587
wg.Add(1)
8688
go func(r *resource.Deployment) {
8789
defer wg.Done()
90+
// keep updating the resource status until it fails/succeeds/times out
8891
pollDeploymentStatus(ctx, runCtx, r)
8992
rcCopy := c.markProcessed(r.Status().Error())
9093
printStatusCheckSummary(out, r, rcCopy)
9194
}(d)
9295
}
9396

94-
// Retrieve pending resource states
97+
// Retrieve pending deployments statuses
9598
go func() {
9699
printDeploymentStatus(ctx, out, deployments, deadline)
97100
}()
98101

99-
// Wait for all deployment status to be fetched
102+
// Wait for all deployment statuses to be fetched
100103
wg.Wait()
101104
return getSkaffoldDeployStatus(c)
102105
}
@@ -109,15 +112,23 @@ func getDeployments(client kubernetes.Interface, ns string, l *DefaultLabeller,
109112
return nil, fmt.Errorf("could not fetch deployments: %w", err)
110113
}
111114

112-
deployments := make([]*resource.Deployment, 0, len(deps.Items))
113-
for _, d := range deps.Items {
115+
deployments := make([]*resource.Deployment, len(deps.Items))
116+
for i, d := range deps.Items {
114117
var deadline time.Duration
115118
if d.Spec.ProgressDeadlineSeconds == nil || *d.Spec.ProgressDeadlineSeconds == kubernetesMaxDeadline {
116119
deadline = deadlineDuration
117120
} else {
118121
deadline = time.Duration(*d.Spec.ProgressDeadlineSeconds) * time.Second
119122
}
120-
deployments = append(deployments, resource.NewDeployment(d.Name, d.Namespace, deadline))
123+
pd := diag.New([]string{d.Namespace}).
124+
WithLabel(RunIDLabel, l.Labels()[RunIDLabel]).
125+
WithValidators([]validator.Validator{validator.NewPodValidator(client)})
126+
127+
for k, v := range d.Spec.Template.Labels {
128+
pd = pd.WithLabel(k, v)
129+
}
130+
131+
deployments[i] = resource.NewDeployment(d.Name, d.Namespace, deadline).WithValidator(pd)
121132
}
122133

123134
return deployments, nil
@@ -208,7 +219,7 @@ func printStatus(deployments []*resource.Deployment, out io.Writer) bool {
208219
allDone = false
209220
if str := r.ReportSinceLastUpdated(); str != "" {
210221
event.ResourceStatusCheckEventUpdated(r.String(), str)
211-
fmt.Fprintln(out, tabHeader, trimNewLine(str))
222+
fmt.Fprintln(out, trimNewLine(str))
212223
}
213224
}
214225
return allDone

pkg/skaffold/deploy/status_check_test.go

+4-1
Original file line numberDiff line numberDiff line change
@@ -23,12 +23,14 @@ import (
2323
"time"
2424

2525
"github.com/google/go-cmp/cmp"
26+
"github.com/google/go-cmp/cmp/cmpopts"
2627
appsv1 "k8s.io/api/apps/v1"
2728
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2829
"k8s.io/apimachinery/pkg/runtime"
2930
fakekubeclientset "k8s.io/client-go/kubernetes/fake"
3031
utilpointer "k8s.io/utils/pointer"
3132

33+
"github.com/GoogleContainerTools/skaffold/pkg/diag"
3234
"github.com/GoogleContainerTools/skaffold/pkg/skaffold/config"
3335
"github.com/GoogleContainerTools/skaffold/pkg/skaffold/deploy/resource"
3436
"github.com/GoogleContainerTools/skaffold/pkg/skaffold/event"
@@ -214,7 +216,8 @@ func TestGetDeployments(t *testing.T) {
214216
client := fakekubeclientset.NewSimpleClientset(objs...)
215217
actual, err := getDeployments(client, "test", labeller, 200*time.Second)
216218
t.CheckErrorAndDeepEqual(test.shouldErr, err, &test.expected, &actual,
217-
cmp.AllowUnexported(resource.Deployment{}, resource.Status{}))
219+
cmp.AllowUnexported(resource.Deployment{}, resource.Status{}),
220+
cmpopts.IgnoreInterfaces(struct{ diag.Diagnose }{}))
218221
})
219222
}
220223
}

0 commit comments

Comments
 (0)