Skip to content

Add pod checks #3952

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Apr 21, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
10 changes: 5 additions & 5 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,11 @@ replace (
github.com/docker/docker => github.com/docker/docker v1.4.2-0.20190319215453-e7b5f7dbe98c
golang.org/x/crypto v0.0.0-20190129210102-0709b304e793 => golang.org/x/crypto v0.0.0-20180904163835-0709b304e793
golang.org/x/sys => golang.org/x/sys v0.0.0-20190507160741-ecd444e8653b
k8s.io/api => k8s.io/api v0.0.0-20190620084959-7cf5895f2711
k8s.io/apimachinery => k8s.io/apimachinery v0.0.0-20190612205821-1799e75a0719
k8s.io/client-go => k8s.io/client-go v0.0.0-20190620085101-78d2af792bab
k8s.io/kubectl => k8s.io/kubectl v0.0.0-20190831163037-3b58a944563f
k8s.io/kubernetes => k8s.io/kubernetes v1.12.10
k8s.io/api => k8s.io/api v0.17.0
k8s.io/apimachinery => k8s.io/apimachinery v0.17.0
k8s.io/client-go => k8s.io/client-go v0.17.0
k8s.io/kubernetes => k8s.io/kubernetes v1.14.0
k8s.io/kubectl => k8s.io/kubectl v0.17.0
)

require (
Expand Down
15 changes: 15 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -652,6 +652,7 @@ github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f/go.mod h1:N2
github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415/go.mod h1:GwrjFmJcFw6At/Gs6z4yjiIwzuJ1/+UwLxMQDVQXShQ=
github.com/xeipuuv/gojsonschema v0.0.0-20180618132009-1d523034197f/go.mod h1:5yf86TLmAcydyeJq5YvxkGPE2fm/u4myDekKRoLuqhs=
github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU=
github.com/xlab/handysort v0.0.0-20150421192137-fb3537ed64a1/go.mod h1:QcJo0QPSfTONNIgpN5RA8prR7fF8nkF6cTWTcNerRO8=
github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1:aYKd//L2LvnjZzWKhF00oedf4jCCReLcmhLdhm1A27Q=
go.etcd.io/bbolt v1.3.2/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU=
go.etcd.io/bbolt v1.3.3/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU=
Expand Down Expand Up @@ -908,14 +909,22 @@ honnef.co/go/tools v0.0.1-2019.2.3 h1:3JgtbtFHMiCmsznwGVTUWbgGov+pVqnlf1dEJTNAXe
honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg=
k8s.io/api v0.0.0-20190620084959-7cf5895f2711 h1:BblVYz/wE5WtBsD/Gvu54KyBUTJMflolzc5I2DTvh50=
k8s.io/api v0.0.0-20190620084959-7cf5895f2711/go.mod h1:TBhBqb1AWbBQbW3XRusr7n7E4v2+5ZY8r8sAMnyFC5A=
k8s.io/api v0.17.0 h1:H9d/lw+VkZKEVIUc8F3wgiQ+FUXTTr21M87jXLU7yqM=
k8s.io/api v0.17.0/go.mod h1:npsyOePkeP0CPwyGfXDHxvypiYMJxBWAMpQxCaJ4ZxI=
k8s.io/apimachinery v0.0.0-20190612205821-1799e75a0719 h1:uV4S5IB5g4Nvi+TBVNf3e9L4wrirlwYJ6w88jUQxTUw=
k8s.io/apimachinery v0.0.0-20190612205821-1799e75a0719/go.mod h1:I4A+glKBHiTgiEjQiCCQfCAIcIMFGt291SmsvcrFzJA=
k8s.io/apimachinery v0.17.0 h1:xRBnuie9rXcPxUkDizUsGvPf1cnlZCFu210op7J7LJo=
k8s.io/apimachinery v0.17.0/go.mod h1:b9qmWdKlLuU9EBh+06BtLcSf/Mu89rWL33naRxs1uZg=
k8s.io/apiserver v0.17.0/go.mod h1:ABM+9x/prjINN6iiffRVNCBR2Wk7uY4z+EtEGZD48cg=
k8s.io/cli-runtime v0.0.0-20190831080432-9d670f2021f4/go.mod h1:TtjkdmxYMLASzYbE8E7AUr/ZrXMcmXLnDLRY4sVWspw=
k8s.io/cli-runtime v0.17.0/go.mod h1:1E5iQpMODZq2lMWLUJELwRu2MLWIzwvMgDBpn3Y81Qo=
k8s.io/client-go v0.0.0-20190620085101-78d2af792bab h1:E8Fecph0qbNsAbijJJQryKu4Oi9QTp5cVpjTE+nqg6g=
k8s.io/client-go v0.0.0-20190620085101-78d2af792bab/go.mod h1:E95RaSlHr79aHaX0aGSwcPNfygDiPKOVXdmivCIZT0k=
k8s.io/client-go v0.17.0 h1:8QOGvUGdqDMFrm9sD6IUFl256BcffynGoe80sxgTEDg=
k8s.io/client-go v0.17.0/go.mod h1:TYgR6EUHs6k45hb6KWjVD6jFZvJV4gHDikv/It0xz+k=
k8s.io/cloud-provider v0.17.0/go.mod h1:Ze4c3w2C0bRsjkBUoHpFi+qWe3ob1wI2/7cUn+YQIDE=
k8s.io/code-generator v0.0.0-20190831074504-732c9ca86353/go.mod h1:V5BD6M4CyaN5m+VthcclXWsVcT1Hu+glwa1bi3MIsyE=
k8s.io/code-generator v0.17.0/go.mod h1:DVmfPQgxQENqDIzVR2ddLXMH34qeszkKSdH/N+s+38s=
k8s.io/code-generator v0.17.2/go.mod h1:DVmfPQgxQENqDIzVR2ddLXMH34qeszkKSdH/N+s+38s=
k8s.io/component-base v0.0.0-20190831075413-37a093468564/go.mod h1:pB3zmhcOR5xextKMKdxRr2XUCERS2UNFA/6Tr2WmSJs=
k8s.io/component-base v0.17.0/go.mod h1:rKuRAokNMY2nn2A6LP/MiwpoaMRHpfRnrPaUJJj1Yoc=
Expand All @@ -935,10 +944,15 @@ k8s.io/kube-openapi v0.0.0-20191107075043-30be4d16710a h1:UcxjrRMyNx/i/y8G7kPvLy
k8s.io/kube-openapi v0.0.0-20191107075043-30be4d16710a/go.mod h1:1TqjTSzOxsLGIKfj0lK8EeCP7K1iUG65v09OM0/WG5E=
k8s.io/kubectl v0.0.0-20190831163037-3b58a944563f h1:rngzSSBHZ0ofTBvPu8HT9R+EMc3PKE9XUD9DrK+QdAM=
k8s.io/kubectl v0.0.0-20190831163037-3b58a944563f/go.mod h1:/TM8X12sDkA1rdjpK4qMqSYiDZbicsgYIGjicJ6P4EU=
k8s.io/kubectl v0.17.0 h1:xD4EWlL+epc/JTO1gvSjmV9yiYF0Z2wiHK2DIek6URY=
k8s.io/kubectl v0.17.0/go.mod h1:jIPrUAW656Vzn9wZCCe0PC+oTcu56u2HgFD21Xbfk1s=
k8s.io/kubernetes v1.12.10/go.mod h1:ocZa8+6APFNC2tX1DZASIbocyYT5jHzqFVsY5aoB7Jk=
k8s.io/kubernetes v1.13.0/go.mod h1:ocZa8+6APFNC2tX1DZASIbocyYT5jHzqFVsY5aoB7Jk=
k8s.io/kubernetes v1.14.0/go.mod h1:ocZa8+6APFNC2tX1DZASIbocyYT5jHzqFVsY5aoB7Jk=
k8s.io/legacy-cloud-providers v0.17.0 h1:ITm7sUthpxQyP96MU7K4Ra9M9M1k9eywUWv9IiTaxzc=
k8s.io/legacy-cloud-providers v0.17.0/go.mod h1:DdzaepJ3RtRy+e5YhNtrCYwlgyK87j/5+Yfp0L9Syp8=
k8s.io/metrics v0.0.0-20190831080339-bd7772846802/go.mod h1:98g4ghmWXz8M0qrhPme3ZnY3E/zPsSSbLlqOsw7WVa4=
k8s.io/metrics v0.17.0/go.mod h1:EH1D3YAwN6d7bMelrElnLhLg72l/ERStyv2SIQVt6Do=
k8s.io/utils v0.0.0-20190221042446-c2654d5206da/go.mod h1:8k8uAuAQ0rXslZKaEWd0c3oVhZz7sSzSiPnVZayjIX0=
k8s.io/utils v0.0.0-20190801114015-581e00157fb1/go.mod h1:sZAwmy6armz5eXlNoLmJcl4F1QuKu7sr+mFQ0byX7Ew=
k8s.io/utils v0.0.0-20191114184206-e782cd3c129f h1:GiPwtSzdP43eI1hpPCbROQCCIgCuiMMNF8YUVLF3vJo=
Expand All @@ -956,3 +970,4 @@ sigs.k8s.io/structured-merge-diff v0.0.0-20190525122527-15d366b2352e/go.mod h1:w
sigs.k8s.io/structured-merge-diff v1.0.1-0.20191108220359-b1b620dd3f06/go.mod h1:/ULNhyfzRopfcjskuui0cTITekDduZ7ycKN3oUT9R18=
sigs.k8s.io/yaml v1.1.0 h1:4A07+ZFc2wgJwo8YNlQpr1rVlgUDlxXHhPJciaPY5gs=
sigs.k8s.io/yaml v1.1.0/go.mod h1:UJmg0vDUVViEyp3mgSv9WPwZCDxu4rQW1olrI1uml+o=
vbom.ml/util v0.0.0-20160121211510-db5cfe13f5cc/go.mod h1:so/NYdZXCz+E3ZpW0uAoCj6uzU2+8OWDFv/HxUSs7kI=
84 changes: 84 additions & 0 deletions pkg/diag/diag.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
/*
Copyright 2020 The Skaffold Authors

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package diag

import (
"context"
"fmt"

metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

"github.com/GoogleContainerTools/skaffold/pkg/diag/validator"
)

type Diagnose interface {
Run() ([]validator.Resource, error)
WithLabel(label string) Diagnose
WithValidators(v []validator.Validator) Diagnose
}

type diag struct {
listOptions metav1.ListOptions
namespaces []string
validators []validator.Validator
}

func New(namespaces []string) Diagnose {
var ns []string
for _, n := range namespaces {
if n != "" {
ns = append(ns, n)
}
}
return &diag{
namespaces: ns,
}
}

func (d *diag) WithLabel(label string) Diagnose {
d.listOptions = metav1.ListOptions{
LabelSelector: label,
}
return d
}

func (d *diag) WithValidators(v []validator.Validator) Diagnose {
d.validators = v
return d
}

func (d *diag) Run() ([]validator.Resource, error) {
res := []validator.Resource{}
errs := []error{}
for _, v := range d.validators {
for _, ns := range d.namespaces {
r, err := v.Validate(context.Background(), ns, d.listOptions)
res = append(res, r...)
if err != nil {
errs = append(errs, err)
}
}
}
if len(errs) == 0 {
return res, nil
}
errBuilder := ""
for _, err := range errs {
errBuilder = errBuilder + err.Error() + "\n"
}
return res, fmt.Errorf("following errors occurred %s", errBuilder)
}
176 changes: 120 additions & 56 deletions pkg/diag/validator/pod.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,22 +19,30 @@ package validator
import (
"context"
"fmt"
"regexp"
"strings"

v1 "k8s.io/api/core/v1"
meta_v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/kubernetes"
)

const (
success = "Succeeded"
running = "Running"
unknown = "Unknown"
pending = "Pending"
success = "Succeeded"
running = "Running"
actionableMessage = `could not determine pod status. Try kubectl describe -n %s po/%s`
errorPrefix = `(?P<Prefix>)(?P<DaemonLog>Error response from daemon\:)(?P<Error>.*)`
taintsExp = `\{(?P<taint>.*?):.*?}`
crashLoopBackOff = "CrashLoopBackOff"
runContainerError = "RunContainerError"
imagePullErr = "ErrImagePull"
errImagePullBackOff = "ErrImagePullBackOff"
containerCreating = "ContainerCreating"
)

// for testing
var (
waitingContainerStatus = getWaitingContainerStatus
re = regexp.MustCompile(errorPrefix)
taintsRe = regexp.MustCompile(taintsExp)
)

// PodValidator implements the Validator interface for Pods
Expand All @@ -56,81 +64,137 @@ func (p *PodValidator) Validate(ctx context.Context, ns string, opts meta_v1.Lis
rs := []Resource{}
for _, po := range pods.Items {
ps := p.getPodStatus(&po)
rs = append(rs, NewResourceFromObject(&po, Status(ps.phase), ps.reason.String()))
rs = append(rs, NewResourceFromObject(&po, Status(ps.phase), ps.err))
}
return rs, nil
}

type podStatus struct {
phase string
reason *podReason
}

type podReason struct {
reason string
message string
}

func (r *podReason) String() string {
if r == nil {
return ""
}
return fmt.Sprintf("pod unstable due to reason: %s, message: %s", r.reason, r.message)
}

func (p *PodValidator) getPodStatus(pod *v1.Pod) podStatus {
func (p *PodValidator) getPodStatus(pod *v1.Pod) *podStatus {
ps := newPodStatus(pod.Name, pod.Namespace, string(pod.Status.Phase))
switch pod.Status.Phase {
case v1.PodSucceeded:
return podStatus{phase: success}
case v1.PodRunning:
return podStatus{phase: running}
return ps
default:
return getPendingDetails(pod)
return ps.withErr(getContainerStatus(pod))
}
}

func getPendingDetails(pod *v1.Pod) podStatus {
func getContainerStatus(pod *v1.Pod) error {
// See https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#pod-conditions
for _, c := range pod.Status.Conditions {
switch c.Status {
case v1.ConditionUnknown:
return newPendingStatus(unknown, c.Message)
default:
// TODO(dgageot): Add EphemeralContainerStatuses
cs := append(pod.Status.InitContainerStatuses, pod.Status.ContainerStatuses...)
reason, detail := waitingContainerStatus(cs)
return newPendingStatus(reason, detail)
if c.Type == v1.PodScheduled {
switch c.Status {
case v1.ConditionFalse:
return getTolerationsDetails(c.Reason, c.Message)
case v1.ConditionTrue:
// TODO(dgageot): Add EphemeralContainerStatuses
cs := append(pod.Status.InitContainerStatuses, pod.Status.ContainerStatuses...)
return getWaitingContainerStatus(cs)
case v1.ConditionUnknown:
return fmt.Errorf(c.Message)
}
}
}
return newUnknownStatus()
return nil
}

func getWaitingContainerStatus(cs []v1.ContainerStatus) (string, string) {
func getWaitingContainerStatus(cs []v1.ContainerStatus) error {
// See https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#container-states
for _, c := range cs {
if c.State.Waiting != nil {
return c.State.Waiting.Reason, c.State.Waiting.Message
return extractErrorMessageFromContainerStatus(c)
}
}
return success, success
// No waiting containers, pod should be in good health.
return nil
}

func newPendingStatus(r string, d string) podStatus {
return podStatus{
phase: pending,
reason: &podReason{
reason: r,
message: d,
},
func getTolerationsDetails(reason string, message string) error {
matches := taintsRe.FindAllStringSubmatch(message, -1)
if len(matches) == 0 {
return fmt.Errorf("%s: %s", reason, message)
}
messages := make([]string, len(matches))
// TODO: Add actionable item to fix these errors.
for i, m := range matches {
if len(m) < 2 {
continue
}
t := m[1]
switch t {
case v1.TaintNodeMemoryPressure:
messages[i] = "1 node has memory pressure"
case v1.TaintNodeDiskPressure:
messages[i] = "1 node has disk pressure"
case v1.TaintNodeNotReady:
messages[i] = "1 node is not ready"
case v1.TaintNodeUnreachable:
messages[i] = "1 node is unreachable"
case v1.TaintNodeUnschedulable:
messages[i] = "1 node is unschedulable"
case v1.TaintNodeNetworkUnavailable:
messages[i] = "1 node's network not available"
case v1.TaintNodePIDPressure:
messages[i] = "1 node has PID pressure"
}
}
return fmt.Errorf("%s: 0/%d nodes available: %s", reason, len(messages), strings.Join(messages, ", "))
}

func newUnknownStatus() podStatus {
return podStatus{
phase: unknown,
reason: &podReason{
reason: unknown,
message: unknown,
},
type podStatus struct {
name string
namespace string
phase string
err error
}

func (p *podStatus) isStable() bool {
return p.phase == success || (p.phase == running && p.err == nil)
}

func (p *podStatus) withErr(err error) *podStatus {
p.err = err
return p
}

func (p *podStatus) String() string {
switch {
case p.isStable():
return ""
default:
if p.err != nil {
return fmt.Sprintf("%s", p.err)
}
}
return fmt.Sprintf(actionableMessage, p.namespace, p.name)
}

func extractErrorMessageFromContainerStatus(c v1.ContainerStatus) error {
// Extract meaning full error out of container statuses.
switch c.State.Waiting.Reason {
case containerCreating:
return fmt.Errorf("creating container %s", c.Name)
case crashLoopBackOff:
return fmt.Errorf("restarting failed container %s", c.Name)
case imagePullErr, errImagePullBackOff:
return fmt.Errorf("container %s is waiting to start: image %s can't be pulled", c.Name, c.Image)
case runContainerError:
match := re.FindStringSubmatch(c.State.Waiting.Message)
if len(match) != 0 {
return fmt.Errorf("container %s in error: %s", c.Name, trimSpace(match[3]))
}
}
return fmt.Errorf("container %s in error: %s", c.Name, trimSpace(c.State.Waiting.Message))
}

func newPodStatus(n string, ns string, p string) *podStatus {
return &podStatus{
name: n,
namespace: ns,
phase: p,
}
}

func trimSpace(msg string) string {
return strings.Trim(msg, " ")
}
Loading