Skip to content

Commit 5d60723

Browse files
fabriziopandinik8s-infra-cherrypick-robot
authored and
k8s-infra-cherrypick-robot
committed
Add k/v pairs describing the overall status of the control plane
1 parent 726572c commit 5d60723

File tree

6 files changed

+117
-18
lines changed

6 files changed

+117
-18
lines changed

controlplane/kubeadm/internal/control_plane.go

+80
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,9 @@ package internal
1818

1919
import (
2020
"context"
21+
"fmt"
22+
"sort"
23+
"strings"
2124

2225
"github.com/pkg/errors"
2326
apierrors "k8s.io/apimachinery/pkg/api/errors"
@@ -32,6 +35,7 @@ import (
3235
controlplanev1 "sigs.k8s.io/cluster-api/controlplane/kubeadm/api/v1beta1"
3336
"sigs.k8s.io/cluster-api/controlplane/kubeadm/internal/etcd"
3437
"sigs.k8s.io/cluster-api/util/collections"
38+
"sigs.k8s.io/cluster-api/util/conditions"
3539
"sigs.k8s.io/cluster-api/util/failuredomains"
3640
"sigs.k8s.io/cluster-api/util/patch"
3741
)
@@ -385,3 +389,79 @@ func (c *ControlPlane) InjectTestManagementCluster(managementCluster ManagementC
385389
c.managementCluster = managementCluster
386390
c.workloadCluster = nil
387391
}
392+
393+
// StatusToLogKeyAndValues returns following key/value pairs describing the overall status of the control plane:
394+
// - machines is the list of KCP machines; each machine might have additional notes surfacing
395+
// - if the machine has been created in the current reconcile (new)
396+
// - if machines node is not yet (node ref not set)
397+
// - if the machine has bee marked for remediation (health check failed)
398+
// - if there are unhealthy control plane component on the machine
399+
// - if the machine has a deletion timestamp/has been deleted in the current reconcile (deleting)
400+
// - if the machine is not up to date with the KCP spec (not up to date)
401+
//
402+
// - etcdMembers list as reported by etcd.
403+
func (c *ControlPlane) StatusToLogKeyAndValues(newMachine, deletedMachine *clusterv1.Machine) []any {
404+
controlPlaneMachineHealthConditions := []clusterv1.ConditionType{
405+
controlplanev1.MachineAPIServerPodHealthyCondition,
406+
controlplanev1.MachineControllerManagerPodHealthyCondition,
407+
controlplanev1.MachineSchedulerPodHealthyCondition,
408+
}
409+
if c.IsEtcdManaged() {
410+
controlPlaneMachineHealthConditions = append(controlPlaneMachineHealthConditions,
411+
controlplanev1.MachineEtcdPodHealthyCondition,
412+
controlplanev1.MachineEtcdMemberHealthyCondition,
413+
)
414+
}
415+
416+
machines := []string{}
417+
for _, m := range c.Machines {
418+
notes := []string{}
419+
420+
if m.Status.NodeRef == nil {
421+
notes = append(notes, "node ref not set")
422+
}
423+
424+
if c.MachinesToBeRemediatedByKCP().Has(m) {
425+
notes = append(notes, "health check failed")
426+
}
427+
428+
for _, condition := range controlPlaneMachineHealthConditions {
429+
if conditions.IsUnknown(m, condition) {
430+
notes = append(notes, strings.Replace(string(condition), "Healthy", " health unknown", -1))
431+
}
432+
if conditions.IsFalse(m, condition) {
433+
notes = append(notes, strings.Replace(string(condition), "Healthy", " not healthy", -1))
434+
}
435+
}
436+
437+
if !c.UpToDateMachines().Has(m) {
438+
notes = append(notes, "not up to date")
439+
}
440+
441+
if !m.DeletionTimestamp.IsZero() || (deletedMachine != nil && m.Name == deletedMachine.Name) {
442+
notes = append(notes, "deleting")
443+
}
444+
445+
name := m.Name
446+
if len(notes) > 0 {
447+
name = fmt.Sprintf("%s (%s)", name, strings.Join(notes, ", "))
448+
}
449+
machines = append(machines, name)
450+
}
451+
452+
if newMachine != nil {
453+
machines = append(machines, fmt.Sprintf("%s (new)", newMachine.Name))
454+
}
455+
sort.Strings(machines)
456+
457+
etcdMembers := []string{}
458+
for _, m := range c.EtcdMembers {
459+
etcdMembers = append(etcdMembers, m.Name)
460+
}
461+
sort.Strings(etcdMembers)
462+
463+
return []any{
464+
"machines", strings.Join(machines, ", "),
465+
"etcdMembers", strings.Join(etcdMembers, ", "),
466+
}
467+
}

controlplane/kubeadm/internal/controllers/controller.go

+2-1
Original file line numberDiff line numberDiff line change
@@ -672,7 +672,8 @@ func (r *KubeadmControlPlaneReconciler) reconcileDelete(ctx context.Context, con
672672
continue
673673
}
674674

675-
log.Info("Deleting Machine (KCP deleted)")
675+
log.WithValues(controlPlane.StatusToLogKeyAndValues(nil, machineToDelete)...).
676+
Info("Deleting Machine (KCP deleted)")
676677
if err := r.Client.Delete(ctx, machineToDelete); err != nil && !apierrors.IsNotFound(err) {
677678
errs = append(errs, errors.Wrapf(err, "failed to delete control plane Machine %s", klog.KObj(machineToDelete)))
678679
}

controlplane/kubeadm/internal/controllers/helpers.go

+5-12
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@ import (
2929
"k8s.io/apimachinery/pkg/types"
3030
kerrors "k8s.io/apimachinery/pkg/util/errors"
3131
"k8s.io/apiserver/pkg/storage/names"
32-
"k8s.io/klog/v2"
3332
ctrl "sigs.k8s.io/controller-runtime"
3433
"sigs.k8s.io/controller-runtime/pkg/client"
3534

@@ -183,14 +182,13 @@ func (r *KubeadmControlPlaneReconciler) reconcileExternalReference(ctx context.C
183182
return patchHelper.Patch(ctx, obj)
184183
}
185184

186-
func (r *KubeadmControlPlaneReconciler) cloneConfigsAndGenerateMachine(ctx context.Context, cluster *clusterv1.Cluster, kcp *controlplanev1.KubeadmControlPlane, bootstrapSpec *bootstrapv1.KubeadmConfigSpec, failureDomain *string) error {
185+
func (r *KubeadmControlPlaneReconciler) cloneConfigsAndGenerateMachine(ctx context.Context, cluster *clusterv1.Cluster, kcp *controlplanev1.KubeadmControlPlane, bootstrapSpec *bootstrapv1.KubeadmConfigSpec, failureDomain *string) (*clusterv1.Machine, error) {
187186
var errs []error
188-
log := ctrl.LoggerFrom(ctx)
189187

190188
// Compute desired Machine
191189
machine, err := r.computeDesiredMachine(kcp, cluster, failureDomain, nil)
192190
if err != nil {
193-
return errors.Wrap(err, "failed to create Machine: failed to compute desired Machine")
191+
return nil, errors.Wrap(err, "failed to create Machine: failed to compute desired Machine")
194192
}
195193

196194
// Since the cloned resource should eventually have a controller ref for the Machine, we create an
@@ -222,7 +220,7 @@ func (r *KubeadmControlPlaneReconciler) cloneConfigsAndGenerateMachine(ctx conte
222220
// Safe to return early here since no resources have been created yet.
223221
conditions.MarkFalse(kcp, controlplanev1.MachinesCreatedCondition, controlplanev1.InfrastructureTemplateCloningFailedReason,
224222
clusterv1.ConditionSeverityError, err.Error())
225-
return errors.Wrap(err, "failed to clone infrastructure template")
223+
return nil, errors.Wrap(err, "failed to clone infrastructure template")
226224
}
227225
machine.Spec.InfrastructureRef = *infraRef
228226

@@ -250,15 +248,10 @@ func (r *KubeadmControlPlaneReconciler) cloneConfigsAndGenerateMachine(ctx conte
250248
if err := r.cleanupFromGeneration(ctx, infraRef, bootstrapRef); err != nil {
251249
errs = append(errs, errors.Wrap(err, "failed to cleanup generated resources"))
252250
}
253-
254-
return kerrors.NewAggregate(errs)
251+
return nil, kerrors.NewAggregate(errs)
255252
}
256253

257-
log.Info("Machine created (scale up)",
258-
"Machine", klog.KObj(machine),
259-
infraRef.Kind, klog.KRef(infraRef.Namespace, infraRef.Name),
260-
bootstrapRef.Kind, klog.KRef(bootstrapRef.Namespace, bootstrapRef.Name))
261-
return nil
254+
return machine, nil
262255
}
263256

264257
func (r *KubeadmControlPlaneReconciler) cleanupFromGeneration(ctx context.Context, remoteRefs ...*corev1.ObjectReference) error {

controlplane/kubeadm/internal/controllers/remediation.go

+2-1
Original file line numberDiff line numberDiff line change
@@ -311,7 +311,8 @@ func (r *KubeadmControlPlaneReconciler) reconcileUnhealthyMachines(ctx context.C
311311
}
312312

313313
// Surface the operation is in progress.
314-
log.Info("Deleting Machine (remediating unhealthy Machine)")
314+
log.WithValues(controlPlane.StatusToLogKeyAndValues(nil, machineToBeRemediated)...).
315+
Info("Deleting Machine (remediating unhealthy Machine)")
315316
conditions.MarkFalse(machineToBeRemediated, clusterv1.MachineOwnerRemediatedCondition, clusterv1.RemediationInProgressReason, clusterv1.ConditionSeverityWarning, "")
316317

317318
v1beta2conditions.Set(machineToBeRemediated, metav1.Condition{

controlplane/kubeadm/internal/controllers/scale.go

+18-4
Original file line numberDiff line numberDiff line change
@@ -53,12 +53,19 @@ func (r *KubeadmControlPlaneReconciler) initializeControlPlane(ctx context.Conte
5353
return ctrl.Result{}, err
5454
}
5555

56-
if err := r.cloneConfigsAndGenerateMachine(ctx, controlPlane.Cluster, controlPlane.KCP, bootstrapSpec, fd); err != nil {
56+
newMachine, err := r.cloneConfigsAndGenerateMachine(ctx, controlPlane.Cluster, controlPlane.KCP, bootstrapSpec, fd)
57+
if err != nil {
5758
logger.Error(err, "Failed to create initial control plane Machine")
5859
r.recorder.Eventf(controlPlane.KCP, corev1.EventTypeWarning, "FailedInitialization", "Failed to create initial control plane Machine for cluster %s control plane: %v", klog.KObj(controlPlane.Cluster), err)
5960
return ctrl.Result{}, err
6061
}
6162

63+
logger.WithValues(controlPlane.StatusToLogKeyAndValues(newMachine, nil)...).
64+
Info("Machine created (scale up)",
65+
"Machine", klog.KObj(newMachine),
66+
newMachine.Spec.InfrastructureRef.Kind, klog.KRef(newMachine.Spec.InfrastructureRef.Namespace, newMachine.Spec.InfrastructureRef.Name),
67+
newMachine.Spec.Bootstrap.ConfigRef.Kind, klog.KRef(newMachine.Spec.Bootstrap.ConfigRef.Namespace, newMachine.Spec.Bootstrap.ConfigRef.Name))
68+
6269
// Requeue the control plane, in case there are additional operations to perform
6370
return ctrl.Result{Requeue: true}, nil
6471
}
@@ -87,12 +94,19 @@ func (r *KubeadmControlPlaneReconciler) scaleUpControlPlane(ctx context.Context,
8794
return ctrl.Result{}, err
8895
}
8996

90-
if err := r.cloneConfigsAndGenerateMachine(ctx, controlPlane.Cluster, controlPlane.KCP, bootstrapSpec, fd); err != nil {
97+
newMachine, err := r.cloneConfigsAndGenerateMachine(ctx, controlPlane.Cluster, controlPlane.KCP, bootstrapSpec, fd)
98+
if err != nil {
9199
logger.Error(err, "Failed to create additional control plane Machine")
92100
r.recorder.Eventf(controlPlane.KCP, corev1.EventTypeWarning, "FailedScaleUp", "Failed to create additional control plane Machine for cluster % control plane: %v", klog.KObj(controlPlane.Cluster), err)
93101
return ctrl.Result{}, err
94102
}
95103

104+
logger.WithValues(controlPlane.StatusToLogKeyAndValues(newMachine, nil)...).
105+
Info("Machine created (scale up)",
106+
"Machine", klog.KObj(newMachine),
107+
newMachine.Spec.InfrastructureRef.Kind, klog.KRef(newMachine.Spec.InfrastructureRef.Namespace, newMachine.Spec.InfrastructureRef.Name),
108+
newMachine.Spec.Bootstrap.ConfigRef.Kind, klog.KRef(newMachine.Spec.Bootstrap.ConfigRef.Namespace, newMachine.Spec.Bootstrap.ConfigRef.Name))
109+
96110
// Requeue the control plane, in case there are other operations to perform
97111
return ctrl.Result{Requeue: true}, nil
98112
}
@@ -138,14 +152,14 @@ func (r *KubeadmControlPlaneReconciler) scaleDownControlPlane(
138152
// NOTE: etcd member removal will be performed by the kcp-cleanup hook after machine completes drain & all volumes are detached.
139153
}
140154

141-
logger = logger.WithValues("Machine", klog.KObj(machineToDelete))
142-
logger.Info("Deleting Machine (scale down)")
143155
if err := r.Client.Delete(ctx, machineToDelete); err != nil && !apierrors.IsNotFound(err) {
144156
logger.Error(err, "Failed to delete control plane machine")
145157
r.recorder.Eventf(controlPlane.KCP, corev1.EventTypeWarning, "FailedScaleDown",
146158
"Failed to delete control plane Machine %s for cluster %s control plane: %v", machineToDelete.Name, klog.KObj(controlPlane.Cluster), err)
147159
return ctrl.Result{}, err
148160
}
161+
logger.WithValues(controlPlane.StatusToLogKeyAndValues(nil, machineToDelete)...).
162+
Info("Deleting Machine (scale down)", "Machine", klog.KObj(machineToDelete))
149163

150164
// Requeue the control plane, in case there are additional operations to perform
151165
return ctrl.Result{Requeue: true}, nil

util/collections/machine_collection.go

+10
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,16 @@ func ToMachineList(machines Machines) clusterv1.MachineList {
123123
return ml
124124
}
125125

126+
// Has return true when the collection has the given machine.
127+
func (s Machines) Has(machine *clusterv1.Machine) bool {
128+
for _, m := range s {
129+
if m.Name == machine.Name && m.Namespace == machine.Namespace {
130+
return true
131+
}
132+
}
133+
return false
134+
}
135+
126136
// Insert adds items to the set.
127137
func (s Machines) Insert(machines ...*clusterv1.Machine) {
128138
for i := range machines {

0 commit comments

Comments
 (0)