Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 22 additions & 7 deletions controlplane/kubeadm/internal/controllers/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package controllers
import (
"context"
"fmt"
"slices"
"sort"
"strings"
"time"
Expand Down Expand Up @@ -456,6 +457,11 @@ func (r *KubeadmControlPlaneReconciler) reconcile(ctx context.Context, controlPl
}

if err := r.syncMachines(ctx, controlPlane); err != nil {
// Note: If any of the calls got a NotFound error, it means that at least one Machine got deleted.
// Let's return here so that the next Reconcile will get the updated list of Machines.
if apierrors.IsNotFound(err) {
return ctrl.Result{}, nil // Note: Requeue is not needed, changes to Machines trigger another reconcile.
}
return ctrl.Result{}, errors.Wrap(err, "failed to sync Machines")
}

Expand Down Expand Up @@ -504,7 +510,9 @@ func (r *KubeadmControlPlaneReconciler) reconcile(ctx context.Context, controlPl
// Note: We have to wait here even if there are no more Machines that need rollout (in-place update in
// progress is not counted as needs rollout).
if machines := controlPlane.MachinesToCompleteInPlaceUpdate(); machines.Len() > 0 {
log.Info("Waiting for in-place update to complete", "machines", strings.Join(machines.Names(), ", "))
for _, machine := range machines {
log.Info(fmt.Sprintf("Waiting for in-place update of Machine %s to complete", machine.Name), "Machine", klog.KObj(machine))
}
return ctrl.Result{}, nil // Note: Changes to Machines trigger another reconcile.
}

Expand All @@ -513,10 +521,12 @@ func (r *KubeadmControlPlaneReconciler) reconcile(ctx context.Context, controlPl
switch {
case len(machinesNeedingRollout) > 0:
var allMessages []string
for machine, machineUpToDateResult := range machinesUpToDateResults {
allMessages = append(allMessages, fmt.Sprintf("Machine %s needs rollout: %s", machine, strings.Join(machineUpToDateResult.LogMessages, ",")))
machinesNeedingRolloutNames := machinesNeedingRollout.Names()
slices.Sort(machinesNeedingRolloutNames)
for _, name := range machinesNeedingRolloutNames {
allMessages = append(allMessages, fmt.Sprintf("Machine %s needs rollout: %s", name, strings.Join(machinesUpToDateResults[name].LogMessages, ", ")))
}
log.Info(fmt.Sprintf("Rolling out Control Plane machines: %s", strings.Join(allMessages, ",")), "machinesNeedingRollout", machinesNeedingRollout.Names())
log.Info(fmt.Sprintf("Machines need rollout: %s", strings.Join(machinesNeedingRolloutNames, ",")), "reason", strings.Join(allMessages, ", "))
v1beta1conditions.MarkFalse(controlPlane.KCP, controlplanev1.MachinesSpecUpToDateV1Beta1Condition, controlplanev1.RollingUpdateInProgressV1Beta1Reason, clusterv1.ConditionSeverityWarning, "Rolling %d replicas with outdated spec (%d replicas up to date)", len(machinesNeedingRollout), len(controlPlane.Machines)-len(machinesNeedingRollout))
return r.updateControlPlane(ctx, controlPlane, machinesNeedingRollout, machinesUpToDateResults)
default:
Expand Down Expand Up @@ -1042,10 +1052,15 @@ func reconcileMachineUpToDateCondition(_ context.Context, controlPlane *internal
}

if inplace.IsUpdateInProgress(machine) {
msg := "* In-place update in progress"
if c := conditions.Get(machine, clusterv1.MachineUpdatingCondition); c != nil && c.Status == metav1.ConditionTrue && c.Message != "" {
msg = fmt.Sprintf("* %s", c.Message)
}
conditions.Set(machine, metav1.Condition{
Type: clusterv1.MachineUpToDateCondition,
Status: metav1.ConditionFalse,
Reason: clusterv1.MachineUpToDateUpdatingReason,
Type: clusterv1.MachineUpToDateCondition,
Status: metav1.ConditionFalse,
Reason: clusterv1.MachineUpToDateUpdatingReason,
Message: msg,
})
continue
}
Expand Down
7 changes: 4 additions & 3 deletions controlplane/kubeadm/internal/controllers/controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2400,9 +2400,10 @@ func TestKubeadmControlPlaneReconciler_reconcileControlPlaneAndMachinesCondition
Message: "Waiting for a Node with spec.providerID foo to exist",
},
{
Type: clusterv1.MachineUpToDateCondition,
Status: metav1.ConditionFalse,
Reason: clusterv1.MachineUpToDateUpdatingReason,
Type: clusterv1.MachineUpToDateCondition,
Status: metav1.ConditionFalse,
Reason: clusterv1.MachineUpToDateUpdatingReason,
Message: "* In-place update in progress",
},
},
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ func (r *KubeadmControlPlaneReconciler) canUpdateMachine(ctx context.Context, ma
return r.overrideCanUpdateMachineFunc(ctx, machine, machineUpToDateResult)
}

log := ctrl.LoggerFrom(ctx)
log := ctrl.LoggerFrom(ctx).WithValues("Machine", klog.KObj(machine))

// Machine cannot be updated in-place if the feature gate is not enabled.
if !feature.Gates.Enabled(feature.InPlaceUpdates) {
Expand Down Expand Up @@ -79,7 +79,7 @@ func (r *KubeadmControlPlaneReconciler) canUpdateMachine(ctx context.Context, ma
return false, err
}
if !canUpdateMachine {
log.Info(fmt.Sprintf("Machine cannot be updated in-place by extensions: %s", strings.Join(reasons, ",")), "Machine", klog.KObj(machine))
log.Info(fmt.Sprintf("Machine %s cannot be updated in-place by extensions", machine.Name), "reason", strings.Join(reasons, ","))
return false, nil
}
return true, nil
Expand Down
6 changes: 3 additions & 3 deletions controlplane/kubeadm/internal/controllers/inplace_trigger.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@ func (r *KubeadmControlPlaneReconciler) triggerInPlaceUpdate(ctx context.Context
return r.overrideTriggerInPlaceUpdate(ctx, machine, machineUpToDateResult)
}

log := ctrl.LoggerFrom(ctx)
log.Info("Triggering in-place update", "Machine", klog.KObj(machine))
log := ctrl.LoggerFrom(ctx).WithValues("Machine", klog.KObj(machine))
log.Info(fmt.Sprintf("Triggering in-place update for Machine %s", machine.Name))

// Mark Machine for in-place update.
// Note: Once we write UpdateInProgressAnnotation we will always continue with the in-place update.
Expand Down Expand Up @@ -133,7 +133,7 @@ func (r *KubeadmControlPlaneReconciler) triggerInPlaceUpdate(ctx context.Context
return errors.Wrapf(err, "failed to complete triggering in-place update for Machine %s", klog.KObj(machine))
}

log.Info("Completed triggering in-place update", "Machine", klog.KObj(machine))
log.Info(fmt.Sprintf("Completed triggering in-place update for Machine %s", machine.Name))
r.recorder.Event(machine, corev1.EventTypeNormal, "SuccessfulStartInPlaceUpdate", "Machine starting in-place update")

// Wait until the cache observed the Machine with PendingHooksAnnotation to ensure subsequent reconciles
Expand Down
6 changes: 3 additions & 3 deletions controlplane/kubeadm/internal/controllers/scale.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ func (r *KubeadmControlPlaneReconciler) initializeControlPlane(ctx context.Conte
}

log.WithValues(controlPlane.StatusToLogKeyAndValues(newMachine, nil)...).
Info("Machine created (init)",
Info(fmt.Sprintf("Machine %s created (init)", newMachine.Name),
"Machine", klog.KObj(newMachine),
newMachine.Spec.InfrastructureRef.Kind, klog.KRef(newMachine.Namespace, newMachine.Spec.InfrastructureRef.Name),
newMachine.Spec.Bootstrap.ConfigRef.Kind, klog.KRef(newMachine.Namespace, newMachine.Spec.Bootstrap.ConfigRef.Name))
Expand Down Expand Up @@ -87,7 +87,7 @@ func (r *KubeadmControlPlaneReconciler) scaleUpControlPlane(ctx context.Context,
}

log.WithValues(controlPlane.StatusToLogKeyAndValues(newMachine, nil)...).
Info("Machine created (scale up)",
Info(fmt.Sprintf("Machine %s created (scale up)", newMachine.Name),
"Machine", klog.KObj(newMachine),
newMachine.Spec.InfrastructureRef.Kind, klog.KRef(newMachine.Namespace, newMachine.Spec.InfrastructureRef.Name),
newMachine.Spec.Bootstrap.ConfigRef.Kind, klog.KRef(newMachine.Namespace, newMachine.Spec.Bootstrap.ConfigRef.Name))
Expand Down Expand Up @@ -144,7 +144,7 @@ func (r *KubeadmControlPlaneReconciler) scaleDownControlPlane(
// Note: We intentionally log after Delete because we want this log line to show up only after DeletionTimestamp has been set.
// Also, setting DeletionTimestamp doesn't mean the Machine is actually deleted (deletion takes some time).
log.WithValues(controlPlane.StatusToLogKeyAndValues(nil, machineToDelete)...).
Info("Deleting Machine (scale down)", "Machine", klog.KObj(machineToDelete))
Info(fmt.Sprintf("Machine %s deleting (scale down)", machineToDelete.Name), "Machine", klog.KObj(machineToDelete))

// Requeue the control plane, in case there are additional operations to perform
return ctrl.Result{Requeue: true}, nil
Expand Down
6 changes: 3 additions & 3 deletions internal/controllers/machine/machine_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -668,7 +668,7 @@ func (r *Reconciler) reconcileDelete(ctx context.Context, s *scope) (ctrl.Result
// We only delete the node after the underlying infrastructure is gone.
// https://github.com/kubernetes-sigs/cluster-api/issues/2565
if isDeleteNodeAllowed {
log.Info("Deleting node", "Node", klog.KRef("", m.Status.NodeRef.Name))
log.Info("Deleting Node", "Node", klog.KRef("", m.Status.NodeRef.Name))

var deleteNodeErr error
waitErr := wait.PollUntilContextTimeout(ctx, 2*time.Second, r.nodeDeletionRetryTimeout, true, func(ctx context.Context) (bool, error) {
Expand All @@ -678,9 +678,9 @@ func (r *Reconciler) reconcileDelete(ctx context.Context, s *scope) (ctrl.Result
return true, nil
})
if waitErr != nil {
log.Error(deleteNodeErr, "Timed out deleting node", "Node", klog.KRef("", m.Status.NodeRef.Name))
log.Error(deleteNodeErr, "Timed out deleting Node", "Node", klog.KRef("", m.Status.NodeRef.Name))
v1beta1conditions.MarkFalse(m, clusterv1.MachineNodeHealthyV1Beta1Condition, clusterv1.DeletionFailedV1Beta1Reason, clusterv1.ConditionSeverityWarning, "")
r.recorder.Eventf(m, corev1.EventTypeWarning, "FailedDeleteNode", "error deleting Machine's node: %v", deleteNodeErr)
r.recorder.Eventf(m, corev1.EventTypeWarning, "FailedDeleteNode", "error deleting Machine's Node: %v", deleteNodeErr)

// If the node deletion timeout is not expired yet, requeue the Machine for reconciliation.
if m.Spec.Deletion.NodeDeletionTimeoutSeconds == nil || *m.Spec.Deletion.NodeDeletionTimeoutSeconds == 0 || m.DeletionTimestamp.Add(time.Duration(*m.Spec.Deletion.NodeDeletionTimeoutSeconds)*time.Second).After(time.Now()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ func (r *Reconciler) reconcileInPlaceUpdate(ctx context.Context, s *scope) (ctrl

// If hook is not pending, we're waiting for the owner controller to mark it as pending.
if !hasUpdateMachinePending {
log.Info("In-place update annotation is set, waiting for UpdateMachine hook to be marked as pending")
log.Info("Machine marked for in-place update, waiting for owning controller to mark UpdateMachine hook as pending")
return ctrl.Result{}, nil
}

Expand Down Expand Up @@ -110,7 +110,6 @@ func (r *Reconciler) reconcileInPlaceUpdate(ctx context.Context, s *scope) (ctrl
return result, nil
}

log.Info("In-place update completed successfully")
if err := r.completeInPlaceUpdate(ctx, s); err != nil {
return ctrl.Result{}, errors.Wrap(err, "failed to complete in-place update")
}
Expand Down Expand Up @@ -144,11 +143,9 @@ func (r *Reconciler) callUpdateMachineHook(ctx context.Context, s *scope) (ctrl.
if err != nil {
return ctrl.Result{}, "", err
}

if len(extensions) == 0 {
return ctrl.Result{}, "", errors.New("no extensions registered for UpdateMachine hook")
}

if len(extensions) > 1 {
return ctrl.Result{}, "", errors.Errorf("found multiple UpdateMachine hooks (%s): only one hook is supported", strings.Join(extensions, ","))
}
Expand Down Expand Up @@ -208,7 +205,7 @@ func (r *Reconciler) completeInPlaceUpdate(ctx context.Context, s *scope) error
return err
}

log.Info("In-place update completed!")
log.Info("Completed in-place update")
return nil
}

Expand All @@ -224,6 +221,8 @@ func (r *Reconciler) removeInPlaceUpdateAnnotation(ctx context.Context, obj clie
return errors.Wrapf(err, "failed to remove %s annotation from object %s", clusterv1.UpdateInProgressAnnotation, klog.KObj(obj))
}

// Note: DeepCopy object to not modify the passed-in object which can lead to conflict errors later on.
obj = obj.DeepCopyObject().(client.Object)
orig := obj.DeepCopyObject().(client.Object)
delete(annotations, clusterv1.UpdateInProgressAnnotation)
obj.SetAnnotations(annotations)
Expand Down
5 changes: 2 additions & 3 deletions internal/controllers/machine/machine_controller_noderef.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,6 @@ func (r *Reconciler) reconcileNode(ctx context.Context, s *scope) (ctrl.Result,

// Check that the Machine has a valid ProviderID.
if machine.Spec.ProviderID == "" {
log.Info("Waiting for infrastructure provider to report spec.providerID", machine.Spec.InfrastructureRef.Kind, klog.KRef(machine.Namespace, machine.Spec.InfrastructureRef.Name))
v1beta1conditions.MarkFalse(machine, clusterv1.MachineNodeHealthyV1Beta1Condition, clusterv1.WaitingForNodeRefV1Beta1Reason, clusterv1.ConditionSeverityInfo, "")
return ctrl.Result{}, nil
}
Expand All @@ -96,7 +95,7 @@ func (r *Reconciler) reconcileNode(ctx context.Context, s *scope) (ctrl.Result,
return ctrl.Result{}, errors.Wrapf(err, "no matching Node for Machine %q in namespace %q", machine.Name, machine.Namespace)
}
v1beta1conditions.MarkFalse(machine, clusterv1.MachineNodeHealthyV1Beta1Condition, clusterv1.NodeProvisioningV1Beta1Reason, clusterv1.ConditionSeverityWarning, "Waiting for a node with matching ProviderID to exist")
log.Info("Infrastructure provider reporting spec.providerID, matching Kubernetes node is not yet available", machine.Spec.InfrastructureRef.Kind, klog.KRef(machine.Namespace, machine.Spec.InfrastructureRef.Name), "providerID", machine.Spec.ProviderID)
log.Info("Infrastructure provider reporting spec.providerID, matching Kubernetes Node is not yet available", machine.Spec.InfrastructureRef.Kind, klog.KRef(machine.Namespace, machine.Spec.InfrastructureRef.Name), "providerID", machine.Spec.ProviderID)
// No need to requeue here. Nodes emit an event that triggers reconciliation.
return ctrl.Result{}, nil
}
Expand All @@ -112,7 +111,7 @@ func (r *Reconciler) reconcileNode(ctx context.Context, s *scope) (ctrl.Result,
machine.Status.NodeRef = clusterv1.MachineNodeReference{
Name: s.node.Name,
}
log.Info("Infrastructure provider reporting spec.providerID, Kubernetes node is now available", machine.Spec.InfrastructureRef.Kind, klog.KRef(machine.Namespace, machine.Spec.InfrastructureRef.Name), "providerID", machine.Spec.ProviderID, "Node", klog.KRef("", machine.Status.NodeRef.Name))
log.Info("Infrastructure provider reporting spec.providerID, Kubernetes Node is now available", machine.Spec.InfrastructureRef.Kind, klog.KRef(machine.Namespace, machine.Spec.InfrastructureRef.Name), "providerID", machine.Spec.ProviderID, "Node", klog.KRef("", machine.Status.NodeRef.Name))
r.recorder.Event(machine, corev1.EventTypeNormal, "SuccessfulSetNodeRef", machine.Status.NodeRef.Name)
}

Expand Down
34 changes: 22 additions & 12 deletions internal/controllers/machine/machine_controller_phases.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ import (
capierrors "sigs.k8s.io/cluster-api/errors"
"sigs.k8s.io/cluster-api/internal/contract"
"sigs.k8s.io/cluster-api/util"
"sigs.k8s.io/cluster-api/util/conditions"
v1beta1conditions "sigs.k8s.io/cluster-api/util/conditions/deprecated/v1beta1"
"sigs.k8s.io/cluster-api/util/patch"
"sigs.k8s.io/cluster-api/util/predicates"
Expand Down Expand Up @@ -212,9 +213,12 @@ func (r *Reconciler) reconcileBootstrap(ctx context.Context, s *scope) (ctrl.Res

// If the data secret was not created yet, return.
if !dataSecretCreated {
log.Info(fmt.Sprintf("Waiting for bootstrap provider to generate data secret and set %s",
contract.Bootstrap().DataSecretCreated(contractVersion).Path().String()),
s.bootstrapConfig.GetKind(), klog.KObj(s.bootstrapConfig))
// Only log if the Machine is a control plane Machine or the Cluster is already initialized to reduce noise.
if util.IsControlPlaneMachine(m) || conditions.IsTrue(s.cluster, clusterv1.ClusterControlPlaneInitializedCondition) {
log.Info(fmt.Sprintf("Waiting for bootstrap provider to generate data secret and set %s",
contract.Bootstrap().DataSecretCreated(contractVersion).Path().String()),
s.bootstrapConfig.GetKind(), klog.KObj(s.bootstrapConfig))
}
return ctrl.Result{}, nil
}

Expand Down Expand Up @@ -309,21 +313,27 @@ func (r *Reconciler) reconcileInfrastructure(ctx context.Context, s *scope) (ctr

// If the InfrastructureMachine is not provisioned (and it wasn't already provisioned before), return.
if !provisioned && !ptr.Deref(m.Status.Initialization.InfrastructureProvisioned, false) {
log.Info(fmt.Sprintf("Waiting for infrastructure provider to create machine infrastructure and set %s",
contract.InfrastructureMachine().Provisioned(contractVersion).Path().String()),
s.infraMachine.GetKind(), klog.KObj(s.infraMachine))
// Only log if the Machine is a control plane Machine or the Cluster is already initialized to reduce noise.
if util.IsControlPlaneMachine(m) || conditions.IsTrue(s.cluster, clusterv1.ClusterControlPlaneInitializedCondition) {
log.Info(fmt.Sprintf("Waiting for infrastructure provider to set %s on %s",
contract.InfrastructureMachine().Provisioned(contractVersion).Path().String(), s.infraMachine.GetKind()),
s.infraMachine.GetKind(), klog.KObj(s.infraMachine))
}
return ctrl.Result{}, nil
}

// Get providerID from the InfrastructureMachine (intentionally not setting it on the Machine yet).
var providerID *string
if providerID, err = contract.InfrastructureMachine().ProviderID().Get(s.infraMachine); err != nil {
return ctrl.Result{}, errors.Wrapf(err, "failed to read providerID from %s %s",
s.infraMachine.GetKind(), klog.KObj(s.infraMachine))
} else if *providerID == "" {
return ctrl.Result{}, errors.Errorf("got empty %s field from %s %s",
providerID, err := contract.InfrastructureMachine().ProviderID().Get(s.infraMachine)
switch {
case err != nil && !errors.Is(err, contract.ErrFieldNotFound):
return ctrl.Result{}, errors.Wrapf(err, "failed to read %s from %s %s",
contract.InfrastructureMachine().ProviderID().Path().String(),
s.infraMachine.GetKind(), klog.KObj(s.infraMachine))
case ptr.Deref(providerID, "") == "":
log.Info(fmt.Sprintf("Waiting for infrastructure provider to set %s on %s",
contract.InfrastructureMachine().ProviderID().Path().String(), s.infraMachine.GetKind()),
s.infraMachine.GetKind(), klog.KObj(s.infraMachine))
return ctrl.Result{}, nil // Note: Requeue is not needed, changes to InfraMachine trigger another reconcile.
}

// Get and set addresses from the InfrastructureMachine.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -680,7 +680,7 @@ func TestReconcileInfrastructure(t *testing.T) {
},
},
{
name: "infra machine ready and no provider ID, it should fail",
name: "should do nothing if infra machine ready and no provider ID",
contract: "v1beta1",
machine: defaultMachine.DeepCopy(),
infraMachine: map[string]interface{}{
Expand All @@ -697,7 +697,7 @@ func TestReconcileInfrastructure(t *testing.T) {
},
infraMachineGetError: nil,
expectResult: ctrl.Result{},
expectError: true,
expectError: false,
},
{
name: "should never revert back to infrastructure not ready",
Expand Down
16 changes: 12 additions & 4 deletions internal/controllers/machine/machine_controller_status.go
Original file line number Diff line number Diff line change
Expand Up @@ -642,6 +642,9 @@ func setUpdatingCondition(_ context.Context, machine *clusterv1.Machine, updatin
if updatingReason == "" {
updatingReason = clusterv1.MachineInPlaceUpdatingReason
}
if updatingMessage == "" {
updatingMessage = "In-place update in progress"
}
conditions.Set(machine, metav1.Condition{
Type: clusterv1.MachineUpdatingCondition,
Status: metav1.ConditionTrue,
Expand Down Expand Up @@ -714,11 +717,16 @@ func setUpToDateCondition(_ context.Context, m *clusterv1.Machine, ms *clusterv1
return
}

if conditions.IsTrue(m, clusterv1.MachineUpdatingCondition) {
if c := conditions.Get(m, clusterv1.MachineUpdatingCondition); c != nil && c.Status == metav1.ConditionTrue {
msg := "* In-place update in progress"
if c.Message != "" {
msg = fmt.Sprintf("* %s", c.Message)
}
conditions.Set(m, metav1.Condition{
Type: clusterv1.MachineUpToDateCondition,
Status: metav1.ConditionFalse,
Reason: clusterv1.MachineUpToDateUpdatingReason,
Type: clusterv1.MachineUpToDateCondition,
Status: metav1.ConditionFalse,
Reason: clusterv1.MachineUpToDateUpdatingReason,
Message: msg,
})
return
}
Expand Down
Loading