Skip to content

Commit b9836f8

Browse files
authored
[Bugfix] Action success check (#915)
1 parent e6cfa83 commit b9836f8

22 files changed

+188
-60
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
- (Bugfix) Fix 3.6 -> 3.7 Upgrade procedure
1414
- (Bugfix) Add missing finalizer
1515
- (Bugfix) Add graceful to kill command
16+
- (Bugfix) Add reachable condition to deployment. Mark as UpToDate only of cluster is reachable.
17+
- (Bugfix) Add toleration's for network failures in action start procedure
1618

1719
## [1.2.7](https://github.com/arangodb/kube-arangodb/tree/1.2.7) (2022-01-17)
1820
- Add Plan BackOff functionality

pkg/apis/deployment/v1/conditions.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,9 @@ const (
9191
// ConditionTypeTopologyAware indicates that the member is deployed with TopologyAwareness.
9292
ConditionTypeTopologyAware ConditionType = "TopologyAware"
9393

94+
// ConditionTypePVCResizePending indicates that the member has to be restarted due to PVC Resized pending action
95+
ConditionTypePVCResizePending ConditionType = "PVCResizePending"
96+
9497
// ConditionTypeLicenseSet indicates that license V2 is set on cluster.
9598
ConditionTypeLicenseSet ConditionType = "LicenseSet"
9699
)

pkg/apis/deployment/v2alpha1/conditions.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,9 @@ const (
9191
// ConditionTypeTopologyAware indicates that the member is deployed with TopologyAwareness.
9292
ConditionTypeTopologyAware ConditionType = "TopologyAware"
9393

94+
// ConditionTypePVCResizePending indicates that the member has to be restarted due to PVC Resized pending action
95+
ConditionTypePVCResizePending ConditionType = "PVCResizePending"
96+
9497
// ConditionTypeLicenseSet indicates that license V2 is set on cluster.
9598
ConditionTypeLicenseSet ConditionType = "LicenseSet"
9699
)

pkg/deployment/cleanup.go

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -30,25 +30,31 @@ import (
3030

3131
"github.com/arangodb/kube-arangodb/pkg/deployment/resources/inspector"
3232
"github.com/arangodb/kube-arangodb/pkg/util"
33+
"github.com/arangodb/kube-arangodb/pkg/util/constants"
3334
"github.com/arangodb/kube-arangodb/pkg/util/k8sutil"
3435
inspectorInterface "github.com/arangodb/kube-arangodb/pkg/util/k8sutil/inspector"
3536
)
3637

3738
// removePodFinalizers removes all finalizers from all pods owned by us.
38-
func (d *Deployment) removePodFinalizers(ctx context.Context, cachedStatus inspectorInterface.Inspector) error {
39+
func (d *Deployment) removePodFinalizers(ctx context.Context, cachedStatus inspectorInterface.Inspector) (bool, error) {
3940
log := d.deps.Log
4041

42+
found := false
43+
4144
if err := cachedStatus.IteratePods(func(pod *core.Pod) error {
42-
if err := k8sutil.RemovePodFinalizers(ctx, cachedStatus, log, d.PodsModInterface(), pod, pod.GetFinalizers(), true); err != nil {
45+
log.Info().Str("pod", pod.GetName()).Msgf("Removing Pod Finalizer")
46+
if count, err := k8sutil.RemovePodFinalizers(ctx, cachedStatus, log, d.PodsModInterface(), pod, constants.ManagedFinalizers(), true); err != nil {
4347
log.Warn().Err(err).Msg("Failed to remove pod finalizers")
4448
return err
49+
} else if count > 0 {
50+
found = true
4551
}
4652

4753
ctxChild, cancel := globals.GetGlobalTimeouts().Kubernetes().WithTimeout(ctx)
4854
defer cancel()
4955

5056
if err := d.PodsModInterface().Delete(ctxChild, pod.GetName(), meta.DeleteOptions{
51-
GracePeriodSeconds: util.NewInt64(1),
57+
GracePeriodSeconds: util.NewInt64(0),
5258
}); err != nil {
5359
if !k8sutil.IsNotFound(err) {
5460
log.Warn().Err(err).Msg("Failed to remove pod")
@@ -57,25 +63,30 @@ func (d *Deployment) removePodFinalizers(ctx context.Context, cachedStatus inspe
5763
}
5864
return nil
5965
}, inspector.FilterPodsByLabels(k8sutil.LabelsForDeployment(d.GetName(), ""))); err != nil {
60-
return err
66+
return false, err
6167
}
6268

63-
return nil
69+
return found, nil
6470
}
6571

6672
// removePVCFinalizers removes all finalizers from all PVCs owned by us.
67-
func (d *Deployment) removePVCFinalizers(ctx context.Context, cachedStatus inspectorInterface.Inspector) error {
73+
func (d *Deployment) removePVCFinalizers(ctx context.Context, cachedStatus inspectorInterface.Inspector) (bool, error) {
6874
log := d.deps.Log
6975

76+
found := false
77+
7078
if err := cachedStatus.IteratePersistentVolumeClaims(func(pvc *core.PersistentVolumeClaim) error {
71-
if err := k8sutil.RemovePVCFinalizers(ctx, cachedStatus, log, d.PersistentVolumeClaimsModInterface(), pvc, pvc.GetFinalizers(), true); err != nil {
79+
log.Info().Str("pvc", pvc.GetName()).Msgf("Removing PVC Finalizer")
80+
if count, err := k8sutil.RemovePVCFinalizers(ctx, cachedStatus, log, d.PersistentVolumeClaimsModInterface(), pvc, constants.ManagedFinalizers(), true); err != nil {
7281
log.Warn().Err(err).Msg("Failed to remove PVC finalizers")
7382
return err
83+
} else if count > 0 {
84+
found = true
7485
}
7586
return nil
7687
}, inspector.FilterPersistentVolumeClaimsByLabels(k8sutil.LabelsForDeployment(d.GetName(), ""))); err != nil {
77-
return err
88+
return false, err
7889
}
7990

80-
return nil
91+
return found, nil
8192
}

pkg/deployment/context_impl.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -460,7 +460,7 @@ func (d *Deployment) RemovePodFinalizers(ctx context.Context, podName string) er
460460
return errors.WithStack(err)
461461
}
462462

463-
err = k8sutil.RemovePodFinalizers(ctx, d.GetCachedStatus(), log, d.PodsModInterface(), p, p.GetFinalizers(), true)
463+
_, err = k8sutil.RemovePodFinalizers(ctx, d.GetCachedStatus(), log, d.PodsModInterface(), p, p.GetFinalizers(), true)
464464
if err != nil {
465465
return errors.WithStack(err)
466466
}

pkg/deployment/deployment.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -339,10 +339,10 @@ func (d *Deployment) run() {
339339
}
340340
// Remove finalizers from created resources
341341
log.Info().Msg("Deployment removed, removing finalizers to prevent orphaned resources")
342-
if err := d.removePodFinalizers(context.TODO(), cachedStatus); err != nil {
342+
if _, err := d.removePodFinalizers(context.TODO(), cachedStatus); err != nil {
343343
log.Warn().Err(err).Msg("Failed to remove Pod finalizers")
344344
}
345-
if err := d.removePVCFinalizers(context.TODO(), cachedStatus); err != nil {
345+
if _, err := d.removePVCFinalizers(context.TODO(), cachedStatus); err != nil {
346346
log.Warn().Err(err).Msg("Failed to remove PVC finalizers")
347347
}
348348
// We're being stopped.

pkg/deployment/deployment_finalizers.go

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -68,8 +68,10 @@ func (d *Deployment) runDeploymentFinalizers(ctx context.Context, cachedStatus i
6868
switch f {
6969
case constants.FinalizerDeplRemoveChildFinalizers:
7070
log.Debug().Msg("Inspecting 'remove child finalizers' finalizer")
71-
if err := d.inspectRemoveChildFinalizers(ctx, log, updated, cachedStatus); err == nil {
71+
if retry, err := d.inspectRemoveChildFinalizers(ctx, log, updated, cachedStatus); err == nil && !retry {
7272
removalList = append(removalList, f)
73+
} else if retry {
74+
log.Debug().Str("finalizer", f).Msg("Retry on finalizer removal")
7375
} else {
7476
log.Debug().Err(err).Str("finalizer", f).Msg("Cannot remove finalizer yet")
7577
}
@@ -87,15 +89,21 @@ func (d *Deployment) runDeploymentFinalizers(ctx context.Context, cachedStatus i
8789

8890
// inspectRemoveChildFinalizers checks the finalizer condition for remove-child-finalizers.
8991
// It returns nil if the finalizer can be removed.
90-
func (d *Deployment) inspectRemoveChildFinalizers(ctx context.Context, _ zerolog.Logger, _ *api.ArangoDeployment, cachedStatus inspectorInterface.Inspector) error {
91-
if err := d.removePodFinalizers(ctx, cachedStatus); err != nil {
92-
return errors.WithStack(err)
92+
func (d *Deployment) inspectRemoveChildFinalizers(ctx context.Context, _ zerolog.Logger, _ *api.ArangoDeployment, cachedStatus inspectorInterface.Inspector) (bool, error) {
93+
retry := false
94+
95+
if found, err := d.removePodFinalizers(ctx, cachedStatus); err != nil {
96+
return false, errors.WithStack(err)
97+
} else if found {
98+
retry = true
9399
}
94-
if err := d.removePVCFinalizers(ctx, cachedStatus); err != nil {
95-
return errors.WithStack(err)
100+
if found, err := d.removePVCFinalizers(ctx, cachedStatus); err != nil {
101+
return false, errors.WithStack(err)
102+
} else if found {
103+
retry = true
96104
}
97105

98-
return nil
106+
return retry, nil
99107
}
100108

101109
// removeDeploymentFinalizers removes the given finalizers from the given PVC.
@@ -125,7 +133,7 @@ func removeDeploymentFinalizers(ctx context.Context, log zerolog.Logger, cli ver
125133
return nil
126134
}
127135
ignoreNotFound := false
128-
if err := k8sutil.RemoveFinalizers(log, finalizers, getFunc, updateFunc, ignoreNotFound); err != nil {
136+
if _, err := k8sutil.RemoveFinalizers(log, finalizers, getFunc, updateFunc, ignoreNotFound); err != nil {
129137
return errors.WithStack(err)
130138
}
131139
return nil

pkg/deployment/deployment_inspector.go

Lines changed: 33 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -274,6 +274,22 @@ func (d *Deployment) inspectDeploymentWithError(ctx context.Context, lastInterva
274274
return minInspectionInterval, nil
275275
}
276276

277+
// Reachable state ensurer
278+
reachableConditionState := status.Conditions.Check(api.ConditionTypeReachable).Exists().IsTrue().Evaluate()
279+
if d.State().IsReachable() {
280+
if !reachableConditionState {
281+
if err = d.updateConditionWithHash(ctx, api.ConditionTypeReachable, true, "ArangoDB is reachable", "", ""); err != nil {
282+
return minInspectionInterval, errors.Wrapf(err, "Unable to update Reachable condition")
283+
}
284+
}
285+
} else {
286+
if reachableConditionState {
287+
if err = d.updateConditionWithHash(ctx, api.ConditionTypeReachable, false, "ArangoDB is not reachable", "", ""); err != nil {
288+
return minInspectionInterval, errors.Wrapf(err, "Unable to update Reachable condition")
289+
}
290+
}
291+
}
292+
277293
if d.apiObject.Status.IsPlanEmpty() && status.AppliedVersion != checksum {
278294
if err := d.WithStatusUpdate(ctx, func(s *api.DeploymentStatus) bool {
279295
s.AppliedVersion = checksum
@@ -284,7 +300,7 @@ func (d *Deployment) inspectDeploymentWithError(ctx context.Context, lastInterva
284300

285301
return minInspectionInterval, nil
286302
} else if status.AppliedVersion == checksum {
287-
isUpToDate, reason := d.isUpToDateStatus()
303+
isUpToDate, reason := d.isUpToDateStatus(status)
288304

289305
if !isUpToDate && status.Conditions.IsTrue(api.ConditionTypeUpToDate) {
290306
if err = d.updateConditionWithHash(ctx, api.ConditionTypeUpToDate, false, reason, "There are pending operations in plan or members are in restart process", checksum); err != nil {
@@ -332,18 +348,31 @@ func (d *Deployment) inspectDeploymentWithError(ctx context.Context, lastInterva
332348
return
333349
}
334350

335-
func (d *Deployment) isUpToDateStatus() (upToDate bool, reason string) {
336-
if !d.apiObject.Status.IsPlanEmpty() {
351+
func (d *Deployment) isUpToDateStatus(status api.DeploymentStatus) (upToDate bool, reason string) {
352+
if !status.IsPlanEmpty() {
337353
return false, "Plan is not empty"
338354
}
339355

340356
upToDate = true
341357

342-
d.apiObject.Status.Members.ForeachServerGroup(func(group api.ServerGroup, list api.MemberStatusList) error {
358+
if !status.Conditions.Check(api.ConditionTypeReachable).Exists().IsTrue().Evaluate() {
359+
upToDate = false
360+
}
361+
362+
status.Members.ForeachServerGroup(func(group api.ServerGroup, list api.MemberStatusList) error {
363+
if !upToDate {
364+
return nil
365+
}
343366
for _, member := range list {
344367
if member.Conditions.IsTrue(api.ConditionTypeRestart) || member.Conditions.IsTrue(api.ConditionTypePendingRestart) {
345368
upToDate = false
346369
reason = "Pending restarts on members"
370+
return nil
371+
}
372+
if member.Conditions.IsTrue(api.ConditionTypePVCResizePending) {
373+
upToDate = false
374+
reason = "PVC is resizing"
375+
return nil
347376
}
348377
}
349378
return nil

pkg/deployment/member/phase_updates.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ func removeMemberConditionsMapFunc(m *api.MemberStatus) {
8787
m.Conditions.Remove(api.ConditionTypeCleanedOut)
8888
m.Conditions.Remove(api.ConditionTypeTopologyAware)
8989
m.Conditions.Remove(api.MemberReplacementRequired)
90+
m.Conditions.Remove(api.ConditionTypePVCResizePending)
9091

9192
m.RemoveTerminationsBefore(time.Now().Add(-1 * recentTerminationsKeepPeriod))
9293

pkg/deployment/member/state.go

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,10 +35,12 @@ type StateInspector interface {
3535
RefreshState(ctx context.Context, members api.DeploymentStatusMemberElements)
3636
MemberState(id string) (State, bool)
3737

38+
State() State
39+
3840
Log(logger zerolog.Logger)
3941
}
4042

41-
func NewStateInspector(client reconciler.DeploymentMemberClient) StateInspector {
43+
func NewStateInspector(client reconciler.DeploymentClient) StateInspector {
4244
return &stateInspector{
4345
client: client,
4446
}
@@ -49,7 +51,13 @@ type stateInspector struct {
4951

5052
members map[string]State
5153

52-
client reconciler.DeploymentMemberClient
54+
state State
55+
56+
client reconciler.DeploymentClient
57+
}
58+
59+
func (s *stateInspector) State() State {
60+
return s.state
5361
}
5462

5563
func (s *stateInspector) Log(logger zerolog.Logger) {
@@ -89,13 +97,31 @@ func (s *stateInspector) RefreshState(ctx context.Context, members api.Deploymen
8997
}
9098
})
9199

100+
gctx, cancel := globals.GetGlobalTimeouts().ArangoDCheck().WithTimeout(ctx)
101+
defer cancel()
102+
103+
var cs State
104+
105+
c, err := s.client.GetDatabaseClient(ctx)
106+
if err != nil {
107+
cs.Reachable = err
108+
} else {
109+
v, err := c.Version(gctx)
110+
if err != nil {
111+
cs.Reachable = err
112+
} else {
113+
cs.Version = v
114+
}
115+
}
116+
92117
current := map[string]State{}
93118

94119
for id := range members {
95120
current[members[id].Member.ID] = results[id]
96121
}
97122

98123
s.members = current
124+
s.state = cs
99125
}
100126

101127
func (s *stateInspector) MemberState(id string) (State, bool) {

0 commit comments

Comments
 (0)