Skip to content

Commit be01a12

Browse files
authored
[bug fix] handle rules exceeded error in listener rule synthesizer (#4393)
* merge * pr comments
1 parent be696cc commit be01a12

File tree

2 files changed

+493
-13
lines changed

2 files changed

+493
-13
lines changed

pkg/deploy/elbv2/listener_rule_synthesizer.go

Lines changed: 85 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,10 @@ import (
44
"context"
55
awssdk "github.com/aws/aws-sdk-go-v2/aws"
66
"github.com/aws/aws-sdk-go-v2/service/elasticloadbalancingv2/types"
7+
"github.com/aws/smithy-go"
78
"github.com/go-logr/logr"
89
"github.com/google/go-cmp/cmp"
10+
"github.com/pkg/errors"
911
"k8s.io/apimachinery/pkg/util/sets"
1012
"sigs.k8s.io/aws-load-balancer-controller/pkg/aws/services"
1113
"sigs.k8s.io/aws-load-balancer-controller/pkg/config"
@@ -81,6 +83,7 @@ func (s *listenerRuleSynthesizer) synthesizeListenerRulesOnListener(ctx context.
8183
}
8284
resLRDesiredRuleConfigs[resLR] = resLRDesiredRuleConfig
8385
}
86+
8487
// matchedResAndSDKLRsBySettings : A slice of matched resLR and SDKLR rule pairs that have matching settings like actions and conditions. These needs to be only reprioratized to their corresponding priorities
8588
// matchedResAndSDKLRsByPriority : A slice of matched resLR and SDKLR rule pairs that have matching priorities but not settings like actions and conditions. These needs to be modified in place to avoid any 503 errors
8689
// unmatchedResLRs : A slice of resLR that do not have a corresponding match in the sdkLRs. These rules need to be created on the load balancer.
@@ -89,6 +92,7 @@ func (s *listenerRuleSynthesizer) synthesizeListenerRulesOnListener(ctx context.
8992
if err != nil {
9093
return err
9194
}
95+
9296
// Re-prioritize matched listener rules.
9397
if len(matchedResAndSDKLRsBySettings) > 0 {
9498
err := s.lrManager.SetRulePriorities(ctx, matchedResAndSDKLRsBySettings, unmatchedSDKLRs)
@@ -104,20 +108,12 @@ func (s *listenerRuleSynthesizer) synthesizeListenerRulesOnListener(ctx context.
104108
}
105109
resAndSDKLR.resLR.SetStatus(lsStatus)
106110
}
107-
// Create all the new rules on the LB
108-
for _, resLR := range unmatchedResLRs {
109-
lrStatus, err := s.lrManager.Create(ctx, resLR, resLRDesiredRuleConfigs[resLR])
110-
if err != nil {
111-
return err
112-
}
113-
resLR.SetStatus(lrStatus)
114-
}
115-
// Delete all unmatched sdk LRs which were pushed down as new rules are either modified or created at higher priority
116-
for _, sdkLR := range unmatchedSDKLRs {
117-
if err := s.lrManager.Delete(ctx, sdkLR); err != nil {
118-
return err
119-
}
111+
112+
err = s.createAndDeleteRules(ctx, len(sdkLRs), resLRDesiredRuleConfigs, unmatchedResLRs, unmatchedSDKLRs)
113+
if err != nil {
114+
return err
120115
}
116+
121117
// Update existing listener rules on the load balancer for their tags
122118
for _, resAndSDKLR := range matchedResAndSDKLRsBySettings {
123119
lsStatus, err := s.lrManager.UpdateRulesTags(ctx, resAndSDKLR.resLR, resAndSDKLR.sdkLR)
@@ -212,6 +208,73 @@ func (s *listenerRuleSynthesizer) matchResAndSDKListenerRules(resLRs []*elbv2mod
212208
return matchedResAndSDKLRsBySettings, matchedResAndSDKLRsByPriority, resLRsToCreate, sdkLRsToDelete, nil
213209
}
214210

211+
func (s *listenerRuleSynthesizer) createAndDeleteRules(ctx context.Context, initialRuleCount int, resLRDesiredActionsAndConditionsPairs map[*elbv2model.ListenerRule]*resLRDesiredRuleConfig, unmatchedResLRs []*elbv2model.ListenerRule, unmatchedSDKLRs []ListenerRuleWithTags) error {
212+
213+
/*
214+
The Basic idea of the state machine is that we try to create all rules before attempting any deletions. By doing this,
215+
we give customers maximum availability which ensures that all customer rules are accounted for at all times on the ELB listener.
216+
In edge cases, where we have exceeded the maximum rules on a listener, we need to delete rules before adding anymore rules.
217+
To give these users maximum availability, we flip-flop between removing / adding rules.
218+
*/
219+
220+
// Track where we are in creating / deleting rules
221+
222+
// resLRIndex is the creation index
223+
var resLRIndex int
224+
225+
// sdkLRIndex is the deletion index
226+
var sdkLRIndex int
227+
228+
// ruleCounter tracks the number of rules (to our best knowledge) attached to a listener. This number may drift if
229+
// external entities are modifying the listener too (generally we say this leads to undefined behavior)
230+
ruleCounter := initialRuleCount
231+
232+
// maxRules is the number we calculate locally which is the users' maximum allowed rules on a listener. As this is
233+
// a modifiable limit, we can't hardcode any value. -1 is used as a sentinel value.
234+
maxRules := -1
235+
236+
// We should loop while we have stuff to create or delete.
237+
for len(unmatchedResLRs) > resLRIndex || len(unmatchedSDKLRs) > sdkLRIndex {
238+
239+
if len(unmatchedResLRs) > resLRIndex && ruleCounter != maxRules {
240+
// We should prioritize the creation of rules. If we've reached the maximum number of rules on the listener,
241+
// we need to perform a deletion instead.
242+
resLR := unmatchedResLRs[resLRIndex]
243+
lrStatus, err := s.lrManager.Create(ctx, resLR, resLRDesiredActionsAndConditionsPairs[resLR])
244+
if err != nil {
245+
// Detect too many rules error, we want this exception to only trigger once. triggering multiple times
246+
// either means outside forces are changing listener rules OR we have a bug.
247+
if isTooManyRulesErr(err) && maxRules == -1 {
248+
maxRules = ruleCounter
249+
s.logger.V(1).Info("Enabling delete / create cycle")
250+
continue
251+
}
252+
return err
253+
}
254+
// If we get here, the creation succeeded. We must advance the creation index and update the current number of rules
255+
// on the listener.
256+
resLR.SetStatus(lrStatus)
257+
ruleCounter++
258+
resLRIndex++
259+
} else if len(unmatchedSDKLRs) > sdkLRIndex {
260+
// Deletion is priority #2 to creation.
261+
sdkLR := unmatchedSDKLRs[sdkLRIndex]
262+
if err := s.lrManager.Delete(ctx, sdkLR); err != nil {
263+
return err
264+
}
265+
// Getting here means the deletion was successful. We need to advance the deletion index and update the current number of rules
266+
// on the listener.
267+
ruleCounter--
268+
sdkLRIndex++
269+
} else {
270+
// Getting here means that the user has requested more rules than what is allowed on their listener.
271+
return errors.New("Unable to synthesize listener rules, there's too many attached.")
272+
}
273+
274+
}
275+
return nil
276+
}
277+
215278
func mapResListenerRuleByPriority(resLRs []*elbv2model.ListenerRule) map[int32]*elbv2model.ListenerRule {
216279
resLRByPriority := make(map[int32]*elbv2model.ListenerRule, len(resLRs))
217280
for _, resLR := range resLRs {
@@ -241,3 +304,12 @@ func mapResListenerRuleByListenerARN(resLRs []*elbv2model.ListenerRule) (map[str
241304
}
242305
return resLRsByLSARN, nil
243306
}
307+
308+
func isTooManyRulesErr(err error) bool {
309+
var apiErr smithy.APIError
310+
if errors.As(err, &apiErr) {
311+
code := apiErr.ErrorCode()
312+
return code == "TooManyRules"
313+
}
314+
return false
315+
}

0 commit comments

Comments
 (0)