Skip to content

Commit 7caa4ad

Browse files
chirag-wadhwa5AndrewJSchofield
authored andcommitted
KAFKA-19928: Added retry and backoff mechanism in NetworkPartitionMetadataClient (#21001)
Currently, if a ListOffsets request fails in NetworkPartitionMetadataClient for any reason, the corresponding future is completed then and there, without any retries. But the NetworkClient and InterbrokerSendThread are loaded lazily in the NetworkPartitionMetadataClient on the arrival of the first request. But when the first request comes, it is immediately enqueued in the NetworkClient, before the connection could be established, thereby always failing the first request. As a solution to that, this PR introduces a retry mechanism with an upper limit on the retry attempts, as well as exponential backoff between succesive retries. Reviewers: Apoorv Mittal <apoorvmittal10@gmail.com>, Andrew Schofield <aschofield@confluent.io>, Sushant Mahajan <smahajan@confluent.io>
1 parent 358dc27 commit 7caa4ad

File tree

6 files changed

+553
-112
lines changed

6 files changed

+553
-112
lines changed
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.kafka.common.utils;
19+
20+
/**
21+
* Manages retry attempts and exponential backoff for requests.
22+
*/
23+
public class ExponentialBackoffManager {
24+
private final int maxAttempts;
25+
private int attempts;
26+
private final ExponentialBackoff backoff;
27+
28+
public ExponentialBackoffManager(int maxAttempts, long initialInterval, int multiplier, long maxInterval, double jitter) {
29+
this.maxAttempts = maxAttempts;
30+
this.backoff = new ExponentialBackoff(
31+
initialInterval,
32+
multiplier,
33+
maxInterval,
34+
jitter);
35+
}
36+
37+
public void incrementAttempt() {
38+
attempts++;
39+
}
40+
41+
public void resetAttempts() {
42+
attempts = 0;
43+
}
44+
45+
public boolean canAttempt() {
46+
return attempts < maxAttempts;
47+
}
48+
49+
public long backOff() {
50+
return this.backoff.backoff(attempts);
51+
}
52+
53+
public int attempts() {
54+
return attempts;
55+
}
56+
}
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.kafka.common.utils;
19+
20+
import org.junit.jupiter.api.Test;
21+
22+
import java.util.ArrayList;
23+
import java.util.List;
24+
25+
import static org.junit.jupiter.api.Assertions.assertEquals;
26+
import static org.junit.jupiter.api.Assertions.assertFalse;
27+
import static org.junit.jupiter.api.Assertions.assertTrue;
28+
29+
class ExponentialBackoffManagerTest {
30+
31+
private static final ArrayList<Long> BACKOFF_LIST = new ArrayList<>(List.of(100L, 200L, 400L, 800L, 1600L));
32+
33+
@Test
34+
public void testInitialState() {
35+
ExponentialBackoffManager manager = new ExponentialBackoffManager(
36+
5, 100, 2, 1000, 0.0);
37+
assertEquals(0, manager.attempts());
38+
assertTrue(manager.canAttempt());
39+
}
40+
41+
@Test
42+
public void testIncrementAttempt() {
43+
ExponentialBackoffManager manager = new ExponentialBackoffManager(
44+
5, 100, 2, 1000, 0.0);
45+
assertEquals(0, manager.attempts());
46+
manager.incrementAttempt();
47+
assertEquals(1, manager.attempts());
48+
}
49+
50+
@Test
51+
public void testResetAttempts() {
52+
ExponentialBackoffManager manager = new ExponentialBackoffManager(
53+
5, 100, 2, 1000, 0.0);
54+
manager.incrementAttempt();
55+
manager.incrementAttempt();
56+
manager.incrementAttempt();
57+
assertEquals(3, manager.attempts());
58+
59+
manager.resetAttempts();
60+
assertEquals(0, manager.attempts());
61+
assertTrue(manager.canAttempt());
62+
}
63+
64+
@Test
65+
public void testCanAttempt() {
66+
ExponentialBackoffManager manager = new ExponentialBackoffManager(
67+
3, 100, 2, 1000, 0.0);
68+
// Initially can attempt
69+
assertTrue(manager.canAttempt());
70+
assertEquals(0, manager.attempts());
71+
72+
manager.incrementAttempt();
73+
manager.incrementAttempt();
74+
manager.incrementAttempt();
75+
// After all retry attempts are exhausted
76+
assertFalse(manager.canAttempt());
77+
assertEquals(3, manager.attempts());
78+
}
79+
80+
@Test
81+
public void testBackOffWithoutJitter() {
82+
ExponentialBackoffManager manager = new ExponentialBackoffManager(
83+
5, 100, 2, 1000, 0.0);
84+
for (int i = 0; i < 5; i++) {
85+
long backoff = manager.backOff();
86+
// without jitter, the backoff values should be exact multiples.
87+
assertEquals(Math.min(1000L, BACKOFF_LIST.get(i)), backoff);
88+
manager.incrementAttempt();
89+
}
90+
}
91+
92+
@Test
93+
public void testBackOffWithJitter() {
94+
ExponentialBackoffManager manager = new ExponentialBackoffManager(
95+
5, 100, 2, 1000, 0.2);
96+
for (int i = 0; i < 5; i++) {
97+
long backoff = manager.backOff();
98+
// with jitter, the backoff values should be within 20% of the expected value.
99+
assertTrue(backoff >= 0.8 * Math.min(1000L, BACKOFF_LIST.get(i)));
100+
assertTrue(backoff <= 1.2 * Math.min(1000L, BACKOFF_LIST.get(i)));
101+
manager.incrementAttempt();
102+
}
103+
}
104+
}

core/src/main/scala/kafka/server/BrokerServer.scala

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -635,7 +635,8 @@ class BrokerServer(
635635
new LogContext(s"[NetworkPartitionMetadataClient broker=${config.brokerId}]")
636636
),
637637
Time.SYSTEM,
638-
config.interBrokerListenerName()
638+
config.interBrokerListenerName(),
639+
new SystemTimerReaper("network-partition-metadata-client-reaper", new SystemTimer("network-partition-metadata-client"))
639640
)
640641
}
641642

group-coordinator/src/main/java/org/apache/kafka/coordinator/group/NetworkPartitionMetadataClient.java

Lines changed: 95 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -30,10 +30,14 @@
3030
import org.apache.kafka.common.protocol.Errors;
3131
import org.apache.kafka.common.requests.ListOffsetsRequest;
3232
import org.apache.kafka.common.requests.ListOffsetsResponse;
33+
import org.apache.kafka.common.utils.ExponentialBackoffManager;
3334
import org.apache.kafka.common.utils.Time;
35+
import org.apache.kafka.common.utils.Utils;
3436
import org.apache.kafka.metadata.MetadataCache;
3537
import org.apache.kafka.server.util.InterBrokerSendThread;
3638
import org.apache.kafka.server.util.RequestAndCompletionHandler;
39+
import org.apache.kafka.server.util.timer.Timer;
40+
import org.apache.kafka.server.util.timer.TimerTask;
3741

3842
import org.slf4j.Logger;
3943
import org.slf4j.LoggerFactory;
@@ -54,20 +58,42 @@ public class NetworkPartitionMetadataClient implements PartitionMetadataClient {
5458

5559
private static final Logger log = LoggerFactory.getLogger(NetworkPartitionMetadataClient.class);
5660

61+
private static final long REQUEST_BACKOFF_MS = 1_000L;
62+
private static final long REQUEST_BACKOFF_MAX_MS = 30_000L;
63+
private static final int MAX_RETRY_ATTEMPTS = 5;
64+
5765
private final MetadataCache metadataCache;
5866
private final Supplier<KafkaClient> networkClientSupplier;
5967
private final Time time;
6068
private final ListenerName listenerName;
6169
private final AtomicBoolean initialized = new AtomicBoolean(false);
6270
private volatile SendThread sendThread;
71+
private final Timer timer;
6372

6473
public NetworkPartitionMetadataClient(MetadataCache metadataCache,
6574
Supplier<KafkaClient> networkClientSupplier,
66-
Time time, ListenerName listenerName) {
75+
Time time, ListenerName listenerName, Timer timer) {
76+
if (metadataCache == null) {
77+
throw new IllegalArgumentException("MetadataCache must not be null.");
78+
}
79+
if (networkClientSupplier == null) {
80+
throw new IllegalArgumentException("NetworkClientSupplier must not be null.");
81+
}
82+
if (time == null) {
83+
throw new IllegalArgumentException("Time must not be null.");
84+
}
85+
if (listenerName == null) {
86+
throw new IllegalArgumentException("ListenerName must not be null.");
87+
}
88+
if (timer == null) {
89+
throw new IllegalArgumentException("Timer must not be null.");
90+
}
91+
6792
this.metadataCache = metadataCache;
6893
this.networkClientSupplier = networkClientSupplier;
6994
this.time = time;
7095
this.listenerName = listenerName;
96+
this.timer = timer;
7197
}
7298

7399
@Override
@@ -125,6 +151,7 @@ public Map<TopicPartition, CompletableFuture<OffsetResponse>> listLatestOffsets(
125151
public void close() {
126152
// Only close sendThread if it was initialized. Note, close is called only during broker shutdown, so need
127153
// for further synchronization here.
154+
Utils.closeQuietly(timer, "NetworkPartitionMetadataClient timer");
128155
if (!initialized.get()) {
129156
return;
130157
}
@@ -186,14 +213,18 @@ private ListOffsetsRequest.Builder createListOffsetsRequest(List<TopicPartition>
186213
* Handles the response from a ListOffsets request.
187214
*/
188215
// Visible for Testing.
189-
void handleResponse(Map<TopicPartition, CompletableFuture<OffsetResponse>> partitionFutures, ClientResponse clientResponse) {
216+
void handleResponse(PendingRequest pendingRequest, ClientResponse clientResponse) {
190217
// Handle error responses first
191-
if (maybeHandleErrorResponse(partitionFutures, clientResponse)) {
218+
if (maybeHandleErrorResponse(pendingRequest, clientResponse)) {
192219
return;
193220
}
194221

195222
log.debug("ListOffsets response received successfully - {}", clientResponse);
223+
// Reset retry attempts on success
224+
pendingRequest.backoffManager().resetAttempts();
225+
196226
ListOffsetsResponse response = (ListOffsetsResponse) clientResponse.responseBody();
227+
Map<TopicPartition, CompletableFuture<OffsetResponse>> partitionFutures = pendingRequest.futures();
197228

198229
for (ListOffsetsTopicResponse topicResponse : response.topics()) {
199230
String topicName = topicResponse.name();
@@ -216,11 +247,14 @@ void handleResponse(Map<TopicPartition, CompletableFuture<OffsetResponse>> parti
216247
}
217248

218249
/**
219-
* Handles error responses by completing all associated futures with an error. Returns true if an error was
220-
* handled. Otherwise, returns false.
250+
* Handles error responses by completing all associated futures with an error or retrying the request.
251+
* Returns true if an error was handled. Otherwise, returns false.
221252
*/
222-
private boolean maybeHandleErrorResponse(Map<TopicPartition, CompletableFuture<OffsetResponse>> partitionFutures, ClientResponse clientResponse) {
253+
private boolean maybeHandleErrorResponse(PendingRequest pendingRequest, ClientResponse clientResponse) {
254+
Map<TopicPartition, CompletableFuture<OffsetResponse>> partitionFutures = pendingRequest.futures();
223255
Errors error;
256+
boolean shouldRetry = false;
257+
224258
if (clientResponse == null) {
225259
log.error("Response for ListOffsets for topicPartitions: {} is null", partitionFutures.keySet());
226260
error = Errors.UNKNOWN_SERVER_ERROR;
@@ -231,11 +265,13 @@ private boolean maybeHandleErrorResponse(Map<TopicPartition, CompletableFuture<O
231265
log.error("Version mismatch exception", clientResponse.versionMismatch());
232266
error = Errors.UNKNOWN_SERVER_ERROR;
233267
} else if (clientResponse.wasDisconnected()) {
234-
log.error("Response for ListOffsets for TopicPartitions: {} was disconnected - {}.", partitionFutures.keySet(), clientResponse);
268+
log.debug("Response for ListOffsets for TopicPartitions: {} was disconnected - {}.", partitionFutures.keySet(), clientResponse);
235269
error = Errors.NETWORK_EXCEPTION;
270+
shouldRetry = true;
236271
} else if (clientResponse.wasTimedOut()) {
237-
log.error("Response for ListOffsets for TopicPartitions: {} timed out - {}.", partitionFutures.keySet(), clientResponse);
272+
log.debug("Response for ListOffsets for TopicPartitions: {} timed out - {}.", partitionFutures.keySet(), clientResponse);
238273
error = Errors.REQUEST_TIMED_OUT;
274+
shouldRetry = true;
239275
} else if (!clientResponse.hasResponse()) {
240276
log.error("Response for ListOffsets for TopicPartitions: {} has no response - {}.", partitionFutures.keySet(), clientResponse);
241277
error = Errors.UNKNOWN_SERVER_ERROR;
@@ -244,16 +280,63 @@ private boolean maybeHandleErrorResponse(Map<TopicPartition, CompletableFuture<O
244280
return false;
245281
}
246282

283+
// For retriable errors (disconnected or timed out), attempt retry if possible
284+
if (shouldRetry) {
285+
ExponentialBackoffManager backoffManager = pendingRequest.backoffManager();
286+
if (backoffManager.canAttempt()) {
287+
backoffManager.incrementAttempt();
288+
long backoffMs = backoffManager.backOff();
289+
log.debug("Retrying ListOffsets request for TopicPartitions: {} after {} ms (attempt {}/{})",
290+
partitionFutures.keySet(), backoffMs, backoffManager.attempts(), MAX_RETRY_ATTEMPTS);
291+
timer.add(new RetryTimerTask(backoffMs, pendingRequest));
292+
return true;
293+
} else {
294+
log.error("Exhausted max retries ({}) for ListOffsets request for TopicPartitions: {}",
295+
MAX_RETRY_ATTEMPTS, partitionFutures.keySet());
296+
}
297+
}
298+
299+
// Complete all futures with error (either non-retriable error or exhausted retries)
247300
partitionFutures.forEach((tp, future) -> future.complete(new OffsetResponse(-1, error)));
248301
return true;
249302
}
250303

251304
/**
252305
* Tracks a pending ListOffsets request and its associated futures.
253306
*/
254-
private record PendingRequest(Node node,
255-
Map<TopicPartition, CompletableFuture<OffsetResponse>> futures,
256-
ListOffsetsRequest.Builder requestBuilder) {
307+
// Visible for testing.
308+
record PendingRequest(Node node,
309+
Map<TopicPartition, CompletableFuture<OffsetResponse>> futures,
310+
ListOffsetsRequest.Builder requestBuilder,
311+
ExponentialBackoffManager backoffManager) {
312+
PendingRequest(Node node,
313+
Map<TopicPartition, CompletableFuture<OffsetResponse>> futures,
314+
ListOffsetsRequest.Builder requestBuilder) {
315+
this(node, futures, requestBuilder, new ExponentialBackoffManager(
316+
MAX_RETRY_ATTEMPTS,
317+
REQUEST_BACKOFF_MS,
318+
CommonClientConfigs.RETRY_BACKOFF_EXP_BASE,
319+
REQUEST_BACKOFF_MAX_MS,
320+
CommonClientConfigs.RETRY_BACKOFF_JITTER));
321+
}
322+
}
323+
324+
/**
325+
* Timer task for retrying failed requests after backoff.
326+
*/
327+
private final class RetryTimerTask extends TimerTask {
328+
private final PendingRequest pendingRequest;
329+
330+
RetryTimerTask(long delayMs, PendingRequest pendingRequest) {
331+
super(delayMs);
332+
this.pendingRequest = pendingRequest;
333+
}
334+
335+
@Override
336+
public void run() {
337+
sendThread.enqueue(pendingRequest);
338+
sendThread.wakeup();
339+
}
257340
}
258341

259342
private class SendThread extends InterBrokerSendThread {
@@ -286,8 +369,7 @@ public Collection<RequestAndCompletionHandler> generateRequests() {
286369
time.hiResClockMs(),
287370
current.node,
288371
requestBuilder,
289-
response -> handleResponse(current.futures, response)
290-
);
372+
response -> handleResponse(current, response));
291373

292374
requests.add(requestHandler);
293375
}

0 commit comments

Comments
 (0)