Skip to content

Commit bc9afbf

Browse files
authored
ZOOKEEPER-4712: Fix partially shutdown of ZooKeeperServer and its processors
Reviewers: anmolnar, kezhuw, kezhuw, kezhuw Author: jonmv Closes #2154 from jonmv/jonmv/ZOOKEEPER-4541-take-2
1 parent dcaf74c commit bc9afbf

File tree

12 files changed

+152
-88
lines changed

12 files changed

+152
-88
lines changed

zookeeper-server/src/main/java/org/apache/zookeeper/server/ZKDatabase.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -294,7 +294,7 @@ public long loadDataBase() throws IOException {
294294
}
295295

296296
/**
297-
* Fast forward the database adding transactions from the committed log into memory.
297+
* Fast-forward the database adding transactions from the committed log into memory.
298298
* @return the last valid zxid.
299299
* @throws IOException
300300
*/

zookeeper-server/src/main/java/org/apache/zookeeper/server/ZooKeeperServer.java

Lines changed: 36 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -900,7 +900,7 @@ protected void setState(State state) {
900900
* @return true if the server is running or server hits an error, false
901901
* otherwise.
902902
*/
903-
protected boolean canShutdown() {
903+
private boolean canShutdown() {
904904
return state == State.RUNNING || state == State.ERROR;
905905
}
906906

@@ -911,27 +911,49 @@ public boolean isRunning() {
911911
return state == State.RUNNING;
912912
}
913913

914-
public void shutdown() {
914+
public final void shutdown() {
915915
shutdown(false);
916916
}
917917

918918
/**
919919
* Shut down the server instance
920-
* @param fullyShutDown true if another server using the same database will not replace this one in the same process
920+
* @param fullyShutDown true when no other server will use the same database to replace this one
921921
*/
922-
public synchronized void shutdown(boolean fullyShutDown) {
923-
if (!canShutdown()) {
924-
if (fullyShutDown && zkDb != null) {
925-
zkDb.clear();
922+
public final synchronized void shutdown(boolean fullyShutDown) {
923+
if (canShutdown()) {
924+
LOG.info("Shutting down");
925+
926+
shutdownComponents();
927+
928+
if (zkDb != null && !fullyShutDown) {
929+
// There is no need to clear the database if we are going to reuse it:
930+
// * When a new quorum is established we can still apply the diff
931+
// on top of the same zkDb data
932+
// * If we fetch a new snapshot from leader, the zkDb will be
933+
// cleared anyway before loading the snapshot
934+
try {
935+
// This will fast-forward the database to the last recorded transaction
936+
zkDb.fastForwardDataBase();
937+
} catch (IOException e) {
938+
LOG.error("Error updating DB", e);
939+
fullyShutDown = true;
940+
}
926941
}
942+
setState(State.SHUTDOWN);
943+
} else {
927944
LOG.debug("ZooKeeper server is not running, so not proceeding to shutdown!");
928-
return;
929945
}
930-
LOG.info("shutting down");
931-
932-
// new RuntimeException("Calling shutdown").printStackTrace();
933-
setState(State.SHUTDOWN);
946+
if (zkDb != null && fullyShutDown) {
947+
zkDb.clear();
948+
}
949+
}
934950

951+
/**
952+
* @implNote
953+
* Shuts down components owned by this class;
954+
* remember to call super.shutdownComponents() when overriding!
955+
*/
956+
protected void shutdownComponents() {
935957
// unregister all metrics that are keeping a strong reference to this object
936958
// subclasses will do their specific clean up
937959
unregisterMetrics();
@@ -940,9 +962,8 @@ public synchronized void shutdown(boolean fullyShutDown) {
940962
requestThrottler.shutdown();
941963
}
942964

943-
// Since sessionTracker and syncThreads poll we just have to
944-
// set running to false and they will detect it during the poll
945-
// interval.
965+
// Since sessionTracker and syncThreads poll we just have to set running to false,
966+
// and they will detect it during the poll interval.
946967
if (sessionTracker != null) {
947968
sessionTracker.shutdown();
948969
}
@@ -953,25 +974,6 @@ public synchronized void shutdown(boolean fullyShutDown) {
953974
jvmPauseMonitor.serviceStop();
954975
}
955976

956-
if (zkDb != null) {
957-
if (fullyShutDown) {
958-
zkDb.clear();
959-
} else {
960-
// else there is no need to clear the database
961-
// * When a new quorum is established we can still apply the diff
962-
// on top of the same zkDb data
963-
// * If we fetch a new snapshot from leader, the zkDb will be
964-
// cleared anyway before loading the snapshot
965-
try {
966-
//This will fast forward the database to the latest recorded transactions
967-
zkDb.fastForwardDataBase();
968-
} catch (IOException e) {
969-
LOG.error("Error updating DB", e);
970-
zkDb.clear();
971-
}
972-
}
973-
}
974-
975977
requestPathMetricsCollector.shutdown();
976978
unregisterJMX();
977979
}

zookeeper-server/src/main/java/org/apache/zookeeper/server/ZooKeeperServerMain.java

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -192,9 +192,7 @@ public void runFromConfig(ServerConfig config) throws IOException, AdminServerEx
192192
if (secureCnxnFactory != null) {
193193
secureCnxnFactory.join();
194194
}
195-
if (zkServer.canShutdown()) {
196-
zkServer.shutdown(true);
197-
}
195+
zkServer.shutdown(true);
198196
} catch (InterruptedException e) {
199197
// warn, but generally this is ok
200198
LOG.warn("Server interrupted", e);

zookeeper-server/src/main/java/org/apache/zookeeper/server/persistence/FileTxnSnapLog.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -313,7 +313,7 @@ public long restore(DataTree dt, Map<Long, Integer> sessions, PlayBackListener l
313313
}
314314

315315
/**
316-
* This function will fast forward the server database to have the latest
316+
* This function will fast-forward the server database to have the latest
317317
* transactions in it. This is the same as restore, but only reads from
318318
* the transaction logs and not restores from a snapshot.
319319
* @param dt the datatree to write transactions to.

zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/LeaderZooKeeperServer.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -155,11 +155,11 @@ protected void unregisterMetrics() {
155155
}
156156

157157
@Override
158-
public synchronized void shutdown() {
158+
protected synchronized void shutdownComponents() {
159159
if (containerManager != null) {
160160
containerManager.stop();
161161
}
162-
super.shutdown();
162+
super.shutdownComponents();
163163
}
164164

165165
@Override

zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/Learner.java

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -916,26 +916,26 @@ boolean isRunning() {
916916
}
917917

918918
void closeSocket() {
919-
if (sock != null) {
920-
if (sockBeingClosed.compareAndSet(false, true)) {
921-
if (closeSocketAsync) {
922-
final Thread closingThread = new Thread(() -> closeSockSync(), "CloseSocketThread(sid:" + zk.getServerId());
923-
closingThread.setDaemon(true);
924-
closingThread.start();
925-
} else {
926-
closeSockSync();
927-
}
919+
if (sockBeingClosed.compareAndSet(false, true)) {
920+
if (sock == null) { // Closing before establishing the connection is a noop
921+
return;
922+
}
923+
Socket socket = sock;
924+
sock = null;
925+
if (closeSocketAsync) {
926+
final Thread closingThread = new Thread(() -> closeSockSync(socket), "CloseSocketThread(sid:" + zk.getServerId());
927+
closingThread.setDaemon(true);
928+
closingThread.start();
929+
} else {
930+
closeSockSync(socket);
928931
}
929932
}
930933
}
931934

932-
void closeSockSync() {
935+
private static void closeSockSync(Socket socket) {
933936
try {
934937
long startTime = Time.currentElapsedTime();
935-
if (sock != null) {
936-
sock.close();
937-
sock = null;
938-
}
938+
socket.close();
939939
ServerMetrics.getMetrics().SOCKET_CLOSING_TIME.add(Time.currentElapsedTime() - startTime);
940940
} catch (IOException e) {
941941
LOG.warn("Ignoring error closing connection to leader", e);

zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/LearnerZooKeeperServer.java

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -152,24 +152,19 @@ protected void unregisterJMX(Learner peer) {
152152
}
153153

154154
@Override
155-
public synchronized void shutdown() {
156-
if (!canShutdown()) {
157-
LOG.debug("ZooKeeper server is not running, so not proceeding to shutdown!");
158-
return;
159-
}
160-
LOG.info("Shutting down");
161-
try {
162-
super.shutdown();
163-
} catch (Exception e) {
164-
LOG.warn("Ignoring unexpected exception during shutdown", e);
165-
}
155+
protected void shutdownComponents() {
166156
try {
167157
if (syncProcessor != null) {
168158
syncProcessor.shutdown();
169159
}
170160
} catch (Exception e) {
171161
LOG.warn("Ignoring unexpected exception in syncprocessor shutdown", e);
172162
}
163+
try {
164+
super.shutdownComponents();
165+
} catch (Exception e) {
166+
LOG.warn("Ignoring unexpected exception during shutdown", e);
167+
}
173168
}
174169

175170
}

zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/ObserverZooKeeperServer.java

Lines changed: 1 addition & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ public class ObserverZooKeeperServer extends LearnerZooKeeperServer {
4444
* take periodic snapshot. Default is ON.
4545
*/
4646

47-
private boolean syncRequestProcessorEnabled = this.self.getSyncEnabled();
47+
private final boolean syncRequestProcessorEnabled = this.self.getSyncEnabled();
4848

4949
/*
5050
* Pending sync requests
@@ -127,18 +127,6 @@ public String getState() {
127127
return "observer";
128128
}
129129

130-
@Override
131-
public synchronized void shutdown() {
132-
if (!canShutdown()) {
133-
LOG.debug("ZooKeeper server is not running, so not proceeding to shutdown!");
134-
return;
135-
}
136-
super.shutdown();
137-
if (syncRequestProcessorEnabled && syncProcessor != null) {
138-
syncProcessor.shutdown();
139-
}
140-
}
141-
142130
@Override
143131
public void dumpMonitorValues(BiConsumer<String, Object> response) {
144132
super.dumpMonitorValues(response);

zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/ReadOnlyZooKeeperServer.java

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -190,11 +190,7 @@ public long getServerId() {
190190
}
191191

192192
@Override
193-
public synchronized void shutdown() {
194-
if (!canShutdown()) {
195-
LOG.debug("ZooKeeper server is not running, so not proceeding to shutdown!");
196-
return;
197-
}
193+
protected void shutdownComponents() {
198194
shutdown = true;
199195
unregisterJMX(this);
200196

@@ -206,7 +202,7 @@ public synchronized void shutdown() {
206202
self.adminServer.setZooKeeperServer(null);
207203

208204
// shutdown the server itself
209-
super.shutdown();
205+
super.shutdownComponents();
210206
}
211207

212208
@Override

zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/SendAckRequestProcessor.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ public void processRequest(Request si) {
4646
learner.writePacket(qp, false);
4747
} catch (IOException e) {
4848
LOG.warn("Closing connection to leader, exception during packet send", e);
49-
learner.closeSockSync();
49+
learner.closeSocket();
5050
}
5151
}
5252
}
@@ -56,7 +56,7 @@ public void flush() throws IOException {
5656
learner.writePacket(null, true);
5757
} catch (IOException e) {
5858
LOG.warn("Closing connection to leader, exception during packet send", e);
59-
learner.closeSockSync();
59+
learner.closeSocket();
6060
}
6161
}
6262

0 commit comments

Comments
 (0)