Skip to content

Commit cc563ca

Browse files
committed
Implement binlog recovery utilities and integrate with replicators
- Added a new module for handling MySQL binlog corruption (Error 1236) with automatic recovery functionality. - Integrated recovery logic into both DbReplicatorRealtime and BinlogReplicator to streamline error handling and process restart. - Updated .gitignore to exclude the binlog directory instead of files for better management.
1 parent 9c9c387 commit cc563ca

File tree

4 files changed

+58
-25
lines changed

4 files changed

+58
-25
lines changed

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
__pycache__
22
.idea/
33
config.yaml
4-
binlog*
4+
binlog/
55
*cmake_build*
66
monitoring.log
77
.DS_Store
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
"""
2+
Shared binlog recovery utilities for handling MySQL Error 1236 (binlog corruption).
3+
"""
4+
import os
5+
import shutil
6+
from logging import getLogger
7+
8+
logger = getLogger(__name__)
9+
10+
11+
def recover_from_binlog_corruption(binlog_dir: str, error: Exception) -> None:
12+
"""
13+
Recover from MySQL Error 1236 (binlog corruption) by deleting the corrupted
14+
binlog directory and raising an exception to trigger process restart.
15+
16+
Args:
17+
binlog_dir: Path to the binlog directory to delete
18+
error: The original OperationalError that triggered recovery
19+
20+
Raises:
21+
RuntimeError: Always raised to trigger process restart after cleanup
22+
23+
This function:
24+
1. Logs the error and recovery attempt
25+
2. Deletes the corrupted binlog directory
26+
3. Raises RuntimeError to exit the process cleanly
27+
4. ProcessRunner will automatically restart the process
28+
5. On restart, replication resumes from a fresh state
29+
"""
30+
logger.error(f"[binlogrepl] operational error (1236, 'Could not find first log file name in binary log index file')")
31+
logger.error(f"[binlogrepl] Full error: {error}")
32+
logger.info("[binlogrepl] Error 1236 detected - attempting automatic recovery")
33+
34+
# Delete the corrupted binlog directory to force fresh start
35+
if os.path.exists(binlog_dir):
36+
logger.warning(f"[binlogrepl] Deleting corrupted binlog directory: {binlog_dir}")
37+
try:
38+
shutil.rmtree(binlog_dir)
39+
logger.info(f"[binlogrepl] Successfully deleted binlog directory: {binlog_dir}")
40+
except Exception as delete_error:
41+
logger.error(f"[binlogrepl] Failed to delete binlog directory: {delete_error}", exc_info=True)
42+
raise RuntimeError("Failed to delete corrupted binlog directory") from delete_error
43+
else:
44+
logger.warning(f"[binlogrepl] Binlog directory does not exist: {binlog_dir}")
45+
46+
# Exit process cleanly to trigger automatic restart by runner
47+
logger.info("[binlogrepl] Exiting process for automatic restart by runner")
48+
logger.info("[binlogrepl] The runner will automatically restart this process")
49+
raise RuntimeError("Binlog corruption detected (Error 1236) - restarting for recovery") from error

mysql_ch_replicator/binlog_replicator.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212

1313
from pymysql.err import OperationalError
1414

15+
from .binlog_recovery import recover_from_binlog_corruption
1516
from .config import BinlogReplicatorSettings, Settings
1617
from .pymysqlreplication import BinLogStreamReader
1718
from .pymysqlreplication.event import QueryEvent
@@ -617,6 +618,11 @@ def run(self):
617618
time.sleep(BinlogReplicator.READ_LOG_INTERVAL)
618619

619620
except OperationalError as e:
621+
# Check if this is Error 1236 (binlog corruption) - needs automatic recovery
622+
if e.args[0] == 1236:
623+
recover_from_binlog_corruption(self.replicator_settings.data_dir, e)
624+
625+
# For other operational errors, log and retry
620626
logger.error(f"operational error {str(e)}", exc_info=True)
621627
time.sleep(15)
622628
except Exception as e:

mysql_ch_replicator/db_replicator_realtime.py

Lines changed: 2 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
import json
22
import os
3-
import shutil
43
import time
54
from collections import defaultdict
65
from logging import getLogger
76

87
import pymysql.err
98

9+
from .binlog_recovery import recover_from_binlog_corruption
1010
from .binlog_replicator import EventType, LogEvent
1111
from .common import Status
1212
from .converter import strip_sql_comments
@@ -79,34 +79,12 @@ def run_realtime_replication(self):
7979
except pymysql.err.OperationalError as e:
8080
# Check if this is the binlog index file corruption error (Error 1236)
8181
if e.args[0] == 1236:
82-
logger.error(
83-
"[binlogrepl] operational error (1236, 'Could not find first log file name in binary log index file')"
84-
)
85-
logger.error(f"[binlogrepl] Full error: {e}")
86-
logger.info("[binlogrepl] Attempting automatic recovery...")
87-
8882
# Get binlog directory path for this database
8983
binlog_dir = os.path.join(
9084
self.replicator.config.binlog_replicator.data_dir,
9185
self.replicator.database
9286
)
93-
94-
# Delete the corrupted binlog directory
95-
if os.path.exists(binlog_dir):
96-
logger.warning(f"[binlogrepl] Deleting corrupted binlog directory: {binlog_dir}")
97-
try:
98-
shutil.rmtree(binlog_dir)
99-
logger.info(f"[binlogrepl] Successfully deleted binlog directory: {binlog_dir}")
100-
except Exception as delete_error:
101-
logger.error(f"[binlogrepl] Failed to delete binlog directory: {delete_error}", exc_info=True)
102-
raise RuntimeError("Failed to delete corrupted binlog directory") from delete_error
103-
else:
104-
logger.warning(f"[binlogrepl] Binlog directory does not exist: {binlog_dir}")
105-
106-
# Exit process cleanly to trigger automatic restart by runner
107-
logger.info("[binlogrepl] Exiting process for automatic restart by runner")
108-
logger.info("[binlogrepl] The runner will automatically restart this process")
109-
raise RuntimeError("Binlog corruption detected (Error 1236) - restarting for recovery") from e
87+
recover_from_binlog_corruption(binlog_dir, e)
11088
else:
11189
# Re-raise other OperationalErrors
11290
logger.error(f"[binlogrepl] Unhandled OperationalError: {e}", exc_info=True)

0 commit comments

Comments
 (0)