Skip to content

Commit 6b5a390

Browse files
committed
Update dstack-backup.py
1 parent 55e05d5 commit 6b5a390

File tree

1 file changed

+85
-57
lines changed

1 file changed

+85
-57
lines changed

scripts/dstack-backup.py

Lines changed: 85 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,7 @@ def perform_backup(self, vm_id: str, vm_name: str, backup_type: str, hd: str) ->
190190
vm_dir = self.vms_dir.resolve() / vm_id
191191
backup_dir = self.backup_dir.resolve() / vm_id / "backups"
192192
qmp_socket = vm_dir / "qmp.sock"
193+
backup_lock = vm_dir / "backup.lock"
193194

194195
# Create backup directory if it doesn't exist
195196
backup_dir.mkdir(parents=True, exist_ok=True)
@@ -204,79 +205,106 @@ def perform_backup(self, vm_id: str, vm_name: str, backup_type: str, hd: str) ->
204205
# Create timestamped directory for this backup
205206
timestamp = datetime.now().strftime("%Y%m%dT%H%M%S")
206207
backup_timestamp_dir = backup_dir / f"{timestamp}"
208+
logger.info(f"Creating backup directory: {backup_timestamp_dir}")
207209
backup_timestamp_dir.mkdir(parents=True, exist_ok=True)
208210
try:
209211
latest_dir.unlink()
210212
except FileNotFoundError:
211213
pass
212214
latest_dir.symlink_to(timestamp)
213215

214-
# For full backups, clear bitmaps first
215-
if backup_level == "full":
216-
logger.info(f"Clearing bitmaps for full backup of VM {vm_name}")
216+
def do_backup():
217+
# For full backups, clear bitmaps first
218+
if backup_level == "full":
219+
logger.info(f"Clearing bitmaps for full backup of VM {vm_name}")
220+
if qmp_socket.exists():
221+
try:
222+
# Use absolute path for qmp_socket
223+
abs_qmp_socket = qmp_socket.resolve()
224+
result = subprocess.Popen(
225+
["qmpbackup", "--debug", "--socket",
226+
str(abs_qmp_socket), "cleanup", "--remove-bitmap"],
227+
stdout=sys.stdout,
228+
stderr=sys.stderr
229+
)
230+
returncode = result.wait()
231+
if returncode != 0:
232+
logger.warning(
233+
f"Failed to clear bitmaps for VM {vm_name} ({vm_id})")
234+
# Continue anyway as this might be the first backup
235+
except Exception as e:
236+
logger.error(f"Error clearing bitmaps: {e}")
237+
return False
238+
else:
239+
logger.error(f"QMP socket not found at {qmp_socket}")
240+
return False
241+
242+
# Perform the backup
243+
logger.info(f"Running qmpbackup")
244+
245+
# Convert to absolute paths for qmpbackup
246+
abs_qmp_socket = qmp_socket.resolve()
247+
abs_latest_dir = latest_dir.resolve()
248+
249+
logger.debug(
250+
f"Running: qmpbackup --socket {abs_qmp_socket} backup -i {hd} --no-subdir -t {abs_latest_dir} -l {backup_level}")
217251
if qmp_socket.exists():
218252
try:
219-
# Use absolute path for qmp_socket
220-
abs_qmp_socket = qmp_socket.resolve()
221-
result = subprocess.run(
222-
["qmpbackup", "--socket",
223-
str(abs_qmp_socket), "cleanup", "--remove-bitmap"],
224-
capture_output=True,
225-
text=True
253+
backup_lock.touch(exist_ok=False)
254+
# Use Popen for real-time output
255+
process = subprocess.Popen(
256+
[
257+
"qmpbackup",
258+
"--debug",
259+
"--socket", str(abs_qmp_socket),
260+
"backup",
261+
"-i", hd,
262+
"--no-subdir",
263+
"-t", str(abs_latest_dir),
264+
"-l", backup_level
265+
],
266+
stdout=sys.stdout,
267+
stderr=sys.stderr,
268+
text=True,
269+
bufsize=1 # Line buffered
226270
)
227-
if result.returncode != 0:
228-
logger.warning(
229-
f"Failed to clear bitmaps for VM {vm_name} ({vm_id}): {result.stderr}")
230-
# Continue anyway as this might be the first backup
271+
272+
# Get return code
273+
returncode = process.wait()
274+
if returncode == 0:
275+
logger.info(f"Backup successful")
276+
self.update_backup_time(vm_id, backup_type)
277+
278+
# Rotate backups if needed
279+
if backup_type == "full":
280+
self._rotate_backups(vm_id)
281+
return True
282+
else:
283+
logger.error("Backup failed")
284+
return False
231285
except Exception as e:
232-
logger.error(f"Error clearing bitmaps: {e}")
286+
logger.error(f"Error performing backup: {e}")
287+
return False
288+
finally:
289+
backup_lock.unlink()
233290
else:
234291
logger.error(f"QMP socket not found at {qmp_socket}")
235292
return False
236-
237-
# Perform the backup
238-
logger.debug(f"Running qmpbackup")
239-
240-
# Convert to absolute paths for qmpbackup
241-
abs_qmp_socket = qmp_socket.resolve()
242-
abs_latest_dir = latest_dir.resolve()
243-
244-
logger.debug(
245-
f"Running: qmpbackup --socket {abs_qmp_socket} backup -i {hd} --no-subdir -t {abs_latest_dir} -l {backup_level}")
246-
if qmp_socket.exists():
293+
try:
294+
suc = do_backup()
295+
except Exception as e:
296+
logger.error(f"Error performing backup: {e}")
297+
suc = False
298+
if not suc and backup_type == "full":
299+
# Remove the latest backup dir suc = self.perform_backup(vm_id, vm_name, "incremental", hd)
300+
logger.info(
301+
f"Removing {os.path.basename(backup_timestamp_dir)}")
247302
try:
248-
result = subprocess.run(
249-
[
250-
"qmpbackup",
251-
"--socket", str(abs_qmp_socket),
252-
"backup",
253-
"-i", hd,
254-
"--no-subdir",
255-
"-t", str(abs_latest_dir),
256-
"-l", backup_level
257-
],
258-
capture_output=True,
259-
text=True
260-
)
261-
262-
if result.returncode == 0:
263-
logger.debug(f"Backup successful")
264-
self.update_backup_time(vm_id, backup_type)
265-
266-
# Rotate backups if needed
267-
if backup_type == "full":
268-
self._rotate_backups(vm_id)
269-
return True
270-
else:
271-
logger.error(
272-
f"Backup failed: {result.stderr} : {result.stdout}")
273-
return False
303+
shutil.rmtree(backup_timestamp_dir)
274304
except Exception as e:
275-
logger.error(f"Error performing backup: {e}")
276-
return False
277-
else:
278-
logger.error(f"QMP socket not found at {qmp_socket}")
279-
return False
305+
logger.error(f"Error removing old backup: {e}")
306+
307+
return suc
280308

281309
def needs_backup(self, vm_id: str) -> Optional[str]:
282310
"""Determine if a VM needs a backup and what type"""

0 commit comments

Comments
 (0)