Skip to content

Commit b3d7cbd

Browse files
committed
vmm: Add backup api
1 parent c57b552 commit b3d7cbd

File tree

3 files changed

+144
-3
lines changed

3 files changed

+144
-3
lines changed

vmm/rpc/proto/vmm_rpc.proto

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,25 @@ message GpuInfo {
224224
bool is_free = 4;
225225
}
226226

227+
message BackupDiskRequest {
228+
// vm id
229+
string id = 1;
230+
// full or incremental
231+
string level = 2;
232+
}
233+
234+
message BackupInfo {
235+
// filename (e.g., FULL-1694222400-hd1.img)
236+
string filename = 1;
237+
// size of the backup in bytes
238+
uint64 size = 2;
239+
}
240+
241+
message ListBackupsResponse {
242+
// list of backups
243+
repeated BackupInfo backups = 1;
244+
}
245+
227246
// Service definition for dstack-vmm
228247
service Vmm {
229248
// RPC to create a VM
@@ -262,4 +281,10 @@ service Vmm {
262281

263282
// List GPUs
264283
rpc ListGpus(google.protobuf.Empty) returns (ListGpusResponse);
284+
285+
// Backup a VM data disk
286+
rpc BackupDisk(BackupDiskRequest) returns (google.protobuf.Empty);
287+
288+
// List backups for a VM
289+
rpc ListBackups(Id) returns (ListBackupsResponse);
265290
}

vmm/src/app.rs

Lines changed: 109 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,9 @@ use dstack_kms_rpc::kms_client::KmsClient;
66
use dstack_types::shared_filenames::{
77
compat_v3, APP_COMPOSE, ENCRYPTED_ENV, INSTANCE_INFO, SYS_CONFIG, USER_CONFIG,
88
};
9-
use dstack_vmm_rpc::{self as pb, GpuInfo, StatusRequest, StatusResponse, VmConfiguration};
9+
use dstack_vmm_rpc::{
10+
self as pb, BackupInfo, GpuInfo, StatusRequest, StatusResponse, VmConfiguration,
11+
};
1012
use fs_err as fs;
1113
use guest_api::client::DefaultClient as GuestClient;
1214
use id_pool::IdPool;
@@ -18,7 +20,7 @@ use std::net::IpAddr;
1820
use std::path::{Path, PathBuf};
1921
use std::sync::{Arc, Mutex, MutexGuard};
2022
use supervisor_client::SupervisorClient;
21-
use tracing::{error, info};
23+
use tracing::{error, info, warn};
2224

2325
pub use image::{Image, ImageInfo};
2426
pub use qemu::{VmConfig, VmWorkDir};
@@ -653,6 +655,111 @@ impl App {
653655
}
654656
Ok(())
655657
}
658+
659+
pub(crate) async fn backup_disk(&self, id: &str, level: &str) -> Result<()> {
660+
let work_dir = self.work_dir(id);
661+
662+
// Determine backup level based on the backup_type
663+
let backup_level = match level {
664+
"full" => "full",
665+
"incremental" => "inc",
666+
_ => bail!("Invalid backup level: {level}"),
667+
};
668+
669+
// Get the VM directory path as a string
670+
let backup_dir = work_dir.path().join("backups");
671+
let qmp_socket = work_dir.qmp_socket().to_string_lossy().to_string();
672+
673+
// Create backup directory if it doesn't exist
674+
tokio::fs::create_dir_all(&backup_dir)
675+
.await
676+
.context("Failed to create backup directory")?;
677+
678+
// Run the qmpbackup command in a blocking thread pool since it takes seconds to complete
679+
tokio::task::spawn_blocking(move || {
680+
let output = std::process::Command::new("qmpbackup")
681+
.arg("--socket")
682+
.arg(qmp_socket)
683+
.arg("backup")
684+
.arg("-i")
685+
.arg("hd1")
686+
.arg("--no-subdir")
687+
.arg("-t")
688+
.arg(&backup_dir)
689+
.arg("-T")
690+
.arg("-l")
691+
.arg(backup_level)
692+
.output();
693+
694+
match output {
695+
Ok(output) => {
696+
if !output.status.success() {
697+
let stderr = String::from_utf8_lossy(&output.stderr);
698+
Err(anyhow::anyhow!("qmpbackup command failed: {}", stderr))
699+
} else {
700+
Ok(())
701+
}
702+
}
703+
Err(e) => Err(anyhow::anyhow!(
704+
"Failed to execute qmpbackup command: {}",
705+
e
706+
)),
707+
}
708+
})
709+
.await
710+
.context("Failed to execute backup task")?
711+
}
712+
713+
pub(crate) async fn list_backups(&self, id: &str) -> Result<Vec<BackupInfo>> {
714+
let work_dir = self.work_dir(id);
715+
let backup_dir = work_dir.path().join("backups");
716+
717+
// Create backup directory if it doesn't exist
718+
if !backup_dir.exists() {
719+
return Ok(Vec::new());
720+
}
721+
722+
// List backup files in the directory
723+
let mut backups = Vec::new();
724+
725+
// Read directory entries in a blocking task
726+
let backup_dir_clone = backup_dir.clone();
727+
let entries =
728+
std::fs::read_dir(backup_dir_clone).context("Failed to read backup directory")?;
729+
// Process each entry
730+
for entry in entries {
731+
let path = match entry {
732+
Ok(entry) => entry.path(),
733+
Err(e) => {
734+
warn!("Failed to read directory entry: {e:?}");
735+
continue;
736+
}
737+
};
738+
// Skip if not a file
739+
if !path.is_file() {
740+
continue;
741+
}
742+
743+
// Get file name
744+
let file_name = match path.file_name().and_then(|n| n.to_str()) {
745+
Some(name) => name.to_string(),
746+
None => continue,
747+
};
748+
749+
if !file_name.ends_with(".img") {
750+
continue;
751+
}
752+
753+
backups.push(BackupInfo {
754+
filename: file_name,
755+
size: path
756+
.metadata()
757+
.context("Failed to get file metadata")?
758+
.len(),
759+
});
760+
}
761+
Ok(backups)
762+
}
656763
}
657764

658765
fn paginate<T>(items: Vec<T>, page: u32, page_size: u32) -> impl Iterator<Item = T> {

vmm/src/main_service.rs

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@ use std::time::{SystemTime, UNIX_EPOCH};
33

44
use anyhow::{anyhow, bail, Context, Result};
55
use dstack_types::AppCompose;
6-
use dstack_vmm_rpc as rpc;
76
use dstack_vmm_rpc::vmm_server::{VmmRpc, VmmServer};
7+
use dstack_vmm_rpc::{self as rpc, BackupDiskRequest};
88
use dstack_vmm_rpc::{
99
AppId, ComposeHash as RpcComposeHash, GatewaySettings, GetInfoResponse, GetMetaResponse, Id,
1010
ImageInfo as RpcImageInfo, ImageListResponse, KmsSettings, ListGpusResponse, PublicKeyResponse,
@@ -461,6 +461,15 @@ impl VmmRpc for RpcHandler {
461461
let hash = hex_sha256(&request.compose_file);
462462
Ok(RpcComposeHash { hash })
463463
}
464+
465+
async fn backup_disk(self, request: BackupDiskRequest) -> Result<()> {
466+
self.app.backup_disk(&request.id, &request.level).await
467+
}
468+
469+
async fn list_backups(self, request: Id) -> Result<rpc::ListBackupsResponse> {
470+
let backups = self.app.list_backups(&request.id).await?;
471+
Ok(rpc::ListBackupsResponse { backups })
472+
}
464473
}
465474

466475
impl RpcCall<App> for RpcHandler {

0 commit comments

Comments
 (0)