Skip to content

Commit 216d246

Browse files
: port v0 multi-process test to v1
Summary: port graceful-stop coverage from hyperactor_multiprocess into the v1 mesh layer. this adds `actor_mesh::test_actor_mesh_stop_graceful`, which spawns responsive `TestActor`s across a proc mesh, calls `stop()`, and verifies that the operation completes quickly and returns `Ok` — the V1 analogue of V0's test_stop. both paths use the same underlying mechanism (`Proc::destroy_and_wait`), but V1 reports a simple success instead of V0's structured `ProcStopResult`, so the V0 test is re-documented to point at the new V1 coverage and note the API difference. Differential Revision: D87933475
1 parent 8c21b1c commit 216d246

File tree

2 files changed

+63
-4
lines changed

2 files changed

+63
-4
lines changed

hyperactor_mesh/src/v1/actor_mesh.rs

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1030,4 +1030,61 @@ mod tests {
10301030
stop_duration
10311031
);
10321032
}
1033+
1034+
/// Test that actors stop gracefully when they respond to stop
1035+
/// signals within the timeout. Complementary to
1036+
/// test_actor_mesh_stop_timeout which tests abort behavior. V1
1037+
/// equivalent of
1038+
/// hyperactor_multiprocess/src/proc_actor.rs::test_stop
1039+
#[async_timed_test(timeout_secs = 30)]
1040+
#[cfg(fbcode_build)]
1041+
async fn test_actor_mesh_stop_graceful() {
1042+
hyperactor_telemetry::initialize_logging_for_test();
1043+
1044+
let instance = testing::instance().await;
1045+
1046+
// Create proc mesh with 2 replicas
1047+
let meshes = testing::proc_meshes(instance, extent!(replicas = 2)).await;
1048+
let proc_mesh = &meshes[1];
1049+
1050+
// Spawn TestActors - these stop cleanly (no blocking
1051+
// operations)
1052+
let actor_mesh = proc_mesh
1053+
.spawn::<testactor::TestActor>(instance, "test_actors", &())
1054+
.await
1055+
.unwrap();
1056+
1057+
let expected_actors = actor_mesh.values().count();
1058+
assert!(expected_actors > 0, "Should have spawned some actors");
1059+
1060+
// Give actors time to initialize
1061+
RealClock.sleep(std::time::Duration::from_millis(100)).await;
1062+
1063+
// Time the stop operation
1064+
let stop_start = RealClock.now();
1065+
let result = actor_mesh.stop(instance).await;
1066+
let stop_duration = RealClock.now().duration_since(stop_start);
1067+
1068+
// Graceful stop should succeed (return Ok)
1069+
assert!(
1070+
result.is_ok(),
1071+
"Stop should succeed for responsive actors, got: {:?}",
1072+
result.err()
1073+
);
1074+
1075+
// Verify stop completed quickly (< 2 seconds). Responsive
1076+
// actors should stop almost immediately, not wait for
1077+
// timeout.
1078+
assert!(
1079+
stop_duration < std::time::Duration::from_secs(2),
1080+
"Graceful stop took {:?}, expected < 2s (actors should stop quickly)",
1081+
stop_duration
1082+
);
1083+
1084+
tracing::info!(
1085+
"Successfully stopped {} actors in {:?}",
1086+
expected_actors,
1087+
stop_duration
1088+
);
1089+
}
10331090
}

hyperactor_multiprocess/src/proc_actor.rs

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1002,13 +1002,15 @@ mod tests {
10021002
}
10031003
}
10041004

1005-
// V0 test - V1 needs equivalent coverage. Tests graceful stop
1005+
// V0 test - V1 has equivalent coverage. Tests graceful stop
10061006
// behavior where responsive actors stop cleanly within timeout.
10071007
// Spawns 4 TestActors, calls stop() with 1-second timeout,
10081008
// verifies all actors stop gracefully (5 stopped, 1 aborted). V1
1009-
// uses the same underlying mechanism (Proc::destroy_and_wait) but
1010-
// ActorMesh::stop() currently has no test coverage verifying stop
1011-
// succeeds and actors reach terminal state.
1009+
// equivalent:
1010+
// hyperactor_mesh/src/v1/actor_mesh.rs::test_actor_mesh_stop_graceful.
1011+
// Both use the same underlying mechanism (Proc::destroy_and_wait),
1012+
// but V1 returns Ok() for clean stop vs V0's ProcStopResult with
1013+
// counts.
10121014
#[tokio::test]
10131015
async fn test_stop() {
10141016
// Show here that the proc actors are stopped when the proc

0 commit comments

Comments
 (0)