Skip to content

Commit 246ebe2

Browse files
: port v0 multi-process test to v1 (#2008)
Summary: port graceful-stop coverage from hyperactor_multiprocess into the v1 mesh layer. this adds `actor_mesh::test_actor_mesh_stop_graceful`, which spawns responsive `TestActor`s across a proc mesh, calls `stop()`, and verifies that the operation completes quickly and returns `Ok` — the V1 analogue of V0's test_stop. both paths use the same underlying mechanism (`Proc::destroy_and_wait`), but V1 reports a simple success instead of V0's structured `ProcStopResult`, so the V0 test is re-documented to point at the new V1 coverage and note the API difference. Differential Revision: D87933475
1 parent e91e2ce commit 246ebe2

File tree

2 files changed

+63
-4
lines changed

2 files changed

+63
-4
lines changed

hyperactor_mesh/src/v1/actor_mesh.rs

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1032,4 +1032,61 @@ mod tests {
10321032
stop_duration
10331033
);
10341034
}
1035+
1036+
/// Test that actors stop gracefully when they respond to stop
1037+
/// signals within the timeout. Complementary to
1038+
/// test_actor_mesh_stop_timeout which tests abort behavior. V1
1039+
/// equivalent of
1040+
/// hyperactor_multiprocess/src/proc_actor.rs::test_stop
1041+
#[async_timed_test(timeout_secs = 30)]
1042+
#[cfg(fbcode_build)]
1043+
async fn test_actor_mesh_stop_graceful() {
1044+
hyperactor_telemetry::initialize_logging_for_test();
1045+
1046+
let instance = testing::instance().await;
1047+
1048+
// Create proc mesh with 2 replicas
1049+
let meshes = testing::proc_meshes(instance, extent!(replicas = 2)).await;
1050+
let proc_mesh = &meshes[1];
1051+
1052+
// Spawn TestActors - these stop cleanly (no blocking
1053+
// operations)
1054+
let actor_mesh = proc_mesh
1055+
.spawn::<testactor::TestActor>(instance, "test_actors", &())
1056+
.await
1057+
.unwrap();
1058+
1059+
let expected_actors = actor_mesh.values().count();
1060+
assert!(expected_actors > 0, "Should have spawned some actors");
1061+
1062+
// Give actors time to initialize
1063+
RealClock.sleep(std::time::Duration::from_millis(100)).await;
1064+
1065+
// Time the stop operation
1066+
let stop_start = RealClock.now();
1067+
let result = actor_mesh.stop(instance).await;
1068+
let stop_duration = RealClock.now().duration_since(stop_start);
1069+
1070+
// Graceful stop should succeed (return Ok)
1071+
assert!(
1072+
result.is_ok(),
1073+
"Stop should succeed for responsive actors, got: {:?}",
1074+
result.err()
1075+
);
1076+
1077+
// Verify stop completed quickly (< 2 seconds). Responsive
1078+
// actors should stop almost immediately, not wait for
1079+
// timeout.
1080+
assert!(
1081+
stop_duration < std::time::Duration::from_secs(2),
1082+
"Graceful stop took {:?}, expected < 2s (actors should stop quickly)",
1083+
stop_duration
1084+
);
1085+
1086+
tracing::info!(
1087+
"Successfully stopped {} actors in {:?}",
1088+
expected_actors,
1089+
stop_duration
1090+
);
1091+
}
10351092
}

hyperactor_multiprocess/src/proc_actor.rs

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1002,13 +1002,15 @@ mod tests {
10021002
}
10031003
}
10041004

1005-
// V0 test - V1 needs equivalent coverage. Tests graceful stop
1005+
// V0 test - V1 has equivalent coverage. Tests graceful stop
10061006
// behavior where responsive actors stop cleanly within timeout.
10071007
// Spawns 4 TestActors, calls stop() with 1-second timeout,
10081008
// verifies all actors stop gracefully (5 stopped, 1 aborted). V1
1009-
// uses the same underlying mechanism (Proc::destroy_and_wait) but
1010-
// ActorMesh::stop() currently has no test coverage verifying stop
1011-
// succeeds and actors reach terminal state.
1009+
// equivalent:
1010+
// hyperactor_mesh/src/v1/actor_mesh.rs::test_actor_mesh_stop_graceful.
1011+
// Both use the same underlying mechanism (Proc::destroy_and_wait),
1012+
// but V1 returns Ok() for clean stop vs V0's ProcStopResult with
1013+
// counts.
10121014
#[tokio::test]
10131015
async fn test_stop() {
10141016
// Show here that the proc actors are stopped when the proc

0 commit comments

Comments
 (0)