Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 54 additions & 0 deletions hyperactor_mesh/src/v1/actor_mesh.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1032,4 +1032,58 @@ mod tests {
stop_duration
);
}

/// Test that actors stop gracefully when they respond to stop
/// signals within the timeout. Complementary to
/// test_actor_mesh_stop_timeout which tests abort behavior. V1
/// equivalent of
/// hyperactor_multiprocess/src/proc_actor.rs::test_stop
#[async_timed_test(timeout_secs = 30)]
#[cfg(fbcode_build)]
async fn test_actor_mesh_stop_graceful() {
hyperactor_telemetry::initialize_logging_for_test();

let instance = testing::instance().await;

// Create proc mesh with 2 replicas
let meshes = testing::proc_meshes(instance, extent!(replicas = 2)).await;
let proc_mesh = &meshes[1];

// Spawn TestActors - these stop cleanly (no blocking
// operations)
let actor_mesh = proc_mesh
.spawn::<testactor::TestActor>(instance, "test_actors", &())
.await
.unwrap();

let expected_actors = actor_mesh.values().count();
assert!(expected_actors > 0, "Should have spawned some actors");

// Time the stop operation
let stop_start = RealClock.now();
let result = actor_mesh.stop(instance).await;
let stop_duration = RealClock.now().duration_since(stop_start);

// Graceful stop should succeed (return Ok)
assert!(
result.is_ok(),
"Stop should succeed for responsive actors, got: {:?}",
result.err()
);

// Verify stop completed quickly (< 2 seconds). Responsive
// actors should stop almost immediately, not wait for
// timeout.
assert!(
stop_duration < std::time::Duration::from_secs(2),
"Graceful stop took {:?}, expected < 2s (actors should stop quickly)",
stop_duration
);

tracing::info!(
"Successfully stopped {} actors in {:?}",
expected_actors,
stop_duration
);
}
}
10 changes: 6 additions & 4 deletions hyperactor_multiprocess/src/proc_actor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1002,13 +1002,15 @@ mod tests {
}
}

// V0 test - V1 needs equivalent coverage. Tests graceful stop
// V0 test - V1 has equivalent coverage. Tests graceful stop
// behavior where responsive actors stop cleanly within timeout.
// Spawns 4 TestActors, calls stop() with 1-second timeout,
// verifies all actors stop gracefully (5 stopped, 1 aborted). V1
// uses the same underlying mechanism (Proc::destroy_and_wait) but
// ActorMesh::stop() currently has no test coverage verifying stop
// succeeds and actors reach terminal state.
// equivalent:
// hyperactor_mesh/src/v1/actor_mesh.rs::test_actor_mesh_stop_graceful.
// Both use the same underlying mechanism (Proc::destroy_and_wait),
// but V1 returns Ok() for clean stop vs V0's ProcStopResult with
// counts.
#[tokio::test]
async fn test_stop() {
// Show here that the proc actors are stopped when the proc
Expand Down