@@ -768,6 +768,8 @@ static int semu_init(emu_state_t *emu, int argc, char **argv)
768768
769769 /* Set up peripherals */
770770 emu -> uart .in_fd = 0 , emu -> uart .out_fd = 1 ;
771+ emu -> uart .waiting_hart_id = UINT32_MAX ;
772+ emu -> uart .has_waiting_hart = false;
771773 capture_keyboard_input (); /* set up uart */
772774#if SEMU_HAS (VIRTIONET )
773775 /* Always set ram pointer, even if netdev is not configured.
@@ -809,7 +811,7 @@ static int semu_init(emu_state_t *emu, int argc, char **argv)
809811 emu -> peripheral_update_ctr = 0 ;
810812 emu -> debug = debug ;
811813
812- /* Initialize coroutine system for SMP mode (n_hart > 1) */
814+ /* Initialize coroutine system for multi-hart mode (SMP > 1) */
813815 if (vm -> n_hart > 1 ) {
814816 uint32_t total_slots = vm -> n_hart ;
815817#if SEMU_HAS (VIRTIONET )
@@ -853,17 +855,29 @@ static int semu_init(emu_state_t *emu, int argc, char **argv)
853855 */
854856static void wfi_handler (hart_t * hart )
855857{
856- vm_t * vm = hart -> vm ;
857- /* Only yield in SMP mode (n_hart > 1) */
858- if (vm -> n_hart > 1 ) {
859- /* Per RISC-V spec: WFI returns immediately if interrupt is pending.
860- * Only yield to scheduler if no interrupt is currently pending.
858+ /* Per RISC-V spec: WFI returns immediately if interrupt is pending.
859+ * We check if any interrupt is actually pending (sip & sie != 0).
860+ */
861+ bool interrupt_pending = (hart -> sip & hart -> sie ) != 0 ;
862+
863+ if (!interrupt_pending ) {
864+ emu_state_t * emu = PRIV (hart );
865+ vm_t * vm = & emu -> vm ;
866+
867+ /* Only use coroutine yielding in multi-hart mode where the coroutine
868+ * scheduler loop is active. In single-hart mode, WFI is a no-op since
869+ * there's no scheduler to resume execution after yield.
861870 */
862- if (!(hart -> sip & hart -> sie )) {
863- hart -> in_wfi = true; /* Mark as waiting for interrupt */
864- coro_yield (); /* Suspend until scheduler resumes us */
865- hart -> in_wfi = false; /* Resumed - no longer waiting */
871+ if (vm -> n_hart > 1 ) {
872+ hart -> in_wfi = true; /* Mark as waiting for interrupt */
873+ coro_yield (); /* Suspend until scheduler resumes us */
874+ /* NOTE: Do NOT clear in_wfi here to avoid race condition.
875+ * The scheduler needs to see this flag to detect idle state.
876+ * The flag will be cleared when an interrupt is actually injected.
877+ */
866878 }
879+ } else {
880+ hart -> in_wfi = false; /* Clear if interrupt already pending */
867881 }
868882}
869883
@@ -1150,87 +1164,150 @@ static int semu_run(emu_state_t *emu)
11501164 poll_capacity = needed ;
11511165 }
11521166
1167+ /* Determine poll timeout based on hart states BEFORE setting up
1168+ * poll fds. This check must happen before coro_resume_hart()
1169+ * modifies flags.
1170+ *
1171+ * - If no harts are STARTED, block indefinitely (wait for IPI)
1172+ * - If all STARTED harts are idle (WFI or UART waiting), block
1173+ * - Otherwise, use non-blocking poll (timeout=0)
1174+ */
1175+ int poll_timeout = 0 ;
1176+ uint32_t started_harts = 0 ;
1177+ uint32_t idle_harts = 0 ;
1178+ for (uint32_t i = 0 ; i < vm -> n_hart ; i ++ ) {
1179+ if (vm -> hart [i ]-> hsm_status == SBI_HSM_STATE_STARTED ) {
1180+ started_harts ++ ;
1181+ /* Count hart as idle if it's in WFI or waiting for UART */
1182+ if (vm -> hart [i ]-> in_wfi ||
1183+ (emu -> uart .has_waiting_hart &&
1184+ emu -> uart .waiting_hart_id == i )) {
1185+ idle_harts ++ ;
1186+ }
1187+ }
1188+ }
1189+
11531190 /* Collect file descriptors for poll() */
11541191 size_t pfd_count = 0 ;
11551192 int timer_index = -1 ;
11561193
1157- /* Add periodic timer fd (1ms interval for guest timer emulation) */
1194+ /* Add periodic timer fd (1ms interval for guest timer emulation).
1195+ * Only add timer when ALL harts are active (none idle) to allow
1196+ * poll() to sleep when any harts are in WFI. When harts are idle,
1197+ * timer updates can be deferred until they wake up.
1198+ *
1199+ * During SMP boot (started_harts < vm->n_hart), always include the
1200+ * timer to ensure secondary harts can complete initialization. Only
1201+ * apply conditional exclusion after all harts have started.
1202+ *
1203+ * For single-hart configurations (n_hart == 1), disable
1204+ * optimization entirely to avoid boot issues, as the first hart
1205+ * starts immediately.
1206+ */
1207+ bool all_harts_started = (started_harts >= vm -> n_hart );
1208+ bool harts_active =
1209+ (vm -> n_hart == 1 ) || !all_harts_started || (idle_harts == 0 );
11581210#ifdef __APPLE__
11591211 /* macOS: use kqueue with EVFILT_TIMER */
1160- if (kq >= 0 && pfd_count < poll_capacity ) {
1212+ if (kq >= 0 && pfd_count < poll_capacity && harts_active ) {
11611213 pfds [pfd_count ] = (struct pollfd ){kq , POLLIN , 0 };
11621214 timer_index = (int ) pfd_count ;
11631215 pfd_count ++ ;
11641216 }
11651217#else
11661218 /* Linux: use timerfd */
1167- if (wfi_timer_fd >= 0 && pfd_count < poll_capacity ) {
1219+ if (wfi_timer_fd >= 0 && pfd_count < poll_capacity &&
1220+ harts_active ) {
11681221 pfds [pfd_count ] = (struct pollfd ){wfi_timer_fd , POLLIN , 0 };
11691222 timer_index = (int ) pfd_count ;
11701223 pfd_count ++ ;
11711224 }
11721225#endif
11731226
1174- /* Add UART input fd (stdin for keyboard input) */
1175- if (emu -> uart .in_fd >= 0 && pfd_count < poll_capacity ) {
1227+ /* Add UART input fd (stdin for keyboard input).
1228+ * Only add UART when:
1229+ * 1. Single-hart configuration (n_hart == 1), OR
1230+ * 2. Not all harts started (!all_harts_started), OR
1231+ * 3. All harts are active (idle_harts == 0), OR
1232+ * 4. A hart is actively waiting for UART input
1233+ *
1234+ * This prevents UART (which is always "readable" on TTY) from
1235+ * preventing poll() sleep when harts are idle. Trade-off: user
1236+ * input (Ctrl+A x) may be delayed by up to poll_timeout (10ms)
1237+ * when harts are idle, which is acceptable for an emulator.
1238+ */
1239+ bool need_uart = (vm -> n_hart == 1 ) || !all_harts_started ||
1240+ (idle_harts == 0 ) || emu -> uart .has_waiting_hart ;
1241+ if (emu -> uart .in_fd >= 0 && pfd_count < poll_capacity &&
1242+ need_uart ) {
11761243 pfds [pfd_count ] = (struct pollfd ){emu -> uart .in_fd , POLLIN , 0 };
11771244 pfd_count ++ ;
11781245 }
11791246
1180- /* Determine poll timeout based on hart WFI states:
1181- * - If no harts are STARTED, block indefinitely (wait for IPI)
1182- * - If all STARTED harts are in WFI, block indefinitely
1183- * - Otherwise, use non-blocking poll (timeout=0)
1247+ /* Set poll timeout based on current idle state (adaptive timeout).
1248+ * Three-tier strategy:
1249+ * 1. Blocking (-1): All harts idle + have fds → wait for events
1250+ * 2. Short sleep (10ms): Some harts idle → reduce CPU usage
1251+ * 3. Non-blocking (0): All harts active → maximum responsiveness
1252+ *
1253+ * SAFETY: Never use blocking timeout when pfd_count==0, as
1254+ * poll(0,-1) would hang indefinitely. Always use 10ms timeout as
1255+ * fallback.
11841256 */
1185- int poll_timeout = 0 ;
1186- uint32_t started_harts = 0 ;
1187- uint32_t wfi_harts = 0 ;
1188- for (uint32_t i = 0 ; i < vm -> n_hart ; i ++ ) {
1189- if (vm -> hart [i ]-> hsm_status == SBI_HSM_STATE_STARTED ) {
1190- started_harts ++ ;
1191- if (vm -> hart [i ]-> in_wfi )
1192- wfi_harts ++ ;
1193- }
1194- }
1195- /* Block if no harts running or all running harts are waiting */
11961257 if (pfd_count > 0 &&
1197- (started_harts == 0 || wfi_harts == started_harts ))
1258+ (started_harts == 0 || idle_harts == started_harts )) {
1259+ /* All harts idle + have fds: block until event */
11981260 poll_timeout = -1 ;
1261+ } else if (idle_harts > 0 ) {
1262+ /* Some/all harts idle (or all idle but no fds): 10ms sleep */
1263+ poll_timeout = 10 ;
1264+ } else {
1265+ /* All harts active: non-blocking */
1266+ poll_timeout = 0 ;
1267+ }
11991268
12001269 /* Execute poll() to wait for I/O events.
1201- * - timeout=0: non-blocking poll when harts are running
1202- * - timeout=-1: blocking poll when all harts in WFI (idle state)
1270+ * - timeout=0: non-blocking poll when harts are active
1271+ * - timeout=10: short sleep when some harts idle
1272+ * - timeout=-1: blocking poll when all harts idle (WFI or UART
1273+ * wait)
1274+ *
1275+ * When pfd_count==0, poll() acts as a pure sleep mechanism.
12031276 */
1204- if (pfd_count > 0 ) {
1205- int nevents = poll (pfds , pfd_count , poll_timeout );
1206- if (nevents > 0 ) {
1207- /* Consume timer expiration events to prevent fd staying
1208- * readable
1209- */
1210- if (timer_index >= 0 &&
1211- (pfds [timer_index ].revents & POLLIN )) {
1277+ int nevents = poll (pfds , pfd_count , poll_timeout );
1278+
1279+ if (pfd_count > 0 && nevents > 0 ) {
1280+ /* Consume timer expiration events to prevent fd staying
1281+ * readable
1282+ */
1283+ if (timer_index >= 0 && (pfds [timer_index ].revents & POLLIN )) {
12121284#ifdef __APPLE__
1213- /* drain kqueue events with non-blocking kevent */
1214- struct kevent events [32 ];
1215- struct timespec timeout_zero = {0 , 0 };
1216- kevent (kq , NULL , 0 , events , 32 , & timeout_zero );
1285+ /* drain kqueue events with non-blocking kevent */
1286+ struct kevent events [32 ];
1287+ struct timespec timeout_zero = {0 , 0 };
1288+ kevent (kq , NULL , 0 , events , 32 , & timeout_zero );
12171289#else
1218- /* Linux: read timerfd to consume expiration count */
1219- uint64_t expirations ;
1220- ssize_t ret_read = read ( wfi_timer_fd , & expirations ,
1221- sizeof (expirations ));
1222- (void ) ret_read ;
1290+ /* Linux: read timerfd to consume expiration count */
1291+ uint64_t expirations ;
1292+ ssize_t ret_read =
1293+ read ( wfi_timer_fd , & expirations , sizeof (expirations ));
1294+ (void ) ret_read ;
12231295#endif
1224- }
1225- } else if (nevents < 0 && errno != EINTR ) {
1226- perror ("poll" );
12271296 }
1297+ } else if (nevents < 0 && errno != EINTR ) {
1298+ perror ("poll" );
12281299 }
12291300
12301301 /* Resume all hart coroutines (round-robin scheduling).
12311302 * Each hart executes a batch of instructions, then yields back.
1232- * Harts in WFI will clear their in_wfi flag when resuming from
1233- * coro_yield() in wfi_handler().
1303+ * Harts in WFI will have their in_wfi flag cleared by interrupt
1304+ * handlers (ACLINT, PLIC, UART) when interrupts are injected.
1305+ *
1306+ * Note: We must always resume harts after poll() returns, even if
1307+ * all harts appear idle. The in_wfi flag is only cleared when
1308+ * interrupt sources inject interrupts, so skipping resume would
1309+ * cause a deadlock where harts remain stuck waiting even after
1310+ * events arrive.
12341311 */
12351312 for (uint32_t i = 0 ; i < vm -> n_hart ; i ++ ) {
12361313 coro_resume_hart (i );
0 commit comments