Skip to content

Commit a56f5bd

Browse files
authored
Merge pull request #110 from sysprog21/coro-uart
Implement event-driven UART coroutine with CPU optimization
2 parents 4552c62 + 1aab2cb commit a56f5bd

File tree

6 files changed

+195
-65
lines changed

6 files changed

+195
-65
lines changed

aclint.c

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,13 @@
66
/* ACLINT MTIMER */
77
void aclint_mtimer_update_interrupts(hart_t *hart, mtimer_state_t *mtimer)
88
{
9-
if (semu_timer_get(&mtimer->mtime) >= mtimer->mtimecmp[hart->mhartid])
9+
if (semu_timer_get(&mtimer->mtime) >= mtimer->mtimecmp[hart->mhartid]) {
1010
hart->sip |= RV_INT_STI_BIT; /* Set Supervisor Timer Interrupt */
11-
else
11+
/* Clear WFI flag when interrupt is injected - wakes the hart */
12+
hart->in_wfi = false;
13+
} else {
1214
hart->sip &= ~RV_INT_STI_BIT; /* Clear Supervisor Timer Interrupt */
15+
}
1316
}
1417

1518
static bool aclint_mtimer_reg_read(mtimer_state_t *mtimer,
@@ -106,10 +109,13 @@ void aclint_mtimer_write(hart_t *hart,
106109
/* ACLINT MSWI */
107110
void aclint_mswi_update_interrupts(hart_t *hart, mswi_state_t *mswi)
108111
{
109-
if (mswi->msip[hart->mhartid])
112+
if (mswi->msip[hart->mhartid]) {
110113
hart->sip |= RV_INT_SSI_BIT; /* Set Machine Software Interrupt */
111-
else
114+
/* Clear WFI flag when interrupt is injected */
115+
hart->in_wfi = false;
116+
} else {
112117
hart->sip &= ~RV_INT_SSI_BIT; /* Clear Machine Software Interrupt */
118+
}
113119
}
114120

115121
static bool aclint_mswi_reg_read(mswi_state_t *mswi,
@@ -165,10 +171,13 @@ void aclint_mswi_write(hart_t *hart,
165171
/* ACLINT SSWI */
166172
void aclint_sswi_update_interrupts(hart_t *hart, sswi_state_t *sswi)
167173
{
168-
if (sswi->ssip[hart->mhartid])
174+
if (sswi->ssip[hart->mhartid]) {
169175
hart->sip |= RV_INT_SSI_BIT; /* Set Supervisor Software Interrupt */
170-
else
176+
/* Clear WFI flag when interrupt is injected */
177+
hart->in_wfi = false;
178+
} else {
171179
hart->sip &= ~RV_INT_SSI_BIT; /* Clear Supervisor Software Interrupt */
180+
}
172181
}
173182

174183
static bool aclint_sswi_reg_read(__attribute__((unused)) sswi_state_t *sswi,

coro.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -607,5 +607,9 @@ bool coro_is_suspended(uint32_t slot_id)
607607

608608
uint32_t coro_current_hart_id(void)
609609
{
610+
/* Return sentinel value if coroutine subsystem not initialized */
611+
if (!coro_state.initialized)
612+
return UINT32_MAX;
613+
610614
return coro_state.current_hart;
611615
}

device.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,9 @@ typedef struct {
6060
/* I/O handling */
6161
int in_fd, out_fd;
6262
bool in_ready;
63+
/* Coroutine support for input waiting (SMP mode) */
64+
uint32_t waiting_hart_id; /**< Hart ID waiting for input */
65+
bool has_waiting_hart; /**< true if a hart is yielding for input */
6366
} u8250_state_t;
6467

6568
void u8250_update_interrupts(u8250_state_t *uart);

main.c

Lines changed: 132 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -768,6 +768,8 @@ static int semu_init(emu_state_t *emu, int argc, char **argv)
768768

769769
/* Set up peripherals */
770770
emu->uart.in_fd = 0, emu->uart.out_fd = 1;
771+
emu->uart.waiting_hart_id = UINT32_MAX;
772+
emu->uart.has_waiting_hart = false;
771773
capture_keyboard_input(); /* set up uart */
772774
#if SEMU_HAS(VIRTIONET)
773775
/* Always set ram pointer, even if netdev is not configured.
@@ -809,7 +811,7 @@ static int semu_init(emu_state_t *emu, int argc, char **argv)
809811
emu->peripheral_update_ctr = 0;
810812
emu->debug = debug;
811813

812-
/* Initialize coroutine system for SMP mode (n_hart > 1) */
814+
/* Initialize coroutine system for multi-hart mode (SMP > 1) */
813815
if (vm->n_hart > 1) {
814816
uint32_t total_slots = vm->n_hart;
815817
#if SEMU_HAS(VIRTIONET)
@@ -853,17 +855,29 @@ static int semu_init(emu_state_t *emu, int argc, char **argv)
853855
*/
854856
static void wfi_handler(hart_t *hart)
855857
{
856-
vm_t *vm = hart->vm;
857-
/* Only yield in SMP mode (n_hart > 1) */
858-
if (vm->n_hart > 1) {
859-
/* Per RISC-V spec: WFI returns immediately if interrupt is pending.
860-
* Only yield to scheduler if no interrupt is currently pending.
858+
/* Per RISC-V spec: WFI returns immediately if interrupt is pending.
859+
* We check if any interrupt is actually pending (sip & sie != 0).
860+
*/
861+
bool interrupt_pending = (hart->sip & hart->sie) != 0;
862+
863+
if (!interrupt_pending) {
864+
emu_state_t *emu = PRIV(hart);
865+
vm_t *vm = &emu->vm;
866+
867+
/* Only use coroutine yielding in multi-hart mode where the coroutine
868+
* scheduler loop is active. In single-hart mode, WFI is a no-op since
869+
* there's no scheduler to resume execution after yield.
861870
*/
862-
if (!(hart->sip & hart->sie)) {
863-
hart->in_wfi = true; /* Mark as waiting for interrupt */
864-
coro_yield(); /* Suspend until scheduler resumes us */
865-
hart->in_wfi = false; /* Resumed - no longer waiting */
871+
if (vm->n_hart > 1) {
872+
hart->in_wfi = true; /* Mark as waiting for interrupt */
873+
coro_yield(); /* Suspend until scheduler resumes us */
874+
/* NOTE: Do NOT clear in_wfi here to avoid race condition.
875+
* The scheduler needs to see this flag to detect idle state.
876+
* The flag will be cleared when an interrupt is actually injected.
877+
*/
866878
}
879+
} else {
880+
hart->in_wfi = false; /* Clear if interrupt already pending */
867881
}
868882
}
869883

@@ -1150,87 +1164,150 @@ static int semu_run(emu_state_t *emu)
11501164
poll_capacity = needed;
11511165
}
11521166

1167+
/* Determine poll timeout based on hart states BEFORE setting up
1168+
* poll fds. This check must happen before coro_resume_hart()
1169+
* modifies flags.
1170+
*
1171+
* - If no harts are STARTED, block indefinitely (wait for IPI)
1172+
* - If all STARTED harts are idle (WFI or UART waiting), block
1173+
* - Otherwise, use non-blocking poll (timeout=0)
1174+
*/
1175+
int poll_timeout = 0;
1176+
uint32_t started_harts = 0;
1177+
uint32_t idle_harts = 0;
1178+
for (uint32_t i = 0; i < vm->n_hart; i++) {
1179+
if (vm->hart[i]->hsm_status == SBI_HSM_STATE_STARTED) {
1180+
started_harts++;
1181+
/* Count hart as idle if it's in WFI or waiting for UART */
1182+
if (vm->hart[i]->in_wfi ||
1183+
(emu->uart.has_waiting_hart &&
1184+
emu->uart.waiting_hart_id == i)) {
1185+
idle_harts++;
1186+
}
1187+
}
1188+
}
1189+
11531190
/* Collect file descriptors for poll() */
11541191
size_t pfd_count = 0;
11551192
int timer_index = -1;
11561193

1157-
/* Add periodic timer fd (1ms interval for guest timer emulation) */
1194+
/* Add periodic timer fd (1ms interval for guest timer emulation).
1195+
* Only add timer when ALL harts are active (none idle) to allow
1196+
* poll() to sleep when any harts are in WFI. When harts are idle,
1197+
* timer updates can be deferred until they wake up.
1198+
*
1199+
* During SMP boot (started_harts < vm->n_hart), always include the
1200+
* timer to ensure secondary harts can complete initialization. Only
1201+
* apply conditional exclusion after all harts have started.
1202+
*
1203+
* For single-hart configurations (n_hart == 1), disable
1204+
* optimization entirely to avoid boot issues, as the first hart
1205+
* starts immediately.
1206+
*/
1207+
bool all_harts_started = (started_harts >= vm->n_hart);
1208+
bool harts_active =
1209+
(vm->n_hart == 1) || !all_harts_started || (idle_harts == 0);
11581210
#ifdef __APPLE__
11591211
/* macOS: use kqueue with EVFILT_TIMER */
1160-
if (kq >= 0 && pfd_count < poll_capacity) {
1212+
if (kq >= 0 && pfd_count < poll_capacity && harts_active) {
11611213
pfds[pfd_count] = (struct pollfd){kq, POLLIN, 0};
11621214
timer_index = (int) pfd_count;
11631215
pfd_count++;
11641216
}
11651217
#else
11661218
/* Linux: use timerfd */
1167-
if (wfi_timer_fd >= 0 && pfd_count < poll_capacity) {
1219+
if (wfi_timer_fd >= 0 && pfd_count < poll_capacity &&
1220+
harts_active) {
11681221
pfds[pfd_count] = (struct pollfd){wfi_timer_fd, POLLIN, 0};
11691222
timer_index = (int) pfd_count;
11701223
pfd_count++;
11711224
}
11721225
#endif
11731226

1174-
/* Add UART input fd (stdin for keyboard input) */
1175-
if (emu->uart.in_fd >= 0 && pfd_count < poll_capacity) {
1227+
/* Add UART input fd (stdin for keyboard input).
1228+
* Only add UART when:
1229+
* 1. Single-hart configuration (n_hart == 1), OR
1230+
* 2. Not all harts started (!all_harts_started), OR
1231+
* 3. All harts are active (idle_harts == 0), OR
1232+
* 4. A hart is actively waiting for UART input
1233+
*
1234+
* This prevents UART (which is always "readable" on TTY) from
1235+
* preventing poll() sleep when harts are idle. Trade-off: user
1236+
* input (Ctrl+A x) may be delayed by up to poll_timeout (10ms)
1237+
* when harts are idle, which is acceptable for an emulator.
1238+
*/
1239+
bool need_uart = (vm->n_hart == 1) || !all_harts_started ||
1240+
(idle_harts == 0) || emu->uart.has_waiting_hart;
1241+
if (emu->uart.in_fd >= 0 && pfd_count < poll_capacity &&
1242+
need_uart) {
11761243
pfds[pfd_count] = (struct pollfd){emu->uart.in_fd, POLLIN, 0};
11771244
pfd_count++;
11781245
}
11791246

1180-
/* Determine poll timeout based on hart WFI states:
1181-
* - If no harts are STARTED, block indefinitely (wait for IPI)
1182-
* - If all STARTED harts are in WFI, block indefinitely
1183-
* - Otherwise, use non-blocking poll (timeout=0)
1247+
/* Set poll timeout based on current idle state (adaptive timeout).
1248+
* Three-tier strategy:
1249+
* 1. Blocking (-1): All harts idle + have fds → wait for events
1250+
* 2. Short sleep (10ms): Some harts idle → reduce CPU usage
1251+
* 3. Non-blocking (0): All harts active → maximum responsiveness
1252+
*
1253+
* SAFETY: Never use blocking timeout when pfd_count==0, as
1254+
* poll(0,-1) would hang indefinitely. Always use 10ms timeout as
1255+
* fallback.
11841256
*/
1185-
int poll_timeout = 0;
1186-
uint32_t started_harts = 0;
1187-
uint32_t wfi_harts = 0;
1188-
for (uint32_t i = 0; i < vm->n_hart; i++) {
1189-
if (vm->hart[i]->hsm_status == SBI_HSM_STATE_STARTED) {
1190-
started_harts++;
1191-
if (vm->hart[i]->in_wfi)
1192-
wfi_harts++;
1193-
}
1194-
}
1195-
/* Block if no harts running or all running harts are waiting */
11961257
if (pfd_count > 0 &&
1197-
(started_harts == 0 || wfi_harts == started_harts))
1258+
(started_harts == 0 || idle_harts == started_harts)) {
1259+
/* All harts idle + have fds: block until event */
11981260
poll_timeout = -1;
1261+
} else if (idle_harts > 0) {
1262+
/* Some/all harts idle (or all idle but no fds): 10ms sleep */
1263+
poll_timeout = 10;
1264+
} else {
1265+
/* All harts active: non-blocking */
1266+
poll_timeout = 0;
1267+
}
11991268

12001269
/* Execute poll() to wait for I/O events.
1201-
* - timeout=0: non-blocking poll when harts are running
1202-
* - timeout=-1: blocking poll when all harts in WFI (idle state)
1270+
* - timeout=0: non-blocking poll when harts are active
1271+
* - timeout=10: short sleep when some harts idle
1272+
* - timeout=-1: blocking poll when all harts idle (WFI or UART
1273+
* wait)
1274+
*
1275+
* When pfd_count==0, poll() acts as a pure sleep mechanism.
12031276
*/
1204-
if (pfd_count > 0) {
1205-
int nevents = poll(pfds, pfd_count, poll_timeout);
1206-
if (nevents > 0) {
1207-
/* Consume timer expiration events to prevent fd staying
1208-
* readable
1209-
*/
1210-
if (timer_index >= 0 &&
1211-
(pfds[timer_index].revents & POLLIN)) {
1277+
int nevents = poll(pfds, pfd_count, poll_timeout);
1278+
1279+
if (pfd_count > 0 && nevents > 0) {
1280+
/* Consume timer expiration events to prevent fd staying
1281+
* readable
1282+
*/
1283+
if (timer_index >= 0 && (pfds[timer_index].revents & POLLIN)) {
12121284
#ifdef __APPLE__
1213-
/* drain kqueue events with non-blocking kevent */
1214-
struct kevent events[32];
1215-
struct timespec timeout_zero = {0, 0};
1216-
kevent(kq, NULL, 0, events, 32, &timeout_zero);
1285+
/* drain kqueue events with non-blocking kevent */
1286+
struct kevent events[32];
1287+
struct timespec timeout_zero = {0, 0};
1288+
kevent(kq, NULL, 0, events, 32, &timeout_zero);
12171289
#else
1218-
/* Linux: read timerfd to consume expiration count */
1219-
uint64_t expirations;
1220-
ssize_t ret_read = read(wfi_timer_fd, &expirations,
1221-
sizeof(expirations));
1222-
(void) ret_read;
1290+
/* Linux: read timerfd to consume expiration count */
1291+
uint64_t expirations;
1292+
ssize_t ret_read =
1293+
read(wfi_timer_fd, &expirations, sizeof(expirations));
1294+
(void) ret_read;
12231295
#endif
1224-
}
1225-
} else if (nevents < 0 && errno != EINTR) {
1226-
perror("poll");
12271296
}
1297+
} else if (nevents < 0 && errno != EINTR) {
1298+
perror("poll");
12281299
}
12291300

12301301
/* Resume all hart coroutines (round-robin scheduling).
12311302
* Each hart executes a batch of instructions, then yields back.
1232-
* Harts in WFI will clear their in_wfi flag when resuming from
1233-
* coro_yield() in wfi_handler().
1303+
* Harts in WFI will have their in_wfi flag cleared by interrupt
1304+
* handlers (ACLINT, PLIC, UART) when interrupts are injected.
1305+
*
1306+
* Note: We must always resume harts after poll() returns, even if
1307+
* all harts appear idle. The in_wfi flag is only cleared when
1308+
* interrupt sources inject interrupts, so skipping resume would
1309+
* cause a deadlock where harts remain stuck waiting even after
1310+
* events arrive.
12341311
*/
12351312
for (uint32_t i = 0; i < vm->n_hart; i++) {
12361313
coro_resume_hart(i);

plic.c

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,13 @@ void plic_update_interrupts(vm_t *vm, plic_state_t *plic)
1111
plic->masked |= plic->active;
1212
/* Send interrupt to target */
1313
for (uint32_t i = 0; i < vm->n_hart; i++) {
14-
if (plic->ip & plic->ie[i])
14+
if (plic->ip & plic->ie[i]) {
1515
vm->hart[i]->sip |= RV_INT_SEI_BIT;
16-
else
16+
/* Clear WFI flag when external interrupt is injected */
17+
vm->hart[i]->in_wfi = false;
18+
} else {
1719
vm->hart[i]->sip &= ~RV_INT_SEI_BIT;
20+
}
1821
}
1922
}
2023

uart.c

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#include <termios.h>
77
#include <unistd.h>
88

9+
#include "coro.h"
910
#include "device.h"
1011
#include "riscv.h"
1112
#include "riscv_private.h"
@@ -86,12 +87,45 @@ static void u8250_handle_out(u8250_state_t *uart, uint8_t value)
8687
fprintf(stderr, "failed to write UART output: %s\n", strerror(errno));
8788
}
8889

90+
/* Wait for UART input using coroutine yield (SMP mode only)
91+
* This function allows a hart to yield when no UART input is available,
92+
* preventing CPU spinning when waiting for stdin. The hart will be resumed
93+
* by the event loop when stdin becomes readable.
94+
*/
95+
static void u8250_wait_for_input(u8250_state_t *uart)
96+
{
97+
/* Only yield in SMP mode - single-core mode doesn't use coroutines */
98+
uint32_t hart_id = coro_current_hart_id();
99+
if (hart_id == UINT32_MAX)
100+
return; /* Not in a coroutine, skip yielding */
101+
102+
/* Mark this hart as waiting for UART input */
103+
uart->waiting_hart_id = hart_id;
104+
uart->has_waiting_hart = true;
105+
106+
/* Yield until stdin has data available. The event loop will resume this
107+
* hart when poll() detects POLLIN on stdin fd.
108+
*/
109+
coro_yield();
110+
111+
/* Resumed - clear waiting state */
112+
uart->has_waiting_hart = false;
113+
uart->waiting_hart_id = UINT32_MAX;
114+
}
115+
89116
static uint8_t u8250_handle_in(u8250_state_t *uart)
90117
{
91118
uint8_t value = 0;
92119
u8250_check_ready(uart);
93-
if (!uart->in_ready)
94-
return value;
120+
121+
/* If no data available, yield and wait for stdin to become readable */
122+
if (!uart->in_ready) {
123+
u8250_wait_for_input(uart);
124+
/* After resume, re-check if data is now available */
125+
u8250_check_ready(uart);
126+
if (!uart->in_ready)
127+
return value; /* Spurious wakeup - still no data */
128+
}
95129

96130
if (read(uart->in_fd, &value, 1) < 0)
97131
fprintf(stderr, "failed to read UART input: %s\n", strerror(errno));

0 commit comments

Comments
 (0)