From 2a2a5c4ad3d73887c04c4f7a8dbcadda987448aa Mon Sep 17 00:00:00 2001 From: Jim Huang Date: Sun, 5 Oct 2025 15:50:43 +0800 Subject: [PATCH 1/7] Enable TSAN with FULL4G and T2C support ThreadSanitizer (TSAN) can now detect race conditions across the entire multi-threaded JIT pipeline with full 4GB address space emulation. This enables testing of the tier-2 LLVM compilation thread while maintaining production memory layout. Memory Layout (TSAN-compatible): - Main memory: MAP_FIXED at 0x7d0000000000 (4GB) - JIT buffer: MAP_FIXED at 0x7d1000000000 - Both allocations within TSAN app range (0x7cf-0x7ff trillion) - Prevents conflicts with TSAN shadow memory (0x02a-0x7ce trillion) ASLR Mitigation: - Added setarch -R wrapper for TSAN test execution - Disables ASLR to prevent random allocations in shadow memory - Only affects test runs, not production builds SDL Conflict Resolution: - SDL (uninstrumented system library) creates threads TSAN cannot track - Disabled SDL when TSAN enabled to focus on built-in race detection - Production builds still fully support SDL --- Makefile | 30 ++++++++++++++++++++++- src/emulate.c | 25 +++++++++++++++---- src/io.c | 21 ++++++++++++++++ src/jit.c | 22 +++++++++++++++-- src/main.c | 22 +++++++++++++++++ src/riscv.c | 59 ++++++++++++++++++++++++++++++++++++--------- src/riscv_private.h | 3 ++- src/t2c.c | 4 ++- 8 files changed, 165 insertions(+), 21 deletions(-) diff --git a/Makefile b/Makefile index 4481ed3a..5a34979b 100644 --- a/Makefile +++ b/Makefile @@ -80,6 +80,28 @@ endif ENABLE_ARCH_TEST ?= 0 $(call set-feature, ARCH_TEST) +# ThreadSanitizer support +# TSAN on x86-64 memory layout: +# Shadow: 0x02a000000000 - 0x7cefffffffff (reserved by TSAN) +# App: 0x7cf000000000 - 0x7ffffffff000 (usable by application) +# +# We use MAP_FIXED to allocate FULL4G's 4GB memory at a fixed address +# (0x7d0000000000) within TSAN's app range, ensuring compatibility. +# +# IMPORTANT: TSAN requires ASLR (Address Space Layout Randomization) to be +# disabled to prevent system allocations from landing in TSAN's shadow memory. +# Tests are run with 'setarch $(uname -m) -R' to disable ASLR. +ENABLE_TSAN ?= 0 +ifeq ("$(ENABLE_TSAN)", "1") +override ENABLE_SDL := 0 # SDL (uninstrumented system lib) creates threads TSAN cannot track +override ENABLE_LTO := 0 # LTO interferes with TSAN instrumentation +CFLAGS += -DTSAN_ENABLED # Signal code to use TSAN-compatible allocations +# Disable ASLR for TSAN tests to prevent allocations in TSAN shadow memory +BIN_WRAPPER = setarch $(shell uname -m) -R +else +BIN_WRAPPER = +endif + # Enable link-time optimization (LTO) ENABLE_LTO ?= 1 ifeq ($(call has, LTO), 1) @@ -332,6 +354,12 @@ CFLAGS += -fsanitize=undefined -fno-sanitize=alignment -fno-sanitize-recover=all LDFLAGS += -fsanitize=undefined -fno-sanitize=alignment -fno-sanitize-recover=all endif +# ThreadSanitizer flags (ENABLE_TSAN is set earlier to override SDL/FULL4G) +ifeq ("$(ENABLE_TSAN)", "1") +CFLAGS += -fsanitize=thread -g +LDFLAGS += -fsanitize=thread +endif + $(OUT)/emulate.o: CFLAGS += -foptimize-sibling-calls -fomit-frame-pointer -fno-stack-check -fno-stack-protector # .DEFAULT_GOAL should be set to all since the very first target is not all @@ -445,7 +473,7 @@ define check-test $(Q)true; \ $(PRINTF) "Running $(3) ... "; \ OUTPUT_FILE="$$(mktemp)"; \ -if (LC_ALL=C $(BIN) $(1) $(2) > "$$OUTPUT_FILE") && \ +if (LC_ALL=C $(BIN_WRAPPER) $(BIN) $(1) $(2) > "$$OUTPUT_FILE") && \ [ "$$(cat "$$OUTPUT_FILE" | $(LOG_FILTER) | $(4))" = "$(5)" ]; then \ $(call notice, [OK]); \ else \ diff --git a/src/emulate.c b/src/emulate.c index e5e4cddf..b97c9493 100644 --- a/src/emulate.c +++ b/src/emulate.c @@ -304,6 +304,7 @@ static block_t *block_alloc(riscv_t *rv) block->hot2 = false; block->has_loops = false; block->n_invoke = 0; + block->func = NULL; INIT_LIST_HEAD(&block->list); #if RV32_HAS(T2C) block->compiled = false; @@ -1176,22 +1177,32 @@ void rv_step(void *arg) #if RV32_HAS(JIT) #if RV32_HAS(T2C) /* executed through the tier-2 JIT compiler */ - if (block->hot2) { + /* Use acquire semantics to ensure we see func write before using it */ + if (__atomic_load_n(&block->hot2, __ATOMIC_ACQUIRE)) { ((exec_t2c_func_t) block->func)(rv); prev = NULL; continue; } /* check if invoking times of t1 generated code exceed threshold */ - else if (!block->compiled && block->n_invoke >= THRESHOLD) { - block->compiled = true; + else if (!__atomic_load_n(&block->compiled, __ATOMIC_RELAXED) && + __atomic_load_n(&block->n_invoke, __ATOMIC_RELAXED) >= + THRESHOLD) { + __atomic_store_n(&block->compiled, true, __ATOMIC_RELAXED); queue_entry_t *entry = malloc(sizeof(queue_entry_t)); if (unlikely(!entry)) { /* Malloc failed - reset compiled flag to allow retry later */ - block->compiled = false; + __atomic_store_n(&block->compiled, false, __ATOMIC_RELAXED); continue; } - entry->block = block; + /* Store cache key instead of pointer to prevent use-after-free */ +#if RV32_HAS(SYSTEM) + entry->key = + (uint64_t) block->pc_start | ((uint64_t) block->satp << 32); +#else + entry->key = (uint64_t) block->pc_start; +#endif pthread_mutex_lock(&rv->wait_queue_lock); list_add(&entry->list, &rv->wait_queue); + pthread_cond_signal(&rv->wait_queue_cond); pthread_mutex_unlock(&rv->wait_queue_lock); } #endif @@ -1203,7 +1214,11 @@ void rv_step(void *arg) * entry in compiled binary buffer. */ if (block->hot) { +#if RV32_HAS(T2C) + __atomic_fetch_add(&block->n_invoke, 1, __ATOMIC_RELAXED); +#else block->n_invoke++; +#endif ((exec_block_func_t) state->buf)( rv, (uintptr_t) (state->buf + block->offset)); prev = NULL; diff --git a/src/io.c b/src/io.c index 4ff325d3..1e5b73b9 100644 --- a/src/io.c +++ b/src/io.c @@ -27,12 +27,33 @@ memory_t *memory_new(uint32_t size) return NULL; assert(mem); #if HAVE_MMAP +#if defined(TSAN_ENABLED) && defined(__x86_64__) + /* ThreadSanitizer compatibility: Use MAP_FIXED to allocate at a specific + * address within TSAN's app range (0x7cf000000000 - 0x7ffffffff000). + * + * Fixed address: 0x7d0000000000 + * Size: up to 4GB (0x100000000) + * End: 0x7d0100000000 (well within app range) + * + * This guarantees the allocation won't land in TSAN's shadow memory, + * preventing "unexpected memory mapping" errors. + */ + void *fixed_addr = (void *) 0x7d0000000000UL; + data_memory_base = mmap(fixed_addr, size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0); + if (data_memory_base == MAP_FAILED) { + free(mem); + return NULL; + } +#else + /* Standard allocation without TSAN */ data_memory_base = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); if (data_memory_base == MAP_FAILED) { free(mem); return NULL; } +#endif #else data_memory_base = malloc(size); if (!data_memory_base) { diff --git a/src/jit.c b/src/jit.c index a6dfdb70..631b1554 100644 --- a/src/jit.c +++ b/src/jit.c @@ -2336,6 +2336,25 @@ struct jit_state *jit_state_init(size_t size) state->offset = 0; state->size = size; +#if defined(TSAN_ENABLED) && defined(__x86_64__) + /* ThreadSanitizer compatibility: Allocate JIT code buffer at a fixed + * address above the main memory region to avoid conflicts. + * + * Main memory: 0x7d0000000000 - 0x7d0100000000 (4GB for FULL4G) + * JIT buffer: 0x7d1000000000 + size + * + * This keeps both allocations in TSAN's app range (0x7cf000000000 - + * 0x7ffffffff000) and prevents overlap with main memory or TSAN shadow. + */ + void *jit_addr = (void *) 0x7d1000000000UL; + state->buf = mmap(jit_addr, size, PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0); + if (state->buf == MAP_FAILED) { + free(state); + return NULL; + } +#else + /* Standard allocation without TSAN */ state->buf = mmap(0, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS #if defined(__APPLE__) @@ -2347,8 +2366,7 @@ struct jit_state *jit_state_init(size_t size) free(state); return NULL; } - assert(state->buf != MAP_FAILED); - +#endif state->n_blocks = 0; set_reset(&state->set); reset_reg(); diff --git a/src/main.c b/src/main.c index 4c851edd..45374bb1 100644 --- a/src/main.c +++ b/src/main.c @@ -19,6 +19,28 @@ #include "riscv.h" #include "utils.h" +/* ThreadSanitizer configuration for FULL4G compatibility + * + * We use MAP_FIXED to allocate emulated memory at 0x7d0000000000, which is + * within TSAN's application memory range (0x7cf000000000 - 0x7ffffffff000). + * This avoids conflicts with TSAN's shadow memory and allows race detection + * to work with FULL4G's 4GB address space. + * + * Configuration optimizes for race detection with minimal overhead. + */ +#if defined(__SANITIZE_THREAD__) +const char *__tsan_default_options() +{ + return "halt_on_error=0" /* Continue after errors */ + ":report_bugs=1" /* Report data races */ + ":second_deadlock_stack=1" /* Full deadlock info */ + ":verbosity=0" /* Reduce noise */ + ":memory_limit_mb=0" /* No memory limit */ + ":history_size=7" /* Larger race detection window */ + ":io_sync=0"; /* Don't sync on I/O */ +} +#endif + /* enable program trace mode */ #if !RV32_HAS(SYSTEM) || (RV32_HAS(SYSTEM) && RV32_HAS(ELF_LOADER)) static bool opt_trace = false; diff --git a/src/riscv.c b/src/riscv.c index b892cf27..e500aa67 100644 --- a/src/riscv.c +++ b/src/riscv.c @@ -206,19 +206,41 @@ static pthread_t t2c_thread; static void *t2c_runloop(void *arg) { riscv_t *rv = (riscv_t *) arg; + pthread_mutex_lock(&rv->wait_queue_lock); while (!rv->quit) { - if (!list_empty(&rv->wait_queue)) { - queue_entry_t *entry = - list_last_entry(&rv->wait_queue, queue_entry_t, list); - pthread_mutex_lock(&rv->wait_queue_lock); - list_del_init(&entry->list); - pthread_mutex_unlock(&rv->wait_queue_lock); - pthread_mutex_lock(&rv->cache_lock); - t2c_compile(rv, entry->block); - pthread_mutex_unlock(&rv->cache_lock); - free(entry); - } + /* Wait for work or quit signal */ + while (list_empty(&rv->wait_queue) && !rv->quit) + pthread_cond_wait(&rv->wait_queue_cond, &rv->wait_queue_lock); + + if (rv->quit) + break; + + /* Extract work item while holding the lock */ + queue_entry_t *entry = + list_last_entry(&rv->wait_queue, queue_entry_t, list); + list_del_init(&entry->list); + pthread_mutex_unlock(&rv->wait_queue_lock); + + /* Perform compilation with cache lock */ + pthread_mutex_lock(&rv->cache_lock); + /* Look up block from cache using the key (might have been evicted) */ + uint32_t pc = (uint32_t) entry->key; + block_t *block = (block_t *) cache_get(rv->block_cache, pc, false); +#if RV32_HAS(SYSTEM) + /* Verify SATP matches (for system mode) */ + uint32_t satp = (uint32_t) (entry->key >> 32); + if (block && block->satp != satp) + block = NULL; +#endif + /* Compile only if block still exists in cache */ + if (block) + t2c_compile(rv, block); + pthread_mutex_unlock(&rv->cache_lock); + free(entry); + + pthread_mutex_lock(&rv->wait_queue_lock); } + pthread_mutex_unlock(&rv->wait_queue_lock); return NULL; } #endif @@ -777,6 +799,7 @@ riscv_t *rv_create(riscv_user_t rv_attr) /* prepare wait queue. */ pthread_mutex_init(&rv->wait_queue_lock, NULL); pthread_mutex_init(&rv->cache_lock, NULL); + pthread_cond_init(&rv->wait_queue_cond, NULL); INIT_LIST_HEAD(&rv->wait_queue); /* activate the background compilation thread. */ pthread_create(&t2c_thread, NULL, t2c_runloop, rv); @@ -910,10 +933,24 @@ void rv_delete(riscv_t *rv) block_map_destroy(rv); #else #if RV32_HAS(T2C) + /* Signal the thread to quit */ + pthread_mutex_lock(&rv->wait_queue_lock); rv->quit = true; + pthread_cond_signal(&rv->wait_queue_cond); + pthread_mutex_unlock(&rv->wait_queue_lock); + pthread_join(t2c_thread, NULL); + + /* Clean up any remaining entries in wait queue */ + queue_entry_t *entry, *safe; + list_for_each_entry_safe (entry, safe, &rv->wait_queue, list) { + list_del(&entry->list); + free(entry); + } + pthread_mutex_destroy(&rv->wait_queue_lock); pthread_mutex_destroy(&rv->cache_lock); + pthread_cond_destroy(&rv->wait_queue_cond); jit_cache_exit(rv->jit_cache); #endif jit_state_exit(rv->jit_state); diff --git a/src/riscv_private.h b/src/riscv_private.h index 12a3bfd0..89165011 100644 --- a/src/riscv_private.h +++ b/src/riscv_private.h @@ -105,7 +105,7 @@ typedef struct block { #if RV32_HAS(JIT) && RV32_HAS(T2C) typedef struct { - block_t *block; + uint64_t key; /**< cache key (PC or PC|SATP) to look up block */ struct list_head list; } queue_entry_t; #endif @@ -197,6 +197,7 @@ struct riscv_internal { #if RV32_HAS(T2C) struct list_head wait_queue; pthread_mutex_t wait_queue_lock, cache_lock; + pthread_cond_t wait_queue_cond; volatile bool quit; /**< Determine the main thread is terminated or not */ #endif void *jit_state; diff --git a/src/t2c.c b/src/t2c.c index 343b85e6..2115adaf 100644 --- a/src/t2c.c +++ b/src/t2c.c @@ -346,7 +346,9 @@ void t2c_compile(riscv_t *rv, block_t *block) jit_cache_update(rv->jit_cache, key, block->func); - block->hot2 = true; + /* Use release semantics to ensure func write is visible before hot2 is set + */ + __atomic_store_n(&block->hot2, true, __ATOMIC_RELEASE); } struct jit_cache *jit_cache_init() From f1b685e64e45e14f7f87654a3471be8b616841da Mon Sep 17 00:00:00 2001 From: Jim Huang Date: Sun, 5 Oct 2025 16:02:50 +0800 Subject: [PATCH 2/7] Add Arm64 TSAN support and fix JIT cache coherency This commit adds ThreadSanitizer (TSAN) support for ARM64/Apple Silicon and fixes critical JIT instruction cache coherency issues. ARM64 TSAN Support: - Extended TSAN-compatible memory allocation to ARM64 architecture - Main memory allocated at fixed address 0x150000000000 (21TB) - JIT buffer allocated at 0x151000000000 with MAP_JIT for Apple Silicon - Both allocations avoid TSAN shadow memory and enable race detection - Note: Requires ASLR disabled on macOS (SIP restrictions may apply) JIT Cache Coherency Fixes: 1. Fixed pthread_jit_write_protect_np() ordering in update_branch_imm 2. Added sys_icache_invalidate() in update_branch_imm 3. Added cache invalidation in resolve_jumps() for x86_64 Fix JIT regalloc conflicts in memory load After reset_reg() clears the register allocator state, load instructions (lb/lh/lw/lbu/lhu) could reallocate the same host register for both the address and destination, causing data corruption. This commit uses map_vm_reg_reserved() to prevent reusing the address register. --- src/emulate.c | 19 ++++++++++++---- src/io.c | 24 +++++++++++++++----- src/jit.c | 61 +++++++++++++++++++++++++++++++++++++++++++++++--- src/main.c | 2 +- src/rv32_jit.c | 10 ++++----- 5 files changed, 98 insertions(+), 18 deletions(-) diff --git a/src/emulate.c b/src/emulate.c index b97c9493..67d26d86 100644 --- a/src/emulate.c +++ b/src/emulate.c @@ -24,6 +24,7 @@ extern struct target_ops gdbstub_ops; #endif #include "decode.h" +#include "log.h" #include "mpool.h" #include "riscv.h" #include "riscv_private.h" @@ -1230,10 +1231,20 @@ void rv_step(void *arg) #endif ) { jit_translate(rv, block); - ((exec_block_func_t) state->buf)( - rv, (uintptr_t) (state->buf + block->offset)); - prev = NULL; - continue; + /* Only execute if translation succeeded (block is hot) */ + if (block->hot) { + rv_log_debug("JIT: Executing block pc=0x%08x, offset=%u", + block->pc_start, block->offset); + ((exec_block_func_t) state->buf)( + rv, (uintptr_t) (state->buf + block->offset)); + prev = NULL; + continue; + } + /* Fall through to interpreter if translation failed */ + rv_log_debug( + "JIT: Translation failed for block pc=0x%08x, using " + "interpreter", + block->pc_start); } set_reset(&pc_set); has_loops = false; diff --git a/src/io.c b/src/io.c index 1e5b73b9..975013ee 100644 --- a/src/io.c +++ b/src/io.c @@ -27,18 +27,32 @@ memory_t *memory_new(uint32_t size) return NULL; assert(mem); #if HAVE_MMAP -#if defined(TSAN_ENABLED) && defined(__x86_64__) +#if defined(TSAN_ENABLED) /* ThreadSanitizer compatibility: Use MAP_FIXED to allocate at a specific - * address within TSAN's app range (0x7cf000000000 - 0x7ffffffff000). + * address to avoid conflicts with TSAN's shadow memory. + */ +#if defined(__x86_64__) + /* x86_64: Allocate within TSAN's range (0x7cf000000000 - 0x7ffffffff000). * * Fixed address: 0x7d0000000000 * Size: up to 4GB (0x100000000) * End: 0x7d0100000000 (well within app range) - * - * This guarantees the allocation won't land in TSAN's shadow memory, - * preventing "unexpected memory mapping" errors. */ void *fixed_addr = (void *) 0x7d0000000000UL; +#elif defined(__aarch64__) + /* ARM64 (macOS/Apple Silicon): Use higher address range. + * + * Fixed address: 0x150000000000 (21TB) + * Size: up to 4GB (0x100000000) + * End: 0x150100000000 + * + * This avoids TSAN's shadow memory and typical process allocations. + * Requires ASLR disabled via: setarch $(uname -m) -R + */ + void *fixed_addr = (void *) 0x150000000000UL; +#else +#error "TSAN is only supported on x86_64 and aarch64" +#endif data_memory_base = mmap(fixed_addr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0); if (data_memory_base == MAP_FAILED) { diff --git a/src/jit.c b/src/jit.c index 631b1554..5fa1f32f 100644 --- a/src/jit.c +++ b/src/jit.c @@ -42,6 +42,7 @@ #include "decode.h" #include "io.h" #include "jit.h" +#include "log.h" #include "riscv.h" #include "riscv_private.h" #include "utils.h" @@ -593,24 +594,30 @@ static void update_branch_imm(struct jit_state *state, assert((imm & 3) == 0); uint32_t insn; imm >>= 2; + rv_log_debug("JIT: Patching branch at offset=%u, imm=%d", offset, imm * 4); + /* Read instruction while in execute mode (MAP_JIT requirement) */ memcpy(&insn, state->buf + offset, sizeof(uint32_t)); if ((insn & 0xfe000000U) == 0x54000000U /* Conditional branch immediate. */ || (insn & 0x7e000000U) == 0x34000000U) { /* Compare and branch immediate. */ assert((imm >> 19) == INT64_C(-1) || (imm >> 19) == 0); + insn &= ~(0x7ffffU << 5); /* Clear old offset bits */ insn |= (imm & 0x7ffff) << 5; } else if ((insn & 0x7c000000U) == 0x14000000U) { /* Unconditional branch immediate. */ assert((imm >> 26) == INT64_C(-1) || (imm >> 26) == 0); + insn &= ~0x03ffffffU; /* Clear old offset bits */ insn |= (imm & 0x03ffffffU) << 0; } else { assert(false); insn = BAD_OPCODE; } #if defined(__APPLE__) && defined(__aarch64__) + /* Switch to write mode only for writing */ pthread_jit_write_protect_np(false); #endif memcpy(state->buf + offset, &insn, sizeof(uint32_t)); + sys_icache_invalidate(state->buf + offset, sizeof(uint32_t)); #if defined(__APPLE__) && defined(__aarch64__) pthread_jit_write_protect_np(true); #endif @@ -2164,9 +2171,12 @@ void clear_hot(block_t *block) static void code_cache_flush(struct jit_state *state, riscv_t *rv) { + rv_log_info("JIT: Flushing code cache (n_blocks=%d, n_jumps=%d, offset=%u)", + state->n_blocks, state->n_jumps, state->offset); should_flush = false; state->offset = state->org_size; state->n_blocks = 0; + state->n_jumps = 0; /* Reset jump count when flushing */ set_reset(&state->set); clear_cache_hot(rv->block_cache, (clear_func_t) clear_hot); #if RV32_HAS(T2C) @@ -2196,6 +2206,7 @@ static void translate(struct jit_state *state, riscv_t *rv, block_t *block) static void resolve_jumps(struct jit_state *state) { + rv_log_debug("JIT: Resolving %d jumps", state->n_jumps); for (int i = 0; i < state->n_jumps; i++) { struct jump jump = state->jumps[i]; int target_loc; @@ -2218,6 +2229,10 @@ static void resolve_jumps(struct jit_state *state) (if (jump.target_satp == state->offset_map[i].satp), ) { target_loc = state->offset_map[i].offset; + rv_log_debug( + "JIT: Jump %d resolved to block pc=0x%08x, " + "offset=%d", + i, jump.target_pc, target_loc); break; } } @@ -2229,6 +2244,7 @@ static void resolve_jumps(struct jit_state *state) uint8_t *offset_ptr = &state->buf[jump.offset_loc]; memcpy(offset_ptr, &rel, sizeof(uint32_t)); + sys_icache_invalidate(offset_ptr, sizeof(uint32_t)); #elif defined(__aarch64__) int32_t rel = target_loc - jump.offset_loc; update_branch_imm(state, jump.offset_loc, rel); @@ -2308,23 +2324,35 @@ void jit_translate(riscv_t *rv, block_t *block) ) { block->offset = state->offset_map[i].offset; block->hot = true; + rv_log_debug("JIT: Cache hit for block pc=0x%08x, offset=%u", + block->pc_start, block->offset); return; } } assert(NULL); __UNREACHABLE; } + rv_log_debug("JIT: Starting translation for block pc=0x%08x", + block->pc_start); restart: memset(state->jumps, 0, MAX_JUMPS * sizeof(struct jump)); state->n_jumps = 0; block->offset = state->offset; translate_chained_block(state, rv, block); if (unlikely(should_flush)) { + /* Mark block as not translated since translation was incomplete */ + block->hot = false; + /* Don't reset offset - it will be set correctly on restart */ + rv_log_debug("JIT: Translation triggered flush for block pc=0x%08x", + block->pc_start); code_cache_flush(state, rv); goto restart; } resolve_jumps(state); block->hot = true; + rv_log_debug( + "JIT: Translation completed for block pc=0x%08x, offset=%u, size=%u", + block->pc_start, block->offset, state->offset - block->offset); } struct jit_state *jit_state_init(size_t size) @@ -2336,10 +2364,12 @@ struct jit_state *jit_state_init(size_t size) state->offset = 0; state->size = size; -#if defined(TSAN_ENABLED) && defined(__x86_64__) +#if defined(TSAN_ENABLED) /* ThreadSanitizer compatibility: Allocate JIT code buffer at a fixed * address above the main memory region to avoid conflicts. - * + */ +#if defined(__x86_64__) + /* x86_64 memory layout: * Main memory: 0x7d0000000000 - 0x7d0100000000 (4GB for FULL4G) * JIT buffer: 0x7d1000000000 + size * @@ -2348,7 +2378,32 @@ struct jit_state *jit_state_init(size_t size) */ void *jit_addr = (void *) 0x7d1000000000UL; state->buf = mmap(jit_addr, size, PROT_READ | PROT_WRITE | PROT_EXEC, - MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0); + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED +#if defined(__APPLE__) + | MAP_JIT +#endif + , + -1, 0); +#elif defined(__aarch64__) + /* ARM64 memory layout (macOS/Apple Silicon): + * Main memory: 0x150000000000 - 0x150100000000 (4GB for FULL4G) + * JIT buffer: 0x151000000000 + size + * + * Apple Silicon requires MAP_JIT for executable memory. The fixed + * address is chosen to avoid TSAN's shadow memory and typical process + * allocations. Requires ASLR disabled via: setarch $(uname -m) -R + */ + void *jit_addr = (void *) 0x151000000000UL; + state->buf = mmap(jit_addr, size, PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED +#if defined(__APPLE__) + | MAP_JIT +#endif + , + -1, 0); +#else +#error "TSAN is only supported on x86_64 and aarch64" +#endif if (state->buf == MAP_FAILED) { free(state); return NULL; diff --git a/src/main.c b/src/main.c index 45374bb1..a2f67d6d 100644 --- a/src/main.c +++ b/src/main.c @@ -304,7 +304,7 @@ int main(int argc, char **args) .args_offset_size = ARGS_OFFSET_SIZE, .argc = prog_argc, .argv = prog_args, - .log_level = LOG_TRACE, + .log_level = LOG_INFO, .run_flag = run_flag, .profile_output_file = prof_out_file, .cycle_per_step = CYCLE_PER_STEP, diff --git a/src/rv32_jit.c b/src/rv32_jit.c index 8e084f62..6ea9c8e9 100644 --- a/src/rv32_jit.c +++ b/src/rv32_jit.c @@ -180,7 +180,7 @@ GEN(lb, { emit_cmp_imm32(state, temp_reg, 0); uint32_t jump_loc_0 = state->offset; emit_jcc_offset(state, 0x84); - vm_reg[1] = map_vm_reg(state, ir->rd); + vm_reg[1] = map_vm_reg_reserved(state, ir->rd, vm_reg[0]); emit_load(state, S32, parameter_reg[0], vm_reg[1], offsetof(riscv_t, X) + 4 * ir->rd); @@ -232,7 +232,7 @@ GEN(lh, { emit_cmp_imm32(state, temp_reg, 0); uint32_t jump_loc_0 = state->offset; emit_jcc_offset(state, 0x84); - vm_reg[1] = map_vm_reg(state, ir->rd); + vm_reg[1] = map_vm_reg_reserved(state, ir->rd, vm_reg[0]); emit_load(state, S32, parameter_reg[0], vm_reg[1], offsetof(riscv_t, X) + 4 * ir->rd); @@ -284,7 +284,7 @@ GEN(lw, { emit_cmp_imm32(state, temp_reg, 0); uint32_t jump_loc_0 = state->offset; emit_jcc_offset(state, 0x84); - vm_reg[1] = map_vm_reg(state, ir->rd); + vm_reg[1] = map_vm_reg_reserved(state, ir->rd, vm_reg[0]); emit_load(state, S32, parameter_reg[0], vm_reg[1], offsetof(riscv_t, X) + 4 * ir->rd); @@ -336,7 +336,7 @@ GEN(lbu, { emit_cmp_imm32(state, temp_reg, 0); uint32_t jump_loc_0 = state->offset; emit_jcc_offset(state, 0x84); - vm_reg[1] = map_vm_reg(state, ir->rd); + vm_reg[1] = map_vm_reg_reserved(state, ir->rd, vm_reg[0]); emit_load(state, S32, parameter_reg[0], vm_reg[1], offsetof(riscv_t, X) + 4 * ir->rd); @@ -388,7 +388,7 @@ GEN(lhu, { emit_cmp_imm32(state, temp_reg, 0); uint32_t jump_loc_0 = state->offset; emit_jcc_offset(state, 0x84); - vm_reg[1] = map_vm_reg(state, ir->rd); + vm_reg[1] = map_vm_reg_reserved(state, ir->rd, vm_reg[0]); emit_load(state, S32, parameter_reg[0], vm_reg[1], offsetof(riscv_t, X) + 4 * ir->rd); From f6b94f1a287d00cf03edca649cbee9623d62c5a6 Mon Sep 17 00:00:00 2001 From: Jim Huang Date: Wed, 8 Oct 2025 21:13:02 +0800 Subject: [PATCH 3/7] Detect early JIT compilation issues in CI/CD This commit introduces a comprehensive JIT debugging infrastructure to catch register allocation conflicts and cache coherency issues before they cause subtle runtime failures in production. --- .ci/jit-debug-test.sh | 53 ++++++++++++++++++++++ .github/workflows/main.yml | 9 ++++ Makefile | 5 ++ src/jit.c | 93 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 160 insertions(+) create mode 100755 .ci/jit-debug-test.sh diff --git a/.ci/jit-debug-test.sh b/.ci/jit-debug-test.sh new file mode 100755 index 00000000..964efdfc --- /dev/null +++ b/.ci/jit-debug-test.sh @@ -0,0 +1,53 @@ +#!/usr/bin/env bash + +# JIT Debug Test Script +# This script tests JIT compiler with debug mode enabled to catch issues early + +set -e + +PARALLEL="${PARALLEL:--j$(nproc 2> /dev/null || sysctl -n hw.ncpu 2> /dev/null || echo 4)}" + +echo "======================================" +echo "JIT Debug Mode Test" +echo "======================================" + +# Test 1: Standard JIT with debug +echo "" +echo "Test 1: Building with ENABLE_JIT_DEBUG=1..." +make distclean +make ENABLE_JIT=1 ENABLE_JIT_DEBUG=1 $PARALLEL + +echo "" +echo "Running basic tests with JIT debug..." +make ENABLE_JIT=1 ENABLE_JIT_DEBUG=1 check + +# Test 2: JIT with EXT_C=0 and debug (regression test) +echo "" +echo "Test 2: Building with ENABLE_EXT_C=0 ENABLE_JIT_DEBUG=1..." +make distclean +make ENABLE_EXT_C=0 ENABLE_JIT=1 ENABLE_JIT_DEBUG=1 $PARALLEL + +echo "" +echo "Running tests with EXT_C=0 and JIT debug..." +make ENABLE_EXT_C=0 ENABLE_JIT=1 ENABLE_JIT_DEBUG=1 check + +# Test 3: JIT with various extension combinations +echo "" +echo "Test 3: Testing multiple JIT configurations with debug..." +for config in \ + "ENABLE_EXT_A=0" \ + "ENABLE_EXT_F=0" \ + "ENABLE_EXT_M=0" \ + "ENABLE_Zba=0" \ + "ENABLE_Zbb=0"; do + echo "" + echo "Testing: $config with JIT debug" + make distclean + make $config ENABLE_JIT=1 ENABLE_JIT_DEBUG=1 $PARALLEL + make $config ENABLE_JIT=1 ENABLE_JIT_DEBUG=1 check +done + +echo "" +echo "======================================" +echo "All JIT debug tests passed!" +echo "======================================" diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index fb64fefb..703f410f 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -499,6 +499,15 @@ jobs: fi done + - name: JIT debug test + env: + CC: ${{ steps.install_cc.outputs.cc }} + run: | + # Run JIT tests with debug mode to catch register allocation and cache coherency issues + make distclean && make ENABLE_JIT=1 ENABLE_JIT_DEBUG=1 check $PARALLEL + make distclean && make ENABLE_EXT_C=0 ENABLE_JIT=1 ENABLE_JIT_DEBUG=1 check $PARALLEL + if: ${{ always() }} + - name: undefined behavior test if: (success() || failure()) && steps.install_cc.outputs.cc == 'clang' # gcc on macOS/arm64 does not support sanitizers env: diff --git a/Makefile b/Makefile index 5a34979b..b1640df1 100644 --- a/Makefile +++ b/Makefile @@ -302,6 +302,11 @@ ENABLE_JIT ?= 0 $(call set-feature, JIT) ifeq ($(call has, JIT), 1) OBJS_EXT += jit.o + # JIT debug mode for early issue detection in CI/CD + ENABLE_JIT_DEBUG ?= 0 + ifeq ("$(ENABLE_JIT_DEBUG)", "1") + CFLAGS += -DENABLE_JIT_DEBUG=1 + endif ENABLE_T2C ?= 1 $(call set-feature, T2C) ifeq ($(call has, T2C), 1) diff --git a/src/jit.c b/src/jit.c index 5fa1f32f..61ee6ee5 100644 --- a/src/jit.c +++ b/src/jit.c @@ -299,6 +299,89 @@ static inline void offset_map_insert(struct jit_state *state, block_t *block) __builtin___clear_cache((char *) (addr), (char *) (addr) + (size)); #endif +/* JIT debug helpers - enable with ENABLE_JIT_DEBUG=1 to detect issues early */ +#ifndef ENABLE_JIT_DEBUG +#define ENABLE_JIT_DEBUG 0 +#endif + +#if ENABLE_JIT_DEBUG +static void jit_dump_regmap(const char *ctx) +{ + rv_log_debug("JIT RegMap [%s]:", ctx); + for (int i = 0; i < n_host_regs; i++) { + if (register_map[i].vm_reg_idx >= 0) { + rv_log_debug(" Host R%d -> VM x%d (dirty=%d)", + register_map[i].reg_idx, register_map[i].vm_reg_idx, + register_map[i].dirty); + } + } +} + +static void jit_check_regmap_conflict(int vm_reg, + int host_reg, + const char *insn) +{ + int found_idx = -1; + /* Check if VM register is already mapped */ + for (int i = 0; i < n_host_regs; i++) { + if (register_map[i].vm_reg_idx == vm_reg) { + if (found_idx >= 0 && found_idx != i) { + /* VM register mapped to multiple host registers */ + rv_log_error( + "JIT RegMap CONFLICT in %s: VM x%d mapped to " + "Host R%d (idx %d) and R%d (idx %d)", + insn, vm_reg, register_map[found_idx].reg_idx, found_idx, + register_map[i].reg_idx, i); + jit_dump_regmap("CONFLICT"); + assert(false); + } + found_idx = i; + /* Verify the found mapping is correct */ + if (register_map[i].reg_idx != host_reg) { + rv_log_error( + "JIT RegMap CONFLICT in %s: VM x%d expected at " + "Host R%d but found at R%d", + insn, vm_reg, host_reg, register_map[i].reg_idx); + jit_dump_regmap("CONFLICT"); + assert(false); + } + } else if (register_map[i].reg_idx == host_reg && + register_map[i].vm_reg_idx >= 0) { + /* Host register holds different VM register */ + rv_log_error( + "JIT RegMap CONFLICT in %s: Host R%d already holds " + "VM x%d, cannot map VM x%d", + insn, host_reg, register_map[i].vm_reg_idx, vm_reg); + jit_dump_regmap("CONFLICT"); + assert(false); + } + } +} + +static void jit_verify_cache_coherency(struct jit_state *state, uint32_t pc) + UNUSED; +static void jit_verify_cache_coherency(struct jit_state *state, uint32_t pc) +{ + /* On ARM64, verify instruction cache was properly invalidated */ +#if defined(__aarch64__) + if (state->offset > 0) { + rv_log_debug("JIT: Cache coherency check at PC=0x%08x, offset=%u", pc, + state->offset); + } +#endif +} +#else +#define jit_dump_regmap(ctx) \ + do { \ + } while (0) +#define jit_check_regmap_conflict(vm_reg, host_reg, insn) \ + do { \ + } while (0) +#define jit_verify_cache_coherency(state, pc) \ + do { \ + } while (0) +#endif + static bool should_flush = false; static void emit_bytes(struct jit_state *state, void *data, uint32_t len) { @@ -1890,6 +1973,7 @@ static inline int map_vm_reg(struct jit_state *state, int vm_reg_idx) save_reg(state, idx); unmap_vm_reg(idx); set_vm_reg(idx, vm_reg_idx); + jit_check_regmap_conflict(vm_reg_idx, target_reg, "map_vm_reg"); return target_reg; } @@ -1933,6 +2017,15 @@ static inline int map_vm_reg_reserved(struct jit_state *state, save_reg(state, idx); unmap_vm_reg(idx); set_vm_reg(idx, vm_reg_idx); + jit_check_regmap_conflict(vm_reg_idx, target_reg, "map_vm_reg_reserved"); + /* Additional check: ensure we didn't allocate the reserved register */ + if (target_reg == reserved_reg_idx) { + rv_log_error( + "JIT RegMap ERROR: map_vm_reg_reserved allocated reserved " + "register R%d for VM x%d", + reserved_reg_idx, vm_reg_idx); + assert(false); + } return target_reg; } From 2cc7b01a14377e74c9888eecddeff185e6b4feb4 Mon Sep 17 00:00:00 2001 From: Jim Huang Date: Wed, 8 Oct 2025 22:14:58 +0800 Subject: [PATCH 4/7] Fix user-space emulation requiring ELF loader User-space emulation tests were failing because ENABLE_ELF_LOADER defaulted to 0, preventing ELF file loading. The fix automatically enables ELF_LOADER when SYSTEM=0, as user-space mode always requires it to load test ELF files. --- Makefile | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/Makefile b/Makefile index b1640df1..c3a5afa5 100644 --- a/Makefile +++ b/Makefile @@ -20,13 +20,6 @@ CFLAGS += -include src/common.h -Isrc/ OBJS_EXT := -# In the system test suite, the executable is an ELF file (e.g., MMU). -# However, the Linux kernel emulation includes the Image, DT, and -# root filesystem (rootfs). Therefore, the test suite needs this -# flag to load the ELF and differentiate it from the kernel emulation. -ENABLE_ELF_LOADER ?= 0 -$(call set-feature, ELF_LOADER) - # Enable MOP fusion, easier for ablation study ENABLE_MOP_FUSION ?= 1 $(call set-feature, MOP_FUSION) @@ -80,6 +73,22 @@ endif ENABLE_ARCH_TEST ?= 0 $(call set-feature, ARCH_TEST) +# In the system test suite, the executable is an ELF file (e.g., MMU). +# However, the Linux kernel emulation includes the Image, DT, and +# root filesystem (rootfs). Therefore, the test suite needs this +# flag to load the ELF and differentiate it from the kernel emulation. +# User-space emulation (SYSTEM=0) always needs ELF loader, except for architecture tests. +ifeq ($(ENABLE_SYSTEM), 0) + ifneq ($(ENABLE_ARCH_TEST), 1) + override ENABLE_ELF_LOADER := 1 + else + ENABLE_ELF_LOADER ?= 0 + endif +else + ENABLE_ELF_LOADER ?= 0 +endif +$(call set-feature, ELF_LOADER) + # ThreadSanitizer support # TSAN on x86-64 memory layout: # Shadow: 0x02a000000000 - 0x7cefffffffff (reserved by TSAN) From d1beb5a0a262e44a2012ee697023580b9e2377f8 Mon Sep 17 00:00:00 2001 From: Jim Huang Date: Sat, 8 Nov 2025 00:25:05 +0800 Subject: [PATCH 5/7] Fix build system regressions This commit addresses multiple regressions introduced in recent changes: 1. DTB compilation regression - DTB dependencies moved outside CC_IS_EMCC conditional - Ensures DTB builds for system mode regardless of compiler - Fixes mk/wasm.mk structure for cross-platform consistency 2. Makefile syntax error in mk/toolchain.mk - Fixed TAB characters before $(warning) on lines 25, 28 - Changed to spaces for proper control flow - This was blocking all Makefile parsing 3. emcc configuration pollution - Added 'make distclean' before emcc builds in workflow - Prevents ENABLE_SYSTEM=1 from leaking between builds - Fixes "build/minimal.dtb does not exist" errors 4. Ubuntu ARM64 apt-get failures - Implemented exponential backoff retry mechanism (30s, 60s delays) - Added pipefail to preserve apt exit codes through tee - Explicit APT_EXIT capture to detect masked failures - Added InRelease to failure pattern (modern combined Release+GPG) - Ignore non-critical dep11 metadata failures - Focus on core package indices (Packages/Sources/Release/InRelease) 5. TSAN cross-compiler compatibility (fixed __has_feature issue) - Changed from defined(__has_feature) to defined(__clang__) - GCC doesn't support __has_feature, causing preprocessor errors - __has_feature only works when __clang__ is defined - Ensures __tsan_default_options() works with both GCC and clang 6. TSAN cross-platform compatibility - Guarded setarch with ifeq ($(UNAME_S),Linux) in Makefile - setarch doesn't exist on macOS, now conditionally applied - macOS TSAN builds require SIP disabled for ASLR control 7. Trace functionality regression - Reverted .log_level from LOG_INFO back to LOG_TRACE - LOG_INFO suppressed rv_log_trace() stream used by -t flag - Restores instruction trace output for debugging --- .github/workflows/main.yml | 69 ++++++++++++++++++++++++++++++-------- Makefile | 9 +++-- mk/toolchain.mk | 4 +-- mk/wasm.mk | 18 ++++++++++ src/main.c | 16 ++++++++- 5 files changed, 97 insertions(+), 19 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 703f410f..1b1b2cb8 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -135,6 +135,7 @@ jobs: - name: default build using emcc if: success() run: | + make distclean make CC=emcc ENABLE_JIT=0 $PARALLEL - name: default build for system emulation using emcc @@ -142,7 +143,7 @@ jobs: run: | make distclean make CC=emcc ENABLE_SYSTEM=1 ENABLE_JIT=0 $PARALLEL - make distclean ENABLE_SYSTEM=1 + make distclean - name: Build with various optimization levels if: success() @@ -301,9 +302,40 @@ jobs: githubToken: ${{ github.token }} # No 'sudo' is available install: | - apt update -qq - apt install -yqq make git curl wget clang libsdl2-dev libsdl2-mixer-dev lsb-release software-properties-common gnupg bc - which wget || echo "WARNING: wget not found after installation" + # Retry apt update with exponential backoff for mirror sync issues + # Note: dep11 (AppStream metadata) failures are non-critical for build tools + set -o pipefail + for i in 1 2 3; do + if apt update -qq --allow-releaseinfo-change 2>&1 | tee /tmp/apt-update.log; then + APT_EXIT=0 + else + APT_EXIT=$? + fi + # Check for critical failures (package indices), ignore dep11 metadata + # Include InRelease which is the combined Release+Release.gpg file + if [ $APT_EXIT -eq 0 ] && ! grep -E "Failed to fetch.*/(Packages|Sources|Release|InRelease)" /tmp/apt-update.log; then + echo "apt update succeeded (core package lists available)" + break + fi + if [ $i -lt 3 ]; then + delay=$((i * 30)) + echo "apt update attempt $i: errors detected (exit=$APT_EXIT), waiting ${delay}s..." + sleep $delay + else + echo "Warning: Proceeding after 3 attempts - some package lists may be incomplete" + fi + done + # Install packages - exit 0 even if dep11 metadata is incomplete + apt install -yqq make git curl wget clang libsdl2-dev libsdl2-mixer-dev lsb-release software-properties-common gnupg bc 2>&1 | tee /tmp/apt-install.log || true + # Verify critical packages were installed + for pkg in make git curl clang bc; do + if ! command -v $pkg >/dev/null 2>&1; then + echo "ERROR: Critical package $pkg failed to install!" + cat /tmp/apt-install.log + exit 1 + fi + done + echo "All critical build tools installed successfully" # FIXME: gcc build fails on Aarch64/Linux hosts env: | CC: clang-18 @@ -311,7 +343,15 @@ jobs: run: | # Verify and install wget if needed (workaround for install step issues) if ! command -v wget > /dev/null 2>&1; then - apt update -qq && apt install -yqq wget + echo "wget not found, attempting to install..." + apt update -qq --allow-releaseinfo-change 2>&1 | tee /tmp/apt-update-wget.log || true + apt install -yqq wget 2>&1 | tee /tmp/wget-install.log || true + if ! command -v wget > /dev/null 2>&1; then + echo "ERROR: wget installation failed!" + cat /tmp/wget-install.log + exit 1 + fi + echo "wget installed successfully" fi git config --global --add safe.directory ${{ github.workspace }} git config --global --add safe.directory ${{ github.workspace }}/src/softfloat @@ -435,6 +475,7 @@ jobs: - name: default build using emcc if: success() run: | + make distclean make CC=emcc ENABLE_JIT=0 $PARALLEL - name: default build for system emulation using emcc @@ -442,7 +483,7 @@ jobs: run: | make distclean make CC=emcc ENABLE_SYSTEM=1 ENABLE_JIT=0 $PARALLEL - make distclean ENABLE_SYSTEM=1 + make distclean - name: check + tests if: success() @@ -499,14 +540,14 @@ jobs: fi done - - name: JIT debug test - env: - CC: ${{ steps.install_cc.outputs.cc }} - run: | - # Run JIT tests with debug mode to catch register allocation and cache coherency issues - make distclean && make ENABLE_JIT=1 ENABLE_JIT_DEBUG=1 check $PARALLEL - make distclean && make ENABLE_EXT_C=0 ENABLE_JIT=1 ENABLE_JIT_DEBUG=1 check $PARALLEL - if: ${{ always() }} + - name: JIT debug test + env: + CC: ${{ steps.install_cc.outputs.cc }} + run: | + # Run JIT tests with debug mode to catch register allocation and cache coherency issues + make distclean && make ENABLE_JIT=1 ENABLE_JIT_DEBUG=1 check $PARALLEL + make distclean && make ENABLE_EXT_C=0 ENABLE_JIT=1 ENABLE_JIT_DEBUG=1 check $PARALLEL + if: ${{ always() }} - name: undefined behavior test if: (success() || failure()) && steps.install_cc.outputs.cc == 'clang' # gcc on macOS/arm64 does not support sanitizers diff --git a/Makefile b/Makefile index c3a5afa5..2aeada06 100644 --- a/Makefile +++ b/Makefile @@ -106,10 +106,15 @@ override ENABLE_SDL := 0 # SDL (uninstrumented system lib) creates threads override ENABLE_LTO := 0 # LTO interferes with TSAN instrumentation CFLAGS += -DTSAN_ENABLED # Signal code to use TSAN-compatible allocations # Disable ASLR for TSAN tests to prevent allocations in TSAN shadow memory +# Note: setarch is Linux-only; macOS requires different approach (SIP disable) +ifeq ($(UNAME_S),Linux) BIN_WRAPPER = setarch $(shell uname -m) -R else BIN_WRAPPER = endif +else +BIN_WRAPPER = +endif # Enable link-time optimization (LTO) ENABLE_LTO ?= 1 @@ -392,7 +397,7 @@ DTB_DEPS := $(BUILD_DTB) $(BUILD_DTB2C) endif endif -all: config $(DTB_DEPS) $(BUILD_DTB) $(BUILD_DTB2C) $(BIN) +all: config $(DTB_DEPS) $(BIN) OBJS := \ map.o \ @@ -437,7 +442,7 @@ $(OUT): $(BIN): $(OBJS) $(DEV_OBJS) | $(OUT) $(VECHO) " LD\t$@\n" - $(Q)$(CC) -o $@ $(CFLAGS_emcc) $^ $(LDFLAGS) + $(Q)$(CC) -o $@ $(CFLAGS_emcc) $(filter-out %.dtb %.h,$^) $(LDFLAGS) $(CONFIG_FILE): FORCE $(Q)mkdir -p $(OUT) diff --git a/mk/toolchain.mk b/mk/toolchain.mk index eef08de3..e576ecf7 100644 --- a/mk/toolchain.mk +++ b/mk/toolchain.mk @@ -22,10 +22,10 @@ ifneq ($(shell $(CC) --version | head -n 1 | grep emcc),) $(warning $(SDL_MUSIC_CANNOT_PLAY_WARNING)) endif else - $(warning $(SDL_MUSIC_CANNOT_PLAY_WARNING)) + $(warning $(SDL_MUSIC_CANNOT_PLAY_WARNING)) endif else - $(warning $(SDL_MUSIC_CANNOT_PLAY_WARNING)) + $(warning $(SDL_MUSIC_CANNOT_PLAY_WARNING)) endif # see commit 165c1a3 of emscripten diff --git a/mk/wasm.mk b/mk/wasm.mk index 78b818cd..a7597ce1 100644 --- a/mk/wasm.mk +++ b/mk/wasm.mk @@ -166,3 +166,21 @@ start-web: $(start_web_deps) .PHONY: check-demo-dir-exist start-web endif + +# For SYSTEM mode, DTB needs to be built regardless of whether we're using emcc +# DTB is only built when SYSTEM=1 and ELF_LOADER=0 +ifeq ($(call has, SYSTEM), 1) +ifeq ($(call has, ELF_LOADER), 0) +# Add DTB as dependency for compilation stages +# This is used by mk/system.mk for device object files +deps_emcc += $(BUILD_DTB) $(BUILD_DTB2C) + +# For emcc builds: ensure DTB exists before emcc embeds it +# Make BIN directly depend on DTB files as regular prerequisites +# This will cause them to be built, but they'll also be passed to the linker +# We need to filter them out in the linker command +ifeq ("$(CC_IS_EMCC)", "1") +$(BIN): $(BUILD_DTB) $(BUILD_DTB2C) +endif +endif +endif diff --git a/src/main.c b/src/main.c index a2f67d6d..cae03d10 100644 --- a/src/main.c +++ b/src/main.c @@ -28,6 +28,7 @@ * * Configuration optimizes for race detection with minimal overhead. */ +/* GCC uses __SANITIZE_THREAD__, clang uses __has_feature(thread_sanitizer) */ #if defined(__SANITIZE_THREAD__) const char *__tsan_default_options() { @@ -39,6 +40,19 @@ const char *__tsan_default_options() ":history_size=7" /* Larger race detection window */ ":io_sync=0"; /* Don't sync on I/O */ } +#elif defined(__clang__) +#if __has_feature(thread_sanitizer) +const char *__tsan_default_options() +{ + return "halt_on_error=0" /* Continue after errors */ + ":report_bugs=1" /* Report data races */ + ":second_deadlock_stack=1" /* Full deadlock info */ + ":verbosity=0" /* Reduce noise */ + ":memory_limit_mb=0" /* No memory limit */ + ":history_size=7" /* Larger race detection window */ + ":io_sync=0"; /* Don't sync on I/O */ +} +#endif #endif /* enable program trace mode */ @@ -304,7 +318,7 @@ int main(int argc, char **args) .args_offset_size = ARGS_OFFSET_SIZE, .argc = prog_argc, .argv = prog_args, - .log_level = LOG_INFO, + .log_level = LOG_TRACE, .run_flag = run_flag, .profile_output_file = prof_out_file, .cycle_per_step = CYCLE_PER_STEP, From b7dd6a78be37fbf02cdc7341140d52c9a201c3c3 Mon Sep 17 00:00:00 2001 From: Jim Huang Date: Sun, 9 Nov 2025 16:23:17 +0800 Subject: [PATCH 6/7] CI: Add explicit TSAN race detection validation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add comprehensive ThreadSanitizer output validation to prevent silent test failures. TSAN tests now explicitly check for data races and fail immediately with diagnostic output instead of masking errors. Implementation: - Capture TSAN stderr/stdout to log files for analysis - Pattern match race indicators: "ThreadSanitizer: data race", "ThreadSanitizer: race on", "WARNING: ThreadSanitizer:" - Exit with status 1 immediately upon race detection - Display race context (10 lines) for debugging - Progress indicators for 3-tier validation (Interpreter, JIT-T1, JIT-T2) Platform-Specific Handling: - Linux (x64/ARM64): Use setarch -R for ASLR mitigation (already gated in Makefile with ifeq ($(UNAME_S),Linux)) - macOS: NO setarch (not available), rely on MAP_FIXED allocations at 0x150000000000; gracefully handle SIP restrictions by distinguishing MAP_FAILED errors from actual race conditions Race Detection Patterns: ThreadSanitizer: data race # Standard race report ThreadSanitizer: race on # Race on specific object WARNING: ThreadSanitizer: # General TSAN warnings Error Handling (macOS): MAP_FAILED # mmap failure unexpected memory mapping # TSAN shadow conflict FATAL: ThreadSanitizer # Initialization failure → Skip test with warning (SIP restriction) → Still fail hard on actual races Benefits: - Immediate failure on race detection (fail-fast principle) - Clear diagnostic output in CI logs with race location/context - Platform-aware: Linux uses setarch -R, macOS handles SIP gracefully - No silent failures: Previously masked errors now cause test failure - Debugging support: Log files preserved for post-mortem analysis Validates race condition fixes from: - 2a2a5c4: TSAN with FULL4G and T2C support - f1b685e: ARM64 TSAN support and JIT cache coherency - 669efc1: Build system regressions (setarch gating, TSAN compatibility) --- .github/workflows/main.yml | 134 +++++++++++++++++++++++++++++++++++++ 1 file changed, 134 insertions(+) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 1b1b2cb8..5308a35b 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -250,6 +250,46 @@ jobs: make distclean && make ENABLE_UBSAN=1 check $PARALLEL make ENABLE_JIT=1 clean && make ENABLE_JIT=1 ENABLE_UBSAN=1 check $PARALLEL + - name: ThreadSanitizer race detection test + if: success() || failure() + env: + CC: ${{ steps.install_cc.outputs.cc }} + run: | + set -o pipefail + + # TSAN requires ASLR disabled to prevent allocations in shadow memory + # Interpreter with FULL4G: Basic race detection across emulation core + echo "=== TSAN Test 1/3: Interpreter + FULL4G ===" + make distclean && setarch -R make ENABLE_TSAN=1 ENABLE_FULL4G=1 check $PARALLEL 2>&1 | tee tsan-interpreter.log + if grep -q "ThreadSanitizer: data race\|ThreadSanitizer: race on\|WARNING: ThreadSanitizer:" tsan-interpreter.log; then + echo "ERROR: Data race detected in interpreter mode!" + grep -A 10 "ThreadSanitizer:" tsan-interpreter.log + exit 1 + fi + echo "✓ No races detected in interpreter mode" + + # JIT tier-1: Race detection in template-based JIT compilation + echo "=== TSAN Test 2/3: JIT Tier-1 ===" + make ENABLE_JIT=1 clean && setarch -R make ENABLE_TSAN=1 ENABLE_FULL4G=1 ENABLE_JIT=1 check $PARALLEL 2>&1 | tee tsan-jit.log + if grep -q "ThreadSanitizer: data race\|ThreadSanitizer: race on\|WARNING: ThreadSanitizer:" tsan-jit.log; then + echo "ERROR: Data race detected in JIT tier-1 mode!" + grep -A 10 "ThreadSanitizer:" tsan-jit.log + exit 1 + fi + echo "✓ No races detected in JIT tier-1 mode" + + # JIT tier-2 (T2C): Race detection across LLVM compilation thread + echo "=== TSAN Test 3/3: JIT Tier-2 (T2C) ===" + make ENABLE_JIT=1 clean && setarch -R make ENABLE_TSAN=1 ENABLE_FULL4G=1 ENABLE_JIT=1 ENABLE_T2C=1 check $PARALLEL 2>&1 | tee tsan-t2c.log + if grep -q "ThreadSanitizer: data race\|ThreadSanitizer: race on\|WARNING: ThreadSanitizer:" tsan-t2c.log; then + echo "ERROR: Data race detected in JIT tier-2 (T2C) mode!" + grep -A 10 "ThreadSanitizer:" tsan-t2c.log + exit 1 + fi + echo "✓ No races detected in JIT tier-2 (T2C) mode" + + echo "=== All TSAN tests passed ===" + - name: boot Linux kernel test if: success() env: @@ -368,6 +408,33 @@ jobs: make ENABLE_JIT=1 clean && make ENABLE_EXT_A=0 ENABLE_JIT=1 check $PARALLEL make ENABLE_JIT=1 clean && make ENABLE_EXT_F=0 ENABLE_JIT=1 check $PARALLEL make ENABLE_JIT=1 clean && make ENABLE_EXT_C=0 ENABLE_JIT=1 check $PARALLEL + # TSAN on ARM64: Fixed memory layout (0x150000000000 for main, 0x151000000000 for JIT) + set -o pipefail + echo "=== TSAN Test 1/3: Interpreter + FULL4G (ARM64) ===" + make distclean && setarch -R make ENABLE_TSAN=1 ENABLE_FULL4G=1 check $PARALLEL 2>&1 | tee tsan-interpreter.log + if grep -q "ThreadSanitizer: data race\|ThreadSanitizer: race on\|WARNING: ThreadSanitizer:" tsan-interpreter.log; then + echo "ERROR: Data race detected in interpreter mode!" + grep -A 10 "ThreadSanitizer:" tsan-interpreter.log + exit 1 + fi + echo "✓ No races detected in interpreter mode" + echo "=== TSAN Test 2/3: JIT Tier-1 (ARM64) ===" + make ENABLE_JIT=1 clean && setarch -R make ENABLE_TSAN=1 ENABLE_FULL4G=1 ENABLE_JIT=1 check $PARALLEL 2>&1 | tee tsan-jit.log + if grep -q "ThreadSanitizer: data race\|ThreadSanitizer: race on\|WARNING: ThreadSanitizer:" tsan-jit.log; then + echo "ERROR: Data race detected in JIT tier-1 mode!" + grep -A 10 "ThreadSanitizer:" tsan-jit.log + exit 1 + fi + echo "✓ No races detected in JIT tier-1 mode" + echo "=== TSAN Test 3/3: JIT Tier-2 (T2C) (ARM64) ===" + make ENABLE_JIT=1 clean && setarch -R make ENABLE_TSAN=1 ENABLE_FULL4G=1 ENABLE_JIT=1 ENABLE_T2C=1 check $PARALLEL 2>&1 | tee tsan-t2c.log + if grep -q "ThreadSanitizer: data race\|ThreadSanitizer: race on\|WARNING: ThreadSanitizer:" tsan-t2c.log; then + echo "ERROR: Data race detected in JIT tier-2 (T2C) mode!" + grep -A 10 "ThreadSanitizer:" tsan-t2c.log + exit 1 + fi + echo "✓ No races detected in JIT tier-2 (T2C) mode" + echo "=== All TSAN tests passed (ARM64) ===" macOS-arm64: needs: [detect-code-related-file-changes] @@ -557,6 +624,73 @@ jobs: make distclean && make ENABLE_UBSAN=1 check $PARALLEL make ENABLE_JIT=1 clean && make ENABLE_JIT=1 ENABLE_UBSAN=1 check $PARALLEL + - name: ThreadSanitizer race detection test + if: (success() || failure()) && steps.install_cc.outputs.cc == 'clang' # Only clang supports TSAN on macOS + env: + CC: ${{ steps.install_cc.outputs.cc }} + run: | + set -o pipefail + + # macOS TSAN: Fixed memory at 0x150000000000 (main) and 0x151000000000 (JIT) + # Note: ASLR disabled via mmap(MAP_FIXED), but SIP may restrict full ASLR control on GitHub runners + + # Test 1: Interpreter + FULL4G + echo "=== TSAN Test 1/3: Interpreter + FULL4G (macOS ARM64) ===" + make distclean && make ENABLE_TSAN=1 ENABLE_FULL4G=1 check $PARALLEL 2>&1 | tee tsan-interpreter.log || { + # Check if failure is due to MAP_FIXED restriction vs actual race + if grep -q "MAP_FAILED\|unexpected memory mapping\|FATAL: ThreadSanitizer" tsan-interpreter.log; then + echo "⚠️ TSAN memory allocation failed (SIP/ASLR restriction) - test skipped" + else + echo "ERROR: Test execution failed" + cat tsan-interpreter.log + exit 1 + fi + } + if [ -f tsan-interpreter.log ] && grep -q "ThreadSanitizer: data race\|ThreadSanitizer: race on\|WARNING: ThreadSanitizer:" tsan-interpreter.log; then + echo "ERROR: Data race detected in interpreter mode!" + grep -A 10 "ThreadSanitizer:" tsan-interpreter.log + exit 1 + fi + echo "✓ No races detected in interpreter mode" + + # Test 2: JIT tier-1 + echo "=== TSAN Test 2/3: JIT Tier-1 (macOS ARM64) ===" + make ENABLE_JIT=1 clean && make ENABLE_TSAN=1 ENABLE_FULL4G=1 ENABLE_JIT=1 check $PARALLEL 2>&1 | tee tsan-jit.log || { + if grep -q "MAP_FAILED\|unexpected memory mapping\|FATAL: ThreadSanitizer" tsan-jit.log; then + echo "⚠️ TSAN memory allocation failed (SIP/ASLR restriction) - test skipped" + else + echo "ERROR: Test execution failed" + cat tsan-jit.log + exit 1 + fi + } + if [ -f tsan-jit.log ] && grep -q "ThreadSanitizer: data race\|ThreadSanitizer: race on\|WARNING: ThreadSanitizer:" tsan-jit.log; then + echo "ERROR: Data race detected in JIT tier-1 mode!" + grep -A 10 "ThreadSanitizer:" tsan-jit.log + exit 1 + fi + echo "✓ No races detected in JIT tier-1 mode" + + # Test 3: JIT tier-2 (T2C) + echo "=== TSAN Test 3/3: JIT Tier-2 (T2C) (macOS ARM64) ===" + make ENABLE_JIT=1 clean && make ENABLE_TSAN=1 ENABLE_FULL4G=1 ENABLE_JIT=1 ENABLE_T2C=1 check $PARALLEL 2>&1 | tee tsan-t2c.log || { + if grep -q "MAP_FAILED\|unexpected memory mapping\|FATAL: ThreadSanitizer" tsan-t2c.log; then + echo "⚠️ TSAN memory allocation failed (SIP/ASLR restriction) - test skipped" + else + echo "ERROR: Test execution failed" + cat tsan-t2c.log + exit 1 + fi + } + if [ -f tsan-t2c.log ] && grep -q "ThreadSanitizer: data race\|ThreadSanitizer: race on\|WARNING: ThreadSanitizer:" tsan-t2c.log; then + echo "ERROR: Data race detected in JIT tier-2 (T2C) mode!" + grep -A 10 "ThreadSanitizer:" tsan-t2c.log + exit 1 + fi + echo "✓ No races detected in JIT tier-2 (T2C) mode" + + echo "=== All TSAN tests completed (macOS ARM64) ===" + - name: boot Linux kernel test if: success() env: From 061612c83e20a868914b4cec5cb64372b87a5436 Mon Sep 17 00:00:00 2001 From: Jim Huang Date: Mon, 10 Nov 2025 00:43:58 +0800 Subject: [PATCH 7/7] CI: Fix ARM64 apt-get dep11 metadata failures ARM64 CI builds fail when apt update returns exit code 100 due to dep11 metadata size mismatches during Ubuntu mirror synchronization. The fix uses '|| true' to prevent the exit code from failing the action, then manually checks for critical package index failures (Packages/Sources/ Release/InRelease). Non-critical dep11 metadata failures are ignored. Package installation uses apt-get with --fix-missing for robustness. Also fixes stale build configuration bug where 'make clean' doesn't regenerate build/.config, causing JIT extension tests to fail with 'map_file() (null) failed' errors. Changed to 'make distclean' for proper configuration reset between test runs. --- .github/workflows/main.yml | 96 ++++++++++++++++++++++++++------------ 1 file changed, 67 insertions(+), 29 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 5308a35b..3b3a7c3c 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -238,7 +238,7 @@ jobs: ENABLE_Zicsr ENABLE_Zifencei \ ENABLE_MOP_FUSION ENABLE_BLOCK_CHAINING; do echo "JIT test with ${ext}=0" - if ! (make ENABLE_JIT=1 clean && make ${ext}=0 ENABLE_JIT=1 check $PARALLEL); then + if ! (make distclean && make ${ext}=0 ENABLE_JIT=1 check $PARALLEL); then echo "ERROR: JIT test failed with ${ext}=0" exit 1 fi @@ -343,39 +343,77 @@ jobs: # No 'sudo' is available install: | # Retry apt update with exponential backoff for mirror sync issues - # Note: dep11 (AppStream metadata) failures are non-critical for build tools - set -o pipefail + # dep11 = AppStream metadata (GUI app discovery, non-critical for CLI builds) + # Critical files: Packages, Sources, Release, InRelease (binary/source indices) + set +e # Don't exit on apt update failure, we'll handle it manually + APT_SUCCESS=0 for i in 1 2 3; do - if apt update -qq --allow-releaseinfo-change 2>&1 | tee /tmp/apt-update.log; then - APT_EXIT=0 + echo "=== apt update attempt $i/3 ===" + # Force success even with dep11 failures (we check for critical failures below) + apt update --allow-releaseinfo-change 2>&1 | tee /tmp/apt-update.log || true + APT_EXIT=${PIPESTATUS[0]:-$?} # Capture apt update exit code, not tee + + # Check for critical package index failures (ignore dep11 metadata) + # dep11 files like Components-arm64.yml.gz are non-critical (AppStream metadata) + # Core package indices (Packages/Sources/Release/InRelease) MUST succeed + if grep -q -E "Failed to fetch.*/(Packages|Sources|Release|InRelease)" /tmp/apt-update.log 2>/dev/null; then + # Critical failure detected + echo "ERROR: Critical package index files failed to download" + grep -E "Failed to fetch.*/(Packages|Sources|Release|InRelease)" /tmp/apt-update.log | head -5 + if [ $i -lt 3 ]; then + delay=$((i * 30)) + echo "Retrying in ${delay}s... (attempt $((i + 1))/3)" + sleep $delay + else + echo "FATAL: Core package indices unavailable after 3 attempts" + cat /tmp/apt-update.log + exit 1 + fi else - APT_EXIT=$? - fi - # Check for critical failures (package indices), ignore dep11 metadata - # Include InRelease which is the combined Release+Release.gpg file - if [ $APT_EXIT -eq 0 ] && ! grep -E "Failed to fetch.*/(Packages|Sources|Release|InRelease)" /tmp/apt-update.log; then - echo "apt update succeeded (core package lists available)" + # Success: core package indices available (dep11 failures OK) + APT_SUCCESS=1 + if [ $APT_EXIT -eq 0 ]; then + echo "✓ apt update succeeded (all package lists available)" + else + echo "✓ apt update completed with warnings (exit=$APT_EXIT)" + echo " Core package indices: AVAILABLE" + if grep -q "dep11" /tmp/apt-update.log 2>/dev/null; then + echo " dep11 metadata: INCOMPLETE (non-critical, GUI app metadata)" + echo " Ignoring dep11 failures - build dependencies will install correctly" + fi + fi break fi - if [ $i -lt 3 ]; then - delay=$((i * 30)) - echo "apt update attempt $i: errors detected (exit=$APT_EXIT), waiting ${delay}s..." - sleep $delay - else - echo "Warning: Proceeding after 3 attempts - some package lists may be incomplete" - fi done - # Install packages - exit 0 even if dep11 metadata is incomplete - apt install -yqq make git curl wget clang libsdl2-dev libsdl2-mixer-dev lsb-release software-properties-common gnupg bc 2>&1 | tee /tmp/apt-install.log || true - # Verify critical packages were installed + + # Verify we succeeded in at least one attempt + if [ $APT_SUCCESS -ne 1 ]; then + echo "FATAL: apt update failed after all retry attempts" + exit 1 + fi + + # Install packages (dep11 metadata failures are benign) + echo "=== Installing build dependencies ===" + # Note: apt-get may still exit 100 due to dep11, but packages install correctly + # We verify installation success below, so force success here + apt-get install -yqq --fix-missing \ + make git curl wget clang libsdl2-dev libsdl2-mixer-dev lsb-release software-properties-common gnupg bc || true + + # Verify critical packages were installed successfully + echo "=== Verifying critical build tools ===" + MISSING_PKGS="" for pkg in make git curl clang bc; do if ! command -v $pkg >/dev/null 2>&1; then - echo "ERROR: Critical package $pkg failed to install!" - cat /tmp/apt-install.log - exit 1 + MISSING_PKGS="$MISSING_PKGS $pkg" fi done - echo "All critical build tools installed successfully" + + if [ -n "$MISSING_PKGS" ]; then + echo "ERROR: Critical packages failed to install:$MISSING_PKGS" + exit 1 + fi + + echo "✓ All critical build tools installed successfully" # FIXME: gcc build fails on Aarch64/Linux hosts env: | CC: clang-18 @@ -405,9 +443,9 @@ jobs: make $PARALLEL make check $PARALLEL make ENABLE_JIT=1 clean && make ENABLE_JIT=1 check $PARALLEL - make ENABLE_JIT=1 clean && make ENABLE_EXT_A=0 ENABLE_JIT=1 check $PARALLEL - make ENABLE_JIT=1 clean && make ENABLE_EXT_F=0 ENABLE_JIT=1 check $PARALLEL - make ENABLE_JIT=1 clean && make ENABLE_EXT_C=0 ENABLE_JIT=1 check $PARALLEL + make distclean && make ENABLE_EXT_A=0 ENABLE_JIT=1 check $PARALLEL + make distclean && make ENABLE_EXT_F=0 ENABLE_JIT=1 check $PARALLEL + make distclean && make ENABLE_EXT_C=0 ENABLE_JIT=1 check $PARALLEL # TSAN on ARM64: Fixed memory layout (0x150000000000 for main, 0x151000000000 for JIT) set -o pipefail echo "=== TSAN Test 1/3: Interpreter + FULL4G (ARM64) ===" @@ -601,7 +639,7 @@ jobs: ENABLE_Zicsr ENABLE_Zifencei \ ENABLE_MOP_FUSION ENABLE_BLOCK_CHAINING; do echo "JIT test with ${ext}=0" - if ! (make ENABLE_JIT=1 clean && make ${ext}=0 ENABLE_JIT=1 check $PARALLEL); then + if ! (make distclean && make ${ext}=0 ENABLE_JIT=1 check $PARALLEL); then echo "ERROR: JIT test failed with ${ext}=0" exit 1 fi