sysprog21 · yy214123 · Oct 29, 2025 · Oct 31, 2025 · Nov 2, 2025 · cubic-dev-ai
diff --git a/main.c b/main.c
@@ -1024,8 +1024,11 @@ static void print_mmu_cache_stats(vm_t *vm)
     fprintf(stderr, "\n=== MMU Cache Statistics ===\n");
     for (uint32_t i = 0; i < vm->n_hart; i++) {
         hart_t *hart = vm->hart[i];
-        uint64_t fetch_total =
-            hart->cache_fetch.hits + hart->cache_fetch.misses;
+        uint64_t fetch_hits = 0, fetch_misses = 0;
+        fetch_hits = hart->cache_fetch[1].hits + hart->cache_fetch[2].hits;
-        fetch_hits = hart->cache_fetch[1].hits + hart->cache_fetch[2].hits;
+        fetch_hits = hart->cache_fetch[0].hits + hart->cache_fetch[1].hits;
-        fetch_hits = hart->cache_fetch[1].hits + hart->cache_fetch[2].hits;
+        fetch_hits = hart->cache_fetch[0].hits + hart->cache_fetch[1].hits;
+        fetch_misses =
+            hart->cache_fetch[1].misses + hart->cache_fetch[2].misses;
+        uint64_t fetch_total = fetch_hits + fetch_misses;
 
         /* Combine 8-set × 2-way load cache statistics */
         uint64_t load_hits = 0, load_misses = 0;
@@ -1048,11 +1051,11 @@ static void print_mmu_cache_stats(vm_t *vm)
         uint64_t store_total = store_hits + store_misses;
 
         fprintf(stderr, "\nHart %u:\n", i);
-        fprintf(stderr, "  Fetch: %12llu hits, %12llu misses",
-                hart->cache_fetch.hits, hart->cache_fetch.misses);
+        fprintf(stderr, "  Fetch: %12llu hits, %12llu misses", fetch_hits,
+                fetch_misses);
         if (fetch_total > 0)
             fprintf(stderr, " (%.2f%% hit rate)",
-                    100.0 * hart->cache_fetch.hits / fetch_total);
+                    100.0 * fetch_hits / fetch_total);
         fprintf(stderr, "\n");
 
         fprintf(stderr, "  Load:  %12llu hits, %12llu misses (8x2)", load_hits,

diff --git a/riscv.c b/riscv.c
@@ -1,4 +1,5 @@
 #include <stdio.h>
+#include <string.h>
 
 #include "common.h"
 #include "device.h"
@@ -180,11 +181,17 @@ static inline uint32_t read_rs2(const hart_t *vm, uint32_t insn)
     return vm->x_regs[decode_rs2(insn)];
 }
 
+static inline void icache_invalidate_all(hart_t *vm)
+{
+    memset(&vm->icache, 0, sizeof(vm->icache));
+}
+
 /* virtual addressing */
 
 void mmu_invalidate(hart_t *vm)
 {
-    vm->cache_fetch.n_pages = 0xFFFFFFFF;
+    vm->cache_fetch[0].n_pages = 0xFFFFFFFF;
+    vm->cache_fetch[1].n_pages = 0xFFFFFFFF;
     /* Invalidate all 8 sets × 2 ways for load cache */
     for (int set = 0; set < 8; set++) {
         for (int way = 0; way < 2; way++)
@@ -197,6 +204,7 @@ void mmu_invalidate(hart_t *vm)
             vm->cache_store[set].ways[way].n_pages = 0xFFFFFFFF;
         vm->cache_store[set].lru = 0; /* Reset LRU to way 0 */
     }
+    icache_invalidate_all(vm);
 }
 
 /* Invalidate MMU caches for a specific virtual address range.
@@ -227,9 +235,11 @@ void mmu_invalidate_range(hart_t *vm, uint32_t start_addr, uint32_t size)
     uint32_t end_vpn = (uint32_t) end_addr >> RV_PAGE_SHIFT;
 
     /* Cache invalidation for fetch cache */
-    if (vm->cache_fetch.n_pages >= start_vpn &&
-        vm->cache_fetch.n_pages <= end_vpn)
-        vm->cache_fetch.n_pages = 0xFFFFFFFF;
+    for (int i = 0; i < 2; i++) {
+        if (vm->cache_fetch[i].n_pages >= start_vpn &&
+            vm->cache_fetch[i].n_pages <= end_vpn)
+            vm->cache_fetch[i].n_pages = 0xFFFFFFFF;
+    }
 
     /* Invalidate load cache: 8 sets × 2 ways */
     for (int set = 0; set < 8; set++) {
@@ -361,11 +371,47 @@ static void mmu_fence(hart_t *vm, uint32_t insn UNUSED)
 
 static void mmu_fetch(hart_t *vm, uint32_t addr, uint32_t *value)
 {
+    /* cache hit */
+    uint32_t idx = (addr >> ICACHE_OFFSET_BITS) & ICACHE_INDEX_MASK;
+    uint32_t tag = addr >> (ICACHE_OFFSET_BITS + ICACHE_INDEX_BITS);
+    icache_block_t *blk = &vm->icache.i_block[idx];
     uint32_t vpn = addr >> RV_PAGE_SHIFT;
-    if (unlikely(vpn != vm->cache_fetch.n_pages)) {
+    uint32_t index = __builtin_parity(vpn) & 0x1;
+
+    if (likely(blk->valid && blk->tag == tag)) {
+#ifdef MMU_CACHE_STATS
+        vm->cache_fetch[index].hits++;
+#endif
+        uint32_t ofs = addr & ICACHE_BLOCK_MASK;
+        *value = *(const uint32_t *) (blk->base + ofs);
+        return;
+    }
+
+    /* search the victim cache */
+    uint32_t vcache_key = addr >> ICACHE_OFFSET_BITS;
+    for (int i = 0; i < VCACHE_BLOCKS; i++) {
+        victim_cache_block_t *vblk = &vm->icache.v_block[i];
+
+        /* victim cache hit, swap blocks */
+        if (vblk->valid && vblk->tag == vcache_key) {
+            icache_block_t tmp = *blk;
+            *blk = *vblk;
+            *vblk = tmp;
+            blk->tag = tag;
+            vblk->tag = (tmp.tag << ICACHE_INDEX_BITS) | idx;
+
+            uint32_t ofs = addr & ICACHE_BLOCK_MASK;
+            *value = *(const uint32_t *) (blk->base + ofs);
+            return;
+        }
+    }
+
 #ifdef MMU_CACHE_STATS
-        vm->cache_fetch.misses++;
+    vm->cache_fetch[index].misses++;
 #endif
+
+    /* cache miss, Continue using the original va->pa*/
+    if (unlikely(vpn != vm->cache_fetch[index].n_pages)) {
         mmu_translate(vm, &addr, (1 << 3), (1 << 6), false, RV_EXC_FETCH_FAULT,
                       RV_EXC_FETCH_PFAULT);
         if (vm->error)
@@ -374,15 +420,27 @@ static void mmu_fetch(hart_t *vm, uint32_t addr, uint32_t *value)
         vm->mem_fetch(vm, addr >> RV_PAGE_SHIFT, &page_addr);
         if (vm->error)
             return;
-        vm->cache_fetch.n_pages = vpn;
-        vm->cache_fetch.page_addr = page_addr;
+        vm->cache_fetch[index].n_pages = vpn;
+        vm->cache_fetch[index].page_addr = page_addr;
     }
-#ifdef MMU_CACHE_STATS
-    else {
-        vm->cache_fetch.hits++;
+
+    *value =
+        vm->cache_fetch[index].page_addr[(addr >> 2) & MASK(RV_PAGE_SHIFT - 2)];
+
+    /* Move the current icache block into the victim cache before replacement */
+    if (blk->valid) {
+        victim_cache_block_t *vblk = &vm->icache.v_block[vm->icache.v_next];
+        *vblk = *blk;
+        vblk->tag = (blk->tag << ICACHE_INDEX_BITS) | idx;
+        vblk->valid = true;
+        vm->icache.v_next = (vm->icache.v_next + 1) % VCACHE_BLOCKS;
     }
-#endif
-    *value = vm->cache_fetch.page_addr[(addr >> 2) & MASK(RV_PAGE_SHIFT - 2)];
+
+    /* fill into the icache */
+    uint32_t block_off = (addr & RV_PAGE_MASK) & ~ICACHE_BLOCK_MASK;
+    blk->base = (const uint8_t *) vm->cache_fetch[index].page_addr + block_off;
+    blk->tag = tag;
+    blk->valid = true;
 }
 
 static void mmu_load(hart_t *vm,

diff --git a/riscv.h b/riscv.h
@@ -75,7 +75,58 @@ typedef struct {
 typedef struct __hart_internal hart_t;
 typedef struct __vm_internel vm_t;
 
+/* ICACHE_BLOCKS_SIZE: Size of one instruction-cache block (line).
+ * ICACHE_BLOCKS: Number of blocks (lines) in the instruction cache.
+ *
+ * The cache address is decomposed into [ tag | index | offset ] fields:
+ *   - block-offset bits = log2(ICACHE_BLOCKS_SIZE)
+ *   - index bits        = log2(ICACHE_BLOCKS)
+ */
+#define ICACHE_BLOCKS_SIZE 256
+#define ICACHE_BLOCKS 256
+#define ICACHE_OFFSET_BITS 8
+#define ICACHE_INDEX_BITS 8
+
+/* VCACHE_BLOCKS_SIZE: Size of one victim-cache block (line).
+ * VCACHE_BLOCKS: Number of blocks (lines) in the victim cache.
+ *
+ * The victim cache is implemented as a small, fully associative cache.
+ * It is designed to serve as a temporary buffer for instruction cache blocks
+ * that were recently evicted from the instruction cache.
+ *
+ * Upon an instruction cache miss, the system first checks the victim cache
+ * for the corresponding data. If the data is found (a victim cache hit),
+ * the instruction cache block and the victim cache block are swapped.
+ * Conversely, when the instruction cache is being filled with new data,
+ * the evicted old data from the instruction cache block is simultaneously
+ * placed into the victim cache.
+ */
+#define VCACHE_BLOCK_SIZE ICACHE_BLOCKS_SIZE
+#define VCACHE_BLOCKS 16
+
+/* For power-of-two sizes, (size - 1) sets all low bits to 1,
+ * allowing fast extraction of an address.
+ */
+#define ICACHE_INDEX_MASK (ICACHE_BLOCKS - 1)
+#define ICACHE_BLOCK_MASK (ICACHE_BLOCKS_SIZE - 1)
+#define RV_PAGE_MASK (RV_PAGE_SIZE - 1)
+
+typedef struct {
+    uint32_t tag;
+    const uint8_t *base;
+    bool valid;
+} icache_block_t;
+
+typedef icache_block_t victim_cache_block_t;
+
+typedef struct {
+    icache_block_t i_block[ICACHE_BLOCKS];
+    victim_cache_block_t v_block[VCACHE_BLOCKS];
+    uint32_t v_next;
+} icache_t;
+
 struct __hart_internal {
+    icache_t icache;
     uint32_t x_regs[32];
 
     /* LR reservation virtual address. last bit is 1 if valid */
@@ -106,7 +157,8 @@ struct __hart_internal {
      */
     uint32_t exc_cause, exc_val;
 
-    mmu_fetch_cache_t cache_fetch;
+    /* 2-entry direct-mapped with hash-based indexing */
+    mmu_fetch_cache_t cache_fetch[2];
     /* 8-set × 2-way set-associative cache with 3-bit parity hash indexing */
     mmu_cache_set_t cache_load[8];
     /* 8-set × 2-way set-associative cache for store operations */