diff --git a/main.c b/main.c
index a1d7b7f..15f23b1 100644
--- a/main.c
+++ b/main.c
@@ -1024,8 +1024,30 @@ static void print_mmu_cache_stats(vm_t *vm)
     fprintf(stderr, "\n=== MMU Cache Statistics ===\n");
     for (uint32_t i = 0; i < vm->n_hart; i++) {
         hart_t *hart = vm->hart[i];
-        uint64_t fetch_total =
-            hart->cache_fetch.hits + hart->cache_fetch.misses;
+
+        /* Combine 2-entry tlb statistics */
+        uint64_t fetch_hits_tlb = 0, fetch_misses_tlb = 0;
+        fetch_hits_tlb =
+            hart->cache_fetch[0].tlb_hits + hart->cache_fetch[1].tlb_hits;
+        fetch_misses_tlb =
+            hart->cache_fetch[0].tlb_misses + hart->cache_fetch[1].tlb_misses;
+
+        /* Combine icache statistics */
+        uint64_t fetch_hits_icache = 0, fetch_misses_icache = 0;
+        fetch_hits_icache =
+            hart->cache_fetch[0].icache_hits + hart->cache_fetch[1].icache_hits;
+        fetch_misses_icache = hart->cache_fetch[0].icache_misses +
+                              hart->cache_fetch[1].icache_misses;
+
+        /* Combine victim cache statistics */
+        uint64_t fetch_hits_vcache = 0, fetch_misses_vcache = 0;
+        fetch_hits_vcache =
+            hart->cache_fetch[0].vcache_hits + hart->cache_fetch[1].vcache_hits;
+        fetch_misses_vcache = hart->cache_fetch[0].vcache_misses +
+                              hart->cache_fetch[1].vcache_misses;
+
+        uint64_t access_total =
+            hart->cache_fetch[0].total_fetch + hart->cache_fetch[1].total_fetch;
 
         /* Combine 8-set × 2-way load cache statistics */
         uint64_t load_hits = 0, load_misses = 0;
@@ -1047,14 +1069,32 @@ static void print_mmu_cache_stats(vm_t *vm)
         }
         uint64_t store_total = store_hits + store_misses;
 
-        fprintf(stderr, "\nHart %u:\n", i);
-        fprintf(stderr, "  Fetch: %12llu hits, %12llu misses",
-                hart->cache_fetch.hits, hart->cache_fetch.misses);
-        if (fetch_total > 0)
-            fprintf(stderr, " (%.2f%% hit rate)",
-                    100.0 * hart->cache_fetch.hits / fetch_total);
-        fprintf(stderr, "\n");
 
+        fprintf(stderr, "\n=== Introduction Cache Statistics ===\n");
+        fprintf(stderr, "  Total access:  %12llu\n", access_total);
+        fprintf(stderr, "  Icache hits:   %12llu (%.2f%%)\n", fetch_hits_icache,
+                (fetch_hits_icache * 100.0) / access_total);
+        fprintf(stderr, "  Icache misses: %12llu (%.2f%%)\n",
+                fetch_misses_icache,
+                (fetch_misses_icache * 100.0) / access_total);
+        fprintf(stderr,
+                "   ├ Vcache hits:    %8llu (%.2f%% of Icache misses)\n",
+                fetch_hits_vcache,
+                (fetch_hits_vcache * 100.0) / fetch_misses_icache,
+                (fetch_hits_vcache * 100.0) / access_total);
+        fprintf(stderr,
+                "   └ Vcache misses:  %8llu (%.2f%% of Icache misses)\n",
+                fetch_misses_vcache,
+                (fetch_misses_vcache * 100.0) / fetch_misses_icache,
+                (fetch_misses_vcache * 100.0) / access_total);
+        fprintf(stderr, "      ├ TLB hits:     %4llu (%.2f%%)\n",
+                fetch_hits_tlb,
+                (fetch_hits_tlb * 100.0) / (fetch_hits_tlb + fetch_misses_tlb));
+        fprintf(
+            stderr, "      └ TLB misses:   %4llu (%.2f%%)\n", fetch_misses_tlb,
+            (fetch_misses_tlb * 100.0) / (fetch_hits_tlb + fetch_misses_tlb));
+
+        fprintf(stderr, "\n=== Data Cache Statistics ===\n");
         fprintf(stderr, "  Load:  %12llu hits, %12llu misses (8x2)", load_hits,
                 load_misses);
         if (load_total > 0)
diff --git a/riscv.c b/riscv.c
index 3e00751..dbae36c 100644
--- a/riscv.c
+++ b/riscv.c
@@ -1,4 +1,5 @@
 #include <stdio.h>
+#include <string.h>
 
 #include "common.h"
 #include "device.h"
@@ -180,11 +181,17 @@ static inline uint32_t read_rs2(const hart_t *vm, uint32_t insn)
     return vm->x_regs[decode_rs2(insn)];
 }
 
+static inline void icache_invalidate_all(hart_t *vm)
+{
+    memset(&vm->icache, 0, sizeof(vm->icache));
+}
+
 /* virtual addressing */
 
 void mmu_invalidate(hart_t *vm)
 {
-    vm->cache_fetch.n_pages = 0xFFFFFFFF;
+    vm->cache_fetch[0].n_pages = 0xFFFFFFFF;
+    vm->cache_fetch[1].n_pages = 0xFFFFFFFF;
     /* Invalidate all 8 sets × 2 ways for load cache */
     for (int set = 0; set < 8; set++) {
         for (int way = 0; way < 2; way++)
@@ -197,6 +204,7 @@ void mmu_invalidate(hart_t *vm)
             vm->cache_store[set].ways[way].n_pages = 0xFFFFFFFF;
         vm->cache_store[set].lru = 0; /* Reset LRU to way 0 */
     }
+    icache_invalidate_all(vm);
 }
 
 /* Invalidate MMU caches for a specific virtual address range.
@@ -227,9 +235,11 @@ void mmu_invalidate_range(hart_t *vm, uint32_t start_addr, uint32_t size)
     uint32_t end_vpn = (uint32_t) end_addr >> RV_PAGE_SHIFT;
 
     /* Cache invalidation for fetch cache */
-    if (vm->cache_fetch.n_pages >= start_vpn &&
-        vm->cache_fetch.n_pages <= end_vpn)
-        vm->cache_fetch.n_pages = 0xFFFFFFFF;
+    for (int i = 0; i < 2; i++) {
+        if (vm->cache_fetch[i].n_pages >= start_vpn &&
+            vm->cache_fetch[i].n_pages <= end_vpn)
+            vm->cache_fetch[i].n_pages = 0xFFFFFFFF;
+    }
 
     /* Invalidate load cache: 8 sets × 2 ways */
     for (int set = 0; set < 8; set++) {
@@ -361,10 +371,61 @@ static void mmu_fence(hart_t *vm, uint32_t insn UNUSED)
 
 static void mmu_fetch(hart_t *vm, uint32_t addr, uint32_t *value)
 {
+    uint32_t idx = (addr >> ICACHE_OFFSET_BITS) & ICACHE_INDEX_MASK;
+    uint32_t tag = addr >> (ICACHE_OFFSET_BITS + ICACHE_INDEX_BITS);
+    icache_block_t *blk = &vm->icache.i_block[idx];
     uint32_t vpn = addr >> RV_PAGE_SHIFT;
-    if (unlikely(vpn != vm->cache_fetch.n_pages)) {
+    uint32_t index = __builtin_parity(vpn) & 0x1;
+
+#ifdef MMU_CACHE_STATS
+    vm->cache_fetch[index].total_fetch++;
+#endif
+
+    /* icache lookup */
+    if (likely(blk->valid && blk->tag == tag)) {
+#ifdef MMU_CACHE_STATS
+        vm->cache_fetch[index].icache_hits++;
+#endif
+        uint32_t ofs = addr & ICACHE_BLOCK_MASK;
+        *value = *(const uint32_t *) (blk->base + ofs);
+        return;
+    }
+
+    /* icache miss, try victim cache */
+#ifdef MMU_CACHE_STATS
+    vm->cache_fetch[index].icache_misses++;
+#endif
+
+    uint32_t vcache_key = addr >> ICACHE_OFFSET_BITS;
+    for (int i = 0; i < VCACHE_BLOCKS; i++) {
+        victim_cache_block_t *vblk = &vm->icache.v_block[i];
+
+        if (vblk->valid && vblk->tag == vcache_key) {
+            /* victim cache hit, swap blocks */
 #ifdef MMU_CACHE_STATS
-        vm->cache_fetch.misses++;
+            vm->cache_fetch[index].vcache_hits++;
+#endif
+            icache_block_t tmp = *blk;
+            *blk = *vblk;
+            *vblk = tmp;
+            blk->tag = tag;
+            vblk->tag = (tmp.tag << ICACHE_INDEX_BITS) | idx;
+
+            uint32_t ofs = addr & ICACHE_BLOCK_MASK;
+            *value = *(const uint32_t *) (blk->base + ofs);
+            return;
+        }
+    }
+
+#ifdef MMU_CACHE_STATS
+    vm->cache_fetch[index].vcache_misses++;
+#endif
+
+    /* TLB lookup */
+    if (unlikely(vpn != vm->cache_fetch[index].n_pages)) {
+        /*TLB miss: need to translate VA to PA*/
+#ifdef MMU_CACHE_STATS
+        vm->cache_fetch[index].tlb_misses++;
 #endif
         mmu_translate(vm, &addr, (1 << 3), (1 << 6), false, RV_EXC_FETCH_FAULT,
                       RV_EXC_FETCH_PFAULT);
@@ -374,15 +435,33 @@ static void mmu_fetch(hart_t *vm, uint32_t addr, uint32_t *value)
         vm->mem_fetch(vm, addr >> RV_PAGE_SHIFT, &page_addr);
         if (vm->error)
             return;
-        vm->cache_fetch.n_pages = vpn;
-        vm->cache_fetch.page_addr = page_addr;
+        vm->cache_fetch[index].n_pages = vpn;
+        vm->cache_fetch[index].page_addr = page_addr;
     }
-#ifdef MMU_CACHE_STATS
+    /*TLB hit*/
     else {
-        vm->cache_fetch.hits++;
-    }
+#ifdef MMU_CACHE_STATS
+        vm->cache_fetch[index].tlb_hits++;
 #endif
-    *value = vm->cache_fetch.page_addr[(addr >> 2) & MASK(RV_PAGE_SHIFT - 2)];
+    }
+
+    *value =
+        vm->cache_fetch[index].page_addr[(addr >> 2) & MASK(RV_PAGE_SHIFT - 2)];
+
+    /* Move the current icache block into the victim cache before replacement */
+    if (blk->valid) {
+        victim_cache_block_t *vblk = &vm->icache.v_block[vm->icache.v_next];
+        *vblk = *blk;
+        vblk->tag = (blk->tag << ICACHE_INDEX_BITS) | idx;
+        vblk->valid = true;
+        vm->icache.v_next = (vm->icache.v_next + 1) % VCACHE_BLOCKS;
+    }
+
+    /* fill into the icache */
+    uint32_t block_off = (addr & RV_PAGE_MASK) & ~ICACHE_BLOCK_MASK;
+    blk->base = (const uint8_t *) vm->cache_fetch[index].page_addr + block_off;
+    blk->tag = tag;
+    blk->valid = true;
 }
 
 static void mmu_load(hart_t *vm,
diff --git a/riscv.h b/riscv.h
index 86619a1..6b39905 100644
--- a/riscv.h
+++ b/riscv.h
@@ -36,8 +36,13 @@ typedef struct {
     uint32_t n_pages;
     uint32_t *page_addr;
 #ifdef MMU_CACHE_STATS
-    uint64_t hits;
-    uint64_t misses;
+    uint64_t total_fetch;
+    uint64_t tlb_hits;
+    uint64_t tlb_misses;
+    uint64_t icache_hits;
+    uint64_t icache_misses;
+    uint64_t vcache_hits;
+    uint64_t vcache_misses;
 #endif
 } mmu_fetch_cache_t;
 
@@ -75,7 +80,58 @@ typedef struct {
 typedef struct __hart_internal hart_t;
 typedef struct __vm_internel vm_t;
 
+/* ICACHE_BLOCKS_SIZE: Size of one instruction-cache block (line).
+ * ICACHE_BLOCKS: Number of blocks (lines) in the instruction cache.
+ *
+ * The cache address is decomposed into [ tag | index | offset ] fields:
+ *   - block-offset bits = log2(ICACHE_BLOCKS_SIZE)
+ *   - index bits        = log2(ICACHE_BLOCKS)
+ */
+#define ICACHE_BLOCKS_SIZE 256
+#define ICACHE_BLOCKS 256
+#define ICACHE_OFFSET_BITS 8
+#define ICACHE_INDEX_BITS 8
+
+/* VCACHE_BLOCKS_SIZE: Size of one victim-cache block (line).
+ * VCACHE_BLOCKS: Number of blocks (lines) in the victim cache.
+ *
+ * The victim cache is implemented as a small, fully associative cache.
+ * It is designed to serve as a temporary buffer for instruction cache blocks
+ * that were recently evicted from the instruction cache.
+ *
+ * Upon an instruction cache miss, the system first checks the victim cache
+ * for the corresponding data. If the data is found (a victim cache hit),
+ * the instruction cache block and the victim cache block are swapped.
+ * Conversely, when the instruction cache is being filled with new data,
+ * the evicted old data from the instruction cache block is simultaneously
+ * placed into the victim cache.
+ */
+#define VCACHE_BLOCK_SIZE ICACHE_BLOCKS_SIZE
+#define VCACHE_BLOCKS 16
+
+/* For power-of-two sizes, (size - 1) sets all low bits to 1,
+ * allowing fast extraction of an address.
+ */
+#define ICACHE_INDEX_MASK (ICACHE_BLOCKS - 1)
+#define ICACHE_BLOCK_MASK (ICACHE_BLOCKS_SIZE - 1)
+#define RV_PAGE_MASK (RV_PAGE_SIZE - 1)
+
+typedef struct {
+    uint32_t tag;
+    const uint8_t *base;
+    bool valid;
+} icache_block_t;
+
+typedef icache_block_t victim_cache_block_t;
+
+typedef struct {
+    icache_block_t i_block[ICACHE_BLOCKS];
+    victim_cache_block_t v_block[VCACHE_BLOCKS];
+    uint32_t v_next;
+} icache_t;
+
 struct __hart_internal {
+    icache_t icache;
     uint32_t x_regs[32];
 
     /* LR reservation virtual address. last bit is 1 if valid */
@@ -106,7 +162,8 @@ struct __hart_internal {
      */
     uint32_t exc_cause, exc_val;
 
-    mmu_fetch_cache_t cache_fetch;
+    /* 2-entry direct-mapped with hash-based indexing */
+    mmu_fetch_cache_t cache_fetch[2];
     /* 8-set × 2-way set-associative cache with 3-bit parity hash indexing */
     mmu_cache_set_t cache_load[8];
     /* 8-set × 2-way set-associative cache for store operations */