diff --git a/main.c b/main.c index a1d7b7f..15f23b1 100644 --- a/main.c +++ b/main.c @@ -1024,8 +1024,30 @@ static void print_mmu_cache_stats(vm_t *vm) fprintf(stderr, "\n=== MMU Cache Statistics ===\n"); for (uint32_t i = 0; i < vm->n_hart; i++) { hart_t *hart = vm->hart[i]; - uint64_t fetch_total = - hart->cache_fetch.hits + hart->cache_fetch.misses; + + /* Combine 2-entry tlb statistics */ + uint64_t fetch_hits_tlb = 0, fetch_misses_tlb = 0; + fetch_hits_tlb = + hart->cache_fetch[0].tlb_hits + hart->cache_fetch[1].tlb_hits; + fetch_misses_tlb = + hart->cache_fetch[0].tlb_misses + hart->cache_fetch[1].tlb_misses; + + /* Combine icache statistics */ + uint64_t fetch_hits_icache = 0, fetch_misses_icache = 0; + fetch_hits_icache = + hart->cache_fetch[0].icache_hits + hart->cache_fetch[1].icache_hits; + fetch_misses_icache = hart->cache_fetch[0].icache_misses + + hart->cache_fetch[1].icache_misses; + + /* Combine victim cache statistics */ + uint64_t fetch_hits_vcache = 0, fetch_misses_vcache = 0; + fetch_hits_vcache = + hart->cache_fetch[0].vcache_hits + hart->cache_fetch[1].vcache_hits; + fetch_misses_vcache = hart->cache_fetch[0].vcache_misses + + hart->cache_fetch[1].vcache_misses; + + uint64_t access_total = + hart->cache_fetch[0].total_fetch + hart->cache_fetch[1].total_fetch; /* Combine 8-set × 2-way load cache statistics */ uint64_t load_hits = 0, load_misses = 0; @@ -1047,14 +1069,32 @@ static void print_mmu_cache_stats(vm_t *vm) } uint64_t store_total = store_hits + store_misses; - fprintf(stderr, "\nHart %u:\n", i); - fprintf(stderr, " Fetch: %12llu hits, %12llu misses", - hart->cache_fetch.hits, hart->cache_fetch.misses); - if (fetch_total > 0) - fprintf(stderr, " (%.2f%% hit rate)", - 100.0 * hart->cache_fetch.hits / fetch_total); - fprintf(stderr, "\n"); + fprintf(stderr, "\n=== Introduction Cache Statistics ===\n"); + fprintf(stderr, " Total access: %12llu\n", access_total); + fprintf(stderr, " Icache hits: %12llu (%.2f%%)\n", fetch_hits_icache, + (fetch_hits_icache * 100.0) / access_total); + fprintf(stderr, " Icache misses: %12llu (%.2f%%)\n", + fetch_misses_icache, + (fetch_misses_icache * 100.0) / access_total); + fprintf(stderr, + " ├ Vcache hits: %8llu (%.2f%% of Icache misses)\n", + fetch_hits_vcache, + (fetch_hits_vcache * 100.0) / fetch_misses_icache, + (fetch_hits_vcache * 100.0) / access_total); + fprintf(stderr, + " └ Vcache misses: %8llu (%.2f%% of Icache misses)\n", + fetch_misses_vcache, + (fetch_misses_vcache * 100.0) / fetch_misses_icache, + (fetch_misses_vcache * 100.0) / access_total); + fprintf(stderr, " ├ TLB hits: %4llu (%.2f%%)\n", + fetch_hits_tlb, + (fetch_hits_tlb * 100.0) / (fetch_hits_tlb + fetch_misses_tlb)); + fprintf( + stderr, " └ TLB misses: %4llu (%.2f%%)\n", fetch_misses_tlb, + (fetch_misses_tlb * 100.0) / (fetch_hits_tlb + fetch_misses_tlb)); + + fprintf(stderr, "\n=== Data Cache Statistics ===\n"); fprintf(stderr, " Load: %12llu hits, %12llu misses (8x2)", load_hits, load_misses); if (load_total > 0) diff --git a/riscv.c b/riscv.c index 3e00751..dbae36c 100644 --- a/riscv.c +++ b/riscv.c @@ -1,4 +1,5 @@ #include +#include #include "common.h" #include "device.h" @@ -180,11 +181,17 @@ static inline uint32_t read_rs2(const hart_t *vm, uint32_t insn) return vm->x_regs[decode_rs2(insn)]; } +static inline void icache_invalidate_all(hart_t *vm) +{ + memset(&vm->icache, 0, sizeof(vm->icache)); +} + /* virtual addressing */ void mmu_invalidate(hart_t *vm) { - vm->cache_fetch.n_pages = 0xFFFFFFFF; + vm->cache_fetch[0].n_pages = 0xFFFFFFFF; + vm->cache_fetch[1].n_pages = 0xFFFFFFFF; /* Invalidate all 8 sets × 2 ways for load cache */ for (int set = 0; set < 8; set++) { for (int way = 0; way < 2; way++) @@ -197,6 +204,7 @@ void mmu_invalidate(hart_t *vm) vm->cache_store[set].ways[way].n_pages = 0xFFFFFFFF; vm->cache_store[set].lru = 0; /* Reset LRU to way 0 */ } + icache_invalidate_all(vm); } /* Invalidate MMU caches for a specific virtual address range. @@ -227,9 +235,11 @@ void mmu_invalidate_range(hart_t *vm, uint32_t start_addr, uint32_t size) uint32_t end_vpn = (uint32_t) end_addr >> RV_PAGE_SHIFT; /* Cache invalidation for fetch cache */ - if (vm->cache_fetch.n_pages >= start_vpn && - vm->cache_fetch.n_pages <= end_vpn) - vm->cache_fetch.n_pages = 0xFFFFFFFF; + for (int i = 0; i < 2; i++) { + if (vm->cache_fetch[i].n_pages >= start_vpn && + vm->cache_fetch[i].n_pages <= end_vpn) + vm->cache_fetch[i].n_pages = 0xFFFFFFFF; + } /* Invalidate load cache: 8 sets × 2 ways */ for (int set = 0; set < 8; set++) { @@ -361,10 +371,61 @@ static void mmu_fence(hart_t *vm, uint32_t insn UNUSED) static void mmu_fetch(hart_t *vm, uint32_t addr, uint32_t *value) { + uint32_t idx = (addr >> ICACHE_OFFSET_BITS) & ICACHE_INDEX_MASK; + uint32_t tag = addr >> (ICACHE_OFFSET_BITS + ICACHE_INDEX_BITS); + icache_block_t *blk = &vm->icache.i_block[idx]; uint32_t vpn = addr >> RV_PAGE_SHIFT; - if (unlikely(vpn != vm->cache_fetch.n_pages)) { + uint32_t index = __builtin_parity(vpn) & 0x1; + +#ifdef MMU_CACHE_STATS + vm->cache_fetch[index].total_fetch++; +#endif + + /* icache lookup */ + if (likely(blk->valid && blk->tag == tag)) { +#ifdef MMU_CACHE_STATS + vm->cache_fetch[index].icache_hits++; +#endif + uint32_t ofs = addr & ICACHE_BLOCK_MASK; + *value = *(const uint32_t *) (blk->base + ofs); + return; + } + + /* icache miss, try victim cache */ +#ifdef MMU_CACHE_STATS + vm->cache_fetch[index].icache_misses++; +#endif + + uint32_t vcache_key = addr >> ICACHE_OFFSET_BITS; + for (int i = 0; i < VCACHE_BLOCKS; i++) { + victim_cache_block_t *vblk = &vm->icache.v_block[i]; + + if (vblk->valid && vblk->tag == vcache_key) { + /* victim cache hit, swap blocks */ #ifdef MMU_CACHE_STATS - vm->cache_fetch.misses++; + vm->cache_fetch[index].vcache_hits++; +#endif + icache_block_t tmp = *blk; + *blk = *vblk; + *vblk = tmp; + blk->tag = tag; + vblk->tag = (tmp.tag << ICACHE_INDEX_BITS) | idx; + + uint32_t ofs = addr & ICACHE_BLOCK_MASK; + *value = *(const uint32_t *) (blk->base + ofs); + return; + } + } + +#ifdef MMU_CACHE_STATS + vm->cache_fetch[index].vcache_misses++; +#endif + + /* TLB lookup */ + if (unlikely(vpn != vm->cache_fetch[index].n_pages)) { + /*TLB miss: need to translate VA to PA*/ +#ifdef MMU_CACHE_STATS + vm->cache_fetch[index].tlb_misses++; #endif mmu_translate(vm, &addr, (1 << 3), (1 << 6), false, RV_EXC_FETCH_FAULT, RV_EXC_FETCH_PFAULT); @@ -374,15 +435,33 @@ static void mmu_fetch(hart_t *vm, uint32_t addr, uint32_t *value) vm->mem_fetch(vm, addr >> RV_PAGE_SHIFT, &page_addr); if (vm->error) return; - vm->cache_fetch.n_pages = vpn; - vm->cache_fetch.page_addr = page_addr; + vm->cache_fetch[index].n_pages = vpn; + vm->cache_fetch[index].page_addr = page_addr; } -#ifdef MMU_CACHE_STATS + /*TLB hit*/ else { - vm->cache_fetch.hits++; - } +#ifdef MMU_CACHE_STATS + vm->cache_fetch[index].tlb_hits++; #endif - *value = vm->cache_fetch.page_addr[(addr >> 2) & MASK(RV_PAGE_SHIFT - 2)]; + } + + *value = + vm->cache_fetch[index].page_addr[(addr >> 2) & MASK(RV_PAGE_SHIFT - 2)]; + + /* Move the current icache block into the victim cache before replacement */ + if (blk->valid) { + victim_cache_block_t *vblk = &vm->icache.v_block[vm->icache.v_next]; + *vblk = *blk; + vblk->tag = (blk->tag << ICACHE_INDEX_BITS) | idx; + vblk->valid = true; + vm->icache.v_next = (vm->icache.v_next + 1) % VCACHE_BLOCKS; + } + + /* fill into the icache */ + uint32_t block_off = (addr & RV_PAGE_MASK) & ~ICACHE_BLOCK_MASK; + blk->base = (const uint8_t *) vm->cache_fetch[index].page_addr + block_off; + blk->tag = tag; + blk->valid = true; } static void mmu_load(hart_t *vm, diff --git a/riscv.h b/riscv.h index 86619a1..6b39905 100644 --- a/riscv.h +++ b/riscv.h @@ -36,8 +36,13 @@ typedef struct { uint32_t n_pages; uint32_t *page_addr; #ifdef MMU_CACHE_STATS - uint64_t hits; - uint64_t misses; + uint64_t total_fetch; + uint64_t tlb_hits; + uint64_t tlb_misses; + uint64_t icache_hits; + uint64_t icache_misses; + uint64_t vcache_hits; + uint64_t vcache_misses; #endif } mmu_fetch_cache_t; @@ -75,7 +80,58 @@ typedef struct { typedef struct __hart_internal hart_t; typedef struct __vm_internel vm_t; +/* ICACHE_BLOCKS_SIZE: Size of one instruction-cache block (line). + * ICACHE_BLOCKS: Number of blocks (lines) in the instruction cache. + * + * The cache address is decomposed into [ tag | index | offset ] fields: + * - block-offset bits = log2(ICACHE_BLOCKS_SIZE) + * - index bits = log2(ICACHE_BLOCKS) + */ +#define ICACHE_BLOCKS_SIZE 256 +#define ICACHE_BLOCKS 256 +#define ICACHE_OFFSET_BITS 8 +#define ICACHE_INDEX_BITS 8 + +/* VCACHE_BLOCKS_SIZE: Size of one victim-cache block (line). + * VCACHE_BLOCKS: Number of blocks (lines) in the victim cache. + * + * The victim cache is implemented as a small, fully associative cache. + * It is designed to serve as a temporary buffer for instruction cache blocks + * that were recently evicted from the instruction cache. + * + * Upon an instruction cache miss, the system first checks the victim cache + * for the corresponding data. If the data is found (a victim cache hit), + * the instruction cache block and the victim cache block are swapped. + * Conversely, when the instruction cache is being filled with new data, + * the evicted old data from the instruction cache block is simultaneously + * placed into the victim cache. + */ +#define VCACHE_BLOCK_SIZE ICACHE_BLOCKS_SIZE +#define VCACHE_BLOCKS 16 + +/* For power-of-two sizes, (size - 1) sets all low bits to 1, + * allowing fast extraction of an address. + */ +#define ICACHE_INDEX_MASK (ICACHE_BLOCKS - 1) +#define ICACHE_BLOCK_MASK (ICACHE_BLOCKS_SIZE - 1) +#define RV_PAGE_MASK (RV_PAGE_SIZE - 1) + +typedef struct { + uint32_t tag; + const uint8_t *base; + bool valid; +} icache_block_t; + +typedef icache_block_t victim_cache_block_t; + +typedef struct { + icache_block_t i_block[ICACHE_BLOCKS]; + victim_cache_block_t v_block[VCACHE_BLOCKS]; + uint32_t v_next; +} icache_t; + struct __hart_internal { + icache_t icache; uint32_t x_regs[32]; /* LR reservation virtual address. last bit is 1 if valid */ @@ -106,7 +162,8 @@ struct __hart_internal { */ uint32_t exc_cause, exc_val; - mmu_fetch_cache_t cache_fetch; + /* 2-entry direct-mapped with hash-based indexing */ + mmu_fetch_cache_t cache_fetch[2]; /* 8-set × 2-way set-associative cache with 3-bit parity hash indexing */ mmu_cache_set_t cache_load[8]; /* 8-set × 2-way set-associative cache for store operations */