Skip to content

Commit 5e9504b

Browse files
committed
Adopt 2-entry direct-mapped page cache
Replace the previous 1-entry direct-mapped design with a 2-entry direct-mapped cache using hash-based indexing (same parity hash as cache_load). This allows two hot virtual pages to coexist without thrashing. Measurement shows that the number of virtual-to-physical translations during instruction fetch (mmu_translate() calls) decreased by ~10%.
1 parent 3fb5efe commit 5e9504b

File tree

3 files changed

+74
-22
lines changed

3 files changed

+74
-22
lines changed

main.c

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1024,8 +1024,12 @@ static void print_mmu_cache_stats(vm_t *vm)
10241024
fprintf(stderr, "\n=== MMU Cache Statistics ===\n");
10251025
for (uint32_t i = 0; i < vm->n_hart; i++) {
10261026
hart_t *hart = vm->hart[i];
1027-
uint64_t fetch_total =
1028-
hart->cache_fetch.hits + hart->cache_fetch.misses;
1027+
uint64_t fetch_hits = 0, fetch_misses = 0;
1028+
for (int i = 0; i < 2; i++) {
1029+
fetch_hits += hart->cache_fetch[i].hits;
1030+
fetch_misses += hart->cache_fetch[i].misses;
1031+
}
1032+
uint64_t fetch_total = fetch_hits + fetch_misses;
10291033

10301034
/* Combine 8-set × 2-way load cache statistics */
10311035
uint64_t load_hits = 0, load_misses = 0;
@@ -1048,11 +1052,11 @@ static void print_mmu_cache_stats(vm_t *vm)
10481052
uint64_t store_total = store_hits + store_misses;
10491053

10501054
fprintf(stderr, "\nHart %u:\n", i);
1051-
fprintf(stderr, " Fetch: %12llu hits, %12llu misses",
1052-
hart->cache_fetch.hits, hart->cache_fetch.misses);
1055+
fprintf(stderr, " Fetch: %12llu hits, %12llu misses", fetch_hits,
1056+
fetch_misses);
10531057
if (fetch_total > 0)
10541058
fprintf(stderr, " (%.2f%% hit rate)",
1055-
100.0 * hart->cache_fetch.hits / fetch_total);
1059+
100.0 * fetch_hits / fetch_total);
10561060
fprintf(stderr, "\n");
10571061

10581062
fprintf(stderr, " Load: %12llu hits, %12llu misses (8x2)", load_hits,

riscv.c

Lines changed: 48 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,8 @@ static inline void icache_invalidate_all(hart_t *vm)
190190

191191
void mmu_invalidate(hart_t *vm)
192192
{
193-
vm->cache_fetch.n_pages = 0xFFFFFFFF;
193+
vm->cache_fetch[0].n_pages = 0xFFFFFFFF;
194+
vm->cache_fetch[1].n_pages = 0xFFFFFFFF;
194195
/* Invalidate all 8 sets × 2 ways for load cache */
195196
for (int set = 0; set < 8; set++) {
196197
for (int way = 0; way < 2; way++)
@@ -234,9 +235,11 @@ void mmu_invalidate_range(hart_t *vm, uint32_t start_addr, uint32_t size)
234235
uint32_t end_vpn = (uint32_t) end_addr >> RV_PAGE_SHIFT;
235236

236237
/* Cache invalidation for fetch cache */
237-
if (vm->cache_fetch.n_pages >= start_vpn &&
238-
vm->cache_fetch.n_pages <= end_vpn)
239-
vm->cache_fetch.n_pages = 0xFFFFFFFF;
238+
for (int i = 0; i < 2; i++) {
239+
if (vm->cache_fetch[i].n_pages >= start_vpn &&
240+
vm->cache_fetch[i].n_pages <= end_vpn)
241+
vm->cache_fetch[i].n_pages = 0xFFFFFFFF;
242+
}
240243

241244
/* Invalidate load cache: 8 sets × 2 ways */
242245
for (int set = 0; set < 8; set++) {
@@ -371,24 +374,44 @@ static void mmu_fetch(hart_t *vm, uint32_t addr, uint32_t *value)
371374
/* cache hit */
372375
uint32_t idx = (addr >> ICACHE_OFFSET_BITS) & ICACHE_INDEX_MASK;
373376
uint32_t tag = addr >> (ICACHE_OFFSET_BITS + ICACHE_INDEX_BITS);
374-
icache_block_t *blk = &vm->icache.block[idx];
377+
icache_block_t *blk = &vm->icache.i_block[idx];
378+
uint32_t vpn = addr >> RV_PAGE_SHIFT;
379+
uint32_t index = __builtin_parity(vpn) & 0x1;
375380

376381
if (likely(blk->valid && blk->tag == tag)) {
377382
#ifdef MMU_CACHE_STATS
378-
vm->cache_fetch.hits++;
383+
vm->cache_fetch[index].hits++;
379384
#endif
380385
uint32_t ofs = addr & ICACHE_BLOCK_MASK;
381386
*value = *(const uint32_t *) (blk->base + ofs);
382387
return;
383388
}
384389

390+
/* search the victim cache */
391+
uint32_t vcache_key = addr >> ICACHE_OFFSET_BITS;
392+
for (int i = 0; i < VCACHE_BLOCKS; i++) {
393+
victim_cache_block_t *vblk = &vm->icache.v_block[i];
394+
395+
/* victim cache hit, swap blocks */
396+
if (vblk->valid && vblk->tag == vcache_key) {
397+
icache_block_t tmp = *blk;
398+
*blk = *vblk;
399+
*vblk = tmp;
400+
blk->tag = tag;
401+
vblk->tag = (tmp.tag << ICACHE_INDEX_BITS) | idx;
402+
403+
uint32_t ofs = addr & ICACHE_BLOCK_MASK;
404+
*value = *(const uint32_t *) (blk->base + ofs);
405+
return;
406+
}
407+
}
408+
385409
#ifdef MMU_CACHE_STATS
386-
vm->cache_fetch.misses++;
410+
vm->cache_fetch[index].misses++;
387411
#endif
388412

389-
/* cache miss, Continue using the original va->pa*/
390-
uint32_t vpn = addr >> RV_PAGE_SHIFT;
391-
if (unlikely(vpn != vm->cache_fetch.n_pages)) {
413+
/* cache miss, continue using the original va->pa*/
414+
if (unlikely(vpn != vm->cache_fetch[index].n_pages)) {
392415
mmu_translate(vm, &addr, (1 << 3), (1 << 6), false, RV_EXC_FETCH_FAULT,
393416
RV_EXC_FETCH_PFAULT);
394417
if (vm->error)
@@ -397,15 +420,25 @@ static void mmu_fetch(hart_t *vm, uint32_t addr, uint32_t *value)
397420
vm->mem_fetch(vm, addr >> RV_PAGE_SHIFT, &page_addr);
398421
if (vm->error)
399422
return;
400-
vm->cache_fetch.n_pages = vpn;
401-
vm->cache_fetch.page_addr = page_addr;
423+
vm->cache_fetch[index].n_pages = vpn;
424+
vm->cache_fetch[index].page_addr = page_addr;
402425
}
403426

404-
*value = vm->cache_fetch.page_addr[(addr >> 2) & MASK(RV_PAGE_SHIFT - 2)];
427+
*value =
428+
vm->cache_fetch[index].page_addr[(addr >> 2) & MASK(RV_PAGE_SHIFT - 2)];
429+
430+
/* Move the current icache block into the victim cache before replacement */
431+
if (blk->valid) {
432+
victim_cache_block_t *vblk = &vm->icache.v_block[vm->icache.v_next];
433+
*vblk = *blk;
434+
vblk->tag = (blk->tag << ICACHE_INDEX_BITS) | idx;
435+
vblk->valid = true;
436+
vm->icache.v_next = (vm->icache.v_next + 1) % VCACHE_BLOCKS;
437+
}
405438

406-
/* fill into the cache */
439+
/* fill into the icache */
407440
uint32_t block_off = (addr & RV_PAGE_MASK) & ~ICACHE_BLOCK_MASK;
408-
blk->base = (const uint8_t *) vm->cache_fetch.page_addr + block_off;
441+
blk->base = (const uint8_t *) vm->cache_fetch[index].page_addr + block_off;
409442
blk->tag = tag;
410443
blk->valid = true;
411444
}

riscv.h

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,16 @@ typedef struct __vm_internel vm_t;
8787
#define ICACHE_OFFSET_BITS 8
8888
#define ICACHE_INDEX_BITS 8
8989

90+
/* Define the victim cache.
91+
*
92+
* The block size of the victim cache is identical to that of the primary
93+
* instruction cache (IC), ensuring full block compatibility.
94+
* However, the number of blocks is smaller, allowing the VC to store
95+
* a few recently evicted cache lines to reduce conflict misses.
96+
*/
97+
#define VCACHE_BLOCK_SIZE ICACHE_BLOCKS_SIZE
98+
#define VCACHE_BLOCKS 16
99+
90100
/* For power-of-two sizes, (size - 1) sets all low bits to 1,
91101
* allowing fast extraction of an address.
92102
*/
@@ -100,8 +110,12 @@ typedef struct {
100110
bool valid;
101111
} icache_block_t;
102112

113+
typedef icache_block_t victim_cache_block_t;
114+
103115
typedef struct {
104-
icache_block_t block[ICACHE_BLOCKS];
116+
icache_block_t i_block[ICACHE_BLOCKS];
117+
victim_cache_block_t v_block[VCACHE_BLOCKS];
118+
uint32_t v_next;
105119
} icache_t;
106120

107121
struct __hart_internal {
@@ -136,7 +150,8 @@ struct __hart_internal {
136150
*/
137151
uint32_t exc_cause, exc_val;
138152

139-
mmu_fetch_cache_t cache_fetch;
153+
/* 2-entry direct-mapped with hash-based indexing */
154+
mmu_fetch_cache_t cache_fetch[2];
140155
/* 8-set × 2-way set-associative cache with 3-bit parity hash indexing */
141156
mmu_cache_set_t cache_load[8];
142157
/* 8-set × 2-way set-associative cache for store operations */

0 commit comments

Comments
 (0)