Skip to content

Commit f270dca

Browse files
committed
Add victim cache for I-cache
Introduce a small victim cache to reduce conflict misses in the direct-mapped instruction cache. On an I-cache miss, probe the victim cache; on hit, swap the victim block with the current I-cache block and return the data. Measurement shows that the number of virtual-to-physical translations during instruction fetch (mmu_translate() calls) decreased by ~8%.
1 parent 4404e48 commit f270dca

File tree

2 files changed

+46
-4
lines changed

2 files changed

+46
-4
lines changed

riscv.c

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -374,7 +374,7 @@ static void mmu_fetch(hart_t *vm, uint32_t addr, uint32_t *value)
374374
/* cache hit */
375375
uint32_t idx = (addr >> ICACHE_OFFSET_BITS) & ICACHE_INDEX_MASK;
376376
uint32_t tag = addr >> (ICACHE_OFFSET_BITS + ICACHE_INDEX_BITS);
377-
icache_block_t *blk = &vm->icache.block[idx];
377+
icache_block_t *blk = &vm->icache.i_block[idx];
378378

379379
if (likely(blk->valid && blk->tag == tag)) {
380380
#ifdef MMU_CACHE_STATS
@@ -385,11 +385,30 @@ static void mmu_fetch(hart_t *vm, uint32_t addr, uint32_t *value)
385385
return;
386386
}
387387

388+
/* search the victim cache */
389+
uint32_t vcache_key = addr >> ICACHE_OFFSET_BITS;
390+
for (int i = 0; i < VCACHE_BLOCKS; i++) {
391+
victim_cache_block_t *vblk = &vm->icache.v_block[i];
392+
393+
/* victim cache hit, swap blocks */
394+
if (vblk->valid && vblk->tag == vcache_key) {
395+
icache_block_t tmp = *blk;
396+
*blk = *vblk;
397+
*vblk = tmp;
398+
blk->tag = tag;
399+
vblk->tag = (tmp.tag << ICACHE_INDEX_BITS) | idx;
400+
401+
uint32_t ofs = addr & ICACHE_BLOCK_MASK;
402+
*value = *(const uint32_t *) (blk->base + ofs);
403+
return;
404+
}
405+
}
406+
388407
#ifdef MMU_CACHE_STATS
389408
vm->cache_fetch.misses++;
390409
#endif
391410

392-
/* cache miss, Continue using the original va->pa*/
411+
/* icache miss, Continue using the original va->pa*/
393412
uint32_t vpn = addr >> RV_PAGE_SHIFT;
394413
uint32_t index = __builtin_parity(vpn) & 0x1;
395414
if (unlikely(vpn != vm->cache_fetch[index].n_pages)) {
@@ -408,7 +427,16 @@ static void mmu_fetch(hart_t *vm, uint32_t addr, uint32_t *value)
408427
*value =
409428
vm->cache_fetch[index].page_addr[(addr >> 2) & MASK(RV_PAGE_SHIFT - 2)];
410429

411-
/* fill into the cache */
430+
/* Move the current icache block into the victim cache before replacement */
431+
if (blk->valid) {
432+
victim_cache_block_t *vblk = &vm->icache.v_block[vm->icache.v_next];
433+
*vblk = *blk;
434+
vblk->tag = (blk->tag << ICACHE_INDEX_BITS) | idx;
435+
vblk->valid = true;
436+
vm->icache.v_next = (vm->icache.v_next + 1) % VCACHE_BLOCKS;
437+
}
438+
439+
/* fill into the icache */
412440
uint32_t block_off = (addr & RV_PAGE_MASK) & ~ICACHE_BLOCK_MASK;
413441
blk->base = (const uint8_t *) vm->cache_fetch[index].page_addr + block_off;
414442
blk->tag = tag;

riscv.h

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,16 @@ typedef struct __vm_internel vm_t;
9191
#define ICACHE_OFFSET_BITS (__builtin_ctz((ICACHE_BLOCKS_SIZE)))
9292
#define ICACHE_INDEX_BITS (__builtin_ctz((ICACHE_BLOCKS)))
9393

94+
/* Define the victim cache.
95+
*
96+
* The block size of the victim cache is identical to that of the primary
97+
* instruction cache (IC), ensuring full block compatibility.
98+
* However, the number of blocks is smaller, allowing the VC to store
99+
* a few recently evicted cache lines to reduce conflict misses.
100+
*/
101+
#define VCACHE_BLOCK_SIZE ICACHE_BLOCKS_SIZE
102+
#define VCACHE_BLOCKS 16
103+
94104
/* For power-of-two sizes, (size - 1) sets all low bits to 1,
95105
* allowing fast extraction of an address.
96106
*/
@@ -104,8 +114,12 @@ typedef struct {
104114
bool valid;
105115
} icache_block_t;
106116

117+
typedef icache_block_t victim_cache_block_t;
118+
107119
typedef struct {
108-
icache_block_t block[ICACHE_BLOCKS];
120+
icache_block_t i_block[ICACHE_BLOCKS];
121+
victim_cache_block_t v_block[VCACHE_BLOCKS];
122+
uint32_t v_next;
109123
} icache_t;
110124

111125
struct __hart_internal {

0 commit comments

Comments
 (0)