Skip to content

Commit 1ff3c37

Browse files
committed
Add victim cache for I-cache
Introduce a small victim cache to reduce conflict misses in the direct-mapped instruction cache. On an I-cache miss, probe the victim cache; on hit, swap the victim block with the current I-cache block and return the data. Measurement shows that the number of virtual-to-physical translations during instruction fetch (mmu_translate() calls) decreased by ~8%.
1 parent 4cb3c3c commit 1ff3c37

File tree

2 files changed

+45
-3
lines changed

2 files changed

+45
-3
lines changed

riscv.c

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -374,7 +374,7 @@ static void mmu_fetch(hart_t *vm, uint32_t addr, uint32_t *value)
374374
/* cache hit */
375375
uint32_t idx = (addr >> ICACHE_OFFSET_BITS) & ICACHE_INDEX_MASK;
376376
uint32_t tag = addr >> (ICACHE_OFFSET_BITS + ICACHE_INDEX_BITS);
377-
icache_block_t *blk = &vm->icache.block[idx];
377+
icache_block_t *blk = &vm->icache.i_block[idx];
378378
uint32_t vpn = addr >> RV_PAGE_SHIFT;
379379
uint32_t index = __builtin_parity(vpn) & 0x1;
380380

@@ -387,6 +387,25 @@ static void mmu_fetch(hart_t *vm, uint32_t addr, uint32_t *value)
387387
return;
388388
}
389389

390+
/* search the victim cache */
391+
uint32_t vcache_key = addr >> ICACHE_OFFSET_BITS;
392+
for (int i = 0; i < VCACHE_BLOCKS; i++) {
393+
victim_cache_block_t *vblk = &vm->icache.v_block[i];
394+
395+
/* victim cache hit, swap blocks */
396+
if (vblk->valid && vblk->tag == vcache_key) {
397+
icache_block_t tmp = *blk;
398+
*blk = *vblk;
399+
*vblk = tmp;
400+
blk->tag = tag;
401+
vblk->tag = (tmp.tag << ICACHE_INDEX_BITS) | idx;
402+
403+
uint32_t ofs = addr & ICACHE_BLOCK_MASK;
404+
*value = *(const uint32_t *) (blk->base + ofs);
405+
return;
406+
}
407+
}
408+
390409
#ifdef MMU_CACHE_STATS
391410
vm->cache_fetch[index].misses++;
392411
#endif
@@ -408,7 +427,16 @@ static void mmu_fetch(hart_t *vm, uint32_t addr, uint32_t *value)
408427
*value =
409428
vm->cache_fetch[index].page_addr[(addr >> 2) & MASK(RV_PAGE_SHIFT - 2)];
410429

411-
/* fill into the cache */
430+
/* Move the current icache block into the victim cache before replacement */
431+
if (blk->valid) {
432+
victim_cache_block_t *vblk = &vm->icache.v_block[vm->icache.v_next];
433+
*vblk = *blk;
434+
vblk->tag = (blk->tag << ICACHE_INDEX_BITS) | idx;
435+
vblk->valid = true;
436+
vm->icache.v_next = (vm->icache.v_next + 1) % VCACHE_BLOCKS;
437+
}
438+
439+
/* fill into the icache */
412440
uint32_t block_off = (addr & RV_PAGE_MASK) & ~ICACHE_BLOCK_MASK;
413441
blk->base = (const uint8_t *) vm->cache_fetch[index].page_addr + block_off;
414442
blk->tag = tag;

riscv.h

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,16 @@ typedef struct __vm_internel vm_t;
8787
#define ICACHE_OFFSET_BITS 8
8888
#define ICACHE_INDEX_BITS 8
8989

90+
/* Define the victim cache.
91+
*
92+
* The block size of the victim cache is identical to that of the primary
93+
* instruction cache (IC), ensuring full block compatibility.
94+
* However, the number of blocks is smaller, allowing the VC to store
95+
* a few recently evicted cache lines to reduce conflict misses.
96+
*/
97+
#define VCACHE_BLOCK_SIZE ICACHE_BLOCKS_SIZE
98+
#define VCACHE_BLOCKS 16
99+
90100
/* For power-of-two sizes, (size - 1) sets all low bits to 1,
91101
* allowing fast extraction of an address.
92102
*/
@@ -100,8 +110,12 @@ typedef struct {
100110
bool valid;
101111
} icache_block_t;
102112

113+
typedef icache_block_t victim_cache_block_t;
114+
103115
typedef struct {
104-
icache_block_t block[ICACHE_BLOCKS];
116+
icache_block_t i_block[ICACHE_BLOCKS];
117+
victim_cache_block_t v_block[VCACHE_BLOCKS];
118+
uint32_t v_next;
105119
} icache_t;
106120

107121
struct __hart_internal {

0 commit comments

Comments
 (0)