specialized shared memory module

This commit is contained in:
Blaise Tine 2021-01-16 04:41:58 -08:00
parent ad6e0b4e77
commit fcbf57b66a
12 changed files with 395 additions and 295 deletions

View file

@ -4,7 +4,7 @@ SYSROOT ?= $(RISCV_TOOLCHAIN_PATH)/riscv32-unknown-elf
POCL_CC_PATH ?= /opt/pocl/compiler
POCL_RT_PATH ?= /opt/pocl/runtime
OPTS ?= -n64
OPTS ?= -n32
VORTEX_DRV_PATH ?= $(realpath ../../../driver)
VORTEX_RT_PATH ?= $(realpath ../../../runtime)

View file

@ -101,7 +101,7 @@ static void cleanup() {
if (h_c) free(h_c);
}
int size = 64;
int size = 32;
static void show_usage() {
printf("Usage: [-n size] [-h: help]\n");

View file

@ -172,7 +172,6 @@ module VX_cluster #(
.DRSQ_SIZE (`L2DRSQ_SIZE),
.CRSQ_SIZE (`L2CRSQ_SIZE),
.DREQ_SIZE (`L2DREQ_SIZE),
.DRAM_ENABLE (1),
.WRITE_ENABLE (1),
.CORE_TAG_WIDTH (`XDRAM_TAG_WIDTH),
.CORE_TAG_ID_BITS (0),

View file

@ -299,14 +299,11 @@
// Cache ID
`define SCACHE_ID (32'(`L3_ENABLE) + 32'(`L2_ENABLE) * `NUM_CLUSTERS + CORE_ID * 3 + 2)
// Block size in bytes
`define SCACHE_LINE_SIZE 4
// Word size in bytes
`define SWORD_SIZE 4
// bank address offset
`define SBANK_ADDR_OFFSET `CLOG2(`STACK_SIZE / `SCACHE_LINE_SIZE)
`define SBANK_ADDR_OFFSET `CLOG2(`STACK_SIZE / `SWORD_SIZE)
// Core request size
`define SNUM_REQUESTS `NUM_THREADS

View file

@ -103,7 +103,6 @@ module VX_mem_unit # (
.DRSQ_SIZE (`IDRSQ_SIZE),
.CRSQ_SIZE (`ICRSQ_SIZE),
.DREQ_SIZE (`IDREQ_SIZE),
.DRAM_ENABLE (1),
.WRITE_ENABLE (0),
.CORE_TAG_WIDTH (`ICORE_TAG_WIDTH),
.CORE_TAG_ID_BITS (`ICORE_TAG_ID_BITS),
@ -160,8 +159,7 @@ module VX_mem_unit # (
.MSHR_SIZE (`DMSHR_SIZE),
.DRSQ_SIZE (`DDRSQ_SIZE),
.CRSQ_SIZE (`DCRSQ_SIZE),
.DREQ_SIZE (`DDREQ_SIZE),
.DRAM_ENABLE (1),
.DREQ_SIZE (`DDREQ_SIZE),
.WRITE_ENABLE (1),
.CORE_TAG_WIDTH (`DCORE_TAG_WIDTH),
.CORE_TAG_ID_BITS (`DCORE_TAG_ID_BITS),
@ -215,22 +213,16 @@ module VX_mem_unit # (
.clk (clk),
.reset (reset),
.reset_out (scache_reset)
);
);
VX_cache #(
VX_shared_mem #(
.CACHE_ID (`SCACHE_ID),
.CACHE_SIZE (`SMEM_SIZE),
.CACHE_LINE_SIZE (`SCACHE_LINE_SIZE),
.NUM_BANKS (`SNUM_BANKS),
.WORD_SIZE (`SWORD_SIZE),
.NUM_REQS (`SNUM_REQUESTS),
.CREQ_SIZE (`SCREQ_SIZE),
.MSHR_SIZE (8),
.DRSQ_SIZE (1),
.CRSQ_SIZE (`SCRSQ_SIZE),
.DREQ_SIZE (1),
.DRAM_ENABLE (0),
.WRITE_ENABLE (1),
.CORE_TAG_WIDTH (`DCORE_TAG_WIDTH),
.CORE_TAG_ID_BITS (`DCORE_TAG_ID_BITS),
.BANK_ADDR_OFFSET (`SBANK_ADDR_OFFSET)
@ -240,6 +232,10 @@ module VX_mem_unit # (
.clk (clk),
.reset (scache_reset),
`ifdef PERF_ENABLE
.perf_cache_if (perf_smem_if),
`endif
// Core request
.core_req_valid (smem_req_if.valid),
.core_req_rw (smem_req_if.rw),
@ -253,26 +249,7 @@ module VX_mem_unit # (
.core_rsp_valid (smem_rsp_if.valid),
.core_rsp_data (smem_rsp_if.data),
.core_rsp_tag (smem_rsp_if.tag),
.core_rsp_ready (smem_rsp_if.ready),
`ifdef PERF_ENABLE
.perf_cache_if (perf_smem_if),
`endif
// DRAM request
`UNUSED_PIN (dram_req_valid),
`UNUSED_PIN (dram_req_rw),
`UNUSED_PIN (dram_req_byteen),
`UNUSED_PIN (dram_req_addr),
`UNUSED_PIN (dram_req_data),
`UNUSED_PIN (dram_req_tag),
.dram_req_ready (1'b0),
// DRAM response
.dram_rsp_valid (0),
.dram_rsp_data (0),
.dram_rsp_tag (0),
`UNUSED_PIN (dram_rsp_ready)
.core_rsp_ready (smem_rsp_if.ready)
);
end

View file

@ -174,7 +174,6 @@ module Vortex (
.DRSQ_SIZE (`L3DRSQ_SIZE),
.CRSQ_SIZE (`L3CRSQ_SIZE),
.DREQ_SIZE (`L3DREQ_SIZE),
.DRAM_ENABLE (1),
.WRITE_ENABLE (1),
.CORE_TAG_WIDTH (`L2DRAM_TAG_WIDTH),
.CORE_TAG_ID_BITS (0),

302
hw/rtl/cache/VX_bank.v vendored
View file

@ -27,9 +27,6 @@ module VX_bank #(
// DRAM Request Queue Size
parameter DREQ_SIZE = 1,
// Enable dram update
parameter DRAM_ENABLE = 1,
// Enable cache writeable
parameter WRITE_ENABLE = 1,
@ -103,38 +100,27 @@ module VX_bank #(
wire drsq_push = dram_rsp_valid && dram_rsp_ready;
if (DRAM_ENABLE) begin
wire drsq_full;
assign dram_rsp_ready = !drsq_full;
wire drsq_full;
assign dram_rsp_ready = !drsq_full;
VX_fifo_queue_xt #(
.DATAW (`LINE_ADDR_WIDTH + $bits(dram_rsp_data)),
.SIZE (DRSQ_SIZE),
.FASTRAM (1)
) dram_rsp_queue (
.clk (clk),
.reset (reset),
.push (drsq_push),
.pop (drsq_pop),
.data_in ({dram_rsp_addr, dram_rsp_data}),
`UNUSED_PIN (data_out),
.empty (drsq_empty),
.data_out_next ({drsq_addr_next, drsq_filldata_next}),
.empty_next (drsq_empty_next),
.full (drsq_full),
`UNUSED_PIN (almost_full),
`UNUSED_PIN (size)
);
end else begin
`UNUSED_VAR (dram_rsp_valid)
`UNUSED_VAR (dram_rsp_addr)
`UNUSED_VAR (dram_rsp_data)
assign drsq_empty = 1;
assign drsq_empty_next = 1;
assign drsq_addr_next = 0;
assign drsq_filldata_next = 0;
assign dram_rsp_ready = 0;
end
VX_fifo_queue_xt #(
.DATAW (`LINE_ADDR_WIDTH + $bits(dram_rsp_data)),
.SIZE (DRSQ_SIZE),
.FASTRAM (1)
) dram_rsp_queue (
.clk (clk),
.reset (reset),
.push (drsq_push),
.pop (drsq_pop),
.data_in ({dram_rsp_addr, dram_rsp_data}),
`UNUSED_PIN (data_out),
.empty (drsq_empty),
.data_out_next ({drsq_addr_next, drsq_filldata_next}),
.empty_next (drsq_empty_next),
.full (drsq_full),
`UNUSED_PIN (almost_full),
`UNUSED_PIN (size)
);
wire creq_pop;
wire creq_full, creq_empty;
@ -221,14 +207,6 @@ module VX_bank #(
wire dreq_push_unqual_st0, dreq_push_unqual_st1;
wire writeen_st1;
wire core_req_hit_st1;
wire valid_st01;
wire writeen_st01;
wire [`LINE_ADDR_WIDTH-1:0] addr_st01;
wire [`UP(`WORD_SELECT_BITS)-1:0] wsel_st01;
wire [WORD_SIZE-1:0] byteen_st01;
wire [`WORD_WIDTH-1:0] writeword_st01;
wire [`REQ_TAG_WIDTH-1:0] tag_st01;
wire mshr_push_stall;
wire crsq_push_stall;
@ -278,8 +256,7 @@ module VX_bank #(
assign {debug_pc_st0, debug_wid_st0} = 0;
end
`endif
if (DRAM_ENABLE) begin
VX_tag_access #(
.BANK_ID (BANK_ID),
.CACHE_ID (CACHE_ID),
@ -290,7 +267,7 @@ if (DRAM_ENABLE) begin
.WORD_SIZE (WORD_SIZE),
.WRITE_ENABLE (WRITE_ENABLE),
.BANK_ADDR_OFFSET (BANK_ADDR_OFFSET)
) tag_access (
) tag_access (
.clk (clk),
.reset (reset),
@ -314,66 +291,28 @@ if (DRAM_ENABLE) begin
.writeen_in (valid_st1 && writeen_st1)
);
assign valid_st01 = valid_st1;
assign writeen_st01 = writeen_st1;
assign addr_st01 = addr_st1;
assign wsel_st01 = wsel_st1;
assign byteen_st01 = byteen_st1;
assign writeword_st01 = writeword_st1;
assign tag_st01 = tag_st1;
// redundant fills
wire is_redundant_fill = is_fill_st0 && !miss_st0;
// we have a miss in mshr or going to it for the current address
wire mshr_pending_st0 = mshr_pending_unqual_st0
|| (valid_st1 && (miss_st1 || force_miss_st1) && (addr_st0 == addr_st1));
|| (valid_st1 && (miss_st1 || force_miss_st1) && (addr_st0 == addr_st1));
// force miss to ensure commit order when a new request has pending previous requests to same block
assign force_miss_st0 = !is_mshr_st0 && !is_fill_st0 && mshr_pending_st0;
assign writeen_unqual_st0 = (!is_fill_st0 && !miss_st0 && mem_rw_st0)
|| (is_fill_st0 && !is_redundant_fill);
|| (is_fill_st0 && !is_redundant_fill);
wire send_fill_req_st0 = !is_fill_st0 && miss_st0
&& !(WRITE_THROUGH && mem_rw_st0);
&& !(WRITE_THROUGH && mem_rw_st0);
assign do_writeback_st0 = (WRITE_THROUGH && !is_fill_st0 && mem_rw_st0)
|| (!WRITE_THROUGH && is_fill_st0 && dirty_st0 && !is_redundant_fill);
|| (!WRITE_THROUGH && is_fill_st0 && dirty_st0 && !is_redundant_fill);
assign dreq_push_unqual_st0 = send_fill_req_st0 || do_writeback_st0;
assign mshr_push_unqual_st0 = !is_fill_st0 && !(WRITE_THROUGH && mem_rw_st0);
end else begin
`UNUSED_VAR (mshr_pending_unqual_st0)
`UNUSED_VAR (drsq_push)
`UNUSED_VAR (dirty_st0)
`UNUSED_VAR (writeen_st1)
`ifdef DBG_CACHE_REQ_INFO
assign debug_pc_st1 = debug_pc_st0;
assign debug_wid_st1 = debug_wid_st0;
`endif
assign valid_st01 = valid_st0;
assign writeen_st01 = mem_rw_st0;
assign addr_st01 = addr_st0;
assign wsel_st01 = wsel_st0;
assign byteen_st01 = byteen_st0;
assign writeword_st01 = writeword_st0;
assign tag_st01 = tag_st0;
assign miss_st0 = 0;
assign dirty_st0 = 0;
assign force_miss_st0 = 0;
assign readtag_st0 = 0;
assign do_writeback_st0 = 0;
assign writeen_unqual_st0 = mem_rw_st0;
assign dreq_push_unqual_st0 = 0;
assign mshr_push_unqual_st0 = 0;
end
assign mshr_push_unqual_st0 = !is_fill_st0 && !(WRITE_THROUGH && mem_rw_st0);
VX_pipe_register #(
.DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_BITS) + CACHE_LINE_SIZE + `CACHE_LINE_WIDTH + `WORD_WIDTH + `TAG_SELECT_BITS + `CACHE_LINE_WIDTH + 1 + WORD_SIZE + `REQS_BITS + `REQ_TAG_WIDTH),
@ -403,7 +342,6 @@ end
assign {debug_pc_st01, debug_wid_st01} = 0;
end
`endif
`UNUSED_VAR (tag_st01)
VX_data_access #(
.BANK_ID (BANK_ID),
@ -412,7 +350,6 @@ end
.CACHE_SIZE (CACHE_SIZE),
.CACHE_LINE_SIZE (CACHE_LINE_SIZE),
.NUM_BANKS (NUM_BANKS),
.DRAM_ENABLE (DRAM_ENABLE),
.WORD_SIZE (WORD_SIZE),
.WRITE_ENABLE (WRITE_ENABLE),
.WRITE_THROUGH (WRITE_THROUGH)
@ -435,12 +372,12 @@ end
.dirtyb_out (dirtyb_st0),
// writing
.writeen_in (valid_st01 && writeen_st01),
.waddr_in (addr_st01),
.writeen_in (valid_st1 && writeen_st1),
.waddr_in (addr_st1),
.wfill_in (is_fill_st1),
.wwsel_in (wsel_st01),
.wbyteen_in (byteen_st01),
.writeword_in (writeword_st01),
.wwsel_in (wsel_st1),
.wbyteen_in (byteen_st1),
.writeword_in (writeword_st1),
.filldata_in (filldata_st1)
);
@ -461,81 +398,59 @@ end
wire incoming_fill_st1 = valid_st0 && is_fill_st0 && (addr_st1 == addr_st0);
if (DRAM_ENABLE) begin
wire mshr_dequeue_st1 = valid_st1 && is_mshr_st1 && !mshr_push_unqual && !pipeline_stall;
wire mshr_dequeue_st1 = valid_st1 && is_mshr_st1 && !mshr_push_unqual && !pipeline_stall;
// push a missed request as 'ready' if it was a forced miss that actually had a hit
// or the fill request for this block is comming
wire mshr_init_ready_state_st1 = !miss_st1 || incoming_fill_st1;
// push a missed request as 'ready' if it was a forced miss that actually had a hit
// or the fill request for this block is comming
wire mshr_init_ready_state_st1 = !miss_st1 || incoming_fill_st1;
VX_miss_resrv #(
.BANK_ID (BANK_ID),
.CACHE_ID (CACHE_ID),
.CORE_TAG_ID_BITS (CORE_TAG_ID_BITS),
.CACHE_LINE_SIZE (CACHE_LINE_SIZE),
.NUM_BANKS (NUM_BANKS),
.WORD_SIZE (WORD_SIZE),
.NUM_REQS (NUM_REQS),
.MSHR_SIZE (MSHR_SIZE),
.ALM_FULL (MSHR_SIZE-1),
.CORE_TAG_WIDTH (CORE_TAG_WIDTH)
) miss_resrv (
.clk (clk),
.reset (reset),
VX_miss_resrv #(
.BANK_ID (BANK_ID),
.CACHE_ID (CACHE_ID),
.CORE_TAG_ID_BITS (CORE_TAG_ID_BITS),
.CACHE_LINE_SIZE (CACHE_LINE_SIZE),
.NUM_BANKS (NUM_BANKS),
.WORD_SIZE (WORD_SIZE),
.NUM_REQS (NUM_REQS),
.MSHR_SIZE (MSHR_SIZE),
.ALM_FULL (MSHR_SIZE-1),
.CORE_TAG_WIDTH (CORE_TAG_WIDTH)
) miss_resrv (
.clk (clk),
.reset (reset),
`ifdef DBG_CACHE_REQ_INFO
.deq_debug_pc (debug_pc_st0),
.deq_debug_wid (debug_wid_st0),
.enq_debug_pc (debug_pc_st1),
.enq_debug_wid (debug_wid_st1),
`endif
`ifdef DBG_CACHE_REQ_INFO
.deq_debug_pc (debug_pc_st0),
.deq_debug_wid (debug_wid_st0),
.enq_debug_pc (debug_pc_st1),
.enq_debug_wid (debug_wid_st1),
`endif
// enqueue
.enqueue (mshr_push),
.enqueue_addr (addr_st1),
.enqueue_data ({writeword_st1, req_tid_st1, tag_st1, mem_rw_st1, byteen_st1, wsel_st1}),
.enqueue_is_mshr (is_mshr_st1),
.enqueue_as_ready (mshr_init_ready_state_st1),
.enqueue_almfull (mshr_almost_full),
// enqueue
.enqueue (mshr_push),
.enqueue_addr (addr_st1),
.enqueue_data ({writeword_st1, req_tid_st1, tag_st1, mem_rw_st1, byteen_st1, wsel_st1}),
.enqueue_is_mshr (is_mshr_st1),
.enqueue_as_ready (mshr_init_ready_state_st1),
.enqueue_almfull (mshr_almost_full),
// lookup
.lookup_ready (drsq_pop),
.lookup_addr (addr_st0),
.lookup_match (mshr_pending_unqual_st0),
// schedule
.schedule (mshr_pop),
.schedule_valid (mshr_valid),
`UNUSED_PIN (schedule_addr),
`UNUSED_PIN (schedule_data),
.schedule_valid_next(mshr_valid_next),
.schedule_addr_next (mshr_addr_next),
.schedule_data_next ({mshr_writeword_next, mshr_tid_next, mshr_tag_next, mshr_rw_next, mshr_byteen_next, mshr_wsel_next}),
// lookup
.lookup_ready (drsq_pop),
.lookup_addr (addr_st0),
.lookup_match (mshr_pending_unqual_st0),
// schedule
.schedule (mshr_pop),
.schedule_valid (mshr_valid),
`UNUSED_PIN (schedule_addr),
`UNUSED_PIN (schedule_data),
.schedule_valid_next(mshr_valid_next),
.schedule_addr_next (mshr_addr_next),
.schedule_data_next ({mshr_writeword_next, mshr_tid_next, mshr_tag_next, mshr_rw_next, mshr_byteen_next, mshr_wsel_next}),
// dequeue
.dequeue (mshr_dequeue_st1)
);
end else begin
`UNUSED_VAR (valid_st1)
`UNUSED_VAR (mshr_push)
`UNUSED_VAR (wsel_st1)
`UNUSED_VAR (writeword_st1)
`UNUSED_VAR (mem_rw_st1)
`UNUSED_VAR (byteen_st1)
`UNUSED_VAR (incoming_fill_st1)
assign mshr_almost_full = 0;
assign mshr_pending_unqual_st0 = 0;
assign mshr_valid = 0;
assign mshr_valid_next = 0;
assign mshr_addr_next = 0;
assign mshr_wsel_next = 0;
assign mshr_writeword_next = 0;
assign mshr_tid_next = 0;
assign mshr_tag_next = 0;
assign mshr_rw_next = 0;
assign mshr_byteen_next = 0;
end
// dequeue
.dequeue (mshr_dequeue_st1)
);
// Enqueue core response
@ -625,44 +540,25 @@ end
assign dreq_byteen = writeback ? dreq_byteen_unqual : {CACHE_LINE_SIZE{1'b1}};
if (DRAM_ENABLE) begin
VX_fifo_queue_xt #(
.DATAW (1 + CACHE_LINE_SIZE + `LINE_ADDR_WIDTH + `CACHE_LINE_WIDTH),
.SIZE (DREQ_SIZE),
.ALM_FULL (DREQ_SIZE-1),
.FASTRAM (1)
) dram_req_queue (
.clk (clk),
.reset (reset),
.push (dreq_push),
.pop (dreq_pop),
.data_in ({writeback, dreq_byteen, dreq_addr, dreq_data}),
.data_out({dram_req_rw, dram_req_byteen, dram_req_addr, dram_req_data}),
.empty (dreq_empty),
.almost_full (dreq_almost_full),
`UNUSED_PIN (full),
`UNUSED_PIN (data_out_next),
`UNUSED_PIN (empty_next),
`UNUSED_PIN (size)
);
end else begin
`UNUSED_VAR (dreq_push)
`UNUSED_VAR (dreq_pop)
`UNUSED_VAR (dreq_addr)
`UNUSED_VAR (dreq_data)
`UNUSED_VAR (dreq_byteen)
`UNUSED_VAR (readtag_st1)
`UNUSED_VAR (dirtyb_st1)
`UNUSED_VAR (readdata_st1)
`UNUSED_VAR (writeback)
`UNUSED_VAR (dram_req_ready)
assign dreq_empty = 1;
assign dreq_almost_full = 0;
assign dram_req_rw = 0;
assign dram_req_byteen = 0;
assign dram_req_addr = 0;
assign dram_req_data = 0;
end
VX_fifo_queue_xt #(
.DATAW (1 + CACHE_LINE_SIZE + `LINE_ADDR_WIDTH + `CACHE_LINE_WIDTH),
.SIZE (DREQ_SIZE),
.ALM_FULL (DREQ_SIZE-1),
.FASTRAM (1)
) dram_req_queue (
.clk (clk),
.reset (reset),
.push (dreq_push),
.pop (dreq_pop),
.data_in ({writeback, dreq_byteen, dreq_addr, dreq_data}),
.data_out({dram_req_rw, dram_req_byteen, dram_req_addr, dram_req_data}),
.empty (dreq_empty),
.almost_full (dreq_almost_full),
`UNUSED_PIN (full),
`UNUSED_PIN (data_out_next),
`UNUSED_PIN (empty_next),
`UNUSED_PIN (size)
);
assign dram_req_valid = !dreq_empty;

View file

@ -24,10 +24,7 @@ module VX_cache #(
// Core Response Queue Size
parameter CRSQ_SIZE = 4,
// DRAM Request Queue Size
parameter DREQ_SIZE = 4,
// Enable dram update
parameter DRAM_ENABLE = 1,
parameter DREQ_SIZE = 4,
// Enable cache writeable
parameter WRITE_ENABLE = 1,
@ -129,8 +126,8 @@ module VX_cache #(
.NUM_REQS (NUM_REQS),
.CORE_TAG_WIDTH (CORE_TAG_WIDTH),
.BANK_ADDR_OFFSET(BANK_ADDR_OFFSET),
.BUFFERED ((NUM_BANKS > 1) && DRAM_ENABLE)
) cache_core_req_bank_sel (
.BUFFERED (NUM_BANKS > 1)
) core_req_bank_sel (
.clk (clk),
.reset (reset),
`ifdef PERF_ENABLE
@ -244,7 +241,6 @@ module VX_cache #(
.DRSQ_SIZE (DRSQ_SIZE),
.CRSQ_SIZE (CRSQ_SIZE),
.DREQ_SIZE (DREQ_SIZE),
.DRAM_ENABLE (DRAM_ENABLE),
.WRITE_ENABLE (WRITE_ENABLE),
.WRITE_THROUGH (WRITE_THROUGH),
.CORE_TAG_WIDTH (CORE_TAG_WIDTH),
@ -302,7 +298,7 @@ module VX_cache #(
.NUM_REQS (NUM_REQS),
.CORE_TAG_WIDTH (CORE_TAG_WIDTH),
.CORE_TAG_ID_BITS (CORE_TAG_ID_BITS)
) cache_core_rsp_merge (
) core_rsp_merge (
.clk (clk),
.reset (reset),
.per_bank_core_rsp_valid (per_bank_core_rsp_valid),
@ -316,41 +312,26 @@ module VX_cache #(
.core_rsp_ready (core_rsp_ready)
);
if (DRAM_ENABLE) begin
wire [NUM_BANKS-1:0][(`DRAM_ADDR_WIDTH + 1 + CACHE_LINE_SIZE + `CACHE_LINE_WIDTH)-1:0] data_in;
for (genvar i = 0; i < NUM_BANKS; i++) begin
assign data_in[i] = {per_bank_dram_req_addr[i], per_bank_dram_req_rw[i], per_bank_dram_req_byteen[i], per_bank_dram_req_data[i]};
end
VX_stream_arbiter #(
.NUM_REQS (NUM_BANKS),
.DATAW (`DRAM_ADDR_WIDTH + 1 + CACHE_LINE_SIZE + `CACHE_LINE_WIDTH),
.BUFFERED (1)
) dram_req_arb (
.clk (clk),
.reset (reset),
.valid_in (per_bank_dram_req_valid),
.data_in (data_in),
.ready_in (per_bank_dram_req_ready),
.valid_out (dram_req_valid),
.data_out ({dram_req_addr, dram_req_rw, dram_req_byteen, dram_req_data}),
.ready_out (dram_req_ready)
);
end else begin
`UNUSED_VAR (per_bank_dram_req_valid)
`UNUSED_VAR (per_bank_dram_req_rw)
`UNUSED_VAR (per_bank_dram_req_byteen)
`UNUSED_VAR (per_bank_dram_req_addr)
`UNUSED_VAR (per_bank_dram_req_data)
assign per_bank_dram_req_ready = 0;
assign dram_req_valid = 0;
assign dram_req_rw = 0;
assign dram_req_byteen = 0;
assign dram_req_addr = 0;
assign dram_req_data = 0;
`UNUSED_VAR (dram_req_ready)
wire [NUM_BANKS-1:0][(`DRAM_ADDR_WIDTH + 1 + CACHE_LINE_SIZE + `CACHE_LINE_WIDTH)-1:0] data_in;
for (genvar i = 0; i < NUM_BANKS; i++) begin
assign data_in[i] = {per_bank_dram_req_addr[i], per_bank_dram_req_rw[i], per_bank_dram_req_byteen[i], per_bank_dram_req_data[i]};
end
VX_stream_arbiter #(
.NUM_REQS (NUM_BANKS),
.DATAW (`DRAM_ADDR_WIDTH + 1 + CACHE_LINE_SIZE + `CACHE_LINE_WIDTH),
.BUFFERED (1)
) dram_req_arb (
.clk (clk),
.reset (reset),
.valid_in (per_bank_dram_req_valid),
.data_in (data_in),
.ready_in (per_bank_dram_req_ready),
.valid_out (dram_req_valid),
.data_out ({dram_req_addr, dram_req_rw, dram_req_byteen, dram_req_data}),
.ready_out (dram_req_ready)
);
`ifdef PERF_ENABLE
// per cycle: core_reads, core_writes
reg [($clog2(NUM_REQS+1)-1):0] perf_core_reads_per_cycle, perf_core_writes_per_cycle;

View file

@ -112,7 +112,7 @@ module VX_cache_core_req_bank_sel #(
end
for (genvar i = 0; i < NUM_BANKS; ++i) begin
assign per_bank_core_req_stall[i] = ~per_bank_core_req_ready[i] & per_bank_core_req_valid[i];
assign per_bank_core_req_stall[i] = ~per_bank_core_req_ready[i] && (!BUFFERED || per_bank_core_req_valid[i]);
VX_pipe_register #(
.DATAW (1 + `REQS_BITS + 1 + WORD_SIZE + `WORD_ADDR_WIDTH + CORE_TAG_WIDTH + `WORD_WIDTH),
.RESETW (1),

View file

@ -50,7 +50,6 @@ module VX_cache_core_rsp_merge #(
for (integer i = 0; i < NUM_BANKS; i++) begin
if (per_bank_core_rsp_valid[i]) begin
core_rsp_tag_unqual = per_bank_core_rsp_tag[i];
break;
end
end

View file

@ -2,8 +2,7 @@
module VX_data_access #(
parameter CACHE_ID = 0,
parameter BANK_ID = 0,
parameter BANK_ID = 0,
// Size of cache in bytes
parameter CACHE_SIZE = 1,
// Size of line inside a bank in bytes
@ -11,17 +10,11 @@ module VX_data_access #(
// Number of banks
parameter NUM_BANKS = 1,
// Size of a word in bytes
parameter WORD_SIZE = 1,
// Enable dram update
parameter DRAM_ENABLE = 1,
parameter WORD_SIZE = 1,
// Enable cache writeable
parameter WRITE_ENABLE = 1,
// Enable write-through
parameter WRITE_THROUGH = 1,
// size of tag id in core request tag
parameter CORE_TAG_ID_BITS = 0
) (
@ -111,7 +104,7 @@ module VX_data_access #(
assign byte_enable = wfill_in ? {CACHE_LINE_SIZE{1'b1}} : wbyteen_qual;
assign write_data = wfill_in ? filldata_in : writedata_qual;
wire rw_hazard = DRAM_ENABLE && (raddr == waddr) && writeen_in;
wire rw_hazard = (raddr == waddr) && writeen_in;
if (`WORD_SELECT_BITS != 0) begin
for (genvar i = 0; i < `WORDS_PER_LINE; i++) begin

259
hw/rtl/cache/VX_shared_mem.v vendored Normal file
View file

@ -0,0 +1,259 @@
`include "VX_cache_config.vh"
module VX_shared_mem #(
parameter CACHE_ID = 0,
// Size of cache in bytes
parameter CACHE_SIZE = 16384,
// Number of banks
parameter NUM_BANKS = 4,
// Size of a word in bytes
parameter WORD_SIZE = 4,
// Number of Word requests per cycle
parameter NUM_REQS = NUM_BANKS,
// Core Request Queue Size
parameter CREQ_SIZE = 4,
// Core Response Queue Size
parameter CRSQ_SIZE = 4,
// core request tag size
parameter CORE_TAG_WIDTH = 1,
// size of tag id in core request tag
parameter CORE_TAG_ID_BITS = 0,
// bank offset from beginning of index range
parameter BANK_ADDR_OFFSET = 0
) (
`SCOPE_IO_VX_cache
input wire clk,
input wire reset,
// PERF
`ifdef PERF_ENABLE
VX_perf_cache_if perf_cache_if,
`endif
// Core request
input wire [NUM_REQS-1:0] core_req_valid,
input wire [NUM_REQS-1:0] core_req_rw,
input wire [NUM_REQS-1:0][WORD_SIZE-1:0] core_req_byteen,
input wire [NUM_REQS-1:0][`WORD_ADDR_WIDTH-1:0] core_req_addr,
input wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_req_data,
input wire [NUM_REQS-1:0][CORE_TAG_WIDTH-1:0] core_req_tag,
output wire [NUM_REQS-1:0] core_req_ready,
// Core response
output wire [NUM_REQS-1:0] core_rsp_valid,
output wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data,
output wire [CORE_TAG_WIDTH-1:0] core_rsp_tag,
input wire core_rsp_ready
);
`STATIC_ASSERT(NUM_BANKS <= NUM_REQS, ("invalid value"))
localparam CACHE_LINE_SIZE = WORD_SIZE;
`ifdef DBG_CACHE_REQ_INFO
/* verilator lint_off UNUSED */
wire [31:0] debug_pc_st0;
wire [`NW_BITS-1:0] debug_wid_st0;
/* verilator lint_on UNUSED */
`endif
wire [NUM_BANKS-1:0] per_bank_core_req_valid_unqual;
wire [NUM_BANKS-1:0][`REQS_BITS-1:0] per_bank_core_req_tid_unqual;
wire [NUM_BANKS-1:0] per_bank_core_req_rw_unqual;
wire [NUM_BANKS-1:0][WORD_SIZE-1:0] per_bank_core_req_byteen_unqual;
wire [NUM_BANKS-1:0][`WORD_ADDR_WIDTH-1:0] per_bank_core_req_addr_unqual;
wire [NUM_BANKS-1:0][`WORD_WIDTH-1:0] per_bank_core_req_data_unqual;
wire [NUM_BANKS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_req_tag_unqual;
wire [NUM_BANKS-1:0] per_bank_core_req_ready_unqual;
VX_cache_core_req_bank_sel #(
.CACHE_LINE_SIZE (WORD_SIZE),
.NUM_BANKS (NUM_BANKS),
.WORD_SIZE (WORD_SIZE),
.NUM_REQS (NUM_REQS),
.CORE_TAG_WIDTH (CORE_TAG_WIDTH),
.BANK_ADDR_OFFSET(BANK_ADDR_OFFSET),
.BUFFERED (0)
) core_req_bank_sel (
.clk (clk),
.reset (reset),
`ifdef PERF_ENABLE
.bank_stalls(perf_cache_if.bank_stalls),
`else
`UNUSED_PIN (bank_stalls),
`endif
.core_req_valid (core_req_valid),
.core_req_rw (core_req_rw),
.core_req_byteen(core_req_byteen),
.core_req_addr (core_req_addr),
.core_req_data (core_req_data),
.core_req_tag (core_req_tag),
.core_req_ready (core_req_ready),
.per_bank_core_req_valid (per_bank_core_req_valid_unqual),
.per_bank_core_req_tid (per_bank_core_req_tid_unqual),
.per_bank_core_req_rw (per_bank_core_req_rw_unqual),
.per_bank_core_req_byteen(per_bank_core_req_byteen_unqual),
.per_bank_core_req_addr (per_bank_core_req_addr_unqual),
.per_bank_core_req_tag (per_bank_core_req_tag_unqual),
.per_bank_core_req_data (per_bank_core_req_data_unqual),
.per_bank_core_req_ready (per_bank_core_req_ready_unqual)
);
`UNUSED_VAR (per_bank_core_req_tag_unqual)
`UNUSED_VAR (per_bank_core_req_rw_unqual)
wire [NUM_BANKS-1:0] per_bank_core_req_valid;
wire [NUM_BANKS-1:0][`REQS_BITS-1:0] per_bank_core_req_tid;
wire [NUM_REQS-1:0] per_bank_core_req_rw;
wire [NUM_BANKS-1:0][WORD_SIZE-1:0] per_bank_core_req_byteen;
wire [NUM_BANKS-1:0][`LINE_SELECT_BITS-1:0] per_bank_core_req_addr;
wire [NUM_BANKS-1:0][`WORD_WIDTH-1:0] per_bank_core_req_data;
wire [NUM_REQS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_req_tag;
wire creq_push, creq_pop, creq_empty, creq_full;
wire crsq_full;
assign creq_push = (| core_req_valid) && !creq_full;
assign creq_pop = ~creq_empty && ~crsq_full;
assign per_bank_core_req_ready_unqual = {NUM_BANKS{~creq_full}};
wire [NUM_REQS-1:0][`LINE_SELECT_BITS-1:0] per_bank_core_req_addr_qual;
`UNUSED_VAR (per_bank_core_req_addr_unqual)
for (genvar i = 0; i < NUM_REQS; i++) begin
wire [`LINE_ADDR_WIDTH-1:0] tmp = `LINE_SELECT_ADDRX(per_bank_core_req_addr_unqual[i]);
assign per_bank_core_req_addr_qual[i] = tmp[`LINE_SELECT_BITS-1:0];
`UNUSED_VAR (tmp)
end
VX_fifo_queue #(
.DATAW (NUM_BANKS * (1 + `REQS_BITS + 1 + WORD_SIZE + `LINE_SELECT_BITS + `WORD_WIDTH + CORE_TAG_WIDTH)),
.SIZE (CREQ_SIZE),
.FASTRAM (1)
) core_req_queue (
.clk (clk),
.reset (reset),
.push (creq_push),
.pop (creq_pop),
.data_in ({per_bank_core_req_valid_unqual,
per_bank_core_req_tid_unqual,
per_bank_core_req_rw_unqual,
per_bank_core_req_byteen_unqual,
per_bank_core_req_addr_qual,
per_bank_core_req_data_unqual,
per_bank_core_req_tag_unqual}),
.data_out({per_bank_core_req_valid,
per_bank_core_req_tid,
per_bank_core_req_rw,
per_bank_core_req_byteen,
per_bank_core_req_addr,
per_bank_core_req_data,
per_bank_core_req_tag}),
.empty (creq_empty),
.full (creq_full),
`UNUSED_PIN (size)
);
wire [NUM_BANKS-1:0][`WORD_WIDTH-1:0] per_bank_core_rsp_data;
for (genvar i = 0; i < NUM_BANKS; i++) begin
VX_sp_ram #(
.DATAW(`WORD_WIDTH),
.SIZE(`LINES_PER_BANK),
.BYTEENW(WORD_SIZE),
.RWCHECK(1)
) data (
.clk(clk),
.addr(per_bank_core_req_addr[i]),
.wren(per_bank_core_req_valid[i] && per_bank_core_req_rw[i] && ~crsq_full),
.byteen(per_bank_core_req_byteen[i]),
.rden(1'b1),
.din(per_bank_core_req_data[i]),
.dout(per_bank_core_rsp_data[i])
);
end
reg [NUM_REQS-1:0] core_rsp_valid_unqual;
reg [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data_unqual;
reg [CORE_TAG_WIDTH-1:0] core_rsp_tag_unqual;
always @(*) begin
core_rsp_valid_unqual = 0;
core_rsp_data_unqual = 'x;
core_rsp_tag_unqual = 'x;
for (integer i = 0; i < NUM_BANKS; i++) begin
if (per_bank_core_req_valid[i]) begin
core_rsp_valid_unqual[per_bank_core_req_tid[i]] = 1;
core_rsp_data_unqual[per_bank_core_req_tid[i]] = per_bank_core_rsp_data[i];
core_rsp_tag_unqual = per_bank_core_req_tag[i];
end
end
end
`ifdef DBG_CACHE_REQ_INFO
if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin
assign {debug_pc_st0, debug_wid_st0} = core_rsp_tag_unqual[CORE_TAG_WIDTH-1:CORE_TAG_ID_BITS];
end else begin
assign {debug_pc_st0, debug_wid_st0} = 0;
end
`endif
wire [NUM_REQS-1:0] core_rsp_valid_tmask;
wire crsq_push, crsq_pop, crsq_empty;
wire core_rsp_rw = | (per_bank_core_req_valid & per_bank_core_req_rw);
assign crsq_push = ~creq_empty && ~core_rsp_rw && ~crsq_full;
assign crsq_pop = ~crsq_empty && core_rsp_ready;
VX_fifo_queue #(
.DATAW (NUM_BANKS * (1 + `WORD_WIDTH) + CORE_TAG_WIDTH),
.SIZE (CRSQ_SIZE),
.BUFFERED (1),
.FASTRAM (1)
) core_rsp_queue (
.clk (clk),
.reset (reset),
.push (crsq_push),
.pop (crsq_pop),
.data_in ({core_rsp_valid_unqual, core_rsp_data_unqual, core_rsp_tag_unqual}),
.data_out({core_rsp_valid_tmask, core_rsp_data, core_rsp_tag}),
.empty (crsq_empty),
.full (crsq_full),
`UNUSED_PIN (size)
);
assign core_rsp_valid = core_rsp_valid_tmask & {NUM_REQS{~crsq_empty}};
`ifdef DBG_PRINT_CACHE_BANK
always @(posedge clk) begin
if (crsq_full) begin
$display("%t: cache%0d pipeline-stall", $time, CACHE_ID);
end
if (creq_pop) begin
if (core_rsp_rw)
$display("%t: cache%0d core-wr-req: tmask=%0b, addr=%0h, tag=%0h, byteen=%b, data=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, per_bank_core_req_valid, per_bank_core_req_addr, per_bank_core_req_tag, per_bank_core_req_byteen, per_bank_core_req_data, debug_wid_st0, debug_pc_st0);
else
$display("%t: cache%0d core-rd-req: tmask=%0b, addr=%0h, tag=%0h, byteen=%b, data=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, per_bank_core_req_valid, per_bank_core_req_addr, per_bank_core_req_tag, per_bank_core_req_byteen, per_bank_core_rsp_data, debug_wid_st0, debug_pc_st0);
end
end
`endif
`ifdef PERF_ENABLE
assign perf_cache_if.reads = '0;
assign perf_cache_if.writes = '0;
assign perf_cache_if.read_misses = '0;
assign perf_cache_if.write_misses = '0;
assign perf_cache_if.mshr_stalls = '0;
assign perf_cache_if.pipe_stalls = '0;
assign perf_cache_if.crsp_stalls = '0;
`endif
endmodule