cache multi-porting fixes + optimization

This commit is contained in:
Blaise Tine 2021-08-29 18:33:49 -07:00
parent e26cfab04d
commit 90b50277d0
8 changed files with 100 additions and 69 deletions

View file

@ -46,8 +46,8 @@ debug()
{
echo "begin debugging tests..."
./ci/blackbox.sh --driver=vlsim --cores=1 --perf --app=demo --args="-n1"
./ci/blackbox.sh --driver=vlsim --cores=1 --debug --app=demo --args="-n1"
./ci/blackbox.sh --driver=vlsim --cores=2 --clusters=2 --l2cache --perf --app=demo --args="-n1"
./ci/blackbox.sh --driver=vlsim --cores=2 --clusters=2 --l2cache --debug --app=demo --args="-n1"
./ci/blackbox.sh --driver=vlsim --cores=1 --scope --app=basic --args="-t0 -n1"
echo "debugging tests done!"
@ -72,14 +72,18 @@ FPU_CORE=FPU_DEFAULT ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=dogfood
# using FPNEW FPU core
FPU_CORE=FPU_FPNEW ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=dogfood
# adjust l1 block size to match l2
CONFIGS="-DMEM_BLOCK_SIZE=16 -DL1_BLOCK_SIZE=16" ./ci/blackbox.sh --driver=rtlsim --cores=2 --l2cache --app=io_addr --args="-n1"
# test cache banking
CONFIGS="-DDNUM_BANKS=1" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=demo
CONFIGS="-DDNUM_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=demo
CONFIGS="-DDNUM_BANKS=1" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=io_addr
CONFIGS="-DDNUM_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=io_addr
# test cache multi-porting
CONFIGS="-DDNUM_PORTS=2" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=io_addr
CONFIGS="-DDNUM_PORTS=4" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=io_addr
CONFIGS="-DL2NUM_PORTS=2" ./ci/blackbox.sh --driver=rtlsim --cores=2 --l2cache --app=io_addr
CONFIGS="-DDNUM_PORTS=2" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=demo --debug --args="-n1"
CONFIGS="-DL2NUM_PORTS=2 -DDNUM_PORTS=2" ./ci/blackbox.sh --driver=rtlsim --cores=2 --l2cache --app=io_addr
CONFIGS="-DL2NUM_PORTS=4 -DDNUM_PORTS=4" ./ci/blackbox.sh --driver=rtlsim --cores=4 --l2cache --app=io_addr
# test 128-bit MEM block
CONFIGS=-DMEM_BLOCK_SIZE=16 ./ci/blackbox.sh --driver=vlsim --cores=1 --app=demo

View file

@ -38,7 +38,7 @@
`endif
`ifndef L1_BLOCK_SIZE
`define L1_BLOCK_SIZE (`NUM_THREADS * 4)
`define L1_BLOCK_SIZE ((`L2_ENABLE || `L3_ENABLE) ? (`NUM_THREADS * 4) : `MEM_BLOCK_SIZE)
`endif
`ifndef STARTUP_ADDR

View file

@ -251,7 +251,7 @@
`define ICACHE_ID (32'(`L3_ENABLE) + 32'(`L2_ENABLE) * `NUM_CLUSTERS + CORE_ID * 3 + 0)
// Block size in bytes
`define ICACHE_LINE_SIZE (`L2_ENABLE ? `L1_BLOCK_SIZE : `MEM_BLOCK_SIZE)
`define ICACHE_LINE_SIZE `L1_BLOCK_SIZE
// Word size in bytes
`define IWORD_SIZE 4
@ -289,7 +289,7 @@
`define DCACHE_ID (32'(`L3_ENABLE) + 32'(`L2_ENABLE) * `NUM_CLUSTERS + CORE_ID * 3 + 1)
// Block size in bytes
`define DCACHE_LINE_SIZE (`L2_ENABLE ? `L1_BLOCK_SIZE : `MEM_BLOCK_SIZE)
`define DCACHE_LINE_SIZE `L1_BLOCK_SIZE
// Word size in bytes
`define DWORD_SIZE 4

View file

@ -77,6 +77,7 @@ module VX_bank #(
// Memory request
output wire mem_req_valid,
output wire mem_req_rw,
output wire [NUM_PORTS-1:0] mem_req_pmask,
output wire [NUM_PORTS-1:0][WORD_SIZE-1:0] mem_req_byteen,
output wire [NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] mem_req_wsel,
output wire [`LINE_ADDR_WIDTH-1:0] mem_req_addr,
@ -161,6 +162,7 @@ module VX_bank #(
wire crsq_valid, crsq_ready, crsq_stall;
wire mreq_alm_full;
// prevent read-during-write hazard when accessing tags/data block RAMs
wire rdw_fill_hazard = valid_st0 && is_fill_st0;
wire rdw_write_hazard = valid_st0 && write_st0 && ~creq_rw;
@ -174,14 +176,14 @@ module VX_bank #(
wire creq_grant = !mshr_enable && !mrsq_enable && !flush_enable;
wire mshr_ready = mshr_grant
&& !rdw_fill_hazard // prevent read-during-write
&& !rdw_fill_hazard // prevent read-during-write hazard
&& !crsq_stall; // ensure core response ready
assign mem_rsp_ready = mrsq_grant
&& !crsq_stall; // ensure core response ready
assign creq_ready = creq_grant
&& !rdw_write_hazard // prevent read-during-write
&& !rdw_write_hazard // prevent read-during-write hazard
&& !mreq_alm_full // ensure memory request ready
&& !mshr_alm_full // ensure mshr enqueue ready
&& !crsq_stall; // ensure core response ready
@ -198,6 +200,12 @@ module VX_bank #(
end
`endif
wire [`CACHE_LINE_WIDTH-1:0] wdata_sel;
assign wdata_sel[(NUM_PORTS * `WORD_WIDTH)-1:0] = (mem_rsp_valid || !WRITE_ENABLE) ? mem_rsp_data[(NUM_PORTS * `WORD_WIDTH)-1:0] : creq_data;
for (genvar i = NUM_PORTS * `WORD_WIDTH; i < `CACHE_LINE_WIDTH; ++i) begin
assign wdata_sel[i] = mem_rsp_data[i];
end
VX_pipe_register #(
.DATAW (1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `CACHE_LINE_WIDTH + NUM_PORTS * (WORD_SELECT_BITS + WORD_SIZE + `REQS_BITS + 1 + CORE_TAG_WIDTH) + MSHR_ADDR_WIDTH),
.RESETW (1)
@ -212,7 +220,7 @@ module VX_bank #(
mshr_enable,
creq_fire && creq_rw,
mshr_enable ? mshr_addr : (mem_rsp_valid ? mem_rsp_addr : (flush_enable ? `LINE_ADDR_WIDTH'(flush_addr) : creq_addr)),
(mem_rsp_valid || !WRITE_ENABLE) ? mem_rsp_data : `CACHE_LINE_WIDTH'(creq_data),
wdata_sel,
mshr_enable ? mshr_wsel : creq_wsel,
creq_byteen,
mshr_enable ? mshr_tid : creq_tid,
@ -265,6 +273,8 @@ module VX_bank #(
// we have a core request hit
assign miss_st0 = !is_fill_st0 && !tag_match_st0;
wire read_st0 = !is_fill_st0 && !write_st0;
VX_pipe_register #(
.DATAW (1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `CACHE_LINE_WIDTH + NUM_PORTS * (WORD_SELECT_BITS + WORD_SIZE + `REQS_BITS + 1 + CORE_TAG_WIDTH) + MSHR_ADDR_WIDTH + 1),
.RESETW (1)
@ -302,19 +312,22 @@ module VX_bank #(
if (`WORDS_PER_LINE > 1) begin
reg [`CACHE_LINE_WIDTH-1:0] line_wdata_r;
reg [CACHE_LINE_SIZE-1:0] line_byteen_r;
always @(*) begin
line_wdata_r = 'x;
line_byteen_r = 0;
if (NUM_PORTS > 1) begin
for (integer p = 0; p < NUM_PORTS; p++) begin
if (creq_pmask[p]) begin
line_wdata_r[creq_wsel[p] * `WORD_WIDTH +: `WORD_WIDTH] = creq_data_st1[p];
line_byteen_r[wsel_st1[p] * WORD_SIZE +: WORD_SIZE] = byteen_st1[p];
if (NUM_PORTS > 1) begin
always @(*) begin
line_wdata_r = 'x;
line_byteen_r = 0;
for (integer i = 0; i < NUM_PORTS; ++i) begin
if (pmask_st1[i]) begin
line_wdata_r[wsel_st1[i] * `WORD_WIDTH +: `WORD_WIDTH] = creq_data_st1[i];
line_byteen_r[wsel_st1[i] * WORD_SIZE +: WORD_SIZE] = byteen_st1[i];
end
end
end else begin
end
end else begin
always @(*) begin
line_wdata_r = {`WORDS_PER_LINE{creq_data_st1}};
line_byteen_r[wsel_st1[0] * WORD_SIZE +: WORD_SIZE] = byteen_st1[0];
line_byteen_r = 0;
line_byteen_r[wsel_st1 * WORD_SIZE +: WORD_SIZE] = byteen_st1;
end
end
assign line_wdata_st1 = line_wdata_r;
@ -360,8 +373,8 @@ module VX_bank #(
wire mshr_allocate = creq_fire && ~creq_rw;
wire mshr_replay = do_fill_st0 && ~crsq_stall;
wire mshr_lookup = valid_st0 && ~write_st0 && ~is_mshr_st0 && ~crsq_stall;
wire mshr_release = valid_st1 && read_st1 && ~is_mshr_st1 && ~miss_st1 && ~crsq_stall;
wire mshr_lookup = valid_st0 && read_st0 && !is_mshr_st0 && !crsq_stall;
wire mshr_release = valid_st1 && read_st1 && !is_mshr_st1 && !miss_st1 && !crsq_stall;
wire mshr_not_full;
@ -435,15 +448,15 @@ module VX_bank #(
assign crsq_tag = tag_st1;
if (`WORDS_PER_LINE > 1) begin
for (genvar p = 0; p < NUM_PORTS; ++p) begin
assign crsq_data[p] = rdata_st1[wsel_st1[p] * `WORD_WIDTH +: `WORD_WIDTH];
for (genvar i = 0; i < NUM_PORTS; ++i) begin
assign crsq_data[i] = rdata_st1[wsel_st1[i] * `WORD_WIDTH +: `WORD_WIDTH];
end
end else begin
assign crsq_data = rdata_st1;
end
VX_elastic_buffer #(
.DATAW ((CORE_TAG_WIDTH + 1 + `WORD_WIDTH + `REQS_BITS) * NUM_PORTS),
.DATAW (NUM_PORTS * (CORE_TAG_WIDTH + 1 + `WORD_WIDTH + `REQS_BITS)),
.SIZE (CRSQ_SIZE),
.OUTPUT_REG (1 == NUM_BANKS)
) core_rsp_req (
@ -462,6 +475,7 @@ module VX_bank #(
wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] mreq_data;
wire [NUM_PORTS-1:0][WORD_SIZE-1:0] mreq_byteen;
wire [NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] mreq_wsel;
wire [NUM_PORTS-1:0] mreq_pmask;
wire [`LINE_ADDR_WIDTH-1:0] mreq_addr;
wire [MSHR_ADDR_WIDTH-1:0] mreq_id;
@ -474,19 +488,13 @@ module VX_bank #(
assign mreq_rw = WRITE_ENABLE && write_st1;
assign mreq_addr = addr_st1;
assign mreq_id = mshr_id_st1;
assign mreq_pmask= pmask_st1;
assign mreq_wsel = wsel_st1;
assign mreq_byteen = byteen_st1;
assign mreq_data = creq_data_st1;
if (NUM_PORTS > 1) begin
for (genvar p = 0; p < NUM_PORTS; ++p) begin
assign mreq_byteen[p] = pmask_st1[p] ? byteen_st1[p] : WORD_SIZE'(0);
end
end else begin
assign mreq_byteen[0] = byteen_st1[0];
end
VX_fifo_queue #(
.DATAW (1 + `LINE_ADDR_WIDTH + MSHR_ADDR_WIDTH + NUM_PORTS * (WORD_SIZE + WORD_SELECT_BITS + `WORD_WIDTH)),
.DATAW (1 + `LINE_ADDR_WIDTH + MSHR_ADDR_WIDTH + NUM_PORTS * (1 + WORD_SIZE + WORD_SELECT_BITS + `WORD_WIDTH)),
.SIZE (MREQ_SIZE),
.ALM_FULL (MREQ_SIZE-2)
) mem_req_queue (
@ -494,8 +502,8 @@ module VX_bank #(
.reset (reset),
.push (mreq_push),
.pop (mreq_pop),
.data_in ({mreq_rw, mreq_addr, mreq_id, mreq_byteen, mreq_wsel, mreq_data}),
.data_out ({mem_req_rw, mem_req_addr, mem_req_id, mem_req_byteen, mem_req_wsel, mem_req_data}),
.data_in ({mreq_rw, mreq_addr, mreq_id, mreq_pmask, mreq_byteen, mreq_wsel, mreq_data}),
.data_out ({mem_req_rw, mem_req_addr, mem_req_id, mem_req_pmask, mem_req_byteen, mem_req_wsel, mem_req_data}),
.empty (mreq_empty),
.alm_full (mreq_alm_full),
`UNUSED_PIN (full),

View file

@ -107,34 +107,41 @@ module VX_cache #(
///////////////////////////////////////////////////////////////////////////
wire [NUM_PORTS-1:0][WORD_SIZE-1:0] mem_req_byteen_p;
wire [NUM_PORTS-1:0][WORD_SIZE-1:0] mem_req_byteen_p;
wire [NUM_PORTS-1:0] mem_req_pmask_p;
wire [NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] mem_req_wsel_p;
wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] mem_req_data_p;
wire mem_req_rw_p;
wire mem_req_rw_p;
if (WRITE_ENABLE) begin
if (`WORDS_PER_LINE > 1) begin
reg [CACHE_LINE_SIZE-1:0] mem_req_byteen_r;
reg [`CACHE_LINE_WIDTH-1:0] mem_req_data_r;
reg [CACHE_LINE_SIZE-1:0] mem_req_byteen_r;
reg [`CACHE_LINE_WIDTH-1:0] mem_req_data_r;
always @(*) begin
mem_req_byteen_r = 0;
mem_req_data_r = 'x;
for (integer p = 0; p < NUM_PORTS; ++p) begin
if (mem_req_byteen_p[p] != 0) begin
mem_req_byteen_r[mem_req_wsel_p[p] * WORD_SIZE +: WORD_SIZE] = mem_req_byteen_p[p];
mem_req_data_r[mem_req_wsel_p[p] * `WORD_WIDTH +: `WORD_WIDTH] = mem_req_data_p[p];
always @(*) begin
mem_req_byteen_r = 0;
mem_req_data_r = 'x;
for (integer i = 0; i < NUM_PORTS; ++i) begin
if ((1 == NUM_PORTS) || mem_req_pmask_p[i]) begin
mem_req_byteen_r[mem_req_wsel_p[i] * WORD_SIZE +: WORD_SIZE] = mem_req_byteen_p[i];
mem_req_data_r[mem_req_wsel_p[i] * `WORD_WIDTH +: `WORD_WIDTH] = mem_req_data_p[i];
end
end
end
assign mem_req_rw = mem_req_rw_p;
assign mem_req_byteen = mem_req_byteen_r;
assign mem_req_data = mem_req_data_r;
end else begin
`UNUSED_VAR (mem_req_pmask_p)
`UNUSED_VAR (mem_req_wsel_p)
assign mem_req_rw = mem_req_rw_p;
assign mem_req_byteen = mem_req_byteen_p;
assign mem_req_data = mem_req_data_p;
end
assign mem_req_rw = mem_req_rw_p;
assign mem_req_byteen = mem_req_byteen_r;
assign mem_req_data = mem_req_data_r;
end else begin
`UNUSED_VAR (mem_req_byteen_p)
`UNUSED_VAR (mem_req_pmask_p)
`UNUSED_VAR (mem_req_wsel_p)
`UNUSED_VAR (mem_req_data_p)
`UNUSED_VAR (mem_req_rw_p)
@ -142,7 +149,6 @@ module VX_cache #(
assign mem_req_rw = 0;
assign mem_req_byteen = 'x;
assign mem_req_data = 'x;
end
@ -169,7 +175,8 @@ module VX_cache #(
wire mem_req_valid_nc;
wire mem_req_rw_nc;
wire [`MEM_ADDR_WIDTH-1:0] mem_req_addr_nc;
wire [NUM_PORTS-1:0][WORD_SIZE-1:0] mem_req_byteen_nc;
wire [NUM_PORTS-1:0] mem_req_pmask_nc;
wire [NUM_PORTS-1:0][WORD_SIZE-1:0] mem_req_byteen_nc;
wire [NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] mem_req_wsel_nc;
wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] mem_req_data_nc;
wire [MEM_TAG_IN_WIDTH-1:0] mem_req_tag_nc;
@ -236,6 +243,7 @@ module VX_cache #(
.mem_req_valid_in (mem_req_valid_nc),
.mem_req_rw_in (mem_req_rw_nc),
.mem_req_addr_in (mem_req_addr_nc),
.mem_req_pmask_in (mem_req_pmask_nc),
.mem_req_byteen_in (mem_req_byteen_nc),
.mem_req_wsel_in (mem_req_wsel_nc),
.mem_req_data_in (mem_req_data_nc),
@ -246,6 +254,7 @@ module VX_cache #(
.mem_req_valid_out (mem_req_valid),
.mem_req_addr_out (mem_req_addr),
.mem_req_rw_out (mem_req_rw_p),
.mem_req_pmask_out (mem_req_pmask_p),
.mem_req_byteen_out (mem_req_byteen_p),
.mem_req_wsel_out (mem_req_wsel_p),
.mem_req_data_out (mem_req_data_p),
@ -282,6 +291,7 @@ module VX_cache #(
assign mem_req_valid = mem_req_valid_nc;
assign mem_req_addr = mem_req_addr_nc;
assign mem_req_rw_p = mem_req_rw_nc;
assign mem_req_pmask_p = mem_req_pmask_nc;
assign mem_req_byteen_p = mem_req_byteen_nc;
assign mem_req_wsel_p = mem_req_wsel_nc;
assign mem_req_data_p = mem_req_data_nc;
@ -360,7 +370,8 @@ module VX_cache #(
wire [NUM_BANKS-1:0] per_bank_mem_req_valid;
wire [NUM_BANKS-1:0] per_bank_mem_req_rw;
wire [NUM_BANKS-1:0][NUM_PORTS-1:0][WORD_SIZE-1:0] per_bank_mem_req_byteen;
wire [NUM_BANKS-1:0][NUM_PORTS-1:0] per_bank_mem_req_pmask;
wire [NUM_BANKS-1:0][NUM_PORTS-1:0][WORD_SIZE-1:0] per_bank_mem_req_byteen;
wire [NUM_BANKS-1:0][NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] per_bank_mem_req_wsel;
wire [NUM_BANKS-1:0][`MEM_ADDR_WIDTH-1:0] per_bank_mem_req_addr;
wire [NUM_BANKS-1:0][MSHR_ADDR_WIDTH-1:0] per_bank_mem_req_id;
@ -433,6 +444,7 @@ module VX_cache #(
wire curr_bank_mem_req_valid;
wire curr_bank_mem_req_rw;
wire [NUM_PORTS-1:0] curr_bank_mem_req_pmask;
wire [NUM_PORTS-1:0][WORD_SIZE-1:0] curr_bank_mem_req_byteen;
wire [NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] curr_bank_mem_req_wsel;
wire [`LINE_ADDR_WIDTH-1:0] curr_bank_mem_req_addr;
@ -469,6 +481,7 @@ module VX_cache #(
// Memory request
assign per_bank_mem_req_valid[i] = curr_bank_mem_req_valid;
assign per_bank_mem_req_rw[i] = curr_bank_mem_req_rw;
assign per_bank_mem_req_pmask[i] = curr_bank_mem_req_pmask;
assign per_bank_mem_req_byteen[i] = curr_bank_mem_req_byteen;
assign per_bank_mem_req_wsel[i] = curr_bank_mem_req_wsel;
if (NUM_BANKS == 1) begin
@ -547,6 +560,7 @@ module VX_cache #(
// Memory request
.mem_req_valid (curr_bank_mem_req_valid),
.mem_req_rw (curr_bank_mem_req_rw),
.mem_req_pmask (curr_bank_mem_req_pmask),
.mem_req_byteen (curr_bank_mem_req_byteen),
.mem_req_wsel (curr_bank_mem_req_wsel),
.mem_req_addr (curr_bank_mem_req_addr),
@ -591,9 +605,9 @@ module VX_cache #(
.core_rsp_ready (core_rsp_ready_nc)
);
wire [NUM_BANKS-1:0][(MEM_TAG_IN_WIDTH + 1 + NUM_PORTS * (WORD_SIZE + WORD_SELECT_BITS + `WORD_WIDTH))-1:0] data_in;
for (genvar i = 0; i < NUM_BANKS; i++) begin
assign data_in[i] = {per_bank_mem_req_addr[i], per_bank_mem_req_id[i], per_bank_mem_req_rw[i], per_bank_mem_req_byteen[i], per_bank_mem_req_wsel[i], per_bank_mem_req_data[i]};
wire [NUM_BANKS-1:0][(MEM_TAG_IN_WIDTH + 1 + NUM_PORTS * (1 + WORD_SIZE + WORD_SELECT_BITS + `WORD_WIDTH))-1:0] data_in;
for (genvar i = 0; i < NUM_BANKS; ++i) begin
assign data_in[i] = {per_bank_mem_req_addr[i], per_bank_mem_req_id[i], per_bank_mem_req_rw[i], per_bank_mem_req_pmask[i], per_bank_mem_req_byteen[i], per_bank_mem_req_wsel[i], per_bank_mem_req_data[i]};
end
wire [MSHR_ADDR_WIDTH-1:0] mem_req_id;
@ -602,7 +616,7 @@ module VX_cache #(
VX_stream_arbiter #(
.NUM_REQS (NUM_BANKS),
.DATAW (`MEM_ADDR_WIDTH + MSHR_ADDR_WIDTH + 1 + NUM_PORTS * (WORD_SIZE + WORD_SELECT_BITS + `WORD_WIDTH)),
.DATAW (`MEM_ADDR_WIDTH + MSHR_ADDR_WIDTH + 1 + NUM_PORTS * (1 + WORD_SIZE + WORD_SELECT_BITS + `WORD_WIDTH)),
.BUFFERED (1)
) mem_req_arb (
.clk (clk),
@ -611,7 +625,7 @@ module VX_cache #(
.data_in (data_in),
.ready_in (per_bank_mem_req_ready),
.valid_out (mem_req_valid_nc),
.data_out ({mem_req_addr_nc, mem_req_id, mem_req_rw_nc, mem_req_byteen_nc, mem_req_wsel_nc, mem_req_data_nc}),
.data_out ({mem_req_addr_nc, mem_req_id, mem_req_rw_nc, mem_req_pmask_nc, mem_req_byteen_nc, mem_req_wsel_nc, mem_req_data_nc}),
.ready_out (mem_req_ready_nc)
);

View file

@ -237,7 +237,7 @@ module VX_core_rsp_merge #(
core_rsp_valid_unqual[per_bank_core_rsp_tid[i]] = 1;
core_rsp_tag_unqual[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_tag[i];
core_rsp_data_unqual[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_data[i];
bank_select_table[per_bank_core_rsp_tid[i][i]] = (1 << i);
bank_select_table[per_bank_core_rsp_tid[i]] = (1 << i);
end
end
end

View file

@ -73,7 +73,7 @@ module VX_data_access #(
.BYTEENW (BYTEENW),
.NO_RWCHECK (1)
) data_store (
.clk (clk),
.clk (clk),
.addr (line_addr),
.wren (wren),
.wdata (wdata),
@ -89,7 +89,7 @@ module VX_data_access #(
if (is_fill) begin
dpi_trace("%d: cache%0d:%0d data-fill: addr=%0h, blk_addr=%0d, data=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), line_addr, fill_data);
end else begin
dpi_trace("%d: cache%0d:%0d data-write: addr=%0h, wid=%0d, PC=%0h, byteen=%b, blk_addr=%0d, data=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), debug_wid, debug_pc, byte_enable, line_addr, write_data);
dpi_trace("%d: cache%0d:%0d data-write: addr=%0h, wid=%0d, PC=%0h, byteen=%b, blk_addr=%0d, data=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), debug_wid, debug_pc, wren, line_addr, write_data);
end
end
if (readen && ~stall) begin

View file

@ -59,7 +59,8 @@ module VX_nc_bypass #(
input wire mem_req_valid_in,
input wire mem_req_rw_in,
input wire [MEM_ADDR_WIDTH-1:0] mem_req_addr_in,
input wire [NUM_PORTS-1:0][CORE_DATA_SIZE-1:0] mem_req_byteen_in,
input wire [NUM_PORTS-1:0] mem_req_pmask_in,
input wire [NUM_PORTS-1:0][CORE_DATA_SIZE-1:0] mem_req_byteen_in,
input wire [NUM_PORTS-1:0][MEM_SELECT_BITS-1:0] mem_req_wsel_in,
input wire [NUM_PORTS-1:0][CORE_DATA_WIDTH-1:0] mem_req_data_in,
input wire [MEM_TAG_IN_WIDTH-1:0] mem_req_tag_in,
@ -69,6 +70,7 @@ module VX_nc_bypass #(
output wire mem_req_valid_out,
output wire mem_req_rw_out,
output wire [MEM_ADDR_WIDTH-1:0] mem_req_addr_out,
output wire [NUM_PORTS-1:0] mem_req_pmask_out,
output wire [NUM_PORTS-1:0][CORE_DATA_SIZE-1:0] mem_req_byteen_out,
output wire [NUM_PORTS-1:0][MEM_SELECT_BITS-1:0] mem_req_wsel_out,
output wire [NUM_PORTS-1:0][CORE_DATA_WIDTH-1:0] mem_req_data_out,
@ -188,7 +190,7 @@ module VX_nc_bypass #(
assign mem_req_rw_out = mem_req_valid_in ? mem_req_rw_in : core_req_rw_in_sel;
assign mem_req_addr_out = mem_req_valid_in ? mem_req_addr_in : core_req_addr_in_sel[D +: MEM_ADDR_WIDTH];
if (D != 0) begin
if (D != 0) begin
reg [NUM_PORTS-1:0][CORE_DATA_SIZE-1:0] mem_req_byteen_in_r;
reg [NUM_PORTS-1:0][MEM_SELECT_BITS-1:0] mem_req_wsel_in_r;
reg [NUM_PORTS-1:0][CORE_DATA_WIDTH-1:0] mem_req_data_in_r;
@ -206,12 +208,15 @@ module VX_nc_bypass #(
mem_req_data_in_r[0] = core_req_data_in_sel;
end
assign mem_req_pmask_out = mem_req_valid_in ? mem_req_pmask_in : NUM_PORTS'(1'b1);
assign mem_req_byteen_out = mem_req_valid_in ? mem_req_byteen_in : mem_req_byteen_in_r;
assign mem_req_wsel_out = mem_req_valid_in ? mem_req_wsel_in : mem_req_wsel_in_r;
assign mem_req_data_out = mem_req_valid_in ? mem_req_data_in : mem_req_data_in_r;
assign mem_req_tag_out = mem_req_valid_in ? MEM_TAG_OUT_WIDTH'(mem_req_tag_in_c) : MEM_TAG_OUT_WIDTH'({core_req_nc_tid, req_addr_idx, core_req_tag_in_sel});
end else begin
`UNUSED_VAR (mem_req_wsel_in)
`UNUSED_VAR (mem_req_pmask_in)
assign mem_req_pmask_out = 0;
assign mem_req_byteen_out = mem_req_valid_in ? mem_req_byteen_in : core_req_byteen_in_sel;
assign mem_req_data_out = mem_req_valid_in ? mem_req_data_in : core_req_data_in_sel;
assign mem_req_wsel_out = 0;