refactoring all arbiters with buffering for request count > 2, optimized the cache core response module in critical path when running as L2

This commit is contained in:
Blaise Tine 2020-11-08 01:31:46 -08:00
parent b14007f930
commit 10505caae1
19 changed files with 602 additions and 534 deletions

View file

@ -19,12 +19,12 @@ DBG_PRINT_FLAGS += -DDBG_PRINT_SCOPE
DBG_FLAGS += $(DBG_PRINT_FLAGS)
DBG_FLAGS += -DDBG_CORE_REQ_INFO
#CONFIGS += -DNUM_CLUSTERS=2 -DNUM_CORES=4 -DL2_ENABLE=1
#CONFIGS += -DNUM_CLUSTERS=2 -DNUM_CORES=4 -DL2_ENABLE=1 -DL3_ENABLE=1
#CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1
CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0
#CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=1
#CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0
CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=1
#DEBUG=1
DEBUG=1
#SCOPE=1
CFLAGS += -fPIC

View file

@ -19,7 +19,7 @@ ase-2c: gen_sources setup-ase-2c
ase-4c: gen_sources setup-ase-4c
make -C $(ASE_BUILD_DIR)_4c
cp ../rtl/fp_cores/altera/*.hex $(ASE_BUILD_DIR)_3c/work
cp ../rtl/fp_cores/altera/*.hex $(ASE_BUILD_DIR)_4c/work
setup-ase-1c: $(ASE_BUILD_DIR)_1c/Makefile

View file

@ -1,5 +1,5 @@
+define+NUM_CORES=4
+define+L2_ENABLE=0
+define+L2_ENABLE=1
+define+SYNTHESIS
+define+QUARTUS
+define+FPU_FAST

View file

@ -8,7 +8,7 @@
`endif
`ifndef NUM_CORES
`define NUM_CORES 4
`define NUM_CORES 2
`endif
`ifndef NUM_WARPS
@ -250,11 +250,6 @@
`define IBANK_LINE_SIZE `GLOBAL_BLOCK_SIZE
`endif
// Number of banks {1, 2, 4, 8,...}
`ifndef INUM_BANKS
`define INUM_BANKS 1
`endif
// Size of a word in bytes
`ifndef IWORD_SIZE
`define IWORD_SIZE 4

View file

@ -33,52 +33,66 @@ module VX_csr_io_arb #(
output wire [31:0] csr_io_rsp_data_out,
input wire csr_io_rsp_ready_out
);
if (NUM_REQUESTS == 1) begin
if (NUM_REQUESTS > 1) begin
for (genvar i = 0; i < NUM_REQUESTS; i++) begin
assign csr_io_req_valid_out[i] = csr_io_req_valid_in && (request_id == `REQS_BITS'(i));
assign csr_io_req_addr_out[i] = csr_io_req_addr_in;
assign csr_io_req_rw_out[i] = csr_io_req_rw_in;
assign csr_io_req_data_out[i] = csr_io_req_data_in;
end
assign csr_io_req_ready_in = csr_io_req_ready_out[request_id];
///////////////////////////////////////////////////////////////////////
wire [REQS_BITS-1:0] rsp_idx;
wire [NUM_REQUESTS-1:0] rsp_1hot;
VX_fixed_arbiter #(
.N(NUM_REQUESTS)
) rsp_arb (
.clk (clk),
.reset (reset),
.requests (csr_io_rsp_valid_in),
`UNUSED_PIN (grant_valid),
.grant_index (rsp_idx),
.grant_onehot (rsp_1hot)
);
wire stall = csr_io_rsp_valid_out && ~csr_io_rsp_ready_out;
VX_generic_register #(
.N(1 + 32),
.PASSTHRU(NUM_REQUESTS <= 2)
) pipe_reg (
.clk (clk),
.reset (reset),
.stall (stall),
.flush (1'b0),
.in ({csr_io_rsp_valid_in[rsp_idx], csr_io_rsp_data_in[rsp_idx]}),
.out ({csr_io_rsp_valid_out, csr_io_rsp_data_out})
);
for (genvar i = 0; i < NUM_REQUESTS; i++) begin
assign csr_io_rsp_ready_in[i] = rsp_1hot[i] && ~stall;
end
end else begin
`UNUSED_VAR (clk)
`UNUSED_VAR (reset)
`UNUSED_VAR (request_id)
assign csr_io_req_valid_out = csr_io_req_valid_in;
assign csr_io_req_rw_out = csr_io_req_rw_in;
assign csr_io_req_addr_out = csr_io_req_addr_in;
assign csr_io_req_data_out = csr_io_req_data_in;
assign csr_io_req_ready_in = csr_io_req_ready_out;
assign csr_io_req_valid_out = csr_io_req_valid_in;
assign csr_io_req_addr_out = csr_io_req_addr_in;
assign csr_io_req_rw_out = csr_io_req_rw_in;
assign csr_io_req_data_out = csr_io_req_data_in;
assign csr_io_req_ready_in = csr_io_req_ready_out;
assign csr_io_rsp_valid_out = csr_io_rsp_valid_in;
assign csr_io_rsp_data_out = csr_io_rsp_data_in;
assign csr_io_rsp_ready_in = csr_io_rsp_ready_out;
end else begin
for (genvar i = 0; i < NUM_REQUESTS; i++) begin
assign csr_io_req_valid_out[i] = csr_io_req_valid_in && (request_id == `REQS_BITS'(i));
assign csr_io_req_rw_out[i] = csr_io_req_rw_in;
assign csr_io_req_addr_out[i] = csr_io_req_addr_in;
assign csr_io_req_data_out[i] = csr_io_req_data_in;
end
assign csr_io_req_ready_in = csr_io_req_ready_out[request_id];
reg [REQS_BITS-1:0] bus_rsp_sel;
VX_fixed_arbiter #(
.N(NUM_REQUESTS)
) arbiter (
.clk (clk),
.reset (reset),
.requests (csr_io_rsp_valid_in),
.grant_index (bus_rsp_sel),
`UNUSED_PIN (grant_valid),
`UNUSED_PIN (grant_onehot)
);
assign csr_io_rsp_valid_out = csr_io_rsp_valid_in [bus_rsp_sel];
assign csr_io_rsp_data_out = csr_io_rsp_data_in [bus_rsp_sel];
for (genvar i = 0; i < NUM_REQUESTS; i++) begin
assign csr_io_rsp_ready_in[i] = csr_io_rsp_ready_out && (bus_rsp_sel == `REQS_BITS'(i));
end
assign csr_io_rsp_valid_out = csr_io_rsp_valid_in;
assign csr_io_rsp_data_out = csr_io_rsp_data_in;
assign csr_io_rsp_ready_in = csr_io_rsp_ready_out;
end

View file

@ -274,6 +274,9 @@
// Cache ID
`define ICACHE_ID 32'(`L3_ENABLE) + 32'(`L2_ENABLE) * `NUM_CLUSTERS + CORE_ID * 3 + 1
// Number of banks
`define INUM_BANKS 1
// Core request address bits
`define ICORE_ADDR_WIDTH (32-`CLOG2(`IWORD_SIZE))

View file

@ -8,10 +8,10 @@ module VX_gpu_unit #(
input wire clk,
input wire reset,
// Input
// Inputs
VX_gpu_req_if gpu_req_if,
// Output
// Outputs
VX_warp_ctl_if warp_ctl_if,
VX_exu_to_cmt_if gpu_commit_if
);

View file

@ -14,94 +14,103 @@ module VX_io_arb #(
input wire reset,
// input requests
input wire [NUM_REQUESTS-1:0][`NUM_THREADS-1:0] io_req_valid_in,
input wire [NUM_REQUESTS-1:0][`NUM_THREADS-1:0] io_req_valid_in,
input wire [NUM_REQUESTS-1:0][TAG_IN_WIDTH-1:0] io_req_tag_in,
input wire [NUM_REQUESTS-1:0][`NUM_THREADS-1:0][ADDR_WIDTH-1:0] io_req_addr_in,
input wire [NUM_REQUESTS-1:0] io_req_rw_in,
input wire [NUM_REQUESTS-1:0][`NUM_THREADS-1:0][WORD_SIZE-1:0] io_req_byteen_in,
input wire [NUM_REQUESTS-1:0][`NUM_THREADS-1:0][ADDR_WIDTH-1:0] io_req_addr_in,
input wire [NUM_REQUESTS-1:0][`NUM_THREADS-1:0][WORD_WIDTH-1:0] io_req_data_in,
input wire [NUM_REQUESTS-1:0][TAG_IN_WIDTH-1:0] io_req_tag_in,
input wire [NUM_REQUESTS-1:0][`NUM_THREADS-1:0][WORD_WIDTH-1:0] io_req_data_in,
output wire [NUM_REQUESTS-1:0] io_req_ready_in,
// input response
output wire [NUM_REQUESTS-1:0] io_rsp_valid_in,
output wire [NUM_REQUESTS-1:0][WORD_WIDTH-1:0] io_rsp_data_in,
output wire [NUM_REQUESTS-1:0][TAG_IN_WIDTH-1:0] io_rsp_tag_in,
output wire [NUM_REQUESTS-1:0][WORD_WIDTH-1:0] io_rsp_data_in,
input wire [NUM_REQUESTS-1:0] io_rsp_ready_in,
// output request
output wire [`NUM_THREADS-1:0] io_req_valid_out,
output wire [`NUM_THREADS-1:0] io_req_valid_out,
output wire [TAG_OUT_WIDTH-1:0] io_req_tag_out,
output wire [`NUM_THREADS-1:0][ADDR_WIDTH-1:0] io_req_addr_out,
output wire io_req_rw_out,
output wire [`NUM_THREADS-1:0][WORD_SIZE-1:0] io_req_byteen_out,
output wire [`NUM_THREADS-1:0][ADDR_WIDTH-1:0] io_req_addr_out,
output wire [`NUM_THREADS-1:0][WORD_WIDTH-1:0] io_req_data_out,
output wire [TAG_OUT_WIDTH-1:0] io_req_tag_out,
output wire [`NUM_THREADS-1:0][WORD_SIZE-1:0] io_req_byteen_out,
output wire [`NUM_THREADS-1:0][WORD_WIDTH-1:0] io_req_data_out,
input wire io_req_ready_out,
// output response
input wire io_rsp_valid_out,
input wire [WORD_WIDTH-1:0] io_rsp_data_out,
input wire [TAG_OUT_WIDTH-1:0] io_rsp_tag_out,
input wire [WORD_WIDTH-1:0] io_rsp_data_out,
output wire io_rsp_ready_out
);
if (NUM_REQUESTS == 1) begin
if (NUM_REQUESTS > 1) begin
wire [NUM_REQUESTS-1:0] valids;
for (genvar i = 0; i < NUM_REQUESTS; i++) begin
assign valids[i] = (| io_req_valid_in[i]);
end
wire [REQS_BITS-1:0] req_idx;
wire [NUM_REQUESTS-1:0] req_1hot;
VX_rr_arbiter #(
.N(NUM_REQUESTS)
) req_arb (
.clk (clk),
.reset (reset),
.requests (valids),
`UNUSED_PIN (grant_valid),
.grant_index (req_idx),
.grant_onehot (req_1hot)
);
wire stall = (| io_req_valid_out) && ~io_req_ready_out;
VX_generic_register #(
.N(`NUM_THREADS + TAG_OUT_WIDTH + (`NUM_THREADS * ADDR_WIDTH) + 1 + (`NUM_THREADS * WORD_SIZE) + (`NUM_THREADS * WORD_WIDTH)),
.PASSTHRU(NUM_REQUESTS <= 2)
) pipe_reg (
.clk (clk),
.reset (reset),
.stall (stall),
.flush (1'b0),
.in ({io_req_valid_in[req_idx], {io_req_tag_in[req_idx], REQS_BITS'(req_idx)}, io_req_addr_in[req_idx], io_req_rw_in[req_idx], io_req_byteen_in[req_idx], io_req_data_in[req_idx]}),
.out ({io_req_valid_out, io_req_tag_out, io_req_addr_out, io_req_rw_out, io_req_byteen_out, io_req_data_out})
);
for (genvar i = 0; i < NUM_REQUESTS; i++) begin
assign io_req_ready_in[i] = req_1hot[i] && ~stall;
end
///////////////////////////////////////////////////////////////////////
wire [REQS_BITS-1:0] rsp_sel = io_rsp_tag_out[REQS_BITS-1:0];
for (genvar i = 0; i < NUM_REQUESTS; i++) begin
assign io_rsp_valid_in[i] = io_rsp_valid_out && (rsp_sel == REQS_BITS'(i));
assign io_rsp_tag_in[i] = io_rsp_tag_out[REQS_BITS +: TAG_IN_WIDTH];
assign io_rsp_data_in[i] = io_rsp_data_out;
end
assign io_rsp_ready_out = io_rsp_ready_in[rsp_sel];
end else begin
`UNUSED_VAR (clk)
`UNUSED_VAR (reset)
assign io_req_valid_out = io_req_valid_in;
assign io_req_tag_out = io_req_tag_in;
assign io_req_addr_out = io_req_addr_in;
assign io_req_rw_out = io_req_rw_in;
assign io_req_byteen_out = io_req_byteen_in;
assign io_req_addr_out = io_req_addr_in;
assign io_req_data_out = io_req_data_in;
assign io_req_tag_out = io_req_tag_in;
assign io_req_ready_in = io_req_ready_out;
assign io_rsp_valid_in = io_rsp_valid_out;
assign io_rsp_data_in = io_rsp_data_out;
assign io_rsp_tag_in = io_rsp_tag_out;
assign io_rsp_data_in = io_rsp_data_out;
assign io_rsp_ready_out = io_rsp_ready_in;
end else begin
reg [REQS_BITS-1:0] bus_req_sel;
wire [NUM_REQUESTS-1:0] valid_requests;
for (genvar i = 0; i < NUM_REQUESTS; i++) begin
assign valid_requests[i] = (| io_req_valid_in[i]);
end
VX_rr_arbiter #(
.N(NUM_REQUESTS)
) arbiter (
.clk (clk),
.reset (reset),
.requests (valid_requests),
.grant_index (bus_req_sel),
`UNUSED_PIN (grant_valid),
`UNUSED_PIN (grant_onehot)
);
assign io_req_valid_out = io_req_valid_in [bus_req_sel];
assign io_req_rw_out = io_req_rw_in [bus_req_sel];
assign io_req_byteen_out = io_req_byteen_in [bus_req_sel];
assign io_req_addr_out = io_req_addr_in [bus_req_sel];
assign io_req_data_out = io_req_data_in [bus_req_sel];
assign io_req_tag_out = {io_req_tag_in [bus_req_sel], REQS_BITS'(bus_req_sel)};
for (genvar i = 0; i < NUM_REQUESTS; i++) begin
assign io_req_ready_in[i] = io_req_ready_out && (bus_req_sel == REQS_BITS'(i));
end
wire [REQS_BITS-1:0] bus_rsp_sel = io_rsp_tag_out[REQS_BITS-1:0];
for (genvar i = 0; i < NUM_REQUESTS; i++) begin
assign io_rsp_valid_in[i] = io_rsp_valid_out && (bus_rsp_sel == REQS_BITS'(i));
assign io_rsp_data_in[i] = io_rsp_data_out;
assign io_rsp_tag_in[i] = io_rsp_tag_out[REQS_BITS +: TAG_IN_WIDTH];
end
assign io_rsp_ready_out = io_rsp_ready_in[bus_rsp_sel];
end
endmodule

View file

@ -14,88 +14,98 @@ module VX_mem_arb #(
input wire reset,
// input requests
input wire [NUM_REQUESTS-1:0] mem_req_valid_in,
input wire [NUM_REQUESTS-1:0] mem_req_valid_in,
input wire [NUM_REQUESTS-1:0][TAG_IN_WIDTH-1:0] mem_req_tag_in,
input wire [NUM_REQUESTS-1:0][ADDR_WIDTH-1:0] mem_req_addr_in,
input wire [NUM_REQUESTS-1:0] mem_req_rw_in,
input wire [NUM_REQUESTS-1:0][WORD_SIZE-1:0] mem_req_byteen_in,
input wire [NUM_REQUESTS-1:0][ADDR_WIDTH-1:0] mem_req_addr_in,
input wire [NUM_REQUESTS-1:0][WORD_WIDTH-1:0] mem_req_data_in,
input wire [NUM_REQUESTS-1:0][TAG_IN_WIDTH-1:0] mem_req_tag_in,
input wire [NUM_REQUESTS-1:0][WORD_WIDTH-1:0] mem_req_data_in,
output wire [NUM_REQUESTS-1:0] mem_req_ready_in,
// input response
output wire [NUM_REQUESTS-1:0] mem_rsp_valid_in,
output wire [NUM_REQUESTS-1:0][WORD_WIDTH-1:0] mem_rsp_data_in,
output wire [NUM_REQUESTS-1:0][TAG_IN_WIDTH-1:0] mem_rsp_tag_in,
output wire [NUM_REQUESTS-1:0][WORD_WIDTH-1:0] mem_rsp_data_in,
input wire [NUM_REQUESTS-1:0] mem_rsp_ready_in,
// output request
output wire mem_req_valid_out,
output wire [TAG_OUT_WIDTH-1:0] mem_req_tag_out,
output wire [ADDR_WIDTH-1:0] mem_req_addr_out,
output wire mem_req_rw_out,
output wire [WORD_SIZE-1:0] mem_req_byteen_out,
output wire [ADDR_WIDTH-1:0] mem_req_addr_out,
output wire [WORD_WIDTH-1:0] mem_req_data_out,
output wire [TAG_OUT_WIDTH-1:0] mem_req_tag_out,
input wire mem_req_ready_out,
// output response
input wire mem_rsp_valid_out,
input wire [WORD_WIDTH-1:0] mem_rsp_data_out,
input wire [TAG_OUT_WIDTH-1:0] mem_rsp_tag_out,
input wire [WORD_WIDTH-1:0] mem_rsp_data_out,
output wire mem_rsp_ready_out
);
if (NUM_REQUESTS == 1) begin
if (NUM_REQUESTS > 1) begin
wire [REQS_BITS-1:0] req_idx;
wire [NUM_REQUESTS-1:0] req_1hot;
VX_rr_arbiter #(
.N(NUM_REQUESTS)
) req_arb (
.clk (clk),
.reset (reset),
.requests (mem_req_valid_in),
`UNUSED_PIN (grant_valid),
.grant_index (req_idx),
.grant_onehot (req_1hot)
);
wire stall = mem_req_valid_out && ~mem_req_ready_out;
VX_generic_register #(
.N(1 + TAG_OUT_WIDTH + ADDR_WIDTH + 1 + WORD_SIZE + WORD_WIDTH),
.PASSTHRU(NUM_REQUESTS <= 2)
) pipe_reg (
.clk (clk),
.reset (reset),
.stall (stall),
.flush (1'b0),
.in ({mem_req_valid_in[req_idx], {mem_req_tag_in[req_idx], REQS_BITS'(req_idx)}, mem_req_addr_in[req_idx], mem_req_rw_in[req_idx], mem_req_byteen_in[req_idx], mem_req_data_in[req_idx]}),
.out ({mem_req_valid_out, mem_req_tag_out, mem_req_addr_out, mem_req_rw_out, mem_req_byteen_out, mem_req_data_out})
);
for (genvar i = 0; i < NUM_REQUESTS; i++) begin
assign mem_req_ready_in[i] = req_1hot[i] && ~stall;
end
///////////////////////////////////////////////////////////////////////
wire [REQS_BITS-1:0] rsp_sel = mem_rsp_tag_out[REQS_BITS-1:0];
for (genvar i = 0; i < NUM_REQUESTS; i++) begin
assign mem_rsp_valid_in[i] = mem_rsp_valid_out && (rsp_sel == REQS_BITS'(i));
assign mem_rsp_tag_in[i] = mem_rsp_tag_out[REQS_BITS +: TAG_IN_WIDTH];
assign mem_rsp_data_in[i] = mem_rsp_data_out;
end
assign mem_rsp_ready_out = mem_rsp_ready_in[rsp_sel];
end else begin
`UNUSED_VAR (clk)
`UNUSED_VAR (reset)
assign mem_req_valid_out = mem_req_valid_in;
assign mem_req_tag_out = mem_req_tag_in;
assign mem_req_addr_out = mem_req_addr_in;
assign mem_req_rw_out = mem_req_rw_in;
assign mem_req_byteen_out = mem_req_byteen_in;
assign mem_req_addr_out = mem_req_addr_in;
assign mem_req_data_out = mem_req_data_in;
assign mem_req_tag_out = mem_req_tag_in;
assign mem_req_ready_in = mem_req_ready_out;
assign mem_rsp_valid_in = mem_rsp_valid_out;
assign mem_rsp_data_in = mem_rsp_data_out;
assign mem_rsp_tag_in = mem_rsp_tag_out;
assign mem_rsp_data_in = mem_rsp_data_out;
assign mem_rsp_ready_out = mem_rsp_ready_in;
end else begin
reg [REQS_BITS-1:0] bus_req_sel;
VX_rr_arbiter #(
.N(NUM_REQUESTS)
) arbiter (
.clk (clk),
.reset (reset),
.requests (mem_req_valid_in),
.grant_index (bus_req_sel),
`UNUSED_PIN (grant_valid),
`UNUSED_PIN (grant_onehot)
);
assign mem_req_valid_out = mem_req_valid_in [bus_req_sel];
assign mem_req_rw_out = mem_req_rw_in [bus_req_sel];
assign mem_req_byteen_out = mem_req_byteen_in [bus_req_sel];
assign mem_req_addr_out = mem_req_addr_in [bus_req_sel];
assign mem_req_data_out = mem_req_data_in [bus_req_sel];
assign mem_req_tag_out = {mem_req_tag_in [bus_req_sel], REQS_BITS'(bus_req_sel)};
for (genvar i = 0; i < NUM_REQUESTS; i++) begin
assign mem_req_ready_in[i] = mem_req_ready_out && (bus_req_sel == REQS_BITS'(i));
end
wire [REQS_BITS-1:0] bus_rsp_sel = mem_rsp_tag_out[REQS_BITS-1:0];
for (genvar i = 0; i < NUM_REQUESTS; i++) begin
assign mem_rsp_valid_in[i] = mem_rsp_valid_out && (bus_rsp_sel == REQS_BITS'(i));
assign mem_rsp_data_in[i] = mem_rsp_data_out;
assign mem_rsp_tag_in[i] = mem_rsp_tag_out[REQS_BITS +: TAG_IN_WIDTH];
end
assign mem_rsp_ready_out = mem_rsp_ready_in[bus_rsp_sel];
end
endmodule

123
hw/rtl/cache/VX_bank.v vendored
View file

@ -216,27 +216,30 @@ module VX_bank #(
.CORE_TAG_WIDTH (CORE_TAG_WIDTH),
.CORE_TAG_ID_BITS (CORE_TAG_ID_BITS)
) core_req_arb (
.clk (clk),
.reset (reset),
.clk (clk),
.reset (reset),
// Enqueue
.reqq_push (core_req_fire),
.bank_valids (core_req_valid),
.bank_rw (core_req_rw),
.bank_byteen (core_req_byteen),
.bank_addr (core_req_addr),
.bank_writedata (core_req_data),
.bank_tag (core_req_tag),
.push (core_req_fire),
.tag_in (core_req_tag),
.valids_in (core_req_valid),
.rw_in (core_req_rw),
.byteen_in (core_req_byteen),
.addr_in (core_req_addr),
.writedata_in (core_req_data),
// Dequeue
.reqq_pop (reqq_pop),
.reqq_tid_st0 (reqq_tid_st0),
.reqq_rw_st0 (reqq_rw_st0),
.reqq_byteen_st0 (reqq_byteen_st0),
.reqq_addr_st0 (reqq_addr_st0),
.reqq_writedata_st0(reqq_writeword_st0),
.reqq_tag_st0 (reqq_tag_st0),
.reqq_empty (reqq_empty),
.reqq_full (reqq_full)
.pop (reqq_pop),
.tag_out (reqq_tag_st0),
.tid_out (reqq_tid_st0),
.rw_out (reqq_rw_st0),
.byteen_out (reqq_byteen_st0),
.addr_out (reqq_addr_st0),
.writedata_out (reqq_writeword_st0),
// States
.empty (reqq_empty),
.full (reqq_full)
);
wire msrq_pop;
@ -252,7 +255,6 @@ module VX_bank #(
wire [WORD_SIZE-1:0] msrq_byteen_st0;
wire msrq_is_snp_st0;
wire msrq_snp_invalidate_st0;
wire msrq_pending_hazard_st1;
wire is_msrq_miss_st2;
wire is_msrq_miss_st3;
@ -299,7 +301,9 @@ module VX_bank #(
wire snp_invalidate_st1;
wire is_msrq_st1;
wire msrq_pending_hazard_st1;
wire[`LINE_ADDR_WIDTH-1:0] addr_st2;
wire miss_st3;
wire force_miss_st3;
wire [`LINE_ADDR_WIDTH-1:0] addr_st3;
assign is_msrq_st0 = msrq_pop_unqual;
@ -373,11 +377,11 @@ module VX_bank #(
wire writeen_st2;
wire miss_st1;
wire miss_st2;
wire miss_st3;
wire dirty_st1;
wire mem_rw_st1;
wire [WORD_SIZE-1:0] mem_byteen_st1;
wire force_miss_st2;
wire[`LINE_ADDR_WIDTH-1:0] addr_st2;
`DEBUG_BEGIN
wire [`REQ_TAG_WIDTH-1:0] tag_st1;
wire [`REQS_BITS-1:0] tid_st1;
@ -410,28 +414,28 @@ module VX_bank #(
.reset (reset),
`ifdef DBG_CORE_REQ_INFO
.debug_pc_st1 (debug_pc_st1),
.debug_rd_st1 (debug_rd_st1),
.debug_wid_st1 (debug_wid_st1),
.debug_tagid_st1(debug_tagid_st1),
.debug_pc (debug_pc_st1),
.debug_rd (debug_rd_st1),
.debug_wid (debug_wid_st1),
.debug_tagid (debug_tagid_st1),
`endif
.stall (pipeline_stall),
// Actual Read/Write
.valid_req_st1 (valid_st1),
.writefill_st1 (is_fill_st1),
.addr_st1 (addr_st1),
.mem_rw_st1 (mem_rw_st1),
.is_snp_st1 (is_snp_st1),
.snp_invalidate_st1(snp_invalidate_st1),
.force_miss_st1 (force_miss_st1),
// Inputs
.valid_in (valid_st1),
.addr_in (addr_st1),
.is_write_in (mem_rw_st1),
.is_fill_in (is_fill_st1),
.is_snp_in (is_snp_st1),
.snp_invalidate_in(snp_invalidate_st1),
.force_miss_in (force_miss_st1),
// Read Data
.readtag_st1 (readtag_st1),
.miss_st1 (miss_st1),
.dirty_st1 (dirty_st1),
.writeen_st1 (writeen_st1)
// Outputs
.readtag_out (readtag_st1),
.miss_out (miss_st1),
.dirty_out (dirty_st1),
.writeen_out (writeen_st1)
);
wire valid_st2;
@ -440,8 +444,7 @@ module VX_bank #(
wire [`WORD_WIDTH-1:0] readword_st2;
wire [`BANK_LINE_WIDTH-1:0] readdata_st2;
wire [`BANK_LINE_WIDTH-1:0] writedata_st2;
wire [WORD_SIZE-1:0] mem_byteen_st2;
wire miss_st2;
wire [WORD_SIZE-1:0] mem_byteen_st2;
wire dirty_st2;
wire [BANK_LINE_SIZE-1:0] dirtyb_st2;
wire [`REQ_INST_META_WIDTH-1:0] inst_meta_st2;
@ -449,7 +452,6 @@ module VX_bank #(
wire is_fill_st2;
wire is_snp_st2;
wire snp_invalidate_st2;
wire force_miss_st2;
wire is_msrq_st2;
VX_generic_register #(
@ -486,37 +488,35 @@ module VX_bank #(
.reset (reset),
`ifdef DBG_CORE_REQ_INFO
.debug_pc_st2 (debug_pc_st2),
.debug_rd_st2 (debug_rd_st2),
.debug_wid_st2 (debug_wid_st2),
.debug_tagid_st2(debug_tagid_st2),
.debug_pc (debug_pc_st2),
.debug_rd (debug_rd_st2),
.debug_wid (debug_wid_st2),
.debug_tagid (debug_tagid_st2),
`endif
.stall (pipeline_stall),
// Actual Read/Write
.valid_req_st2 (valid_st2),
.writeen_st2 (writeen_st2),
.writefill_st2 (is_fill_st2),
.addr_st2 (addr_st2),
.wordsel_st2 (wsel_st2),
.mem_byteen_st2 (mem_byteen_st2),
.writeword_st2 (writeword_st2),
.writedata_st2 (writedata_st2),
// Inputs
.valid_in (valid_st2),
.addr_in (addr_st2),
.writeen_in (writeen_st2),
.is_fill_in (is_fill_st2),
.wordsel_in (wsel_st2),
.byteen_in (mem_byteen_st2),
.writeword_in (writeword_st2),
.writedata_in (writedata_st2),
// Read Data
.readword_st2 (readword_st2),
.readdata_st2 (readdata_st2),
.dirtyb_st2 (dirtyb_st2)
// Outputs
.readword_out (readword_st2),
.readdata_out (readdata_st2),
.dirtyb_out (dirtyb_st2)
);
wire valid_st3;
wire [`LINE_ADDR_WIDTH-1:0] addr_st3;
wire valid_st3;
wire [`UP(`WORD_SELECT_WIDTH)-1:0] wsel_st3;
wire [`WORD_WIDTH-1:0] writeword_st3;
wire [`WORD_WIDTH-1:0] readword_st3;
wire [`BANK_LINE_WIDTH-1:0] readdata_st3;
wire miss_st3;
wire dirty_st3;
wire [BANK_LINE_SIZE-1:0] dirtyb_st3;
wire [`REQ_INST_META_WIDTH-1:0] inst_meta_st3;
@ -524,7 +524,6 @@ module VX_bank #(
wire is_fill_st3;
wire is_snp_st3;
wire snp_invalidate_st3;
wire force_miss_st3;
wire is_msrq_st3;
VX_generic_register #(

View file

@ -16,26 +16,26 @@ module VX_bank_core_req_arb #(
input wire reset,
// Enqueue Data
input wire reqq_push,
input wire [NUM_REQUESTS-1:0] bank_valids,
input wire [`CORE_REQ_TAG_COUNT-1:0] bank_rw,
input wire [NUM_REQUESTS-1:0][WORD_SIZE-1:0] bank_byteen,
input wire [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] bank_writedata,
input wire [NUM_REQUESTS-1:0][`WORD_ADDR_WIDTH-1:0] bank_addr,
input wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] bank_tag,
input wire push,
input wire [NUM_REQUESTS-1:0] valids_in,
input wire [`CORE_REQ_TAG_COUNT-1:0] rw_in,
input wire [NUM_REQUESTS-1:0][WORD_SIZE-1:0] byteen_in,
input wire [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] writedata_in,
input wire [NUM_REQUESTS-1:0][`WORD_ADDR_WIDTH-1:0] addr_in,
input wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] tag_in,
// Dequeue Data
input wire reqq_pop,
output wire [`REQS_BITS-1:0] reqq_tid_st0,
output wire reqq_rw_st0,
output wire [WORD_SIZE-1:0] reqq_byteen_st0,
output wire [`WORD_ADDR_WIDTH-1:0] reqq_addr_st0,
output wire [`WORD_WIDTH-1:0] reqq_writedata_st0,
output wire [CORE_TAG_WIDTH-1:0] reqq_tag_st0,
input wire pop,
output wire [`REQS_BITS-1:0] tid_out,
output wire rw_out,
output wire [WORD_SIZE-1:0] byteen_out,
output wire [`WORD_ADDR_WIDTH-1:0] addr_out,
output wire [`WORD_WIDTH-1:0] writedata_out,
output wire [CORE_TAG_WIDTH-1:0] tag_out,
// State Data
output wire reqq_empty,
output wire reqq_full
output wire empty,
output wire full
);
wire [NUM_REQUESTS-1:0] out_per_valids;
@ -64,21 +64,21 @@ module VX_bank_core_req_arb #(
wire use_empty = !(| use_per_valids);
wire out_empty = !(| out_per_valids) || o_empty;
wire push_qual = reqq_push && !reqq_full;
wire push_qual = push && !full;
wire pop_qual = !out_empty && use_empty;
VX_generic_queue #(
.DATAW($bits(bank_valids) + $bits(bank_addr) + $bits(bank_writedata) + $bits(bank_tag) + $bits(bank_rw) + $bits(bank_byteen)),
.DATAW($bits(valids_in) + $bits(addr_in) + $bits(writedata_in) + $bits(tag_in) + $bits(rw_in) + $bits(byteen_in)),
.SIZE(CREQ_SIZE)
) reqq_queue (
.clk (clk),
.reset (reset),
.push (push_qual),
.data_in ({bank_valids, bank_rw, bank_byteen, bank_addr, bank_writedata, bank_tag}),
.data_in ({valids_in, rw_in, byteen_in, addr_in, writedata_in, tag_in}),
.pop (pop_qual),
.data_out ({out_per_valids, out_per_rw, out_per_byteen, out_per_addr, out_per_writedata, out_per_tag}),
.empty (o_empty),
.full (reqq_full),
.full (full),
`UNUSED_PIN (size)
);
@ -91,43 +91,33 @@ module VX_bank_core_req_arb #(
assign qual_rw = use_per_rw;
assign qual_byteen = use_per_byteen;
wire[`REQS_BITS-1:0] qual_request_index;
wire qual_has_request;
wire sel_valid;
wire[`REQS_BITS-1:0] sel_idx;
VX_fixed_arbiter #(
.N(NUM_REQUESTS)
) sel_bank (
.clk (clk),
.reset (reset),
.requests (qual_valids),
.grant_index (qual_request_index),
.grant_valid (qual_has_request),
.grant_valid (sel_valid),
.grant_index (sel_idx),
`UNUSED_PIN (grant_onehot)
);
assign reqq_empty = !qual_has_request;
assign reqq_tid_st0 = qual_request_index;
assign reqq_byteen_st0 = qual_byteen[qual_request_index];
assign reqq_addr_st0 = qual_addr[qual_request_index];
assign reqq_writedata_st0 = qual_writedata[qual_request_index];
assign empty = !sel_valid;
assign tid_out = sel_idx;
assign byteen_out = qual_byteen[sel_idx];
assign addr_out = qual_addr[sel_idx];
assign writedata_out = qual_writedata[sel_idx];
if (CORE_TAG_ID_BITS != 0) begin
assign reqq_tag_st0 = qual_tag;
assign reqq_rw_st0 = qual_rw;
assign tag_out = qual_tag;
assign rw_out = qual_rw;
end else begin
assign reqq_tag_st0 = qual_tag[qual_request_index];
assign reqq_rw_st0 = qual_rw[qual_request_index];
end
`DEBUG_BLOCK(
reg [NUM_REQUESTS-1:0] updated_valids;
always @(*) begin
updated_valids = qual_valids;
if (qual_has_request) begin
updated_valids[qual_request_index] = 0;
end
assign tag_out = qual_tag[sel_idx];
assign rw_out = qual_rw[sel_idx];
end
)
always @(posedge clk) begin
if (reset) begin
@ -140,8 +130,8 @@ module VX_bank_core_req_arb #(
use_per_addr <= out_per_addr;
use_per_writedata <= out_per_writedata;
use_per_tag <= out_per_tag;
end else if (reqq_pop) begin
use_per_valids[qual_request_index] <= 0;
end else if (pop) begin
use_per_valids[sel_idx] <= 0;
end
end
end

View file

@ -46,7 +46,7 @@ module VX_cache #(
parameter CORE_TAG_WIDTH = 4,
// size of tag id in core request tag
parameter CORE_TAG_ID_BITS = 4,
parameter CORE_TAG_ID_BITS = 0,
// dram request tag size
parameter DRAM_TAG_WIDTH = 28,
@ -407,15 +407,15 @@ module VX_cache #(
.CORE_TAG_ID_BITS (CORE_TAG_ID_BITS)
) cache_core_rsp_merge (
.clk (clk),
.reset (reset),
.per_bank_core_rsp_tid (per_bank_core_rsp_tid),
.reset (reset),
.per_bank_core_rsp_valid (per_bank_core_rsp_valid),
.per_bank_core_rsp_data (per_bank_core_rsp_data),
.per_bank_core_rsp_tag (per_bank_core_rsp_tag),
.per_bank_core_rsp_tid (per_bank_core_rsp_tid),
.per_bank_core_rsp_data (per_bank_core_rsp_data),
.per_bank_core_rsp_ready (per_bank_core_rsp_ready),
.core_rsp_valid (core_rsp_valid),
.core_rsp_data (core_rsp_data),
.core_rsp_valid (core_rsp_valid),
.core_rsp_tag (core_rsp_tag),
.core_rsp_data (core_rsp_data),
.core_rsp_ready (core_rsp_ready)
);

View file

@ -18,29 +18,22 @@ module VX_cache_core_req_bank_sel #(
output wire [NUM_BANKS-1:0][NUM_REQUESTS-1:0] per_bank_valid,
output wire core_req_ready
);
reg [NUM_BANKS-1:0][NUM_REQUESTS-1:0] per_bank_valid_r;
if (NUM_BANKS == 1) begin
always @(*) begin
per_bank_valid_r = 0;
for (integer i = 0; i < NUM_REQUESTS; i++) begin
per_bank_valid_r[0][i] = core_req_valid[i];
end
end
assign core_req_ready = per_bank_ready;
end else begin
if (NUM_BANKS > 1) begin
reg [NUM_BANKS-1:0][NUM_REQUESTS-1:0] per_bank_valid_r;
reg [NUM_BANKS-1:0] per_bank_ready_sel;
always @(*) begin
per_bank_valid_r = 0;
per_bank_valid_r = 0;
per_bank_ready_sel = {NUM_BANKS{1'b1}};
for (integer i = 0; i < NUM_REQUESTS; i++) begin
per_bank_valid_r[core_req_addr[i][`BANK_SELECT_ADDR_RNG]][i] = core_req_valid[i];
per_bank_ready_sel[core_req_addr[i][`BANK_SELECT_ADDR_RNG]] = 0;
end
end
assign core_req_ready = & (per_bank_ready | per_bank_ready_sel);
end
assign per_bank_valid = per_bank_valid_r;
assign per_bank_valid = per_bank_valid_r;
assign core_req_ready = & (per_bank_ready | per_bank_ready_sel);
end else begin
assign per_bank_valid = core_req_valid;
assign core_req_ready = per_bank_ready;
end
endmodule

View file

@ -16,86 +16,101 @@ module VX_cache_core_rsp_merge #(
input wire reset,
// Per Bank WB
input wire [NUM_BANKS-1:0][`REQS_BITS-1:0] per_bank_core_rsp_tid,
input wire [NUM_BANKS-1:0] per_bank_core_rsp_valid,
input wire [NUM_BANKS-1:0][`WORD_WIDTH-1:0] per_bank_core_rsp_data,
input wire [NUM_BANKS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_rsp_tag,
input wire [NUM_BANKS-1:0] per_bank_core_rsp_valid,
input wire [NUM_BANKS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_rsp_tag,
input wire [NUM_BANKS-1:0][`REQS_BITS-1:0] per_bank_core_rsp_tid,
input wire [NUM_BANKS-1:0][`WORD_WIDTH-1:0] per_bank_core_rsp_data,
output wire [NUM_BANKS-1:0] per_bank_core_rsp_ready,
// Core Writeback
output wire [NUM_REQUESTS-1:0] core_rsp_valid,
output wire [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] core_rsp_data,
output wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag,
output wire [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] core_rsp_data,
input wire core_rsp_ready
);
if (NUM_REQUESTS > 1) begin
wire [`BANK_BITS-1:0] sel_idx;
wire [`BANK_BITS-1:0] main_bank_index;
VX_fair_arbiter #(
.N(NUM_BANKS)
) sel_bank (
.clk (clk),
.reset (reset),
.requests (per_bank_core_rsp_valid),
.grant_index (main_bank_index),
`UNUSED_PIN (grant_valid),
`UNUSED_PIN (grant_onehot)
);
VX_rr_arbiter #(
.N(NUM_BANKS)
) sel_arb (
.clk (clk),
.reset (reset),
.requests (per_bank_core_rsp_valid),
`UNUSED_PIN (grant_valid),
.grant_index (sel_idx),
`UNUSED_PIN (grant_onehot)
);
reg [NUM_REQUESTS-1:0] core_rsp_valid_unqual;
reg [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] core_rsp_data_unqual;
reg [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag_unqual;
reg [NUM_BANKS-1:0] core_rsp_bank_select;
wire stall = ~core_rsp_ready && (| core_rsp_valid);
reg [NUM_REQUESTS-1:0] core_rsp_valid_unqual;
reg [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] core_rsp_data_unqual;
reg [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag_unqual;
reg [NUM_BANKS-1:0] core_rsp_bank_select;
if (CORE_TAG_ID_BITS != 0) begin
always @(*) begin
core_rsp_valid_unqual = 0;
core_rsp_bank_select = 0;
core_rsp_data_unqual = 'x;
core_rsp_tag_unqual = per_bank_core_rsp_tag[sel_idx];
for (integer i = 0; i < NUM_BANKS; i++) begin
if (per_bank_core_rsp_valid[i]
&& (per_bank_core_rsp_tag[i][CORE_TAG_ID_BITS-1:0] == per_bank_core_rsp_tag[sel_idx][CORE_TAG_ID_BITS-1:0])) begin
core_rsp_valid_unqual[per_bank_core_rsp_tid[i]] = 1;
core_rsp_data_unqual[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_data[i];
core_rsp_bank_select[i] = 1;
end
end
end
end else begin
always @(*) begin
core_rsp_valid_unqual = 0;
core_rsp_valid_unqual[per_bank_core_rsp_tid[sel_idx]] = 1;
core_rsp_bank_select = 0;
core_rsp_bank_select[sel_idx] = 1;
if (CORE_TAG_ID_BITS != 0) begin
always @(*) begin
core_rsp_valid_unqual = 0;
core_rsp_data_unqual = 0;
core_rsp_tag_unqual = per_bank_core_rsp_tag[main_bank_index];
for (integer i = 0; i < NUM_BANKS; i++) begin
if (per_bank_core_rsp_valid[i]
&& (per_bank_core_rsp_tag[i][CORE_TAG_ID_BITS-1:0] == per_bank_core_rsp_tag[main_bank_index][CORE_TAG_ID_BITS-1:0])) begin
core_rsp_valid_unqual[per_bank_core_rsp_tid[i]] = 1;
core_rsp_data_unqual[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_data[i];
core_rsp_bank_select[i] = 1;
end else begin
core_rsp_bank_select[i] = 0;
end
end
end
core_rsp_data_unqual = 'x;
core_rsp_data_unqual[per_bank_core_rsp_tid[sel_idx]] = per_bank_core_rsp_data[sel_idx];
core_rsp_tag_unqual = 'x;
core_rsp_tag_unqual[per_bank_core_rsp_tid[sel_idx]] = per_bank_core_rsp_tag[sel_idx];
for (integer i = 0; i < NUM_BANKS; i++) begin
if (per_bank_core_rsp_valid[i] && !core_rsp_valid_unqual[per_bank_core_rsp_tid[i]]) begin
core_rsp_valid_unqual[per_bank_core_rsp_tid[i]] = 1;
core_rsp_data_unqual[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_data[i];
core_rsp_tag_unqual[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_tag[i];
core_rsp_bank_select[i] = 1;
end
end
end
end
wire stall = ~core_rsp_ready && (| core_rsp_valid);
VX_generic_register #(
.N(NUM_REQUESTS + (NUM_REQUESTS *`WORD_WIDTH) + (`CORE_REQ_TAG_COUNT * CORE_TAG_WIDTH)),
.PASSTHRU(NUM_BANKS <= 2)
) core_wb_reg (
.clk (clk),
.reset (reset),
.stall (stall),
.flush (1'b0),
.in ({core_rsp_valid_unqual, core_rsp_data_unqual, core_rsp_tag_unqual}),
.out ({core_rsp_valid, core_rsp_data, core_rsp_tag})
);
assign per_bank_core_rsp_ready = core_rsp_bank_select & {NUM_BANKS{~stall}};
end else begin
always @(*) begin
core_rsp_valid_unqual = 0;
core_rsp_data_unqual = 0;
core_rsp_tag_unqual = 0;
for (integer i = 0; i < NUM_BANKS; i++) begin
if (per_bank_core_rsp_valid[i]
&& !core_rsp_valid_unqual[per_bank_core_rsp_tid[i]]
&& ((main_bank_index == `BANK_BITS'(i))
|| (per_bank_core_rsp_tid[i] != per_bank_core_rsp_tid[main_bank_index]))) begin
core_rsp_valid_unqual[per_bank_core_rsp_tid[i]] = 1;
core_rsp_data_unqual[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_data[i];
core_rsp_tag_unqual[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_tag[i];
core_rsp_bank_select[i] = 1;
end else begin
core_rsp_bank_select[i] = 0;
end
end
end
end
`UNUSED_VAR (clk)
`UNUSED_VAR (reset)
`UNUSED_VAR (per_bank_core_rsp_tid)
VX_generic_register #(
.N(NUM_REQUESTS + (NUM_REQUESTS *`WORD_WIDTH) + (`CORE_REQ_TAG_COUNT * CORE_TAG_WIDTH))
) core_wb_reg (
.clk (clk),
.reset (reset),
.stall (stall),
.flush (1'b0),
.in ({core_rsp_valid_unqual, core_rsp_data_unqual, core_rsp_tag_unqual}),
.out ({core_rsp_valid, core_rsp_data, core_rsp_tag})
);
assign per_bank_core_rsp_ready = core_rsp_bank_select & {NUM_BANKS{~stall}};
assign core_rsp_valid = per_bank_core_rsp_valid;
assign core_rsp_tag = per_bank_core_rsp_tag;
assign core_rsp_data = per_bank_core_rsp_data;
assign per_bank_core_rsp_ready = core_rsp_ready;
end
endmodule

View file

@ -13,13 +13,13 @@ module VX_cache_dram_req_arb #(
// Inputs
input wire [NUM_BANKS-1:0] per_bank_dram_req_valid,
input wire [NUM_BANKS-1:0][`DRAM_ADDR_WIDTH-1:0] per_bank_dram_req_addr,
input wire [NUM_BANKS-1:0] per_bank_dram_req_rw,
input wire [NUM_BANKS-1:0][BANK_LINE_SIZE-1:0] per_bank_dram_req_byteen,
input wire [NUM_BANKS-1:0][`DRAM_ADDR_WIDTH-1:0] per_bank_dram_req_addr,
input wire [NUM_BANKS-1:0][`BANK_LINE_WIDTH-1:0] per_bank_dram_req_data,
output wire [NUM_BANKS-1:0] per_bank_dram_req_ready,
// Output
// Outputs
output wire dram_req_valid,
output wire dram_req_rw,
output wire [BANK_LINE_SIZE-1:0] dram_req_byteen,
@ -28,36 +28,49 @@ module VX_cache_dram_req_arb #(
input wire dram_req_ready
);
wire sel_valid;
wire [`BANK_BITS-1:0] sel_idx;
wire [NUM_BANKS-1:0] sel_1hot;
VX_fixed_arbiter #(
.N(NUM_BANKS)
) sel_arb (
.clk (clk),
.reset (reset),
.requests (per_bank_dram_req_valid),
.grant_valid (sel_valid),
.grant_index (sel_idx),
.grant_onehot(sel_1hot)
);
if (NUM_BANKS > 1) begin
wire sel_valid;
wire [`BANK_BITS-1:0] sel_idx;
wire [NUM_BANKS-1:0] sel_1hot;
VX_rr_arbiter #(
.N(NUM_BANKS)
) sel_arb (
.clk (clk),
.reset (reset),
.requests (per_bank_dram_req_valid),
.grant_valid (sel_valid),
.grant_index (sel_idx),
.grant_onehot(sel_1hot)
);
wire stall = ~dram_req_ready && dram_req_valid;
wire stall = ~dram_req_ready && dram_req_valid;
VX_generic_register #(
.N(1 + 1 + BANK_LINE_SIZE + `DRAM_ADDR_WIDTH + `BANK_LINE_WIDTH)
) core_wb_reg (
.clk (clk),
.reset (reset),
.stall (stall),
.flush (1'b0),
.in ({sel_valid, per_bank_dram_req_rw[sel_idx], per_bank_dram_req_byteen[sel_idx], per_bank_dram_req_addr[sel_idx], per_bank_dram_req_data[sel_idx]}),
.out ({dram_req_valid, dram_req_rw, dram_req_byteen, dram_req_addr, dram_req_data})
);
VX_generic_register #(
.N(1 + `DRAM_ADDR_WIDTH + 1 + BANK_LINE_SIZE + `BANK_LINE_WIDTH),
.PASSTHRU(NUM_BANKS <= 2)
) pipe_reg (
.clk (clk),
.reset (reset),
.stall (stall),
.flush (1'b0),
.in ({sel_valid, per_bank_dram_req_addr[sel_idx], per_bank_dram_req_rw[sel_idx], per_bank_dram_req_byteen[sel_idx], per_bank_dram_req_data[sel_idx]}),
.out ({dram_req_valid, dram_req_addr, dram_req_rw, dram_req_byteen, dram_req_data})
);
for (genvar i = 0; i < NUM_BANKS; i++) begin
assign per_bank_dram_req_ready[i] = sel_1hot[i] && !stall;
for (genvar i = 0; i < NUM_BANKS; i++) begin
assign per_bank_dram_req_ready[i] = sel_1hot[i] && !stall;
end
end else begin
`UNUSED_VAR (clk)
`UNUSED_VAR (reset)
assign dram_req_valid = per_bank_dram_req_valid;
assign dram_req_rw = per_bank_dram_req_rw;
assign dram_req_byteen = per_bank_dram_req_byteen;
assign dram_req_addr = per_bank_dram_req_addr;
assign dram_req_data = per_bank_dram_req_data;
assign per_bank_dram_req_ready = dram_req_ready;
end
endmodule

View file

@ -27,42 +27,43 @@ module VX_data_access #(
`ifdef DBG_CORE_REQ_INFO
`IGNORE_WARNINGS_BEGIN
input wire[31:0] debug_pc_st2,
input wire[`NR_BITS-1:0] debug_rd_st2,
input wire[`NW_BITS-1:0] debug_wid_st2,
input wire[`UP(CORE_TAG_ID_BITS)-1:0] debug_tagid_st2,
input wire[31:0] debug_pc,
input wire[`NR_BITS-1:0] debug_rd,
input wire[`NW_BITS-1:0] debug_wid,
input wire[`UP(CORE_TAG_ID_BITS)-1:0] debug_tagid,
`IGNORE_WARNINGS_END
`endif
input wire stall,
input wire valid_req_st2,
input wire writeen_st2,
// Inputs
input wire valid_in,
`IGNORE_WARNINGS_BEGIN
input wire[`LINE_ADDR_WIDTH-1:0] addr_st2,
input wire[`LINE_ADDR_WIDTH-1:0] addr_in,
`IGNORE_WARNINGS_END
input wire writefill_st2,
input wire[`WORD_WIDTH-1:0] writeword_st2,
input wire[`BANK_LINE_WIDTH-1:0] writedata_st2,
input wire writeen_in,
input wire is_fill_in,
input wire[`WORD_WIDTH-1:0] writeword_in,
input wire[`BANK_LINE_WIDTH-1:0] writedata_in,
input wire[WORD_SIZE-1:0] byteen_in,
input wire[`UP(`WORD_SELECT_WIDTH)-1:0] wordsel_in,
input wire[WORD_SIZE-1:0] mem_byteen_st2,
input wire[`UP(`WORD_SELECT_WIDTH)-1:0] wordsel_st2,
output wire[`WORD_WIDTH-1:0] readword_st2,
output wire[`BANK_LINE_WIDTH-1:0] readdata_st2,
output wire[BANK_LINE_SIZE-1:0] dirtyb_st2
// Outputs
output wire[`WORD_WIDTH-1:0] readword_out,
output wire[`BANK_LINE_WIDTH-1:0] readdata_out,
output wire[BANK_LINE_SIZE-1:0] dirtyb_out
);
wire[BANK_LINE_SIZE-1:0] qual_read_dirtyb_st2;
wire[`BANK_LINE_WIDTH-1:0] qual_read_data_st2;
wire[BANK_LINE_SIZE-1:0] qual_read_dirtyb_out;
wire[`BANK_LINE_WIDTH-1:0] qual_read_data;
wire[BANK_LINE_SIZE-1:0] use_read_dirtyb_st2;
wire[`BANK_LINE_WIDTH-1:0] use_read_data_st2;
wire[BANK_LINE_SIZE-1:0] use_read_dirtyb_out;
wire[`BANK_LINE_WIDTH-1:0] use_read_data;
wire[`BANK_LINE_WORDS-1:0][WORD_SIZE-1:0] use_byte_enable;
wire[`BANK_LINE_WIDTH-1:0] use_write_data;
wire use_write_enable;
wire[`LINE_SELECT_BITS-1:0] addrline_st2 = addr_st2[`LINE_SELECT_BITS-1:0];
wire[`LINE_SELECT_BITS-1:0] addrline = addr_in[`LINE_SELECT_BITS-1:0];
VX_data_store #(
.CACHE_SIZE (CACHE_SIZE),
@ -74,28 +75,28 @@ module VX_data_access #(
.reset (reset),
.read_addr (addrline_st2),
.read_dirtyb (qual_read_dirtyb_st2),
.read_data (qual_read_data_st2),
.read_addr (addrline),
.read_dirtyb (qual_read_dirtyb_out),
.read_data (qual_read_data),
.write_enable(use_write_enable),
.write_fill (writefill_st2),
.write_fill (is_fill_in),
.byte_enable (use_byte_enable),
.write_addr (addrline_st2),
.write_addr (addrline),
.write_data (use_write_data)
);
assign use_read_dirtyb_st2= qual_read_dirtyb_st2;
assign use_read_data_st2 = qual_read_data_st2;
assign use_read_dirtyb_out= qual_read_dirtyb_out;
assign use_read_data = qual_read_data;
if (`WORD_SELECT_WIDTH != 0) begin
wire [`WORD_WIDTH-1:0] readword = use_read_data_st2[wordsel_st2 * `WORD_WIDTH +: `WORD_WIDTH];
wire [`WORD_WIDTH-1:0] readword = use_read_data[wordsel_in * `WORD_WIDTH +: `WORD_WIDTH];
for (genvar i = 0; i < WORD_SIZE; i++) begin
assign readword_st2[i * 8 +: 8] = readword[i * 8 +: 8] & {8{mem_byteen_st2[i]}};
assign readword_out[i * 8 +: 8] = readword[i * 8 +: 8] & {8{byteen_in[i]}};
end
end else begin
for (genvar i = 0; i < WORD_SIZE; i++) begin
assign readword_st2[i * 8 +: 8] = use_read_data_st2[i * 8 +: 8] & {8{mem_byteen_st2[i]}};
assign readword_out[i * 8 +: 8] = use_read_data[i * 8 +: 8] & {8{byteen_in[i]}};
end
end
@ -103,33 +104,33 @@ module VX_data_access #(
wire [`BANK_LINE_WIDTH-1:0] data_write;
for (genvar i = 0; i < `BANK_LINE_WORDS; i++) begin
wire word_sel = ((`WORD_SELECT_WIDTH == 0) || (wordsel_st2 == `UP(`WORD_SELECT_WIDTH)'(i)));
wire word_sel = ((`WORD_SELECT_WIDTH == 0) || (wordsel_in == `UP(`WORD_SELECT_WIDTH)'(i)));
assign byte_enable[i] = writefill_st2 ? {WORD_SIZE{1'b1}} :
word_sel ? mem_byteen_st2 :
assign byte_enable[i] = is_fill_in ? {WORD_SIZE{1'b1}} :
word_sel ? byteen_in :
{WORD_SIZE{1'b0}};
assign data_write[i * `WORD_WIDTH +: `WORD_WIDTH] = writefill_st2 ? writedata_st2[i * `WORD_WIDTH +: `WORD_WIDTH] : writeword_st2;
assign data_write[i * `WORD_WIDTH +: `WORD_WIDTH] = is_fill_in ? writedata_in[i * `WORD_WIDTH +: `WORD_WIDTH] : writeword_in;
end
assign use_write_enable = valid_req_st2 && writeen_st2 && !stall;
assign use_write_enable = valid_in && writeen_in && !stall;
assign use_byte_enable = byte_enable;
assign use_write_data = data_write;
assign dirtyb_st2 = use_read_dirtyb_st2;
assign readdata_st2 = use_read_data_st2;
assign dirtyb_out = use_read_dirtyb_out;
assign readdata_out = use_read_data;
`ifdef DBG_PRINT_CACHE_DATA
always @(posedge clk) begin
if (valid_req_st2 && !stall) begin
if (valid_in && !stall) begin
if (use_write_enable) begin
if (writefill_st2) begin
$display("%t: cache%0d:%0d data-fill: addr=%0h, dirty=%b, blk_addr=%0d, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st2, BANK_ID), dirtyb_st2, addrline_st2, use_write_data);
if (is_fill_in) begin
$display("%t: cache%0d:%0d data-fill: addr=%0h, dirty=%b, blk_addr=%0d, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_in, BANK_ID), dirtyb_out, addrline, use_write_data);
end else begin
$display("%t: cache%0d:%0d data-write: addr=%0h, wid=%0d, PC=%0h, dirty=%b, blk_addr=%0d, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st2, BANK_ID), debug_wid_st2, debug_pc_st2, dirtyb_st2, addrline_st2, wordsel_st2, writeword_st2);
$display("%t: cache%0d:%0d data-write: addr=%0h, wid=%0d, PC=%0h, dirty=%b, blk_addr=%0d, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_in, BANK_ID), debug_wid, debug_pc, dirtyb_out, addrline, wordsel_in, writeword_in);
end
end else begin
$display("%t: cache%0d:%0d data-read: addr=%0h, wid=%0d, PC=%0h, dirty=%b, blk_addr=%0d, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st2, BANK_ID), debug_wid_st2, debug_pc_st2, dirtyb_st2, addrline_st2, wordsel_st2, qual_read_data_st2);
$display("%t: cache%0d:%0d data-read: addr=%0h, wid=%0d, PC=%0h, dirty=%b, blk_addr=%0d, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_in, BANK_ID), debug_wid, debug_pc, dirtyb_out, addrline, wordsel_in, qual_read_data);
end
end
end

View file

@ -93,24 +93,46 @@ module VX_snp_forwarder #(
assign snp_req_ready = !sfq_full && fwdout_ready;
reg [`REQS_BITS-1:0] fwdin_sel;
if (NUM_REQUESTS > 1) begin
wire sel_valid;
wire [`REQS_BITS-1:0] sel_idx;
wire [NUM_REQUESTS-1:0] sel_1hot;
VX_fixed_arbiter #(
.N(NUM_REQUESTS)
) arbiter (
.clk (clk),
.reset (reset),
.requests (snp_fwdin_valid),
.grant_index (fwdin_sel),
`UNUSED_PIN (grant_valid),
`UNUSED_PIN (grant_onehot)
);
VX_fixed_arbiter #(
.N(NUM_REQUESTS)
) sel_arb (
.clk (clk),
.reset (reset),
.requests (snp_fwdin_valid),
.grant_valid (sel_valid),
.grant_index (sel_idx),
.grant_onehot (sel_1hot)
);
assign fwdin_valid = snp_fwdin_valid[fwdin_sel];
assign fwdin_tag = snp_fwdin_tag[fwdin_sel];
assign fwdin_valid = snp_fwdin_valid[sel_idx];
assign fwdin_tag = snp_fwdin_tag[sel_idx];
for (genvar i = 0; i < NUM_REQUESTS; i++) begin
assign snp_fwdin_ready[i] = fwdin_ready && (fwdin_sel == `REQS_BITS'(i));
wire stall = fwdin_valid && ~fwdin_ready;
VX_generic_register #(
.N(1 + `LOG2UP(SNRQ_SIZE)),
.PASSTHRU(NUM_REQUESTS <= 2)
) pipe_reg (
.clk (clk),
.reset (reset),
.stall (stall),
.flush (1'b0),
.in ({sel_valid, snp_fwdin_tag[sel_idx]}),
.out ({fwdin_valid, fwdin_tag})
);
for (genvar i = 0; i < NUM_REQUESTS; i++) begin
assign snp_fwdin_ready[i] = sel_1hot[i] && !stall;
end
end else begin
assign fwdin_valid = snp_fwdin_valid;
assign fwdin_tag = snp_fwdin_tag;
assign snp_fwdin_ready = fwdin_ready;
end
`ifdef DBG_PRINT_CACHE_SNP
@ -122,7 +144,7 @@ module VX_snp_forwarder #(
$display("%t: cache%0d snp-fwd-out: addr=%0h, invalidate=%0d, tag=%0h", $time, CACHE_ID, `DRAM_TO_BYTE_ADDR(snp_fwdout_addr[0]), snp_fwdout_invalidate[0], snp_fwdout_tag[0]);
end
if (fwdin_valid && fwdin_ready) begin
$display("%t: cache%0d snp-fwd-in[%0d]: tag=%0h", $time, CACHE_ID, fwdin_sel, fwdin_tag);
$display("%t: cache%0d snp-fwd-in: tag=%0h", $time, CACHE_ID, fwdin_tag);
end
if (snp_rsp_valid && snp_rsp_ready) begin
$display("%t: cache%0d snp-fwd-rsp: addr=%0h, invalidate=%0d, tag=%0h", $time, CACHE_ID, snp_rsp_addr, snp_rsp_invalidate, snp_rsp_tag);

View file

@ -16,37 +16,43 @@ module VX_snp_rsp_arb #(
output wire [SNP_REQ_TAG_WIDTH-1:0] snp_rsp_tag,
input wire snp_rsp_ready
);
if (NUM_BANKS > 1) begin
wire sel_valid;
wire [`BANK_BITS-1:0] sel_idx;
wire [NUM_BANKS-1:0] sel_1hot;
wire sel_valid;
wire [`BANK_BITS-1:0] sel_idx;
wire [NUM_BANKS-1:0] sel_1hot;
VX_fixed_arbiter #(
.N(NUM_BANKS)
) sel_arb (
.clk (clk),
.reset (reset),
.requests (per_bank_snp_rsp_valid),
.grant_valid (sel_valid),
.grant_index (sel_idx),
.grant_onehot(sel_1hot)
);
VX_fixed_arbiter #(
.N(NUM_BANKS)
) sel_arb (
.clk (clk),
.reset (reset),
.requests (per_bank_snp_rsp_valid),
.grant_valid (sel_valid),
.grant_index (sel_idx),
.grant_onehot(sel_1hot)
);
wire stall = ~snp_rsp_ready && snp_rsp_valid;
wire stall = ~snp_rsp_ready && snp_rsp_valid;
VX_generic_register #(
.N(1 + SNP_REQ_TAG_WIDTH),
.PASSTHRU(NUM_BANKS <= 2)
) pipe_reg (
.clk (clk),
.reset (reset),
.stall (stall),
.flush (1'b0),
.in ({sel_valid, per_bank_snp_rsp_tag[sel_idx]}),
.out ({snp_rsp_valid, snp_rsp_tag})
);
VX_generic_register #(
.N(1 + SNP_REQ_TAG_WIDTH)
) core_wb_reg (
.clk (clk),
.reset (reset),
.stall (stall),
.flush (1'b0),
.in ({sel_valid, per_bank_snp_rsp_tag[sel_idx]}),
.out ({snp_rsp_valid, snp_rsp_tag})
);
for (genvar i = 0; i < NUM_BANKS; i++) begin
assign per_bank_snp_rsp_ready[i] = sel_1hot[i] && !stall;
for (genvar i = 0; i < NUM_BANKS; i++) begin
assign per_bank_snp_rsp_ready[i] = sel_1hot[i] && !stall;
end
end else begin
assign snp_rsp_valid = per_bank_snp_rsp_valid;
assign snp_rsp_tag = per_bank_snp_rsp_tag;
assign per_bank_snp_rsp_ready = snp_rsp_ready;
end
endmodule

View file

@ -27,46 +27,44 @@ module VX_tag_access #(
`ifdef DBG_CORE_REQ_INFO
`IGNORE_WARNINGS_BEGIN
input wire[31:0] debug_pc_st1,
input wire[`NR_BITS-1:0] debug_rd_st1,
input wire[`NW_BITS-1:0] debug_wid_st1,
input wire[`UP(CORE_TAG_ID_BITS)-1:0] debug_tagid_st1,
input wire[31:0] debug_pc,
input wire[`NR_BITS-1:0] debug_rd,
input wire[`NW_BITS-1:0] debug_wid,
input wire[`UP(CORE_TAG_ID_BITS)-1:0] debug_tagid,
`IGNORE_WARNINGS_END
`endif
input wire stall,
input wire is_snp_st1,
input wire snp_invalidate_st1,
// Inputs
input wire valid_in,
input wire[`LINE_ADDR_WIDTH-1:0] addr_in,
input wire is_write_in,
input wire is_fill_in,
input wire is_snp_in,
input wire snp_invalidate_in,
input wire force_miss_in,
input wire[`LINE_ADDR_WIDTH-1:0] addr_st1,
input wire valid_req_st1,
input wire writefill_st1,
input wire mem_rw_st1,
input wire force_miss_st1,
output wire[`TAG_SELECT_BITS-1:0] readtag_st1,
output wire miss_st1,
output wire dirty_st1,
output wire writeen_st1
// Outputs
output wire[`TAG_SELECT_BITS-1:0] readtag_out,
output wire miss_out,
output wire dirty_out,
output wire writeen_out
);
wire qual_read_valid_st1;
wire qual_read_dirty_st1;
wire[`TAG_SELECT_BITS-1:0] qual_read_tag_st1;
wire qual_read_valid;
wire qual_read_dirty;
wire[`TAG_SELECT_BITS-1:0] qual_read_tag;
wire use_read_valid_st1;
wire use_read_dirty_st1;
wire[`TAG_SELECT_BITS-1:0] use_read_tag_st1;
wire use_read_valid;
wire use_read_dirty;
wire[`TAG_SELECT_BITS-1:0] use_read_tag;
wire use_write_enable;
wire use_invalidate;
wire[`TAG_SELECT_BITS-1:0] addrtag_st1 = addr_st1[`TAG_LINE_ADDR_RNG];
wire[`LINE_SELECT_BITS-1:0] addrline_st1 = addr_st1[`LINE_SELECT_BITS-1:0];
wire[`TAG_SELECT_BITS-1:0] addrtag = addr_in[`TAG_LINE_ADDR_RNG];
wire[`LINE_SELECT_BITS-1:0] addrline = addr_in[`LINE_SELECT_BITS-1:0];
VX_tag_store #(
.CACHE_SIZE (CACHE_SIZE),
@ -77,69 +75,69 @@ module VX_tag_access #(
.clk (clk),
.reset (reset),
.read_addr (addrline_st1),
.read_valid (qual_read_valid_st1),
.read_dirty (qual_read_dirty_st1),
.read_tag (qual_read_tag_st1),
.read_addr (addrline),
.read_valid (qual_read_valid),
.read_dirty (qual_read_dirty),
.read_tag (qual_read_tag),
.invalidate (use_invalidate),
.write_enable(use_write_enable),
.write_fill (writefill_st1),
.write_addr (addrline_st1),
.write_tag (addrtag_st1)
.write_fill (is_fill_in),
.write_addr (addrline),
.write_tag (addrtag)
);
assign use_read_valid_st1 = qual_read_valid_st1 || !DRAM_ENABLE; // If shared memory, always valid
assign use_read_dirty_st1 = qual_read_dirty_st1 && DRAM_ENABLE && WRITE_ENABLE; // Dirty only applies in Dcache
assign use_read_tag_st1 = DRAM_ENABLE ? qual_read_tag_st1 : addrtag_st1; // Tag is always the same in SM
assign use_read_valid = qual_read_valid || !DRAM_ENABLE; // If shared memory, always valid
assign use_read_dirty = qual_read_dirty && DRAM_ENABLE && WRITE_ENABLE; // Dirty only applies in Dcache
assign use_read_tag = DRAM_ENABLE ? qual_read_tag : addrtag; // Tag is always the same in SM
// use "case equality" to handle uninitialized tag when block entry is not valid
wire tags_match = use_read_valid_st1 && (addrtag_st1 === use_read_tag_st1);
wire tags_match = use_read_valid && (addrtag === use_read_tag);
wire normal_write = valid_req_st1
&& mem_rw_st1
&& use_read_valid_st1
&& !writefill_st1
&& !is_snp_st1
&& !miss_st1
&& !force_miss_st1;
wire normal_write = valid_in
&& is_write_in
&& use_read_valid
&& !is_fill_in
&& !is_snp_in
&& !miss_out
&& !force_miss_in;
wire fill_write = valid_req_st1 && writefill_st1
wire fill_write = valid_in && is_fill_in
&& !tags_match; // discard redundant fills because the block could be dirty
assign use_write_enable = (normal_write || fill_write)
&& !stall;
assign use_invalidate = valid_req_st1 && is_snp_st1
assign use_invalidate = valid_in && is_snp_in
&& tags_match
&& (use_read_dirty_st1 || snp_invalidate_st1) // block is dirty or should invalidate
&& !force_miss_st1
&& (use_read_dirty || snp_invalidate_in) // block is dirty or should invalidate
&& !force_miss_in
&& !stall;
wire core_req_miss = valid_req_st1 && !is_snp_st1 && !writefill_st1
wire core_req_miss = valid_in && !is_snp_in && !is_fill_in
&& !tags_match;
assign miss_st1 = core_req_miss;
assign dirty_st1 = valid_req_st1 && use_read_valid_st1 && use_read_dirty_st1;
assign readtag_st1 = use_read_tag_st1;
assign writeen_st1 = use_write_enable;
assign miss_out = core_req_miss;
assign dirty_out = valid_in && use_read_valid && use_read_dirty;
assign readtag_out = use_read_tag;
assign writeen_out = use_write_enable;
`ifdef DBG_PRINT_CACHE_DATA
always @(posedge clk) begin
if (valid_req_st1 && !stall) begin
if (writefill_st1 && use_read_valid_st1 && tags_match) begin
$display("%t: warning: redundant fill - addr=%0h", $time, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID));
if (valid_in && !stall) begin
if (is_fill_in && use_read_valid && tags_match) begin
$display("%t: warning: redundant fill - addr=%0h", $time, `LINE_TO_BYTE_ADDR(addr_in, BANK_ID));
end
if (miss_st1) begin
$display("%t: cache%0d:%0d tag-miss: addr=%0h, wid=%0d, PC=%0h, valid=%b, blk_tag_id=%0h, blk_addr=%0d, tag_id=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID), debug_wid_st1, debug_pc_st1, use_read_dirty_st1, qual_read_tag_st1, addrline_st1, addrtag_st1);
if (miss_out) begin
$display("%t: cache%0d:%0d tag-miss: addr=%0h, wid=%0d, PC=%0h, valid=%b, blk_tag_id=%0h, blk_addr=%0d, tag_id=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_in, BANK_ID), debug_wid, debug_pc, use_read_dirty, qual_read_tag, addrline, addrtag);
end else if ((| use_write_enable)) begin
if (writefill_st1) begin
$display("%t: cache%0d:%0d tag-fill: addr=%0h, blk_addr=%0d, tag_id=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID), addrline_st1, addrtag_st1);
if (is_fill_in) begin
$display("%t: cache%0d:%0d tag-fill: addr=%0h, blk_addr=%0d, tag_id=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_in, BANK_ID), addrline, addrtag);
end else begin
$display("%t: cache%0d:%0d tag-write: addr=%0h, wid=%0d, PC=%0h, blk_addr=%0d, tag_id=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID), debug_wid_st1, debug_pc_st1, addrline_st1, addrtag_st1);
$display("%t: cache%0d:%0d tag-write: addr=%0h, wid=%0d, PC=%0h, blk_addr=%0d, tag_id=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_in, BANK_ID), debug_wid, debug_pc, addrline, addrtag);
end
end else begin
$display("%t: cache%0d:%0d tag-read: addr=%0h, wid=%0d, PC=%0h, blk_addr=%0d, tag_id=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID), debug_wid_st1, debug_pc_st1, addrline_st1, qual_read_tag_st1);
$display("%t: cache%0d:%0d tag-read: addr=%0h, wid=%0d, PC=%0h, blk_addr=%0d, tag_id=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_in, BANK_ID), debug_wid, debug_pc, addrline, qual_read_tag);
end
end
end