cache bank refactoring - removing unecessary core response fifo & restoring single port data access

This commit is contained in:
Blaise Tine 2021-02-21 21:47:46 -08:00
parent ccb74ef286
commit 7560202f8b
12 changed files with 129 additions and 294 deletions

View file

@ -170,7 +170,6 @@ module VX_cluster #(
.CREQ_SIZE (`L2CREQ_SIZE),
.MSHR_SIZE (`L2MSHR_SIZE),
.DRSQ_SIZE (`L2DRSQ_SIZE),
.CRSQ_SIZE (`L2CRSQ_SIZE),
.DREQ_SIZE (`L2DREQ_SIZE),
.WRITE_ENABLE (1),
.CORE_TAG_WIDTH (`XDRAM_TAG_WIDTH),

View file

@ -264,11 +264,6 @@
`define ICREQ_SIZE 4
`endif
// Core Response Queue Size
`ifndef ICRSQ_SIZE
`define ICRSQ_SIZE 4
`endif
// Miss Handling Register Size
`ifndef IMSHR_SIZE
`define IMSHR_SIZE `NUM_WARPS
@ -306,11 +301,6 @@
`define DCREQ_SIZE 4
`endif
// Core Response Queue Size
`ifndef DCRSQ_SIZE
`define DCRSQ_SIZE 4
`endif
// Miss Handling Register Size
`ifndef DMSHR_SIZE
`define DMSHR_SIZE `LSUQ_SIZE
@ -348,11 +338,6 @@
`define SCREQ_SIZE 4
`endif
// Core Response Queue Size
`ifndef SCRSQ_SIZE
`define SCRSQ_SIZE 4
`endif
// L2cache Configurable Knobs /////////////////////////////////////////////////
// Size of cache in bytes
@ -370,11 +355,6 @@
`define L2CREQ_SIZE 4
`endif
// Core Response Queue Size
`ifndef L2CRSQ_SIZE
`define L2CRSQ_SIZE 4
`endif
// Miss Handling Register Size
`ifndef L2MSHR_SIZE
`define L2MSHR_SIZE 16
@ -407,11 +387,6 @@
`define L3CREQ_SIZE 4
`endif
// Core Response Queue Size
`ifndef L3CRSQ_SIZE
`define L3CRSQ_SIZE 4
`endif
// Miss Handling Register Size
`ifndef L3MSHR_SIZE
`define L3MSHR_SIZE 16

View file

@ -101,7 +101,6 @@ module VX_mem_unit # (
.CREQ_SIZE (`ICREQ_SIZE),
.MSHR_SIZE (`IMSHR_SIZE),
.DRSQ_SIZE (`IDRSQ_SIZE),
.CRSQ_SIZE (`ICRSQ_SIZE),
.DREQ_SIZE (`IDREQ_SIZE),
.WRITE_ENABLE (0),
.CORE_TAG_WIDTH (`ICORE_TAG_WIDTH),
@ -161,7 +160,6 @@ module VX_mem_unit # (
.CREQ_SIZE (`DCREQ_SIZE),
.MSHR_SIZE (`DMSHR_SIZE),
.DRSQ_SIZE (`DDRSQ_SIZE),
.CRSQ_SIZE (`DCRSQ_SIZE),
.DREQ_SIZE (`DDREQ_SIZE),
.WRITE_ENABLE (1),
.CORE_TAG_WIDTH (`DCORE_TAG_WIDTH),
@ -227,7 +225,6 @@ module VX_mem_unit # (
.WORD_SIZE (`SWORD_SIZE),
.NUM_REQS (`SNUM_REQUESTS),
.CREQ_SIZE (`SCREQ_SIZE),
.CRSQ_SIZE (`SCRSQ_SIZE),
.CORE_TAG_WIDTH (`DCORE_TAG_WIDTH),
.CORE_TAG_ID_BITS (`DCORE_TAG_ID_BITS),
.BANK_ADDR_OFFSET (`SBANK_ADDR_OFFSET)

View file

@ -7,7 +7,7 @@
`define SCOPE_ASSIGN(d,s) assign scope_``d = s
`define SCOPE_SIZE 16384
`define SCOPE_SIZE 4096
`else

View file

@ -172,7 +172,6 @@ module Vortex (
.CREQ_SIZE (`L3CREQ_SIZE),
.MSHR_SIZE (`L3MSHR_SIZE),
.DRSQ_SIZE (`L3DRSQ_SIZE),
.CRSQ_SIZE (`L3CRSQ_SIZE),
.DREQ_SIZE (`L3DREQ_SIZE),
.WRITE_ENABLE (1),
.CORE_TAG_WIDTH (`L2DRAM_TAG_WIDTH),

158
hw/rtl/cache/VX_bank.v vendored
View file

@ -24,9 +24,6 @@ module VX_bank #(
parameter MSHR_SIZE = 1,
// DRAM Response Queue Size
parameter DRSQ_SIZE = 1,
// Core Response Queue Size
parameter CRSQ_SIZE = 1,
// DRAM Request Queue Size
parameter DREQ_SIZE = 1,
@ -154,7 +151,7 @@ module VX_bank #(
wire [NUM_PORTS-1:0][WORD_SIZE-1:0] byteen_st0, byteen_st1;
wire [NUM_PORTS-1:0][`REQS_BITS-1:0] req_tid_st0, req_tid_st1;
wire [NUM_PORTS-1:0] pmask_st0, pmask_st1;
wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] rdata_st0, rdata_st1;
wire [`CACHE_LINE_WIDTH-1:0] rdata_st1;
wire [`CACHE_LINE_WIDTH-1:0] wdata_st0, wdata_st1;
wire [CORE_TAG_WIDTH-1:0] tag_st0, tag_st1;
wire valid_st0, valid_st1;
@ -169,9 +166,11 @@ module VX_bank #(
wire mshr_pending_st0;
wire is_flush_st0;
wire crsq_alm_full, crsq_push, crsq_pop;
wire dreq_alm_full, dreq_push, dreq_pop;
wire crsq_in_valid, crsq_in_ready, crsq_in_stall;
wire dreq_alm_full;
wire drsq_pop;
wire crsq_in_fire = crsq_in_valid && crsq_in_ready;
VX_pending_size #(
.SIZE (MSHR_SIZE)
@ -179,7 +178,7 @@ module VX_bank #(
.clk (clk),
.reset (reset),
.push (creq_pop && !creq_rw),
.pop (crsq_push),
.pop (crsq_in_fire),
.full (mshr_alm_full),
`UNUSED_PIN (empty),
`UNUSED_PIN (size)
@ -193,15 +192,16 @@ module VX_bank #(
wire is_miss_st1 = valid_st1 && (miss_st1 || force_miss_st1);
assign mshr_pop = mshr_pop_unqual
&& !crsq_alm_full // ensure core response ready
&& !(!IN_ORDER_DRAM && is_miss_st1 && is_mshr_st1); // do not schedule another mshr request if the previous one missed
&& !(!IN_ORDER_DRAM && is_miss_st1 && is_mshr_st1) // do not schedule another mshr request if the previous one missed
&& !crsq_in_stall; // ensure core response ready
assign drsq_pop = drsq_pop_unqual;
assign drsq_pop = drsq_pop_unqual
&& !crsq_in_stall; // ensure core response ready
assign creq_pop = creq_pop_unqual
&& !dreq_alm_full // ensure dram request ready
&& !crsq_alm_full // ensure core response ready
&& !mshr_alm_full; // ensure mshr enqueue ready
&& !dreq_alm_full // ensure dram request ready
&& !mshr_alm_full // ensure mshr enqueue ready
&& !crsq_in_stall; // ensure core response ready
assign dram_rsp_ready = drsq_pop;
@ -252,7 +252,7 @@ module VX_bank #(
) pipe_reg0 (
.clk (clk),
.reset (reset),
.enable (1'b1),
.enable (!crsq_in_stall),
.data_in ({
flush_enable || mshr_pop || drsq_pop || creq_pop,
flush_enable,
@ -326,52 +326,15 @@ module VX_bank #(
assign fill_req_unqual_st0 = !mem_rw_st0 && (!force_miss_st0 || (!IN_ORDER_DRAM && is_mshr_st0 && !prev_miss_dep_st0));
wire [`CACHE_LINE_WIDTH-1:0] rdata_unqual;
wire writeen_st1 = writeen_unqual_st1 && (is_fill_st1 || !force_miss_st1);
wire rw_hazard = valid_st1 && writeen_st1 && (addr_st0 == addr_st1)
&& ((`WORDS_PER_LINE == 1) || (is_fill_st1 || (wsel_st0 == wsel_st1)));
if (`WORDS_PER_LINE > 1) begin
for (genvar p = 0; p < NUM_PORTS; p++) begin
reg [`WORD_WIDTH-1:0] read_data_r;
wire [`WORD_WIDTH-1:0] write_data = wdata_st1[wsel_st0 * `WORD_WIDTH +: `WORD_WIDTH];
always @(*) begin
read_data_r = rdata_unqual[wsel_st0[p] * `WORD_WIDTH +: `WORD_WIDTH];
for (integer i = 0; i < WORD_SIZE; i++) begin
if (rw_hazard
&& (is_fill_st1 || (WRITE_ENABLE && byteen_st1[p][i]))
&& ((NUM_PORTS == 1) || pmask_st1[p])) begin
read_data_r[i * 8 +: 8] = write_data[i * 8 +: 8];
end
end
end
assign rdata_st0[p] = read_data_r;
end
end else begin
reg [`WORD_WIDTH-1:0] read_data_r;
always @(*) begin
read_data_r = rdata_unqual;
for (integer i = 0; i < WORD_SIZE; i++) begin
if (rw_hazard
&& (is_fill_st1 || (WRITE_ENABLE && byteen_st1[0][i]))) begin
read_data_r[i * 8 +: 8] = wdata_st1[i * 8 +: 8];
end
end
end
assign rdata_st0[0] = read_data_r;
end
VX_pipe_register #(
.DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `CACHE_LINE_WIDTH + (`WORD_WIDTH + `UP(`WORD_SELECT_BITS) + WORD_SIZE + `REQS_BITS + 1) * NUM_PORTS + CORE_TAG_WIDTH),
.DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `CACHE_LINE_WIDTH + (`UP(`WORD_SELECT_BITS) + WORD_SIZE + `REQS_BITS + 1) * NUM_PORTS + CORE_TAG_WIDTH),
.RESETW (1)
) pipe_reg1 (
.clk (clk),
.reset (reset),
.enable (1'b1),
.data_in ({valid_st0, is_mshr_st0, is_fill_st0, writeen_unqual_st0, fill_req_unqual_st0, incoming_fill_st0, miss_st0, force_miss_st0, mem_rw_st0, addr_st0, rdata_st0, wdata_st0, wsel_st0, byteen_st0, req_tid_st0, pmask_st0, tag_st0}),
.data_out ({valid_st1, is_mshr_st1, is_fill_st1, writeen_unqual_st1, fill_req_unqual_st1, incoming_fill_st1, miss_st1, force_miss_st1, mem_rw_st1, addr_st1, rdata_st1, wdata_st1, wsel_st1, byteen_st1, req_tid_st1, pmask_st1, tag_st1})
.enable (!crsq_in_stall),
.data_in ({valid_st0, is_mshr_st0, is_fill_st0, writeen_unqual_st0, fill_req_unqual_st0, incoming_fill_st0, miss_st0, force_miss_st0, mem_rw_st0, addr_st0, wdata_st0, wsel_st0, byteen_st0, req_tid_st0, pmask_st0, tag_st0}),
.data_out ({valid_st1, is_mshr_st1, is_fill_st1, writeen_unqual_st1, fill_req_unqual_st1, incoming_fill_st1, miss_st1, force_miss_st1, mem_rw_st1, addr_st1, wdata_st1, wsel_st1, byteen_st1, req_tid_st1, pmask_st1, tag_st1})
);
`ifdef DBG_CACHE_REQ_INFO
@ -382,6 +345,8 @@ module VX_bank #(
end
`endif
wire writeen_st1 = writeen_unqual_st1 && (is_fill_st1 || !force_miss_st1);
wire crsq_push_st1 = !is_fill_st1 && !mem_rw_st1 && !miss_st1 && !force_miss_st1;
wire mshr_push_st1 = !is_fill_st1 && !mem_rw_st1 && (miss_st1 || force_miss_st1);
@ -424,27 +389,25 @@ module VX_bank #(
.reset (reset),
`ifdef DBG_CACHE_REQ_INFO
.debug_pc_r (debug_pc_st0),
.debug_wid_r (debug_wid_st0),
.debug_pc_w (debug_pc_st1),
.debug_wid_w (debug_wid_st1),
.debug_pc (debug_pc_st1),
.debug_wid (debug_wid_st1),
`endif
.addr (addr_st1),
// reading
.readen (valid_st0 && !is_fill_st0 && !mem_rw_st0),
.raddr (addr_st0),
.rdata (rdata_unqual),
.readen (valid_st1 && !is_fill_st1 && !mem_rw_st1),
.rdata (rdata_st1),
// writing
.writeen (valid_st1 && writeen_st1),
.is_fill (is_fill_st1),
.byteen (line_byteen_st1),
.waddr (addr_st1),
.byteen (line_byteen_st1),
.wdata (wdata_st1)
);
assign mshr_push = valid_st1 && mshr_push_st1;
wire mshr_dequeue = valid_st1 && is_mshr_st1 && !mshr_push_st1;
wire mshr_dequeue = valid_st1 && is_mshr_st1 && !mshr_push_st1 && crsq_in_ready;
wire mshr_restore = !IN_ORDER_DRAM && is_mshr_st1;
`RUNTIME_ASSERT(!IN_ORDER_DRAM || !(mshr_push && mshr_restore), ("Oops!"))
@ -508,50 +471,49 @@ module VX_bank #(
wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] crsq_data;
wire [NUM_PORTS-1:0][`REQS_BITS-1:0] crsq_tid;
wire [CORE_TAG_WIDTH-1:0] crsq_tag;
wire crsq_empty;
assign crsq_push = valid_st1 && crsq_push_st1;
assign crsq_pop = core_rsp_valid && core_rsp_ready;
assign crsq_data = rdata_st1;
assign crsq_in_valid = valid_st1 && crsq_push_st1;
assign crsq_in_stall = crsq_in_valid && !crsq_in_ready;
assign crsq_pmask = pmask_st1;
assign crsq_tid = req_tid_st1;
assign crsq_tag = tag_st1;
VX_fifo_queue #(
.DATAW (CORE_TAG_WIDTH + (1 + `WORD_WIDTH + `REQS_BITS) * NUM_PORTS),
.SIZE (CRSQ_SIZE),
.ALM_FULL (CRSQ_SIZE-2),
.BUFFERED (1)
) core_rsp_queue (
.clk (clk),
.reset (reset),
.push (crsq_push),
.pop (crsq_pop),
.data_in ({crsq_tag, crsq_pmask, crsq_data, crsq_tid}),
.data_out ({core_rsp_tag, core_rsp_pmask, core_rsp_data, core_rsp_tid}),
.empty (crsq_empty),
.alm_full (crsq_alm_full),
`UNUSED_PIN (full),
`UNUSED_PIN (alm_empty),
`UNUSED_PIN (size)
);
if (`WORDS_PER_LINE > 1) begin
for (genvar p = 0; p < NUM_PORTS; ++p) begin
assign crsq_data[p] = rdata_st1[wsel_st1[p] * `WORD_WIDTH +: `WORD_WIDTH];
end
end else begin
assign crsq_data = rdata_st1;
end
assign core_rsp_valid = !crsq_empty;
VX_skid_buffer #(
.DATAW (CORE_TAG_WIDTH + (1 + `WORD_WIDTH + `REQS_BITS) * NUM_PORTS),
.BUFFERED (1)
) core_rsp_req (
.clk (clk),
.reset (reset),
.valid_in (crsq_in_valid),
.data_in ({crsq_tag, crsq_pmask, crsq_data, crsq_tid}),
.ready_in (crsq_in_ready),
.valid_out (core_rsp_valid),
.data_out ({core_rsp_tag, core_rsp_pmask, core_rsp_data, core_rsp_tid}),
.ready_out (core_rsp_ready)
);
// Enqueue DRAM request
wire [CACHE_LINE_SIZE-1:0] dreq_byteen;
wire [`LINE_ADDR_WIDTH-1:0] dreq_addr;
wire [`CACHE_LINE_WIDTH-1:0] dreq_data;
wire dreq_empty, writeback;
wire dreq_push, dreq_pop, dreq_empty, dreq_rw;
assign dreq_push = valid_st1 && dreq_push_st1;
assign dreq_pop = dram_req_valid && dram_req_ready;
assign writeback = WRITE_ENABLE && do_writeback_st1;
assign dreq_byteen = writeback ? line_byteen_st1 : {CACHE_LINE_SIZE{1'b1}};
assign dreq_rw = WRITE_ENABLE && do_writeback_st1;
assign dreq_byteen = dreq_rw ? line_byteen_st1 : {CACHE_LINE_SIZE{1'b1}};
assign dreq_addr = addr_st1;
assign dreq_data = wdata_st1;
@ -564,7 +526,7 @@ module VX_bank #(
.reset (reset),
.push (dreq_push),
.pop (dreq_pop),
.data_in ({writeback, dreq_byteen, dreq_addr, dreq_data}),
.data_in ({dreq_rw, dreq_byteen, dreq_addr, dreq_data}),
.data_out ({dram_req_rw, dram_req_byteen, dram_req_addr, dram_req_data}),
.empty (dreq_empty),
.alm_full (dreq_alm_full),
@ -582,7 +544,7 @@ module VX_bank #(
`SCOPE_ASSIGN (miss_st0, miss_st0);
`SCOPE_ASSIGN (force_miss_st0, force_miss_st0);
`SCOPE_ASSIGN (mshr_push, mshr_push);
`SCOPE_ASSIGN (crsq_alm_full, crsq_alm_full);
`SCOPE_ASSIGN (crsq_in_stall, crsq_in_stall);
`SCOPE_ASSIGN (dreq_alm_full, dreq_alm_full);
`SCOPE_ASSIGN (mshr_alm_full, mshr_alm_full);
`SCOPE_ASSIGN (addr_st0, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID));
@ -591,7 +553,7 @@ module VX_bank #(
`ifdef PERF_ENABLE
assign perf_read_misses = valid_st1 && !is_fill_st1 && !is_mshr_st1 && miss_st1 && !mem_rw_st1;
assign perf_write_misses = valid_st1 && !is_fill_st1 && !is_mshr_st1 && miss_st1 && mem_rw_st1;
assign perf_pipe_stalls = crsq_alm_full || dreq_alm_full || mshr_alm_full;
assign perf_pipe_stalls = crsq_in_stall || dreq_alm_full || mshr_alm_full;
assign perf_mshr_stalls = mshr_alm_full;
`endif
@ -604,8 +566,8 @@ module VX_bank #(
$display("%t: cache%0d:%0d miss with incoming fill - addr=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID));
assert(!is_mshr_st1);
end
if (crsq_alm_full || dreq_alm_full || mshr_alm_full) begin
$display("%t: cache%0d:%0d pipeline-stall: cwbq=%b, dwbq=%b, mshr=%b", $time, CACHE_ID, BANK_ID, crsq_alm_full, dreq_alm_full, mshr_alm_full);
if (crsq_in_stall || dreq_alm_full || mshr_alm_full) begin
$display("%t: cache%0d:%0d pipeline-stall: cwbq=%b, dwbq=%b, mshr=%b", $time, CACHE_ID, BANK_ID, crsq_in_stall, dreq_alm_full, mshr_alm_full);
end
if (flush_enable) begin
$display("%t: cache%0d:%0d flush: addr=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(flush_addr, BANK_ID));
@ -622,7 +584,7 @@ module VX_bank #(
else
$display("%t: cache%0d:%0d core-rd-req: addr=%0h, tag=%0h, pmask=%0b, tid=%0d, byteen=%b, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(creq_addr, BANK_ID), creq_tag, creq_pmask, creq_tid, creq_byteen, debug_wid_sel, debug_pc_sel);
end
if (crsq_push) begin
if (crsq_in_fire) begin
$display("%t: cache%0d:%0d core-rsp: addr=%0h, tag=%0h, pmask=%0b, tid=%0d, data=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID), crsq_tag, crsq_pmask, crsq_tid, crsq_data, debug_wid_st1, debug_pc_st1);
end
if (dreq_push) begin

View file

@ -23,9 +23,6 @@ module VX_cache #(
parameter MSHR_SIZE = 16,
// DRAM Response Queue Size
parameter DRSQ_SIZE = 4,
// Core Response Queue Size
parameter CRSQ_SIZE = 4,
// DRAM Request Queue Size
parameter DREQ_SIZE = 4,
@ -298,7 +295,6 @@ module VX_cache #(
.CREQ_SIZE (CREQ_SIZE),
.MSHR_SIZE (MSHR_SIZE),
.DRSQ_SIZE (DRSQ_SIZE),
.CRSQ_SIZE (CRSQ_SIZE),
.DREQ_SIZE (DREQ_SIZE),
.WRITE_ENABLE (WRITE_ENABLE),
.CORE_TAG_WIDTH (CORE_TAG_WIDTH),

View file

@ -19,49 +19,43 @@ module VX_data_access #(
`ifdef DBG_CACHE_REQ_INFO
`IGNORE_WARNINGS_BEGIN
input wire[31:0] debug_pc_r,
input wire[`NW_BITS-1:0] debug_wid_r,
input wire[31:0] debug_pc_w,
input wire[`NW_BITS-1:0] debug_wid_w,
input wire[31:0] debug_pc,
input wire[`NW_BITS-1:0] debug_wid,
`IGNORE_WARNINGS_END
`endif
`IGNORE_WARNINGS_BEGIN
input wire[`LINE_ADDR_WIDTH-1:0] addr,
`IGNORE_WARNINGS_END
// reading
input wire readen,
`IGNORE_WARNINGS_BEGIN
input wire[`LINE_ADDR_WIDTH-1:0] raddr,
`IGNORE_WARNINGS_END
output wire [`CACHE_LINE_WIDTH-1:0] rdata,
// writing
input wire writeen,
input wire is_fill,
input wire [CACHE_LINE_SIZE-1:0] byteen,
`IGNORE_WARNINGS_BEGIN
input wire[`LINE_ADDR_WIDTH-1:0] waddr,
`IGNORE_WARNINGS_END
input wire [CACHE_LINE_SIZE-1:0] byteen,
input wire [`CACHE_LINE_WIDTH-1:0] wdata
);
`UNUSED_VAR (reset)
`UNUSED_VAR (readen)
wire [`LINE_SELECT_BITS-1:0] line_raddr, line_waddr;
wire [`LINE_SELECT_BITS-1:0] line_addr;
wire [CACHE_LINE_SIZE-1:0] byte_enable;
assign line_raddr = raddr[`LINE_SELECT_BITS-1:0];
assign line_waddr = waddr[`LINE_SELECT_BITS-1:0];
assign line_addr = addr[`LINE_SELECT_BITS-1:0];
assign byte_enable = (WRITE_ENABLE && !is_fill) ? byteen : {CACHE_LINE_SIZE{1'b1}};
VX_dp_ram #(
.DATAW(CACHE_LINE_SIZE * 8),
.SIZE(`LINES_PER_BANK),
.BYTEENW(CACHE_LINE_SIZE),
.RWCHECK(1)
VX_sp_ram #(
.DATAW (CACHE_LINE_SIZE * 8),
.SIZE (`LINES_PER_BANK),
.BYTEENW (CACHE_LINE_SIZE),
.RWCHECK (1)
) data_store (
.clk(clk),
.raddr(line_raddr),
.waddr(line_waddr),
.wren(writeen),
.addr(line_addr),
.wren(writeen),
.byteen(byte_enable),
.rden(1'b1),
.din(wdata),
@ -72,13 +66,13 @@ module VX_data_access #(
always @(posedge clk) begin
if (writeen) begin
if (is_fill) begin
$display("%t: cache%0d:%0d data-fill: addr=%0h, blk_addr=%0d, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(waddr, BANK_ID), line_waddr, wdata);
$display("%t: cache%0d:%0d data-fill: addr=%0h, blk_addr=%0d, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), line_addr, wdata);
end else begin
$display("%t: cache%0d:%0d data-write: addr=%0h, wid=%0d, PC=%0h, byteen=%b, blk_addr=%0d, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(waddr, BANK_ID), debug_wid_w, debug_pc_w, byte_enable, line_waddr, wdata);
$display("%t: cache%0d:%0d data-write: addr=%0h, wid=%0d, PC=%0h, byteen=%b, blk_addr=%0d, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), debug_wid, debug_pc, byte_enable, line_addr, wdata);
end
end
if (readen) begin
$display("%t: cache%0d:%0d data-read: addr=%0h, wid=%0d, PC=%0h, blk_addr=%0d, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(raddr, BANK_ID), debug_wid_r, debug_pc_r, line_raddr, rdata);
$display("%t: cache%0d:%0d data-read: addr=%0h, wid=%0d, PC=%0h, blk_addr=%0d, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), debug_wid, debug_pc, line_addr, rdata);
end
end
`endif

View file

@ -15,9 +15,6 @@ module VX_shared_mem #(
// Core Request Queue Size
parameter CREQ_SIZE = 4,
// Core Response Queue Size
parameter CRSQ_SIZE = 4,
// core request tag size
parameter CORE_TAG_WIDTH = 1,
@ -113,10 +110,10 @@ module VX_shared_mem #(
wire [NUM_BANKS-1:0][`REQS_BITS-1:0] per_bank_core_req_tid;
wire creq_push, creq_pop, creq_empty, creq_full;
wire crsq_full;
wire crsq_in_ready;
assign creq_push = (| core_req_valid) && !creq_full;
assign creq_pop = ~creq_empty && ~crsq_full;
assign creq_pop = ~creq_empty && crsq_in_ready;
assign per_bank_core_req_ready_unqual = ~creq_full;
@ -167,7 +164,7 @@ module VX_shared_mem #(
) data (
.clk (clk),
.addr (per_bank_core_req_addr[i]),
.wren (per_bank_core_req_valid[i] && per_bank_core_req_rw[i] && ~crsq_full),
.wren (per_bank_core_req_valid[i] && per_bank_core_req_rw[i] && crsq_in_ready),
.byteen (per_bank_core_req_byteen[i]),
.rden (1'b1),
.din (per_bank_core_req_data[i]),
@ -175,58 +172,53 @@ module VX_shared_mem #(
);
end
reg [NUM_REQS-1:0] core_rsp_valid_unqual;
reg [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data_unqual;
reg [CORE_TAG_WIDTH-1:0] core_rsp_tag_unqual;
reg [NUM_REQS-1:0] core_rsp_valids_in;
reg [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data_in;
reg [CORE_TAG_WIDTH-1:0] core_rsp_tag_in;
always @(*) begin
core_rsp_valid_unqual = 0;
core_rsp_data_unqual = 'x;
core_rsp_tag_unqual = 'x;
core_rsp_valids_in = 0;
core_rsp_data_in = 'x;
core_rsp_tag_in = 'x;
for (integer i = 0; i < NUM_BANKS; i++) begin
if (per_bank_core_req_valid[i]) begin
core_rsp_valid_unqual[per_bank_core_req_tid[i]] = 1;
core_rsp_data_unqual[per_bank_core_req_tid[i]] = per_bank_core_rsp_data[i];
core_rsp_tag_unqual = per_bank_core_req_tag[i];
core_rsp_valids_in[per_bank_core_req_tid[i]] = 1;
core_rsp_data_in[per_bank_core_req_tid[i]] = per_bank_core_rsp_data[i];
core_rsp_tag_in = per_bank_core_req_tag[i];
end
end
end
`ifdef DBG_CACHE_REQ_INFO
if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin
assign {debug_pc_st0, debug_wid_st0} = core_rsp_tag_unqual[CORE_TAG_WIDTH-1:CORE_TAG_ID_BITS];
assign {debug_pc_st0, debug_wid_st0} = core_rsp_tag_in[CORE_TAG_WIDTH-1:CORE_TAG_ID_BITS];
end else begin
assign {debug_pc_st0, debug_wid_st0} = 0;
end
`endif
wire [NUM_REQS-1:0] core_rsp_valid_tmask;
wire crsq_push, crsq_pop, crsq_empty;
wire [NUM_REQS-1:0] core_rsp_valids_out;
wire core_rsp_valid_out;
wire core_rsp_rw = | (per_bank_core_req_valid & per_bank_core_req_rw);
assign crsq_push = ~creq_empty && ~core_rsp_rw && ~crsq_full;
assign crsq_pop = ~crsq_empty && core_rsp_ready;
VX_fifo_queue #(
.DATAW (NUM_BANKS * (1 + `WORD_WIDTH) + CORE_TAG_WIDTH),
.SIZE (CRSQ_SIZE),
wire crsq_in_valid = ~creq_empty && ~core_rsp_rw;
VX_skid_buffer #(
.DATAW (NUM_BANKS * (1 + `WORD_WIDTH) + CORE_TAG_WIDTH),
.BUFFERED (1)
) core_rsp_queue (
.clk (clk),
.reset (reset),
.push (crsq_push),
.pop (crsq_pop),
.data_in ({core_rsp_valid_unqual, core_rsp_data_unqual, core_rsp_tag_unqual}),
.data_out ({core_rsp_valid_tmask, core_rsp_data, core_rsp_tag}),
.empty (crsq_empty),
.full (crsq_full),
`UNUSED_PIN (alm_empty),
`UNUSED_PIN (alm_full),
`UNUSED_PIN (size)
) core_rsp_req (
.clk (clk),
.reset (reset),
.valid_in (crsq_in_valid),
.data_in ({core_rsp_valids_in, core_rsp_data_in, core_rsp_tag_in}),
.ready_in (crsq_in_ready),
.valid_out (core_rsp_valid_out),
.data_out ({core_rsp_valids_out, core_rsp_data, core_rsp_tag}),
.ready_out (core_rsp_ready)
);
assign core_rsp_valid = core_rsp_valid_tmask & {NUM_REQS{~crsq_empty}};
assign core_rsp_valid = core_rsp_valids_out & {NUM_REQS{core_rsp_valid_out}};
`ifdef DBG_PRINT_CACHE_BANK
always @(posedge clk) begin
@ -280,4 +272,4 @@ module VX_shared_mem #(
assign perf_cache_if.crsp_stalls = perf_crsp_stalls;
`endif
endmodule
endmodule

View file

@ -212,7 +212,7 @@
"miss_st0": 1,
"force_miss_st0": 1,
"mshr_push": 1,
"?crsq_alm_full": 1,
"?crsq_in_stall": 1,
"?dreq_alm_full": 1,
"?mshr_alm_full": 1
}

View file

@ -41,25 +41,25 @@ set_global_assignment -name VERILOG_MACRO NDEBUG
set_global_assignment -name MESSAGE_DISABLE 16818
set_global_assignment -name TIMEQUEST_DO_REPORT_TIMING ON
set_global_assignment -name ALLOW_ANY_RAM_SIZE_FOR_RECOGNITION ON
set_global_assignment -name USE_HIGH_SPEED_ADDER ON
set_global_assignment -name MUX_RESTRUCTURE ON
#set_global_assignment -name ALLOW_ANY_RAM_SIZE_FOR_RECOGNITION ON
#set_global_assignment -name USE_HIGH_SPEED_ADDER ON
#set_global_assignment -name MUX_RESTRUCTURE ON
set_global_assignment -name OPTIMIZATION_TECHNIQUE SPEED
set_global_assignment -name OPTIMIZATION_MODE "AGGRESSIVE PERFORMANCE"
set_global_assignment -name FINAL_PLACEMENT_OPTIMIZATION ALWAYS
set_global_assignment -name PLACEMENT_EFFORT_MULTIPLIER 2.0
set_global_assignment -name FITTER_EFFORT "STANDARD FIT"
set_global_assignment -name OPTIMIZE_HOLD_TIMING "ALL PATHS"
set_global_assignment -name OPTIMIZE_MULTI_CORNER_TIMING ON
set_global_assignment -name ROUTER_TIMING_OPTIMIZATION_LEVEL MAXIMUM
set_global_assignment -name ROUTER_CLOCKING_TOPOLOGY_ANALYSIS ON
set_global_assignment -name ROUTER_LCELL_INSERTION_AND_LOGIC_DUPLICATION ON
set_global_assignment -name SYNTH_TIMING_DRIVEN_SYNTHESIS ON
set_global_assignment -name TIMEQUEST_MULTICORNER_ANALYSIS ON
set_global_assignment -name MIN_CORE_JUNCTION_TEMP 0
set_global_assignment -name MAX_CORE_JUNCTION_TEMP 100
set_global_assignment -name SEED 1
#set_global_assignment -name OPTIMIZATION_TECHNIQUE SPEED
#set_global_assignment -name OPTIMIZATION_MODE "AGGRESSIVE PERFORMANCE"
#set_global_assignment -name FINAL_PLACEMENT_OPTIMIZATION ALWAYS
#set_global_assignment -name PLACEMENT_EFFORT_MULTIPLIER 2.0
#set_global_assignment -name FITTER_EFFORT "STANDARD FIT"
#set_global_assignment -name OPTIMIZE_HOLD_TIMING "ALL PATHS"
#set_global_assignment -name OPTIMIZE_MULTI_CORNER_TIMING ON
#set_global_assignment -name ROUTER_TIMING_OPTIMIZATION_LEVEL MAXIMUM
#set_global_assignment -name ROUTER_CLOCKING_TOPOLOGY_ANALYSIS ON
#set_global_assignment -name ROUTER_LCELL_INSERTION_AND_LOGIC_DUPLICATION ON
#set_global_assignment -name SYNTH_TIMING_DRIVEN_SYNTHESIS ON
#set_global_assignment -name TIMEQUEST_MULTICORNER_ANALYSIS ON
#set_global_assignment -name MIN_CORE_JUNCTION_TEMP 0
#set_global_assignment -name MAX_CORE_JUNCTION_TEMP 100
#set_global_assignment -name SEED 1
switch $opts(family) {
"Arria 10" {

View file

@ -1,79 +0,0 @@
FAMILY = "Arria 10"
DEVICE = 10AX115N3F40E2SG
FPU_CORE_PATH=../../../rtl/fp_cores/altera/arria10
#FAMILY = "Stratix 10"
#DEVICE = 1SX280HN2F43E2VG
#FPU_CORE_PATH=../../../rtl/fp_cores/altera/stratix10
PROJECT = vortex_afu
TOP_LEVEL_ENTITY = vortex_afu
SRC_FILE = vortex_afu.sv
RTL_DIR=../../../rtl
FPU_INCLUDE = $(RTL_DIR)/fp_cores;$(FPU_CORE_PATH);$(RTL_DIR)/fp_cores/fpnew/src;$(RTL_DIR)/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl;$(RTL_DIR)/fp_cores/fpnew/src/common_cells/include;$(RTL_DIR)/fp_cores/fpnew/src/common_cells/src
RTL_INCLUDE = $(RTL_DIR);$(RTL_DIR)/libs;$(RTL_DIR)/interfaces;$(RTL_DIR)/cache;../../../rtl/afu;../../../rtl/afu/ccip;$(FPU_INCLUDE)
PROJECT_FILES = $(PROJECT).qpf $(PROJECT).qsf
# Executable Configuration
SYN_ARGS = --parallel --read_settings_files=on --set=VERILOG_MACRO=NOPAE=1
FIT_ARGS = --parallel --part=$(DEVICE) --read_settings_files=on
ASM_ARGS =
STA_ARGS = --parallel --do_report_timing
# Build targets
all: $(PROJECT).sta.rpt
syn: $(PROJECT).syn.rpt
fit: $(PROJECT).fit.rpt
asm: $(PROJECT).asm.rpt
sta: $(PROJECT).sta.rpt
smart: smart.log
# Target implementations
STAMP = echo done >
$(PROJECT).syn.rpt: smart.log syn.chg $(SOURCE_FILES)
quartus_syn $(PROJECT) $(SYN_ARGS)
$(STAMP) fit.chg
$(PROJECT).fit.rpt: smart.log fit.chg $(PROJECT).syn.rpt
quartus_fit $(PROJECT) $(FIT_ARGS)
$(STAMP) asm.chg
$(STAMP) sta.chg
$(PROJECT).asm.rpt: smart.log asm.chg $(PROJECT).fit.rpt
quartus_asm $(PROJECT) $(ASM_ARGS)
$(PROJECT).sta.rpt: smart.log sta.chg $(PROJECT).fit.rpt
quartus_sta $(PROJECT) $(STA_ARGS)
smart.log: $(PROJECT_FILES)
quartus_sh --determine_smart_action $(PROJECT) > smart.log
# Project initialization
$(PROJECT_FILES):
quartus_sh -t ../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../project.sdc -inc "$(RTL_INCLUDE)" -set "NOPAE"
syn.chg:
$(STAMP) syn.chg
fit.chg:
$(STAMP) fit.chg
sta.chg:
$(STAMP) sta.chg
asm.chg:
$(STAMP) asm.chg
program: $(PROJECT).sof
quartus_pgm --no_banner --mode=jtag -o "$(PROJECT).sof"
clean:
rm -rf bin *.rpt *.chg *.qsf *.qpf *.qws *.log *.htm *.eqn *.pin *.sof *.pof qdb incremental_db tmp-clearbox