mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-24 05:47:35 -04:00
cache bank refactoring - removing unecessary core response fifo & restoring single port data access
This commit is contained in:
parent
ccb74ef286
commit
7560202f8b
12 changed files with 129 additions and 294 deletions
|
@ -170,7 +170,6 @@ module VX_cluster #(
|
|||
.CREQ_SIZE (`L2CREQ_SIZE),
|
||||
.MSHR_SIZE (`L2MSHR_SIZE),
|
||||
.DRSQ_SIZE (`L2DRSQ_SIZE),
|
||||
.CRSQ_SIZE (`L2CRSQ_SIZE),
|
||||
.DREQ_SIZE (`L2DREQ_SIZE),
|
||||
.WRITE_ENABLE (1),
|
||||
.CORE_TAG_WIDTH (`XDRAM_TAG_WIDTH),
|
||||
|
|
|
@ -264,11 +264,6 @@
|
|||
`define ICREQ_SIZE 4
|
||||
`endif
|
||||
|
||||
// Core Response Queue Size
|
||||
`ifndef ICRSQ_SIZE
|
||||
`define ICRSQ_SIZE 4
|
||||
`endif
|
||||
|
||||
// Miss Handling Register Size
|
||||
`ifndef IMSHR_SIZE
|
||||
`define IMSHR_SIZE `NUM_WARPS
|
||||
|
@ -306,11 +301,6 @@
|
|||
`define DCREQ_SIZE 4
|
||||
`endif
|
||||
|
||||
// Core Response Queue Size
|
||||
`ifndef DCRSQ_SIZE
|
||||
`define DCRSQ_SIZE 4
|
||||
`endif
|
||||
|
||||
// Miss Handling Register Size
|
||||
`ifndef DMSHR_SIZE
|
||||
`define DMSHR_SIZE `LSUQ_SIZE
|
||||
|
@ -348,11 +338,6 @@
|
|||
`define SCREQ_SIZE 4
|
||||
`endif
|
||||
|
||||
// Core Response Queue Size
|
||||
`ifndef SCRSQ_SIZE
|
||||
`define SCRSQ_SIZE 4
|
||||
`endif
|
||||
|
||||
// L2cache Configurable Knobs /////////////////////////////////////////////////
|
||||
|
||||
// Size of cache in bytes
|
||||
|
@ -370,11 +355,6 @@
|
|||
`define L2CREQ_SIZE 4
|
||||
`endif
|
||||
|
||||
// Core Response Queue Size
|
||||
`ifndef L2CRSQ_SIZE
|
||||
`define L2CRSQ_SIZE 4
|
||||
`endif
|
||||
|
||||
// Miss Handling Register Size
|
||||
`ifndef L2MSHR_SIZE
|
||||
`define L2MSHR_SIZE 16
|
||||
|
@ -407,11 +387,6 @@
|
|||
`define L3CREQ_SIZE 4
|
||||
`endif
|
||||
|
||||
// Core Response Queue Size
|
||||
`ifndef L3CRSQ_SIZE
|
||||
`define L3CRSQ_SIZE 4
|
||||
`endif
|
||||
|
||||
// Miss Handling Register Size
|
||||
`ifndef L3MSHR_SIZE
|
||||
`define L3MSHR_SIZE 16
|
||||
|
|
|
@ -101,7 +101,6 @@ module VX_mem_unit # (
|
|||
.CREQ_SIZE (`ICREQ_SIZE),
|
||||
.MSHR_SIZE (`IMSHR_SIZE),
|
||||
.DRSQ_SIZE (`IDRSQ_SIZE),
|
||||
.CRSQ_SIZE (`ICRSQ_SIZE),
|
||||
.DREQ_SIZE (`IDREQ_SIZE),
|
||||
.WRITE_ENABLE (0),
|
||||
.CORE_TAG_WIDTH (`ICORE_TAG_WIDTH),
|
||||
|
@ -161,7 +160,6 @@ module VX_mem_unit # (
|
|||
.CREQ_SIZE (`DCREQ_SIZE),
|
||||
.MSHR_SIZE (`DMSHR_SIZE),
|
||||
.DRSQ_SIZE (`DDRSQ_SIZE),
|
||||
.CRSQ_SIZE (`DCRSQ_SIZE),
|
||||
.DREQ_SIZE (`DDREQ_SIZE),
|
||||
.WRITE_ENABLE (1),
|
||||
.CORE_TAG_WIDTH (`DCORE_TAG_WIDTH),
|
||||
|
@ -227,7 +225,6 @@ module VX_mem_unit # (
|
|||
.WORD_SIZE (`SWORD_SIZE),
|
||||
.NUM_REQS (`SNUM_REQUESTS),
|
||||
.CREQ_SIZE (`SCREQ_SIZE),
|
||||
.CRSQ_SIZE (`SCRSQ_SIZE),
|
||||
.CORE_TAG_WIDTH (`DCORE_TAG_WIDTH),
|
||||
.CORE_TAG_ID_BITS (`DCORE_TAG_ID_BITS),
|
||||
.BANK_ADDR_OFFSET (`SBANK_ADDR_OFFSET)
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
|
||||
`define SCOPE_ASSIGN(d,s) assign scope_``d = s
|
||||
|
||||
`define SCOPE_SIZE 16384
|
||||
`define SCOPE_SIZE 4096
|
||||
|
||||
`else
|
||||
|
||||
|
|
|
@ -172,7 +172,6 @@ module Vortex (
|
|||
.CREQ_SIZE (`L3CREQ_SIZE),
|
||||
.MSHR_SIZE (`L3MSHR_SIZE),
|
||||
.DRSQ_SIZE (`L3DRSQ_SIZE),
|
||||
.CRSQ_SIZE (`L3CRSQ_SIZE),
|
||||
.DREQ_SIZE (`L3DREQ_SIZE),
|
||||
.WRITE_ENABLE (1),
|
||||
.CORE_TAG_WIDTH (`L2DRAM_TAG_WIDTH),
|
||||
|
|
158
hw/rtl/cache/VX_bank.v
vendored
158
hw/rtl/cache/VX_bank.v
vendored
|
@ -24,9 +24,6 @@ module VX_bank #(
|
|||
parameter MSHR_SIZE = 1,
|
||||
// DRAM Response Queue Size
|
||||
parameter DRSQ_SIZE = 1,
|
||||
|
||||
// Core Response Queue Size
|
||||
parameter CRSQ_SIZE = 1,
|
||||
// DRAM Request Queue Size
|
||||
parameter DREQ_SIZE = 1,
|
||||
|
||||
|
@ -154,7 +151,7 @@ module VX_bank #(
|
|||
wire [NUM_PORTS-1:0][WORD_SIZE-1:0] byteen_st0, byteen_st1;
|
||||
wire [NUM_PORTS-1:0][`REQS_BITS-1:0] req_tid_st0, req_tid_st1;
|
||||
wire [NUM_PORTS-1:0] pmask_st0, pmask_st1;
|
||||
wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] rdata_st0, rdata_st1;
|
||||
wire [`CACHE_LINE_WIDTH-1:0] rdata_st1;
|
||||
wire [`CACHE_LINE_WIDTH-1:0] wdata_st0, wdata_st1;
|
||||
wire [CORE_TAG_WIDTH-1:0] tag_st0, tag_st1;
|
||||
wire valid_st0, valid_st1;
|
||||
|
@ -169,9 +166,11 @@ module VX_bank #(
|
|||
wire mshr_pending_st0;
|
||||
wire is_flush_st0;
|
||||
|
||||
wire crsq_alm_full, crsq_push, crsq_pop;
|
||||
wire dreq_alm_full, dreq_push, dreq_pop;
|
||||
wire crsq_in_valid, crsq_in_ready, crsq_in_stall;
|
||||
wire dreq_alm_full;
|
||||
wire drsq_pop;
|
||||
|
||||
wire crsq_in_fire = crsq_in_valid && crsq_in_ready;
|
||||
|
||||
VX_pending_size #(
|
||||
.SIZE (MSHR_SIZE)
|
||||
|
@ -179,7 +178,7 @@ module VX_bank #(
|
|||
.clk (clk),
|
||||
.reset (reset),
|
||||
.push (creq_pop && !creq_rw),
|
||||
.pop (crsq_push),
|
||||
.pop (crsq_in_fire),
|
||||
.full (mshr_alm_full),
|
||||
`UNUSED_PIN (empty),
|
||||
`UNUSED_PIN (size)
|
||||
|
@ -193,15 +192,16 @@ module VX_bank #(
|
|||
|
||||
wire is_miss_st1 = valid_st1 && (miss_st1 || force_miss_st1);
|
||||
assign mshr_pop = mshr_pop_unqual
|
||||
&& !crsq_alm_full // ensure core response ready
|
||||
&& !(!IN_ORDER_DRAM && is_miss_st1 && is_mshr_st1); // do not schedule another mshr request if the previous one missed
|
||||
&& !(!IN_ORDER_DRAM && is_miss_st1 && is_mshr_st1) // do not schedule another mshr request if the previous one missed
|
||||
&& !crsq_in_stall; // ensure core response ready
|
||||
|
||||
assign drsq_pop = drsq_pop_unqual;
|
||||
assign drsq_pop = drsq_pop_unqual
|
||||
&& !crsq_in_stall; // ensure core response ready
|
||||
|
||||
assign creq_pop = creq_pop_unqual
|
||||
&& !dreq_alm_full // ensure dram request ready
|
||||
&& !crsq_alm_full // ensure core response ready
|
||||
&& !mshr_alm_full; // ensure mshr enqueue ready
|
||||
&& !dreq_alm_full // ensure dram request ready
|
||||
&& !mshr_alm_full // ensure mshr enqueue ready
|
||||
&& !crsq_in_stall; // ensure core response ready
|
||||
|
||||
assign dram_rsp_ready = drsq_pop;
|
||||
|
||||
|
@ -252,7 +252,7 @@ module VX_bank #(
|
|||
) pipe_reg0 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (1'b1),
|
||||
.enable (!crsq_in_stall),
|
||||
.data_in ({
|
||||
flush_enable || mshr_pop || drsq_pop || creq_pop,
|
||||
flush_enable,
|
||||
|
@ -326,52 +326,15 @@ module VX_bank #(
|
|||
|
||||
assign fill_req_unqual_st0 = !mem_rw_st0 && (!force_miss_st0 || (!IN_ORDER_DRAM && is_mshr_st0 && !prev_miss_dep_st0));
|
||||
|
||||
wire [`CACHE_LINE_WIDTH-1:0] rdata_unqual;
|
||||
|
||||
wire writeen_st1 = writeen_unqual_st1 && (is_fill_st1 || !force_miss_st1);
|
||||
|
||||
wire rw_hazard = valid_st1 && writeen_st1 && (addr_st0 == addr_st1)
|
||||
&& ((`WORDS_PER_LINE == 1) || (is_fill_st1 || (wsel_st0 == wsel_st1)));
|
||||
|
||||
if (`WORDS_PER_LINE > 1) begin
|
||||
for (genvar p = 0; p < NUM_PORTS; p++) begin
|
||||
reg [`WORD_WIDTH-1:0] read_data_r;
|
||||
wire [`WORD_WIDTH-1:0] write_data = wdata_st1[wsel_st0 * `WORD_WIDTH +: `WORD_WIDTH];
|
||||
always @(*) begin
|
||||
read_data_r = rdata_unqual[wsel_st0[p] * `WORD_WIDTH +: `WORD_WIDTH];
|
||||
for (integer i = 0; i < WORD_SIZE; i++) begin
|
||||
if (rw_hazard
|
||||
&& (is_fill_st1 || (WRITE_ENABLE && byteen_st1[p][i]))
|
||||
&& ((NUM_PORTS == 1) || pmask_st1[p])) begin
|
||||
read_data_r[i * 8 +: 8] = write_data[i * 8 +: 8];
|
||||
end
|
||||
end
|
||||
end
|
||||
assign rdata_st0[p] = read_data_r;
|
||||
end
|
||||
end else begin
|
||||
reg [`WORD_WIDTH-1:0] read_data_r;
|
||||
always @(*) begin
|
||||
read_data_r = rdata_unqual;
|
||||
for (integer i = 0; i < WORD_SIZE; i++) begin
|
||||
if (rw_hazard
|
||||
&& (is_fill_st1 || (WRITE_ENABLE && byteen_st1[0][i]))) begin
|
||||
read_data_r[i * 8 +: 8] = wdata_st1[i * 8 +: 8];
|
||||
end
|
||||
end
|
||||
end
|
||||
assign rdata_st0[0] = read_data_r;
|
||||
end
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `CACHE_LINE_WIDTH + (`WORD_WIDTH + `UP(`WORD_SELECT_BITS) + WORD_SIZE + `REQS_BITS + 1) * NUM_PORTS + CORE_TAG_WIDTH),
|
||||
.DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `CACHE_LINE_WIDTH + (`UP(`WORD_SELECT_BITS) + WORD_SIZE + `REQS_BITS + 1) * NUM_PORTS + CORE_TAG_WIDTH),
|
||||
.RESETW (1)
|
||||
) pipe_reg1 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (1'b1),
|
||||
.data_in ({valid_st0, is_mshr_st0, is_fill_st0, writeen_unqual_st0, fill_req_unqual_st0, incoming_fill_st0, miss_st0, force_miss_st0, mem_rw_st0, addr_st0, rdata_st0, wdata_st0, wsel_st0, byteen_st0, req_tid_st0, pmask_st0, tag_st0}),
|
||||
.data_out ({valid_st1, is_mshr_st1, is_fill_st1, writeen_unqual_st1, fill_req_unqual_st1, incoming_fill_st1, miss_st1, force_miss_st1, mem_rw_st1, addr_st1, rdata_st1, wdata_st1, wsel_st1, byteen_st1, req_tid_st1, pmask_st1, tag_st1})
|
||||
.enable (!crsq_in_stall),
|
||||
.data_in ({valid_st0, is_mshr_st0, is_fill_st0, writeen_unqual_st0, fill_req_unqual_st0, incoming_fill_st0, miss_st0, force_miss_st0, mem_rw_st0, addr_st0, wdata_st0, wsel_st0, byteen_st0, req_tid_st0, pmask_st0, tag_st0}),
|
||||
.data_out ({valid_st1, is_mshr_st1, is_fill_st1, writeen_unqual_st1, fill_req_unqual_st1, incoming_fill_st1, miss_st1, force_miss_st1, mem_rw_st1, addr_st1, wdata_st1, wsel_st1, byteen_st1, req_tid_st1, pmask_st1, tag_st1})
|
||||
);
|
||||
|
||||
`ifdef DBG_CACHE_REQ_INFO
|
||||
|
@ -382,6 +345,8 @@ module VX_bank #(
|
|||
end
|
||||
`endif
|
||||
|
||||
wire writeen_st1 = writeen_unqual_st1 && (is_fill_st1 || !force_miss_st1);
|
||||
|
||||
wire crsq_push_st1 = !is_fill_st1 && !mem_rw_st1 && !miss_st1 && !force_miss_st1;
|
||||
|
||||
wire mshr_push_st1 = !is_fill_st1 && !mem_rw_st1 && (miss_st1 || force_miss_st1);
|
||||
|
@ -424,27 +389,25 @@ module VX_bank #(
|
|||
.reset (reset),
|
||||
|
||||
`ifdef DBG_CACHE_REQ_INFO
|
||||
.debug_pc_r (debug_pc_st0),
|
||||
.debug_wid_r (debug_wid_st0),
|
||||
.debug_pc_w (debug_pc_st1),
|
||||
.debug_wid_w (debug_wid_st1),
|
||||
.debug_pc (debug_pc_st1),
|
||||
.debug_wid (debug_wid_st1),
|
||||
`endif
|
||||
|
||||
.addr (addr_st1),
|
||||
|
||||
// reading
|
||||
.readen (valid_st0 && !is_fill_st0 && !mem_rw_st0),
|
||||
.raddr (addr_st0),
|
||||
.rdata (rdata_unqual),
|
||||
.readen (valid_st1 && !is_fill_st1 && !mem_rw_st1),
|
||||
.rdata (rdata_st1),
|
||||
|
||||
// writing
|
||||
.writeen (valid_st1 && writeen_st1),
|
||||
.is_fill (is_fill_st1),
|
||||
.byteen (line_byteen_st1),
|
||||
.waddr (addr_st1),
|
||||
.byteen (line_byteen_st1),
|
||||
.wdata (wdata_st1)
|
||||
);
|
||||
|
||||
assign mshr_push = valid_st1 && mshr_push_st1;
|
||||
wire mshr_dequeue = valid_st1 && is_mshr_st1 && !mshr_push_st1;
|
||||
wire mshr_dequeue = valid_st1 && is_mshr_st1 && !mshr_push_st1 && crsq_in_ready;
|
||||
wire mshr_restore = !IN_ORDER_DRAM && is_mshr_st1;
|
||||
`RUNTIME_ASSERT(!IN_ORDER_DRAM || !(mshr_push && mshr_restore), ("Oops!"))
|
||||
|
||||
|
@ -508,50 +471,49 @@ module VX_bank #(
|
|||
wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] crsq_data;
|
||||
wire [NUM_PORTS-1:0][`REQS_BITS-1:0] crsq_tid;
|
||||
wire [CORE_TAG_WIDTH-1:0] crsq_tag;
|
||||
wire crsq_empty;
|
||||
|
||||
assign crsq_push = valid_st1 && crsq_push_st1;
|
||||
assign crsq_pop = core_rsp_valid && core_rsp_ready;
|
||||
assign crsq_data = rdata_st1;
|
||||
assign crsq_in_valid = valid_st1 && crsq_push_st1;
|
||||
assign crsq_in_stall = crsq_in_valid && !crsq_in_ready;
|
||||
|
||||
assign crsq_pmask = pmask_st1;
|
||||
assign crsq_tid = req_tid_st1;
|
||||
assign crsq_tag = tag_st1;
|
||||
|
||||
VX_fifo_queue #(
|
||||
.DATAW (CORE_TAG_WIDTH + (1 + `WORD_WIDTH + `REQS_BITS) * NUM_PORTS),
|
||||
.SIZE (CRSQ_SIZE),
|
||||
.ALM_FULL (CRSQ_SIZE-2),
|
||||
.BUFFERED (1)
|
||||
) core_rsp_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.push (crsq_push),
|
||||
.pop (crsq_pop),
|
||||
.data_in ({crsq_tag, crsq_pmask, crsq_data, crsq_tid}),
|
||||
.data_out ({core_rsp_tag, core_rsp_pmask, core_rsp_data, core_rsp_tid}),
|
||||
.empty (crsq_empty),
|
||||
.alm_full (crsq_alm_full),
|
||||
`UNUSED_PIN (full),
|
||||
`UNUSED_PIN (alm_empty),
|
||||
`UNUSED_PIN (size)
|
||||
);
|
||||
if (`WORDS_PER_LINE > 1) begin
|
||||
for (genvar p = 0; p < NUM_PORTS; ++p) begin
|
||||
assign crsq_data[p] = rdata_st1[wsel_st1[p] * `WORD_WIDTH +: `WORD_WIDTH];
|
||||
end
|
||||
end else begin
|
||||
assign crsq_data = rdata_st1;
|
||||
end
|
||||
|
||||
assign core_rsp_valid = !crsq_empty;
|
||||
VX_skid_buffer #(
|
||||
.DATAW (CORE_TAG_WIDTH + (1 + `WORD_WIDTH + `REQS_BITS) * NUM_PORTS),
|
||||
.BUFFERED (1)
|
||||
) core_rsp_req (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (crsq_in_valid),
|
||||
.data_in ({crsq_tag, crsq_pmask, crsq_data, crsq_tid}),
|
||||
.ready_in (crsq_in_ready),
|
||||
.valid_out (core_rsp_valid),
|
||||
.data_out ({core_rsp_tag, core_rsp_pmask, core_rsp_data, core_rsp_tid}),
|
||||
.ready_out (core_rsp_ready)
|
||||
);
|
||||
|
||||
// Enqueue DRAM request
|
||||
|
||||
wire [CACHE_LINE_SIZE-1:0] dreq_byteen;
|
||||
wire [`LINE_ADDR_WIDTH-1:0] dreq_addr;
|
||||
wire [`CACHE_LINE_WIDTH-1:0] dreq_data;
|
||||
wire dreq_empty, writeback;
|
||||
wire dreq_push, dreq_pop, dreq_empty, dreq_rw;
|
||||
|
||||
assign dreq_push = valid_st1 && dreq_push_st1;
|
||||
|
||||
assign dreq_pop = dram_req_valid && dram_req_ready;
|
||||
|
||||
assign writeback = WRITE_ENABLE && do_writeback_st1;
|
||||
|
||||
assign dreq_byteen = writeback ? line_byteen_st1 : {CACHE_LINE_SIZE{1'b1}};
|
||||
assign dreq_rw = WRITE_ENABLE && do_writeback_st1;
|
||||
assign dreq_byteen = dreq_rw ? line_byteen_st1 : {CACHE_LINE_SIZE{1'b1}};
|
||||
assign dreq_addr = addr_st1;
|
||||
assign dreq_data = wdata_st1;
|
||||
|
||||
|
@ -564,7 +526,7 @@ module VX_bank #(
|
|||
.reset (reset),
|
||||
.push (dreq_push),
|
||||
.pop (dreq_pop),
|
||||
.data_in ({writeback, dreq_byteen, dreq_addr, dreq_data}),
|
||||
.data_in ({dreq_rw, dreq_byteen, dreq_addr, dreq_data}),
|
||||
.data_out ({dram_req_rw, dram_req_byteen, dram_req_addr, dram_req_data}),
|
||||
.empty (dreq_empty),
|
||||
.alm_full (dreq_alm_full),
|
||||
|
@ -582,7 +544,7 @@ module VX_bank #(
|
|||
`SCOPE_ASSIGN (miss_st0, miss_st0);
|
||||
`SCOPE_ASSIGN (force_miss_st0, force_miss_st0);
|
||||
`SCOPE_ASSIGN (mshr_push, mshr_push);
|
||||
`SCOPE_ASSIGN (crsq_alm_full, crsq_alm_full);
|
||||
`SCOPE_ASSIGN (crsq_in_stall, crsq_in_stall);
|
||||
`SCOPE_ASSIGN (dreq_alm_full, dreq_alm_full);
|
||||
`SCOPE_ASSIGN (mshr_alm_full, mshr_alm_full);
|
||||
`SCOPE_ASSIGN (addr_st0, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID));
|
||||
|
@ -591,7 +553,7 @@ module VX_bank #(
|
|||
`ifdef PERF_ENABLE
|
||||
assign perf_read_misses = valid_st1 && !is_fill_st1 && !is_mshr_st1 && miss_st1 && !mem_rw_st1;
|
||||
assign perf_write_misses = valid_st1 && !is_fill_st1 && !is_mshr_st1 && miss_st1 && mem_rw_st1;
|
||||
assign perf_pipe_stalls = crsq_alm_full || dreq_alm_full || mshr_alm_full;
|
||||
assign perf_pipe_stalls = crsq_in_stall || dreq_alm_full || mshr_alm_full;
|
||||
assign perf_mshr_stalls = mshr_alm_full;
|
||||
`endif
|
||||
|
||||
|
@ -604,8 +566,8 @@ module VX_bank #(
|
|||
$display("%t: cache%0d:%0d miss with incoming fill - addr=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID));
|
||||
assert(!is_mshr_st1);
|
||||
end
|
||||
if (crsq_alm_full || dreq_alm_full || mshr_alm_full) begin
|
||||
$display("%t: cache%0d:%0d pipeline-stall: cwbq=%b, dwbq=%b, mshr=%b", $time, CACHE_ID, BANK_ID, crsq_alm_full, dreq_alm_full, mshr_alm_full);
|
||||
if (crsq_in_stall || dreq_alm_full || mshr_alm_full) begin
|
||||
$display("%t: cache%0d:%0d pipeline-stall: cwbq=%b, dwbq=%b, mshr=%b", $time, CACHE_ID, BANK_ID, crsq_in_stall, dreq_alm_full, mshr_alm_full);
|
||||
end
|
||||
if (flush_enable) begin
|
||||
$display("%t: cache%0d:%0d flush: addr=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(flush_addr, BANK_ID));
|
||||
|
@ -622,7 +584,7 @@ module VX_bank #(
|
|||
else
|
||||
$display("%t: cache%0d:%0d core-rd-req: addr=%0h, tag=%0h, pmask=%0b, tid=%0d, byteen=%b, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(creq_addr, BANK_ID), creq_tag, creq_pmask, creq_tid, creq_byteen, debug_wid_sel, debug_pc_sel);
|
||||
end
|
||||
if (crsq_push) begin
|
||||
if (crsq_in_fire) begin
|
||||
$display("%t: cache%0d:%0d core-rsp: addr=%0h, tag=%0h, pmask=%0b, tid=%0d, data=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID), crsq_tag, crsq_pmask, crsq_tid, crsq_data, debug_wid_st1, debug_pc_st1);
|
||||
end
|
||||
if (dreq_push) begin
|
||||
|
|
4
hw/rtl/cache/VX_cache.v
vendored
4
hw/rtl/cache/VX_cache.v
vendored
|
@ -23,9 +23,6 @@ module VX_cache #(
|
|||
parameter MSHR_SIZE = 16,
|
||||
// DRAM Response Queue Size
|
||||
parameter DRSQ_SIZE = 4,
|
||||
|
||||
// Core Response Queue Size
|
||||
parameter CRSQ_SIZE = 4,
|
||||
// DRAM Request Queue Size
|
||||
parameter DREQ_SIZE = 4,
|
||||
|
||||
|
@ -298,7 +295,6 @@ module VX_cache #(
|
|||
.CREQ_SIZE (CREQ_SIZE),
|
||||
.MSHR_SIZE (MSHR_SIZE),
|
||||
.DRSQ_SIZE (DRSQ_SIZE),
|
||||
.CRSQ_SIZE (CRSQ_SIZE),
|
||||
.DREQ_SIZE (DREQ_SIZE),
|
||||
.WRITE_ENABLE (WRITE_ENABLE),
|
||||
.CORE_TAG_WIDTH (CORE_TAG_WIDTH),
|
||||
|
|
44
hw/rtl/cache/VX_data_access.v
vendored
44
hw/rtl/cache/VX_data_access.v
vendored
|
@ -19,49 +19,43 @@ module VX_data_access #(
|
|||
|
||||
`ifdef DBG_CACHE_REQ_INFO
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
input wire[31:0] debug_pc_r,
|
||||
input wire[`NW_BITS-1:0] debug_wid_r,
|
||||
input wire[31:0] debug_pc_w,
|
||||
input wire[`NW_BITS-1:0] debug_wid_w,
|
||||
input wire[31:0] debug_pc,
|
||||
input wire[`NW_BITS-1:0] debug_wid,
|
||||
`IGNORE_WARNINGS_END
|
||||
`endif
|
||||
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
input wire[`LINE_ADDR_WIDTH-1:0] addr,
|
||||
`IGNORE_WARNINGS_END
|
||||
|
||||
// reading
|
||||
input wire readen,
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
input wire[`LINE_ADDR_WIDTH-1:0] raddr,
|
||||
`IGNORE_WARNINGS_END
|
||||
output wire [`CACHE_LINE_WIDTH-1:0] rdata,
|
||||
|
||||
// writing
|
||||
input wire writeen,
|
||||
input wire is_fill,
|
||||
input wire [CACHE_LINE_SIZE-1:0] byteen,
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
input wire[`LINE_ADDR_WIDTH-1:0] waddr,
|
||||
`IGNORE_WARNINGS_END
|
||||
input wire [CACHE_LINE_SIZE-1:0] byteen,
|
||||
input wire [`CACHE_LINE_WIDTH-1:0] wdata
|
||||
);
|
||||
`UNUSED_VAR (reset)
|
||||
`UNUSED_VAR (readen)
|
||||
|
||||
wire [`LINE_SELECT_BITS-1:0] line_raddr, line_waddr;
|
||||
wire [`LINE_SELECT_BITS-1:0] line_addr;
|
||||
wire [CACHE_LINE_SIZE-1:0] byte_enable;
|
||||
|
||||
assign line_raddr = raddr[`LINE_SELECT_BITS-1:0];
|
||||
assign line_waddr = waddr[`LINE_SELECT_BITS-1:0];
|
||||
assign line_addr = addr[`LINE_SELECT_BITS-1:0];
|
||||
assign byte_enable = (WRITE_ENABLE && !is_fill) ? byteen : {CACHE_LINE_SIZE{1'b1}};
|
||||
|
||||
VX_dp_ram #(
|
||||
.DATAW(CACHE_LINE_SIZE * 8),
|
||||
.SIZE(`LINES_PER_BANK),
|
||||
.BYTEENW(CACHE_LINE_SIZE),
|
||||
.RWCHECK(1)
|
||||
VX_sp_ram #(
|
||||
.DATAW (CACHE_LINE_SIZE * 8),
|
||||
.SIZE (`LINES_PER_BANK),
|
||||
.BYTEENW (CACHE_LINE_SIZE),
|
||||
.RWCHECK (1)
|
||||
) data_store (
|
||||
.clk(clk),
|
||||
.raddr(line_raddr),
|
||||
.waddr(line_waddr),
|
||||
.wren(writeen),
|
||||
.addr(line_addr),
|
||||
.wren(writeen),
|
||||
.byteen(byte_enable),
|
||||
.rden(1'b1),
|
||||
.din(wdata),
|
||||
|
@ -72,13 +66,13 @@ module VX_data_access #(
|
|||
always @(posedge clk) begin
|
||||
if (writeen) begin
|
||||
if (is_fill) begin
|
||||
$display("%t: cache%0d:%0d data-fill: addr=%0h, blk_addr=%0d, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(waddr, BANK_ID), line_waddr, wdata);
|
||||
$display("%t: cache%0d:%0d data-fill: addr=%0h, blk_addr=%0d, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), line_addr, wdata);
|
||||
end else begin
|
||||
$display("%t: cache%0d:%0d data-write: addr=%0h, wid=%0d, PC=%0h, byteen=%b, blk_addr=%0d, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(waddr, BANK_ID), debug_wid_w, debug_pc_w, byte_enable, line_waddr, wdata);
|
||||
$display("%t: cache%0d:%0d data-write: addr=%0h, wid=%0d, PC=%0h, byteen=%b, blk_addr=%0d, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), debug_wid, debug_pc, byte_enable, line_addr, wdata);
|
||||
end
|
||||
end
|
||||
if (readen) begin
|
||||
$display("%t: cache%0d:%0d data-read: addr=%0h, wid=%0d, PC=%0h, blk_addr=%0d, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(raddr, BANK_ID), debug_wid_r, debug_pc_r, line_raddr, rdata);
|
||||
$display("%t: cache%0d:%0d data-read: addr=%0h, wid=%0d, PC=%0h, blk_addr=%0d, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), debug_wid, debug_pc, line_addr, rdata);
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
|
68
hw/rtl/cache/VX_shared_mem.v
vendored
68
hw/rtl/cache/VX_shared_mem.v
vendored
|
@ -15,9 +15,6 @@ module VX_shared_mem #(
|
|||
// Core Request Queue Size
|
||||
parameter CREQ_SIZE = 4,
|
||||
|
||||
// Core Response Queue Size
|
||||
parameter CRSQ_SIZE = 4,
|
||||
|
||||
// core request tag size
|
||||
parameter CORE_TAG_WIDTH = 1,
|
||||
|
||||
|
@ -113,10 +110,10 @@ module VX_shared_mem #(
|
|||
wire [NUM_BANKS-1:0][`REQS_BITS-1:0] per_bank_core_req_tid;
|
||||
|
||||
wire creq_push, creq_pop, creq_empty, creq_full;
|
||||
wire crsq_full;
|
||||
wire crsq_in_ready;
|
||||
|
||||
assign creq_push = (| core_req_valid) && !creq_full;
|
||||
assign creq_pop = ~creq_empty && ~crsq_full;
|
||||
assign creq_pop = ~creq_empty && crsq_in_ready;
|
||||
|
||||
assign per_bank_core_req_ready_unqual = ~creq_full;
|
||||
|
||||
|
@ -167,7 +164,7 @@ module VX_shared_mem #(
|
|||
) data (
|
||||
.clk (clk),
|
||||
.addr (per_bank_core_req_addr[i]),
|
||||
.wren (per_bank_core_req_valid[i] && per_bank_core_req_rw[i] && ~crsq_full),
|
||||
.wren (per_bank_core_req_valid[i] && per_bank_core_req_rw[i] && crsq_in_ready),
|
||||
.byteen (per_bank_core_req_byteen[i]),
|
||||
.rden (1'b1),
|
||||
.din (per_bank_core_req_data[i]),
|
||||
|
@ -175,58 +172,53 @@ module VX_shared_mem #(
|
|||
);
|
||||
end
|
||||
|
||||
reg [NUM_REQS-1:0] core_rsp_valid_unqual;
|
||||
reg [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data_unqual;
|
||||
reg [CORE_TAG_WIDTH-1:0] core_rsp_tag_unqual;
|
||||
reg [NUM_REQS-1:0] core_rsp_valids_in;
|
||||
reg [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data_in;
|
||||
reg [CORE_TAG_WIDTH-1:0] core_rsp_tag_in;
|
||||
|
||||
always @(*) begin
|
||||
core_rsp_valid_unqual = 0;
|
||||
core_rsp_data_unqual = 'x;
|
||||
core_rsp_tag_unqual = 'x;
|
||||
core_rsp_valids_in = 0;
|
||||
core_rsp_data_in = 'x;
|
||||
core_rsp_tag_in = 'x;
|
||||
for (integer i = 0; i < NUM_BANKS; i++) begin
|
||||
if (per_bank_core_req_valid[i]) begin
|
||||
core_rsp_valid_unqual[per_bank_core_req_tid[i]] = 1;
|
||||
core_rsp_data_unqual[per_bank_core_req_tid[i]] = per_bank_core_rsp_data[i];
|
||||
core_rsp_tag_unqual = per_bank_core_req_tag[i];
|
||||
core_rsp_valids_in[per_bank_core_req_tid[i]] = 1;
|
||||
core_rsp_data_in[per_bank_core_req_tid[i]] = per_bank_core_rsp_data[i];
|
||||
core_rsp_tag_in = per_bank_core_req_tag[i];
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
`ifdef DBG_CACHE_REQ_INFO
|
||||
if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin
|
||||
assign {debug_pc_st0, debug_wid_st0} = core_rsp_tag_unqual[CORE_TAG_WIDTH-1:CORE_TAG_ID_BITS];
|
||||
assign {debug_pc_st0, debug_wid_st0} = core_rsp_tag_in[CORE_TAG_WIDTH-1:CORE_TAG_ID_BITS];
|
||||
end else begin
|
||||
assign {debug_pc_st0, debug_wid_st0} = 0;
|
||||
end
|
||||
`endif
|
||||
|
||||
wire [NUM_REQS-1:0] core_rsp_valid_tmask;
|
||||
wire crsq_push, crsq_pop, crsq_empty;
|
||||
wire [NUM_REQS-1:0] core_rsp_valids_out;
|
||||
wire core_rsp_valid_out;
|
||||
|
||||
wire core_rsp_rw = | (per_bank_core_req_valid & per_bank_core_req_rw);
|
||||
|
||||
assign crsq_push = ~creq_empty && ~core_rsp_rw && ~crsq_full;
|
||||
assign crsq_pop = ~crsq_empty && core_rsp_ready;
|
||||
|
||||
VX_fifo_queue #(
|
||||
.DATAW (NUM_BANKS * (1 + `WORD_WIDTH) + CORE_TAG_WIDTH),
|
||||
.SIZE (CRSQ_SIZE),
|
||||
wire crsq_in_valid = ~creq_empty && ~core_rsp_rw;
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (NUM_BANKS * (1 + `WORD_WIDTH) + CORE_TAG_WIDTH),
|
||||
.BUFFERED (1)
|
||||
) core_rsp_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.push (crsq_push),
|
||||
.pop (crsq_pop),
|
||||
.data_in ({core_rsp_valid_unqual, core_rsp_data_unqual, core_rsp_tag_unqual}),
|
||||
.data_out ({core_rsp_valid_tmask, core_rsp_data, core_rsp_tag}),
|
||||
.empty (crsq_empty),
|
||||
.full (crsq_full),
|
||||
`UNUSED_PIN (alm_empty),
|
||||
`UNUSED_PIN (alm_full),
|
||||
`UNUSED_PIN (size)
|
||||
) core_rsp_req (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (crsq_in_valid),
|
||||
.data_in ({core_rsp_valids_in, core_rsp_data_in, core_rsp_tag_in}),
|
||||
.ready_in (crsq_in_ready),
|
||||
.valid_out (core_rsp_valid_out),
|
||||
.data_out ({core_rsp_valids_out, core_rsp_data, core_rsp_tag}),
|
||||
.ready_out (core_rsp_ready)
|
||||
);
|
||||
|
||||
assign core_rsp_valid = core_rsp_valid_tmask & {NUM_REQS{~crsq_empty}};
|
||||
assign core_rsp_valid = core_rsp_valids_out & {NUM_REQS{core_rsp_valid_out}};
|
||||
|
||||
`ifdef DBG_PRINT_CACHE_BANK
|
||||
always @(posedge clk) begin
|
||||
|
@ -280,4 +272,4 @@ module VX_shared_mem #(
|
|||
assign perf_cache_if.crsp_stalls = perf_crsp_stalls;
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
endmodule
|
|
@ -212,7 +212,7 @@
|
|||
"miss_st0": 1,
|
||||
"force_miss_st0": 1,
|
||||
"mshr_push": 1,
|
||||
"?crsq_alm_full": 1,
|
||||
"?crsq_in_stall": 1,
|
||||
"?dreq_alm_full": 1,
|
||||
"?mshr_alm_full": 1
|
||||
}
|
||||
|
|
|
@ -41,25 +41,25 @@ set_global_assignment -name VERILOG_MACRO NDEBUG
|
|||
set_global_assignment -name MESSAGE_DISABLE 16818
|
||||
set_global_assignment -name TIMEQUEST_DO_REPORT_TIMING ON
|
||||
|
||||
set_global_assignment -name ALLOW_ANY_RAM_SIZE_FOR_RECOGNITION ON
|
||||
set_global_assignment -name USE_HIGH_SPEED_ADDER ON
|
||||
set_global_assignment -name MUX_RESTRUCTURE ON
|
||||
#set_global_assignment -name ALLOW_ANY_RAM_SIZE_FOR_RECOGNITION ON
|
||||
#set_global_assignment -name USE_HIGH_SPEED_ADDER ON
|
||||
#set_global_assignment -name MUX_RESTRUCTURE ON
|
||||
|
||||
set_global_assignment -name OPTIMIZATION_TECHNIQUE SPEED
|
||||
set_global_assignment -name OPTIMIZATION_MODE "AGGRESSIVE PERFORMANCE"
|
||||
set_global_assignment -name FINAL_PLACEMENT_OPTIMIZATION ALWAYS
|
||||
set_global_assignment -name PLACEMENT_EFFORT_MULTIPLIER 2.0
|
||||
set_global_assignment -name FITTER_EFFORT "STANDARD FIT"
|
||||
set_global_assignment -name OPTIMIZE_HOLD_TIMING "ALL PATHS"
|
||||
set_global_assignment -name OPTIMIZE_MULTI_CORNER_TIMING ON
|
||||
set_global_assignment -name ROUTER_TIMING_OPTIMIZATION_LEVEL MAXIMUM
|
||||
set_global_assignment -name ROUTER_CLOCKING_TOPOLOGY_ANALYSIS ON
|
||||
set_global_assignment -name ROUTER_LCELL_INSERTION_AND_LOGIC_DUPLICATION ON
|
||||
set_global_assignment -name SYNTH_TIMING_DRIVEN_SYNTHESIS ON
|
||||
set_global_assignment -name TIMEQUEST_MULTICORNER_ANALYSIS ON
|
||||
set_global_assignment -name MIN_CORE_JUNCTION_TEMP 0
|
||||
set_global_assignment -name MAX_CORE_JUNCTION_TEMP 100
|
||||
set_global_assignment -name SEED 1
|
||||
#set_global_assignment -name OPTIMIZATION_TECHNIQUE SPEED
|
||||
#set_global_assignment -name OPTIMIZATION_MODE "AGGRESSIVE PERFORMANCE"
|
||||
#set_global_assignment -name FINAL_PLACEMENT_OPTIMIZATION ALWAYS
|
||||
#set_global_assignment -name PLACEMENT_EFFORT_MULTIPLIER 2.0
|
||||
#set_global_assignment -name FITTER_EFFORT "STANDARD FIT"
|
||||
#set_global_assignment -name OPTIMIZE_HOLD_TIMING "ALL PATHS"
|
||||
#set_global_assignment -name OPTIMIZE_MULTI_CORNER_TIMING ON
|
||||
#set_global_assignment -name ROUTER_TIMING_OPTIMIZATION_LEVEL MAXIMUM
|
||||
#set_global_assignment -name ROUTER_CLOCKING_TOPOLOGY_ANALYSIS ON
|
||||
#set_global_assignment -name ROUTER_LCELL_INSERTION_AND_LOGIC_DUPLICATION ON
|
||||
#set_global_assignment -name SYNTH_TIMING_DRIVEN_SYNTHESIS ON
|
||||
#set_global_assignment -name TIMEQUEST_MULTICORNER_ANALYSIS ON
|
||||
#set_global_assignment -name MIN_CORE_JUNCTION_TEMP 0
|
||||
#set_global_assignment -name MAX_CORE_JUNCTION_TEMP 100
|
||||
#set_global_assignment -name SEED 1
|
||||
|
||||
switch $opts(family) {
|
||||
"Arria 10" {
|
||||
|
|
|
@ -1,79 +0,0 @@
|
|||
FAMILY = "Arria 10"
|
||||
DEVICE = 10AX115N3F40E2SG
|
||||
FPU_CORE_PATH=../../../rtl/fp_cores/altera/arria10
|
||||
|
||||
#FAMILY = "Stratix 10"
|
||||
#DEVICE = 1SX280HN2F43E2VG
|
||||
#FPU_CORE_PATH=../../../rtl/fp_cores/altera/stratix10
|
||||
|
||||
PROJECT = vortex_afu
|
||||
TOP_LEVEL_ENTITY = vortex_afu
|
||||
SRC_FILE = vortex_afu.sv
|
||||
|
||||
RTL_DIR=../../../rtl
|
||||
FPU_INCLUDE = $(RTL_DIR)/fp_cores;$(FPU_CORE_PATH);$(RTL_DIR)/fp_cores/fpnew/src;$(RTL_DIR)/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl;$(RTL_DIR)/fp_cores/fpnew/src/common_cells/include;$(RTL_DIR)/fp_cores/fpnew/src/common_cells/src
|
||||
RTL_INCLUDE = $(RTL_DIR);$(RTL_DIR)/libs;$(RTL_DIR)/interfaces;$(RTL_DIR)/cache;../../../rtl/afu;../../../rtl/afu/ccip;$(FPU_INCLUDE)
|
||||
|
||||
PROJECT_FILES = $(PROJECT).qpf $(PROJECT).qsf
|
||||
|
||||
# Executable Configuration
|
||||
SYN_ARGS = --parallel --read_settings_files=on --set=VERILOG_MACRO=NOPAE=1
|
||||
FIT_ARGS = --parallel --part=$(DEVICE) --read_settings_files=on
|
||||
ASM_ARGS =
|
||||
STA_ARGS = --parallel --do_report_timing
|
||||
|
||||
# Build targets
|
||||
all: $(PROJECT).sta.rpt
|
||||
|
||||
syn: $(PROJECT).syn.rpt
|
||||
|
||||
fit: $(PROJECT).fit.rpt
|
||||
|
||||
asm: $(PROJECT).asm.rpt
|
||||
|
||||
sta: $(PROJECT).sta.rpt
|
||||
|
||||
smart: smart.log
|
||||
|
||||
# Target implementations
|
||||
STAMP = echo done >
|
||||
|
||||
$(PROJECT).syn.rpt: smart.log syn.chg $(SOURCE_FILES)
|
||||
quartus_syn $(PROJECT) $(SYN_ARGS)
|
||||
$(STAMP) fit.chg
|
||||
|
||||
$(PROJECT).fit.rpt: smart.log fit.chg $(PROJECT).syn.rpt
|
||||
quartus_fit $(PROJECT) $(FIT_ARGS)
|
||||
$(STAMP) asm.chg
|
||||
$(STAMP) sta.chg
|
||||
|
||||
$(PROJECT).asm.rpt: smart.log asm.chg $(PROJECT).fit.rpt
|
||||
quartus_asm $(PROJECT) $(ASM_ARGS)
|
||||
|
||||
$(PROJECT).sta.rpt: smart.log sta.chg $(PROJECT).fit.rpt
|
||||
quartus_sta $(PROJECT) $(STA_ARGS)
|
||||
|
||||
smart.log: $(PROJECT_FILES)
|
||||
quartus_sh --determine_smart_action $(PROJECT) > smart.log
|
||||
|
||||
# Project initialization
|
||||
$(PROJECT_FILES):
|
||||
quartus_sh -t ../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../project.sdc -inc "$(RTL_INCLUDE)" -set "NOPAE"
|
||||
|
||||
syn.chg:
|
||||
$(STAMP) syn.chg
|
||||
|
||||
fit.chg:
|
||||
$(STAMP) fit.chg
|
||||
|
||||
sta.chg:
|
||||
$(STAMP) sta.chg
|
||||
|
||||
asm.chg:
|
||||
$(STAMP) asm.chg
|
||||
|
||||
program: $(PROJECT).sof
|
||||
quartus_pgm --no_banner --mode=jtag -o "$(PROJECT).sof"
|
||||
|
||||
clean:
|
||||
rm -rf bin *.rpt *.chg *.qsf *.qpf *.qws *.log *.htm *.eqn *.pin *.sof *.pof qdb incremental_db tmp-clearbox
|
Loading…
Add table
Add a link
Reference in a new issue