diff --git a/ci/blackbox.sh b/ci/blackbox.sh index f21b04d81..47525ce79 100755 --- a/ci/blackbox.sh +++ b/ci/blackbox.sh @@ -116,7 +116,7 @@ case $APP in APP_PATH=$VORTEX_HOME/benchmarks/opencl/sgemm ;; vecadd) - APP_PATH=$VORTEX_HOME/benchmarks/opencl/vacadd + APP_PATH=$VORTEX_HOME/benchmarks/opencl/vecadd ;; basic) APP_PATH=$VORTEX_HOME/driver/tests/basic diff --git a/driver/common/vx_utils.cpp b/driver/common/vx_utils.cpp index 27496a19c..61371d24f 100644 --- a/driver/common/vx_utils.cpp +++ b/driver/common/vx_utils.cpp @@ -292,7 +292,7 @@ extern int vx_dump_perf(vx_device_h device, FILE* stream) { int avg_dram_lat = (int)(double(dram_lat_per_core) / double(dram_reads_per_core)); int dram_utilization = (int)((1.0 - (double(dram_reads_per_core + dram_writes_per_core) / double(dram_reads_per_core + dram_writes_per_core + dram_stalls_per_core))) * 100); if (num_cores > 1) fprintf(stream, "PERF: core%d: dram requests=%ld (reads=%ld, writes=%ld)\n", core_id, (dram_reads_per_core + dram_writes_per_core), dram_reads_per_core, dram_writes_per_core); - if (num_cores > 1) fprintf(stream, "PERF: core%d: dram stalls=%d (utilization=%d%%)\n", core_id, dram_stalls_per_core, dram_utilization); + if (num_cores > 1) fprintf(stream, "PERF: core%d: dram stalls=%ld (utilization=%d%%)\n", core_id, dram_stalls_per_core, dram_utilization); if (num_cores > 1) fprintf(stream, "PERF: core%d: average dram latency=%d cycles\n", core_id, avg_dram_lat); dram_reads += dram_reads_per_core; dram_writes += dram_writes_per_core; diff --git a/hw/rtl/VX_cluster.v b/hw/rtl/VX_cluster.v index 4e55060be..f06fa7020 100644 --- a/hw/rtl/VX_cluster.v +++ b/hw/rtl/VX_cluster.v @@ -204,10 +204,7 @@ module VX_cluster #( .dram_rsp_valid (dram_rsp_valid), .dram_rsp_tag (dram_rsp_tag), .dram_rsp_data (dram_rsp_data), - .dram_rsp_ready (dram_rsp_ready), - - // Miss status - `UNUSED_PIN (miss_vec) + .dram_rsp_ready (dram_rsp_ready) ); end else begin diff --git a/hw/rtl/VX_define.vh b/hw/rtl/VX_define.vh index ba0cba28a..4d893d9b6 100644 --- a/hw/rtl/VX_define.vh +++ b/hw/rtl/VX_define.vh @@ -234,8 +234,8 @@ /////////////////////////////////////////////////////////////////////////////// -`ifdef DBG_CACHE_REQ_INFO // pc, rd, wid -`define DBG_CACHE_REQ_MDATAW (32 + `NR_BITS + `NW_BITS) +`ifdef DBG_CACHE_REQ_INFO // wid PC +`define DBG_CACHE_REQ_MDATAW (`NW_BITS + 32) `else `define DBG_CACHE_REQ_MDATAW 0 `endif diff --git a/hw/rtl/VX_icache_stage.v b/hw/rtl/VX_icache_stage.v index 12127ccfa..d0b4ac09e 100644 --- a/hw/rtl/VX_icache_stage.v +++ b/hw/rtl/VX_icache_stage.v @@ -51,7 +51,7 @@ module VX_icache_stage #( assign ifetch_req_if.ready = icache_req_if.ready; `ifdef DBG_CACHE_REQ_INFO - assign icache_req_if.tag = {ifetch_req_if.PC, `NR_BITS'(0), ifetch_req_if.wid, req_tag}; + assign icache_req_if.tag = {ifetch_req_if.PC, ifetch_req_if.wid, req_tag}; `else assign icache_req_if.tag = req_tag; `endif diff --git a/hw/rtl/VX_issue.v b/hw/rtl/VX_issue.v index 784552205..f415a9994 100644 --- a/hw/rtl/VX_issue.v +++ b/hw/rtl/VX_issue.v @@ -123,7 +123,7 @@ module VX_issue #( `SCOPE_ASSIGN (writeback_pc, writeback_if.PC); `SCOPE_ASSIGN (writeback_rd, writeback_if.rd); `SCOPE_ASSIGN (writeback_data, writeback_if.data); - `SCOPE_ASSIGN (writeback_eop, writeback_if.eof); + `SCOPE_ASSIGN (writeback_eop, writeback_if.eop); `ifdef PERF_ENABLE reg [63:0] perf_ibf_stalls; diff --git a/hw/rtl/VX_lsu_unit.v b/hw/rtl/VX_lsu_unit.v index 4cd4017db..4cd044251 100644 --- a/hw/rtl/VX_lsu_unit.v +++ b/hw/rtl/VX_lsu_unit.v @@ -172,7 +172,7 @@ module VX_lsu_unit #( assign dcache_req_if.data = req_data; `ifdef DBG_CACHE_REQ_INFO - assign dcache_req_if.tag = {`NUM_THREADS{{req_pc, req_rd, req_wid, req_tag}}}; + assign dcache_req_if.tag = {`NUM_THREADS{{req_pc, req_wid, req_tag}}}; `else assign dcache_req_if.tag = {`NUM_THREADS{req_tag}}; `endif diff --git a/hw/rtl/VX_mem_unit.v b/hw/rtl/VX_mem_unit.v index a22731586..5b11981c1 100644 --- a/hw/rtl/VX_mem_unit.v +++ b/hw/rtl/VX_mem_unit.v @@ -136,10 +136,7 @@ module VX_mem_unit # ( .dram_rsp_valid (icache_dram_rsp_if.valid), .dram_rsp_data (icache_dram_rsp_if.data), .dram_rsp_tag (icache_dram_rsp_if.tag), - .dram_rsp_ready (icache_dram_rsp_if.ready), - - // Miss status - `UNUSED_PIN (miss_vec) + .dram_rsp_ready (icache_dram_rsp_if.ready) ); VX_cache #( @@ -197,10 +194,7 @@ module VX_mem_unit # ( .dram_rsp_valid (dcache_dram_rsp_if.valid), .dram_rsp_data (dcache_dram_rsp_if.data), .dram_rsp_tag (dcache_dram_rsp_if.tag), - .dram_rsp_ready (dcache_dram_rsp_if.ready), - - // Miss status - `UNUSED_PIN (miss_vec) + .dram_rsp_ready (dcache_dram_rsp_if.ready) ); if (`SM_ENABLE) begin @@ -260,10 +254,7 @@ module VX_mem_unit # ( .dram_rsp_valid (0), .dram_rsp_data (0), .dram_rsp_tag (0), - `UNUSED_PIN (dram_rsp_ready), - - // Miss status - `UNUSED_PIN (miss_vec) + `UNUSED_PIN (dram_rsp_ready) ); end diff --git a/hw/rtl/VX_print_instr.vh b/hw/rtl/VX_print_instr.vh index 68fd87563..00c373d08 100644 --- a/hw/rtl/VX_print_instr.vh +++ b/hw/rtl/VX_print_instr.vh @@ -136,7 +136,7 @@ task print_ex_op; default: $write("?"); endcase end - default:; + default: $write("?"); endcase end endtask diff --git a/hw/rtl/Vortex.v b/hw/rtl/Vortex.v index 41f030d0a..7da97c5b0 100644 --- a/hw/rtl/Vortex.v +++ b/hw/rtl/Vortex.v @@ -206,10 +206,7 @@ module Vortex ( .dram_rsp_valid (dram_rsp_valid), .dram_rsp_data (dram_rsp_data), .dram_rsp_tag (dram_rsp_tag), - .dram_rsp_ready (dram_rsp_ready), - - // Miss status - `UNUSED_PIN (miss_vec) + .dram_rsp_ready (dram_rsp_ready) ); end else begin diff --git a/hw/rtl/cache/VX_bank.v b/hw/rtl/cache/VX_bank.v index cd977be5a..4660ef3d8 100644 --- a/hw/rtl/cache/VX_bank.v +++ b/hw/rtl/cache/VX_bank.v @@ -47,6 +47,13 @@ module VX_bank #( input wire clk, input wire reset, +`ifdef PERF_ENABLE + output wire perf_read_misses, + output wire perf_write_misses, + output wire perf_mshr_stalls, + output wire perf_pipe_stalls, +`endif + // Core Request input wire core_req_valid, input wire [`REQS_BITS-1:0] core_req_tid, @@ -76,39 +83,21 @@ module VX_bank #( input wire dram_rsp_valid, input wire [`LINE_ADDR_WIDTH-1:0] dram_rsp_addr, input wire [`BANK_LINE_WIDTH-1:0] dram_rsp_data, - output wire dram_rsp_ready, - -`ifdef PERF_ENABLE - output wire perf_read_misses, - output wire perf_write_misses, - output wire perf_mshr_stalls, - output wire perf_pipe_stalls, -`endif - - // Misses - output wire misses + output wire dram_rsp_ready ); `ifdef DBG_CACHE_REQ_INFO /* verilator lint_off UNUSED */ wire [31:0] debug_pc_st0; - wire [`NR_BITS-1:0] debug_rd_st0; wire [`NW_BITS-1:0] debug_wid_st0; - wire[`UP(CORE_TAG_ID_BITS)-1:0] debug_tagid_st0; wire [31:0] debug_pc_st1; - wire [`NR_BITS-1:0] debug_rd_st1; wire [`NW_BITS-1:0] debug_wid_st1; - wire[`UP(CORE_TAG_ID_BITS)-1:0] debug_tagid_st1; + + wire [31:0] debug_pc_st12; + wire [`NW_BITS-1:0] debug_wid_st12; wire [31:0] debug_pc_st2; - wire [`NR_BITS-1:0] debug_rd_st2; wire [`NW_BITS-1:0] debug_wid_st2; - wire[`UP(CORE_TAG_ID_BITS)-1:0] debug_tagid_st2; - - wire [31:0] debug_pc_st3; - wire [`NR_BITS-1:0] debug_rd_st3; - wire [`NW_BITS-1:0] debug_wid_st3; - wire[`UP(CORE_TAG_ID_BITS)-1:0] debug_tagid_st3; /* verilator lint_on UNUSED */ `endif @@ -121,7 +110,6 @@ module VX_bank #( wire drsq_push = dram_rsp_valid && dram_rsp_ready; if (DRAM_ENABLE) begin - wire drsq_full; assign dram_rsp_ready = !drsq_full; @@ -183,10 +171,11 @@ module VX_bank #( `UNUSED_PIN (size) ); + wire mshr_pop; reg [$clog2(MSHR_SIZE+1)-1:0] mshr_pending_size; wire [$clog2(MSHR_SIZE+1)-1:0] mshr_pending_size_n; - reg mshr_going_full; - wire mshr_pop; + reg mshr_going_full; + wire mshr_valid_st0; wire [`REQS_BITS-1:0] mshr_tid_st0; wire [`LINE_ADDR_WIDTH-1:0] mshr_addr_st0; @@ -213,7 +202,10 @@ module VX_bank #( wire is_mshr_st1; wire valid_st1; wire [`LINE_ADDR_WIDTH-1:0] addr_st1; - wire [`UP(`WORD_SELECT_WIDTH)-1:0] wsel_st1; + wire [`UP(`WORD_SELECT_WIDTH)-1:0] wsel_st1; + wire [`WORD_WIDTH-1:0] readword_st1; + wire [`BANK_LINE_WIDTH-1:0] readdata_st1; + wire [BANK_LINE_SIZE-1:0] dirtyb_st1; wire [`WORD_WIDTH-1:0] writeword_st1; wire [`BANK_LINE_WIDTH-1:0] writedata_st1; wire [`TAG_SELECT_BITS-1:0] readtag_st1; @@ -224,7 +216,21 @@ module VX_bank #( wire [`REQ_TAG_WIDTH-1:0] tag_st1; wire mem_rw_st1; wire [WORD_SIZE-1:0] byteen_st1; - wire [`REQS_BITS-1:0] req_tid_st1; + wire [`REQS_BITS-1:0] req_tid_st1; + wire core_req_hit_st1; + wire incoming_fill_st1; + wire do_writeback_st1; + wire mshr_push_st1; + wire crsq_push_st1; + wire dreq_push_st1; + + wire valid_st12; + wire writeen_st12; + wire [`LINE_ADDR_WIDTH-1:0] addr_st12; + wire [`UP(`WORD_SELECT_WIDTH)-1:0] wsel_st12; + wire [WORD_SIZE-1:0] byteen_st12; + wire [`WORD_WIDTH-1:0] writeword_st12; + wire [`REQ_TAG_WIDTH-1:0] tag_st12; wire valid_st2; wire [`UP(`WORD_SELECT_WIDTH)-1:0] wsel_st2; @@ -232,7 +238,6 @@ module VX_bank #( wire [`WORD_WIDTH-1:0] writeword_st2; wire [`BANK_LINE_WIDTH-1:0] readdata_st2; wire [`BANK_LINE_WIDTH-1:0] writedata_st2; - wire dirty_st2; wire [BANK_LINE_SIZE-1:0] dirtyb_st2; wire [`TAG_SELECT_BITS-1:0] readtag_st2; wire is_fill_st2; @@ -240,23 +245,17 @@ module VX_bank #( wire miss_st2; wire force_miss_st2; wire[`LINE_ADDR_WIDTH-1:0] addr_st2; - wire writeen_st2; - wire core_req_hit_st2; - wire incoming_fill_st2; + wire writeen_st2; wire [`REQ_TAG_WIDTH-1:0] tag_st2; wire mem_rw_st2; wire [WORD_SIZE-1:0] byteen_st2; - wire [`REQS_BITS-1:0] req_tid_st2; - - wire valid_st3; - wire is_mshr_st3; - wire miss_st3; - wire force_miss_st3; - wire [`LINE_ADDR_WIDTH-1:0] addr_st3; - wire [`REQ_TAG_WIDTH-1:0] tag_st3; - wire mem_rw_st3; - wire [WORD_SIZE-1:0] byteen_st3; - wire [`REQS_BITS-1:0] req_tid_st3; + wire [`REQS_BITS-1:0] req_tid_st2; + wire core_req_hit_st2; + wire incoming_fill_st2; + wire do_writeback_st2; + wire mshr_push_st2; + wire crsq_push_st2; + wire dreq_push_st2; wire mshr_push_stall; wire crsq_push_stall; @@ -264,7 +263,6 @@ module VX_bank #( wire pipeline_stall; wire is_mshr_miss_st2 = valid_st2 && is_mshr_st2 && (miss_st2 || force_miss_st2); - wire is_mshr_miss_st3 = valid_st3 && is_mshr_st3 && (miss_st3 || force_miss_st3); wire creq_commit = valid_st2 && (core_req_hit_st2 || (WRITE_THROUGH && mem_rw_st2)) @@ -276,7 +274,7 @@ module VX_bank #( wire creq_pop_unqual = !mshr_pop_unqual && !drsq_pop_unqual && !creq_empty && !mshr_going_full; assign mshr_pop = mshr_pop_unqual && !pipeline_stall - && !(is_mshr_miss_st2 || is_mshr_miss_st3); // stop if previous request was a miss + && !is_mshr_miss_st2; // stop if previous request was a miss assign drsq_pop = drsq_pop_unqual && !pipeline_stall; assign creq_pop = creq_pop_unqual && !pipeline_stall; @@ -336,9 +334,9 @@ module VX_bank #( `ifdef DBG_CACHE_REQ_INFO if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin - assign {debug_pc_st0, debug_rd_st0, debug_wid_st0, debug_tagid_st0} = tag_st0; + assign {debug_pc_st0, debug_wid_st0} = tag_st0[CORE_TAG_WIDTH-1:CORE_TAG_ID_BITS]; end else begin - assign {debug_pc_st0, debug_rd_st0, debug_wid_st0, debug_tagid_st0} = 0; + assign {debug_pc_st0, debug_wid_st0} = 0; end `endif @@ -346,14 +344,14 @@ if (DRAM_ENABLE) begin wire mshr_pending_hazard_st1; - // we have a miss in msrq or in stage 3 for the current address + // we have a miss in mshr or in stage 3 for the current address wire mshr_pending_hazard_st0 = mshr_pending_hazard_unqual_st0 - || (valid_st3 && (miss_st3 || force_miss_st3) && (addr_st3 == addr_st0)); + || (valid_st2 && (miss_st2 || force_miss_st2) && (addr_st2 == addr_st0)); VX_pipe_register #( .DATAW (1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_WIDTH) + `WORD_WIDTH + 1 + `BANK_LINE_WIDTH + 1 + WORD_SIZE + `REQS_BITS + `REQ_TAG_WIDTH), .RESETW (1) - ) pipe_reg0 ( + ) pipe_reg1 ( .clk (clk), .reset (reset), .enable (!pipeline_stall), @@ -363,18 +361,17 @@ if (DRAM_ENABLE) begin `ifdef DBG_CACHE_REQ_INFO if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin - assign {debug_pc_st1, debug_rd_st1, debug_wid_st1, debug_tagid_st1} = tag_st1; + assign {debug_pc_st1, debug_wid_st1} = tag_st1[CORE_TAG_WIDTH-1:CORE_TAG_ID_BITS]; end else begin - assign {debug_pc_st1, debug_rd_st1, debug_wid_st1, debug_tagid_st1} = 0; + assign {debug_pc_st1, debug_wid_st1} = 0; end `endif // force miss to ensure commit order when a new request has pending previous requests to same block - // also force a miss for msrq requests when previous requests got a miss + // also force a miss for mshr requests when previous requests got a miss wire st2_pending_hazard_st1 = valid_st2 && (miss_st2 || force_miss_st2) && (addr_st2 == addr_st1); - wire st3_pending_hazard_st1 = valid_st3 && (miss_st3 || force_miss_st3) && (addr_st3 == addr_st1); assign force_miss_st1 = (valid_st1 && !is_mshr_st1 && !is_fill_st1 - && (mshr_pending_hazard_st1 || st2_pending_hazard_st1 || st3_pending_hazard_st1)) + && (mshr_pending_hazard_st1 || st2_pending_hazard_st1)) || (valid_st1 && is_mshr_st1 && is_mshr_miss_st2); VX_tag_access #( @@ -392,9 +389,7 @@ if (DRAM_ENABLE) begin `ifdef DBG_CACHE_REQ_INFO .debug_pc (debug_pc_st1), - .debug_rd (debug_rd_st1), .debug_wid (debug_wid_st1), - .debug_tagid (debug_tagid_st1), `endif .stall (pipeline_stall), @@ -413,76 +408,118 @@ if (DRAM_ENABLE) begin .writeen_out (writeen_st1) ); - assign misses = miss_st1; + assign valid_st12 = valid_st2; + assign writeen_st12 = writeen_st2; + assign addr_st12 = addr_st2; + assign wsel_st12 = wsel_st2; + assign byteen_st12 = byteen_st2; + assign writeword_st12 = writeword_st2; + assign tag_st12 = tag_st2; - wire core_req_hit_st1 = !is_fill_st1 && !miss_st1 && !force_miss_st1; + assign core_req_hit_st1 = !is_fill_st1 && !miss_st1 && !force_miss_st1; - wire incoming_fill_st1 = !drsq_empty && (addr_st1 == drsq_addr_st0); + assign incoming_fill_st1 = !drsq_empty && (addr_st1 == drsq_addr_st0); - VX_pipe_register #( - .DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_WIDTH) + `WORD_WIDTH + `TAG_SELECT_BITS + 1 + `BANK_LINE_WIDTH + 1 + WORD_SIZE + `REQS_BITS + `REQ_TAG_WIDTH), - .RESETW (1) - ) pipe_reg1 ( - .clk (clk), - .reset (reset), - .enable (!pipeline_stall), - .data_in ({valid_st1, incoming_fill_st1, core_req_hit_st1, is_mshr_st1, writeen_st1, force_miss_st1, dirty_st1, is_fill_st1, addr_st1, wsel_st1, writeword_st1, readtag_st1, miss_st1, writedata_st1, mem_rw_st1, byteen_st1, req_tid_st1, tag_st1}), - .data_out ({valid_st2, incoming_fill_st2, core_req_hit_st2, is_mshr_st2, writeen_st2, force_miss_st2, dirty_st2, is_fill_st2, addr_st2, wsel_st2, writeword_st2, readtag_st2, miss_st2, writedata_st2, mem_rw_st2, byteen_st2, req_tid_st2, tag_st2}) - ); + wire do_fill_req_st1 = miss_st1 + && !(WRITE_THROUGH && mem_rw_st1) + && (!force_miss_st1 + || (is_mshr_st1 && addr_st1 != addr_st2)) + && !incoming_fill_st1; + + assign do_writeback_st1 = (WRITE_THROUGH && mem_rw_st1) + || (!WRITE_THROUGH && dirty_st1 && is_fill_st1); + + assign dreq_push_st1 = do_fill_req_st1 || do_writeback_st1; + + assign mshr_push_st1 = (miss_st1 || force_miss_st1) + && !(WRITE_THROUGH && mem_rw_st1); + + assign crsq_push_st1 = core_req_hit_st1 && !mem_rw_st1; end else begin `UNUSED_VAR (mshr_pending_hazard_unqual_st0) `UNUSED_VAR (drsq_push) - `UNUSED_VAR (addr_st0) + `UNUSED_VAR (dirty_st1) + `UNUSED_VAR (writeen_st2) + +`ifdef DBG_CACHE_REQ_INFO + assign debug_pc_st1 = debug_pc_st0; + assign debug_wid_st1 = debug_wid_st0; +`endif - assign is_fill_st1 = is_fill_st0; - assign is_mshr_st1 = is_mshr_st0; - assign valid_st1 = valid_st0; - assign wsel_st1 = wsel_st0; - assign writeword_st1= writeword_st0; - assign writedata_st1= writedata_st0; - assign addr_st1 = creq_addr_st0[`LINE_SELECT_ADDR_RNG]; - assign dirty_st1 = 0; - assign readtag_st1 = 0; - assign miss_st1 = 0; - assign writeen_st1 = mem_rw_st1; - assign force_miss_st1 = 0; - assign tag_st1 = tag_st0; - assign mem_rw_st1 = mem_rw_st0; - assign byteen_st1 = byteen_st0; - assign req_tid_st1 = req_tid_st0; - - assign is_fill_st2 = is_fill_st1; - assign is_mshr_st2 = is_mshr_st1; - assign valid_st2 = valid_st1; - assign wsel_st2 = wsel_st1; - assign writeword_st2= writeword_st1; - assign writedata_st2= writedata_st1; - assign addr_st2 = addr_st1; - assign dirty_st2 = dirty_st1; - assign readtag_st2 = readtag_st1; - assign miss_st2 = miss_st1; - assign writeen_st2 = writeen_st1; - assign force_miss_st2 = force_miss_st1; - assign tag_st2 = tag_st1; - assign mem_rw_st2 = mem_rw_st1; - assign byteen_st2 = byteen_st1; - assign req_tid_st2 = req_tid_st1; + assign is_fill_st1 = is_fill_st0; + assign is_mshr_st1 = is_mshr_st0; + assign valid_st1 = valid_st0; + assign wsel_st1 = wsel_st0; + assign writeword_st1 = writeword_st0; + assign writedata_st1 = writedata_st0; + assign addr_st1 = creq_addr_st0[`LINE_SELECT_ADDR_RNG]; + assign tag_st1 = tag_st0; + assign mem_rw_st1 = mem_rw_st0; + assign byteen_st1 = byteen_st0; + assign req_tid_st1 = req_tid_st0; + assign dirty_st1 = 0; + assign readtag_st1 = 0; + assign miss_st1 = 0; + assign writeen_st1 = mem_rw_st0; + assign force_miss_st1 = 0; - assign core_req_hit_st2 = 1; - assign incoming_fill_st2 = 0; + assign valid_st12 = valid_st0; + assign writeen_st12 = mem_rw_st0; + assign addr_st12 = addr_st0; + assign wsel_st12 = wsel_st0; + assign byteen_st12 = byteen_st0; + assign writeword_st12 = writeword_st0; + assign tag_st12 = tag_st0; + + assign incoming_fill_st1= 0; + assign core_req_hit_st1 = 1; + assign do_writeback_st1 = 0; + assign mshr_push_st1 = 0; + assign crsq_push_st1 = !mem_rw_st0; + assign dreq_push_st1 = 0; +end - assign misses = 0; -end + VX_pipe_register #( + .DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_WIDTH) + `WORD_WIDTH + `WORD_WIDTH + `TAG_SELECT_BITS + 1 + `BANK_LINE_WIDTH + 1 + WORD_SIZE + `REQS_BITS + `REQ_TAG_WIDTH), + .RESETW (1) + ) pipe_reg2 ( + .clk (clk), + .reset (reset), + .enable (!pipeline_stall), + .data_in ({valid_st1, mshr_push_st1, crsq_push_st1, dreq_push_st1, do_writeback_st1, incoming_fill_st1, core_req_hit_st1, is_mshr_st1, writeen_st1, force_miss_st1, is_fill_st1, addr_st1, wsel_st1, readword_st1, writeword_st1, readtag_st1, miss_st1, writedata_st1, mem_rw_st1, byteen_st1, req_tid_st1, tag_st1}), + .data_out ({valid_st2, mshr_push_st2, crsq_push_st2, dreq_push_st2, do_writeback_st2, incoming_fill_st2, core_req_hit_st2, is_mshr_st2, writeen_st2, force_miss_st2, is_fill_st2, addr_st2, wsel_st2, readword_st2, writeword_st2, readtag_st2, miss_st2, writedata_st2, mem_rw_st2, byteen_st2, req_tid_st2, tag_st2}) + ); + + if (WRITE_THROUGH) begin + + assign dirtyb_st2 = dirtyb_st1; + assign readdata_st2 = readdata_st1; + + end else begin + + VX_pipe_register #( + .DATAW (BANK_LINE_SIZE + `BANK_LINE_WIDTH), + .RESETW (0) + ) pipe_reg2b ( + .clk (clk), + .reset (reset), + .enable (!pipeline_stall), + .data_in ({dirtyb_st1, readdata_st1}), + .data_out ({dirtyb_st2, readdata_st2}) + ); + + end `ifdef DBG_CACHE_REQ_INFO if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin - assign {debug_pc_st2, debug_rd_st2, debug_wid_st2, debug_tagid_st2} = tag_st2; - end else begin - assign {debug_pc_st2, debug_rd_st2, debug_wid_st2, debug_tagid_st2} = 0; + assign {debug_pc_st12, debug_wid_st12} = tag_st12[CORE_TAG_WIDTH-1:CORE_TAG_ID_BITS]; + end else begin + assign {debug_pc_st12, debug_wid_st12} = 0; end `endif + `UNUSED_VAR (tag_st12) VX_data_access #( .BANK_ID (BANK_ID), @@ -491,6 +528,7 @@ end .CACHE_SIZE (CACHE_SIZE), .BANK_LINE_SIZE (BANK_LINE_SIZE), .NUM_BANKS (NUM_BANKS), + .DRAM_ENABLE (DRAM_ENABLE), .WORD_SIZE (WORD_SIZE), .WRITE_ENABLE (WRITE_ENABLE), .WRITE_THROUGH (WRITE_THROUGH) @@ -499,99 +537,59 @@ end .reset (reset), `ifdef DBG_CACHE_REQ_INFO - .debug_pc (debug_pc_st2), - .debug_rd (debug_rd_st2), - .debug_wid (debug_wid_st2), - .debug_tagid (debug_tagid_st2), + .rdebug_pc (debug_pc_st1), + .rdebug_wid (debug_wid_st1), + .wdebug_pc (debug_pc_st12), + .wdebug_wid (debug_wid_st12), `endif .stall (pipeline_stall), - // Inputs - .valid_in (valid_st2), - .addr_in (addr_st2), - .writeen_in (writeen_st2), - .is_fill_in (is_fill_st2), - .wordsel_in (wsel_st2), - .byteen_in (byteen_st2), - .writeword_in (writeword_st2), - .writedata_in (writedata_st2), + // reading + .readen_in (~writeen_st1 && valid_st1), + .raddr_in (addr_st1), + .rwsel_in (wsel_st1), + .rbyteen_in (byteen_st1), + .readword_out (readword_st1), + .readdata_out (readdata_st1), + .dirtyb_out (dirtyb_st1), - // Outputs - .readword_out (readword_st2), - .readdata_out (readdata_st2), - .dirtyb_out (dirtyb_st2) - ); - - wire [`UP(`WORD_SELECT_WIDTH)-1:0] wsel_st3; - wire [`WORD_WIDTH-1:0] writeword_st3; - wire [`WORD_WIDTH-1:0] readword_st3; - wire [`BANK_LINE_WIDTH-1:0] readdata_st3; - wire [BANK_LINE_SIZE-1:0] dirtyb_st3; - wire [`TAG_SELECT_BITS-1:0] readtag_st3; - wire do_writeback_st3; - wire incoming_fill_st3; - wire mshr_push_st3; - wire crsq_push_st3; - wire dreq_push_st3; - - wire incoming_fill_qual_st2 = (!drsq_empty && (addr_st2 == drsq_addr_st0)) || incoming_fill_st2; - - wire do_fill_req_st2 = miss_st2 - && !(WRITE_THROUGH && mem_rw_st2) - && (!force_miss_st2 - || (is_mshr_st2 && addr_st2 != addr_st3)) - && !incoming_fill_qual_st2; - - wire do_writeback_st2 = (WRITE_THROUGH && mem_rw_st2) - || (!WRITE_THROUGH && dirty_st2 && is_fill_st2); - - wire dreq_push_st2 = do_fill_req_st2 || do_writeback_st2; - - wire mshr_push_st2 = (miss_st2 || force_miss_st2) - && !(WRITE_THROUGH && mem_rw_st2); - - wire crsq_push_st2 = core_req_hit_st2 && !mem_rw_st2; - - VX_pipe_register #( - .DATAW (1 + 1+ 1 + 1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_WIDTH) + `WORD_WIDTH + `TAG_SELECT_BITS + BANK_LINE_SIZE + 1 + WORD_SIZE + `WORD_WIDTH + `BANK_LINE_WIDTH + `REQS_BITS + `REQ_TAG_WIDTH), - .RESETW (1) - ) pipe_reg2 ( - .clk (clk), - .reset (reset), - .enable (!pipeline_stall), - .data_in ({valid_st2, mshr_push_st2, crsq_push_st2, dreq_push_st2, do_writeback_st2, incoming_fill_qual_st2, force_miss_st2, is_mshr_st2, addr_st2, wsel_st2, writeword_st2, readtag_st2, miss_st2, dirtyb_st2, mem_rw_st2, byteen_st2, readword_st2, readdata_st2, req_tid_st2, tag_st2}), - .data_out ({valid_st3, mshr_push_st3, crsq_push_st3, dreq_push_st3, do_writeback_st3, incoming_fill_st3, force_miss_st3, is_mshr_st3, addr_st3, wsel_st3, writeword_st3, readtag_st3, miss_st3, dirtyb_st3, mem_rw_st3, byteen_st3, readword_st3, readdata_st3, req_tid_st3, tag_st3}) - ); + // writing + .writeen_in (writeen_st12 && valid_st12), + .waddr_in (addr_st12), + .wfill_in (is_fill_st2), + .wwsel_in (wsel_st12), + .wbyteen_in (byteen_st12), + .writeword_in (writeword_st12), + .writedata_in (writedata_st2) + ); `ifdef DBG_CACHE_REQ_INFO if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin - assign {debug_pc_st3, debug_rd_st3, debug_wid_st3, debug_tagid_st3} = tag_st3; - end else begin - assign {debug_pc_st3, debug_rd_st3, debug_wid_st3, debug_tagid_st3} = 0; + assign {debug_pc_st2, debug_wid_st2} = tag_st2[CORE_TAG_WIDTH-1:CORE_TAG_ID_BITS]; + end else begin + assign {debug_pc_st2, debug_wid_st2} = 0; end `endif - // Enqueue to miss reserv if it's a valid miss - - wire mshr_push_unqual = valid_st3 && mshr_push_st3; + wire mshr_push_unqual = valid_st2 && mshr_push_st2; assign mshr_push_stall = 0; wire mshr_push = mshr_push_unqual && !crsq_push_stall && !dreq_push_stall; - wire incoming_fill_qual_st3 = (!drsq_empty && (addr_st3 == drsq_addr_st0)) || incoming_fill_st3; + wire incoming_fill_qual_st2 = (!drsq_empty && (addr_st2 == drsq_addr_st0)) || incoming_fill_st2; if (DRAM_ENABLE) begin - wire mshr_dequeue_st3 = valid_st3 && is_mshr_st3 && !mshr_push_unqual && !pipeline_stall; + wire mshr_dequeue_st2 = valid_st2 && is_mshr_st2 && !mshr_push_unqual && !pipeline_stall; - // mark msrq entry that match DRAM fill as 'ready' + // mark mshr entry that match DRAM fill as 'ready' wire update_ready_st0 = drsq_pop; // push missed requests as 'ready' if it was a forced miss but actually had a hit - // or the fill request is comming for the missed block - wire mshr_init_ready_state_st3 = valid_st3 && (!miss_st3 || incoming_fill_qual_st3); + // or the fill request is comming for this block + wire mshr_init_ready_state_st2 = valid_st2 && (!miss_st2 || incoming_fill_qual_st2); VX_miss_resrv #( .BANK_ID (BANK_ID), @@ -608,44 +606,42 @@ end .reset (reset), `ifdef DBG_CACHE_REQ_INFO - .debug_pc_st0 (debug_pc_st0), - .debug_rd_st0 (debug_rd_st0), - .debug_wid_st0 (debug_wid_st0), - .debug_tagid_st0 (debug_tagid_st0), - .debug_pc_st3 (debug_pc_st3), - .debug_rd_st3 (debug_rd_st3), - .debug_wid_st3 (debug_wid_st3), - .debug_tagid_st3 (debug_tagid_st3), + .deq_debug_pc (debug_pc_st0), + .deq_debug_wid (debug_wid_st0), + .enq_debug_pc (debug_pc_st2), + .enq_debug_wid (debug_wid_st2), `endif // enqueue - .enqueue_st3 (mshr_push), - .enqueue_addr_st3 (addr_st3), - .enqueue_data_st3 ({writeword_st3, req_tid_st3, tag_st3, mem_rw_st3, byteen_st3, wsel_st3}), - .enqueue_is_mshr_st3(is_mshr_st3), - .enqueue_ready_st3 (mshr_init_ready_state_st3), + .enqueue (mshr_push), + .enqueue_addr (addr_st2), + .enqueue_data ({writeword_st2, req_tid_st2, tag_st2, mem_rw_st2, byteen_st2, wsel_st2}), + .enqueue_is_mshr (is_mshr_st2), + .enqueue_ready (mshr_init_ready_state_st2), `UNUSED_PIN (enqueue_full), - // fill - .update_ready_st0 (update_ready_st0), - .addr_st0 (addr_st0), - .pending_hazard_st0 (mshr_pending_hazard_unqual_st0), + // lookup + .lookup_ready (update_ready_st0), + .lookup_addr (addr_st0), + .lookup_match (mshr_pending_hazard_unqual_st0), + // schedule + .schedule (mshr_pop), + .schedule_valid (mshr_valid_st0), + .schedule_addr (mshr_addr_st0), + .schedule_data ({mshr_writeword_st0, mshr_tid_st0, mshr_tag_st0, mshr_rw_st0, mshr_byteen_st0, mshr_wsel_st0}), + // dequeue - .schedule_st0 (mshr_pop), - .dequeue_valid_st0 (mshr_valid_st0), - .dequeue_addr_st0 (mshr_addr_st0), - .dequeue_data_st0 ({mshr_writeword_st0, mshr_tid_st0, mshr_tag_st0, mshr_rw_st0, mshr_byteen_st0, mshr_wsel_st0}), - .dequeue_st3 (mshr_dequeue_st3) + .dequeue (mshr_dequeue_st2) ); end else begin - `UNUSED_VAR (valid_st3) + `UNUSED_VAR (valid_st2) `UNUSED_VAR (mshr_push) - `UNUSED_VAR (wsel_st3) - `UNUSED_VAR (writeword_st3) - `UNUSED_VAR (mem_rw_st3) - `UNUSED_VAR (byteen_st3) - `UNUSED_VAR (incoming_fill_st3) + `UNUSED_VAR (wsel_st2) + `UNUSED_VAR (writeword_st2) + `UNUSED_VAR (mem_rw_st2) + `UNUSED_VAR (byteen_st2) + `UNUSED_VAR (incoming_fill_st2) assign mshr_pending_hazard_unqual_st0 = 0; assign mshr_valid_st0 = 0; assign mshr_addr_st0 = 0; @@ -661,7 +657,7 @@ end wire crsq_empty, crsq_full; - wire crsq_push_unqual = valid_st3 && crsq_push_st3; + wire crsq_push_unqual = valid_st2 && crsq_push_st2; assign crsq_push_stall = crsq_push_unqual && crsq_full; wire crsq_push = crsq_push_unqual @@ -671,9 +667,9 @@ end wire crsq_pop = core_rsp_valid && core_rsp_ready; - wire [`REQS_BITS-1:0] crsq_tid_st3 = req_tid_st3; - wire [CORE_TAG_WIDTH-1:0] crsq_tag_st3 = CORE_TAG_WIDTH'(tag_st3); - wire [`WORD_WIDTH-1:0] crsq_data_st3 = readword_st3; + wire [`REQS_BITS-1:0] crsq_tid_st2 = req_tid_st2; + wire [CORE_TAG_WIDTH-1:0] crsq_tag_st2 = CORE_TAG_WIDTH'(tag_st2); + wire [`WORD_WIDTH-1:0] crsq_data_st2 = readword_st2; VX_fifo_queue #( .DATAW (`REQS_BITS + CORE_TAG_WIDTH + `WORD_WIDTH), @@ -685,7 +681,7 @@ end .reset (reset), .push (crsq_push), .pop (crsq_pop), - .data_in ({crsq_tid_st3, crsq_tag_st3, crsq_data_st3}), + .data_in ({crsq_tid_st2, crsq_tag_st2, crsq_data_st2}), .data_out({core_rsp_tid, core_rsp_tag, core_rsp_data}), .empty (crsq_empty), .full (crsq_full), @@ -698,23 +694,23 @@ end wire dreq_empty, dreq_full; - wire dreq_push_unqual = valid_st3 && dreq_push_st3; - assign dreq_push_stall = dreq_push_unqual && dreq_full; + wire dreq_push_unqual = valid_st2 && dreq_push_st2; + assign dreq_push_stall = dreq_push_unqual && dreq_full; wire dreq_push = dreq_push_unqual - && (do_writeback_st3 || !incoming_fill_qual_st3) + && (do_writeback_st2 || !incoming_fill_qual_st2) && !dreq_full && !mshr_push_stall && !crsq_push_stall; wire dreq_pop = dram_req_valid && dram_req_ready; - wire writeback = WRITE_ENABLE && do_writeback_st3; + wire writeback = WRITE_ENABLE && do_writeback_st2; - wire [`LINE_ADDR_WIDTH-1:0] dreq_addr = (WRITE_THROUGH || !writeback) ? addr_st3 : - {readtag_st3, addr_st3[`LINE_SELECT_BITS-1:0]}; + wire [`LINE_ADDR_WIDTH-1:0] dreq_addr = (WRITE_THROUGH || !writeback) ? addr_st2 : + {readtag_st2, addr_st2[`LINE_SELECT_BITS-1:0]}; - wire [BANK_LINE_SIZE-1:0] dreq_byteen = writeback ? dirtyb_st3 : {BANK_LINE_SIZE{1'b1}}; + wire [BANK_LINE_SIZE-1:0] dreq_byteen = writeback ? dirtyb_st2 : {BANK_LINE_SIZE{1'b1}}; if (DRAM_ENABLE) begin VX_fifo_queue #( @@ -727,7 +723,7 @@ end .reset (reset), .push (dreq_push), .pop (dreq_pop), - .data_in ({writeback, dreq_byteen, dreq_addr, readdata_st3}), + .data_in ({writeback, dreq_byteen, dreq_addr, readdata_st2}), .data_out({dram_req_rw, dram_req_byteen, dram_req_addr, dram_req_data}), .empty (dreq_empty), .full (dreq_full), @@ -738,9 +734,9 @@ end `UNUSED_VAR (dreq_pop) `UNUSED_VAR (dreq_addr) `UNUSED_VAR (dreq_byteen) - `UNUSED_VAR (readtag_st3) - `UNUSED_VAR (dirtyb_st3) - `UNUSED_VAR (readdata_st3) + `UNUSED_VAR (readtag_st2) + `UNUSED_VAR (dirtyb_st2) + `UNUSED_VAR (readdata_st2) `UNUSED_VAR (writeback) `UNUSED_VAR (dram_req_ready) assign dreq_empty = 1; @@ -761,7 +757,6 @@ end `SCOPE_ASSIGN (valid_st0, valid_st0); `SCOPE_ASSIGN (valid_st1, valid_st1); `SCOPE_ASSIGN (valid_st2, valid_st2); - `SCOPE_ASSIGN (valid_st3, valid_st3); `SCOPE_ASSIGN (is_fill_st0, is_fill_st0); `SCOPE_ASSIGN (is_mshr_st0, is_mshr_st0); `SCOPE_ASSIGN (miss_st1, miss_st1); @@ -772,7 +767,6 @@ end `SCOPE_ASSIGN (addr_st0, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID)); `SCOPE_ASSIGN (addr_st1, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID)); `SCOPE_ASSIGN (addr_st2, `LINE_TO_BYTE_ADDR(addr_st2, BANK_ID)); - `SCOPE_ASSIGN (addr_st3, `LINE_TO_BYTE_ADDR(addr_st3, BANK_ID)); `ifdef PERF_ENABLE assign perf_read_misses = !pipeline_stall && miss_st2 && !is_mshr_st2 && !mem_rw_st2; @@ -782,14 +776,14 @@ end `endif `ifdef DBG_PRINT_CACHE_BANK - wire incoming_fill_dfp_st3 = drsq_push && (addr_st3 == dram_rsp_addr); + wire incoming_fill_dfp_st2 = drsq_push && (addr_st2 == dram_rsp_addr); always @(posedge clk) begin - if (valid_st3 && miss_st3 && (incoming_fill_st3 || incoming_fill_dfp_st3)) begin - $display("%t: incoming fill - addr=%0h, st3=%b, dfp=%b", $time, `LINE_TO_BYTE_ADDR(addr_st3, BANK_ID), incoming_fill_st3, incoming_fill_dfp_st3); - assert(!is_mshr_st3); + if (valid_st2 && miss_st2 && (incoming_fill_st2 || incoming_fill_dfp_st2)) begin + $display("%t: incoming fill - addr=%0h, st3=%b, dfp=%b", $time, `LINE_TO_BYTE_ADDR(addr_st2, BANK_ID), incoming_fill_st2, incoming_fill_dfp_st2); + assert(!is_mshr_st2); end if (pipeline_stall) begin - $display("%t: cache%0d:%0d pipeline-stall: msrq=%b, cwbq=%b, dwbq=%b", $time, CACHE_ID, BANK_ID, mshr_push_stall, crsq_push_stall, dreq_push_stall); + $display("%t: cache%0d:%0d pipeline-stall: mshr=%b, cwbq=%b, dwbq=%b", $time, CACHE_ID, BANK_ID, mshr_push_stall, crsq_push_stall, dreq_push_stall); end if (drsq_pop) begin $display("%t: cache%0d:%0d fill-rsp: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), drsq_filldata_st0); @@ -801,13 +795,13 @@ end $display("%t: cache%0d:%0d core-rd-req: addr=%0h, tag=%0h, tid=%0d, byteen=%b, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), creq_tag_st0, creq_tid_st0, creq_byteen_st0, debug_wid_st0, debug_pc_st0); end if (crsq_push) begin - $display("%t: cache%0d:%0d core-rsp: addr=%0h, tag=%0h, tid=%0d, data=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st3, BANK_ID), crsq_tag_st3, crsq_tid_st3, crsq_data_st3, debug_wid_st3, debug_pc_st3); + $display("%t: cache%0d:%0d core-rsp: addr=%0h, tag=%0h, tid=%0d, data=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st2, BANK_ID), crsq_tag_st2, crsq_tid_st2, crsq_data_st2, debug_wid_st2, debug_pc_st2); end if (dreq_push) begin - if (do_writeback_st3) - $display("%t: cache%0d:%0d writeback: addr=%0h, data=%0h, byteen=%b, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dreq_addr, BANK_ID), readdata_st3, dirtyb_st3, debug_wid_st3, debug_pc_st3); + if (do_writeback_st2) + $display("%t: cache%0d:%0d writeback: addr=%0h, data=%0h, byteen=%b, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dreq_addr, BANK_ID), readdata_st2, dreq_byteen, debug_wid_st2, debug_pc_st2); else - $display("%t: cache%0d:%0d fill-req: addr=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dreq_addr, BANK_ID), debug_wid_st3, debug_pc_st3); + $display("%t: cache%0d:%0d fill-req: addr=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dreq_addr, BANK_ID), debug_wid_st2, debug_pc_st2); end end `endif diff --git a/hw/rtl/cache/VX_cache.v b/hw/rtl/cache/VX_cache.v index 2e40e1cc9..29bad5328 100644 --- a/hw/rtl/cache/VX_cache.v +++ b/hw/rtl/cache/VX_cache.v @@ -82,9 +82,7 @@ module VX_cache #( input wire dram_rsp_valid, input wire [`BANK_LINE_WIDTH-1:0] dram_rsp_data, input wire [DRAM_TAG_WIDTH-1:0] dram_rsp_tag, - output wire dram_rsp_ready, - - output wire [NUM_BANKS-1:0] miss_vec + output wire dram_rsp_ready ); `STATIC_ASSERT(NUM_BANKS <= NUM_REQS, ("invalid value")) @@ -113,9 +111,6 @@ module VX_cache #( wire [NUM_BANKS-1:0] per_bank_dram_rsp_ready; - wire [NUM_BANKS-1:0] per_bank_miss; - assign miss_vec = per_bank_miss; - `ifdef PERF_ENABLE wire [NUM_BANKS-1:0] perf_read_miss_per_bank; wire [NUM_BANKS-1:0] perf_write_miss_per_bank; @@ -189,8 +184,6 @@ module VX_cache #( wire [`LINE_ADDR_WIDTH-1:0] curr_bank_dram_rsp_addr; wire curr_bank_dram_rsp_ready; - wire curr_bank_miss; - // Core Req assign curr_bank_core_req_valid = per_bank_core_req_valid[i]; assign curr_bank_core_req_tid = per_bank_core_req_tid[i]; @@ -230,9 +223,6 @@ module VX_cache #( end assign curr_bank_dram_rsp_data = dram_rsp_data; assign per_bank_dram_rsp_ready[i] = curr_bank_dram_rsp_ready; - - //Misses - assign per_bank_miss[i] = curr_bank_miss; VX_bank #( .BANK_ID (i), @@ -257,6 +247,13 @@ module VX_cache #( .clk (clk), .reset (reset), + + `ifdef PERF_ENABLE + .perf_read_misses (perf_read_miss_per_bank[i]), + .perf_write_misses (perf_write_miss_per_bank[i]), + .perf_mshr_stalls (perf_mshr_stall_per_bank[i]), + .perf_pipe_stalls (perf_pipe_stall_per_bank[i]), + `endif // Core request .core_req_valid (curr_bank_core_req_valid), @@ -287,17 +284,7 @@ module VX_cache #( .dram_rsp_valid (curr_bank_dram_rsp_valid), .dram_rsp_data (curr_bank_dram_rsp_data), .dram_rsp_addr (curr_bank_dram_rsp_addr), - .dram_rsp_ready (curr_bank_dram_rsp_ready), - - `ifdef PERF_ENABLE - .perf_read_misses (perf_read_miss_per_bank[i]), - .perf_write_misses (perf_write_miss_per_bank[i]), - .perf_mshr_stalls (perf_mshr_stall_per_bank[i]), - .perf_pipe_stalls (perf_pipe_stall_per_bank[i]), - `endif - - //Misses - .misses (curr_bank_miss) + .dram_rsp_ready (curr_bank_dram_rsp_ready) ); end diff --git a/hw/rtl/cache/VX_data_access.v b/hw/rtl/cache/VX_data_access.v index 3128f7908..7bc70fb95 100644 --- a/hw/rtl/cache/VX_data_access.v +++ b/hw/rtl/cache/VX_data_access.v @@ -12,6 +12,9 @@ module VX_data_access #( parameter NUM_BANKS = 1, // Size of a word in bytes parameter WORD_SIZE = 1, + + // Enable dram update + parameter DRAM_ENABLE = 1, // Enable cache writeable parameter WRITE_ENABLE = 1, @@ -27,41 +30,49 @@ module VX_data_access #( `ifdef DBG_CACHE_REQ_INFO `IGNORE_WARNINGS_BEGIN - input wire[31:0] debug_pc, - input wire[`NR_BITS-1:0] debug_rd, - input wire[`NW_BITS-1:0] debug_wid, - input wire[`UP(CORE_TAG_ID_BITS)-1:0] debug_tagid, + input wire[31:0] rdebug_pc, + input wire[`NW_BITS-1:0] rdebug_wid, + input wire[31:0] wdebug_pc, + input wire[`NW_BITS-1:0] wdebug_wid, `IGNORE_WARNINGS_END `endif input wire stall, - // Inputs - input wire valid_in, + // reading + input wire readen_in, `IGNORE_WARNINGS_BEGIN - input wire[`LINE_ADDR_WIDTH-1:0] addr_in, -`IGNORE_WARNINGS_END - input wire writeen_in, - input wire is_fill_in, - input wire [`WORD_WIDTH-1:0] writeword_in, - input wire [`BANK_LINE_WIDTH-1:0] writedata_in, - input wire [WORD_SIZE-1:0] byteen_in, - input wire [`UP(`WORD_SELECT_WIDTH)-1:0] wordsel_in, - - // Outputs + input wire[`LINE_ADDR_WIDTH-1:0] raddr_in, +`IGNORE_WARNINGS_END + input wire [`UP(`WORD_SELECT_WIDTH)-1:0] rwsel_in, + input wire [WORD_SIZE-1:0] rbyteen_in, output wire[`WORD_WIDTH-1:0] readword_out, output wire [`BANK_LINE_WIDTH-1:0] readdata_out, - output wire [BANK_LINE_SIZE-1:0] dirtyb_out + output wire [BANK_LINE_SIZE-1:0] dirtyb_out, + + // writing + input wire writeen_in, +`IGNORE_WARNINGS_BEGIN + input wire[`LINE_ADDR_WIDTH-1:0] waddr_in, +`IGNORE_WARNINGS_END + input wire [`UP(`WORD_SELECT_WIDTH)-1:0] wwsel_in, + input wire [WORD_SIZE-1:0] wbyteen_in, + input wire wfill_in, + input wire [`WORD_WIDTH-1:0] writeword_in, + input wire [`BANK_LINE_WIDTH-1:0] writedata_in ); - wire [BANK_LINE_SIZE-1:0] read_dirtyb_out; - wire [`BANK_LINE_WIDTH-1:0] read_data; + wire [BANK_LINE_SIZE-1:0] read_dirtyb, dirtyb_qual; + wire [`BANK_LINE_WIDTH-1:0] read_data, readdata_qual; - wire [`BANK_LINE_WORDS-1:0][WORD_SIZE-1:0] byte_enable; + wire [BANK_LINE_SIZE-1:0] byte_enable; wire [`BANK_LINE_WIDTH-1:0] write_data; wire write_enable; - wire [`LINE_SELECT_BITS-1:0] addrline = addr_in[`LINE_SELECT_BITS-1:0]; + wire [`LINE_SELECT_BITS-1:0] raddr = raddr_in[`LINE_SELECT_BITS-1:0]; + wire [`LINE_SELECT_BITS-1:0] waddr = waddr_in[`LINE_SELECT_BITS-1:0]; + + `UNUSED_VAR (readen_in) VX_data_store #( .CACHE_SIZE (CACHE_SIZE), @@ -73,66 +84,75 @@ module VX_data_access #( .clk (clk), .reset (reset), - .read_addr (addrline), - .read_dirtyb (read_dirtyb_out), + .read_addr (raddr), .read_data (read_data), + .read_dirtyb (read_dirtyb), .write_enable(write_enable), - .write_fill (is_fill_in), - .byte_enable (byte_enable), - .write_addr (addrline), + .write_fill (wfill_in), + .write_addr (waddr), + .byte_enable (byte_enable), .write_data (write_data) ); - if (`WORD_SELECT_WIDTH != 0) begin - wire [`WORD_WIDTH-1:0] readword = read_data[wordsel_in * `WORD_WIDTH +: `WORD_WIDTH]; - for (genvar i = 0; i < WORD_SIZE; i++) begin - assign readword_out[i * 8 +: 8] = readword[i * 8 +: 8] & {8{byteen_in[i]}}; - end - end else begin - for (genvar i = 0; i < WORD_SIZE; i++) begin - assign readword_out[i * 8 +: 8] = read_data[i * 8 +: 8] & {8{byteen_in[i]}}; - end - end - - wire [`BANK_LINE_WORDS-1:0][WORD_SIZE-1:0] byte_enable_w; - wire [`BANK_LINE_WIDTH-1:0] write_data_w; + wire [`BANK_LINE_WORDS-1:0][WORD_SIZE-1:0] wbyteen_qual; + wire [`BANK_LINE_WIDTH-1:0] writeword_qual; if (`WORD_SELECT_WIDTH != 0) begin for (genvar i = 0; i < `BANK_LINE_WORDS; i++) begin - assign byte_enable_w[i] = (wordsel_in == `WORD_SELECT_WIDTH'(i)) ? byteen_in : {WORD_SIZE{1'b0}}; - assign write_data_w[i * `WORD_WIDTH +: `WORD_WIDTH] = writeword_in; + assign wbyteen_qual[i] = (wwsel_in == `WORD_SELECT_WIDTH'(i)) ? wbyteen_in : {WORD_SIZE{1'b0}}; + assign writeword_qual[i * `WORD_WIDTH +: `WORD_WIDTH] = writeword_in; end end else begin - assign byte_enable_w = byteen_in; - assign write_data_w = writeword_in; + `UNUSED_VAR (wwsel_in) + assign wbyteen_qual = wbyteen_in; + assign writeword_qual = writeword_in; end - assign byte_enable = is_fill_in ? {BANK_LINE_SIZE{1'b1}} : byte_enable_w; - assign write_data = is_fill_in ? writedata_in : write_data_w; + assign byte_enable = wfill_in ? {BANK_LINE_SIZE{1'b1}} : wbyteen_qual; + assign write_data = wfill_in ? writedata_in : writeword_qual; - assign write_enable = valid_in && writeen_in && !stall; + assign write_enable = writeen_in && !stall; + + wire rw_hazard = DRAM_ENABLE && (raddr == waddr) && writeen_in; + for (genvar i = 0; i < BANK_LINE_SIZE; i++) begin + assign dirtyb_qual[i] = rw_hazard ? byte_enable[i] : read_dirtyb[i]; + assign readdata_qual[i * 8 +: 8] = (rw_hazard && byte_enable[i]) ? write_data[i * 8 +: 8] : read_data[i * 8 +: 8]; + end if (WRITE_THROUGH) begin - `UNUSED_VAR (read_dirtyb_out) - assign dirtyb_out = byte_enable_w; - assign readdata_out = write_data_w; + `UNUSED_VAR (dirtyb_qual) + assign dirtyb_out = wbyteen_qual; + assign readdata_out = writeword_qual; end else begin - assign dirtyb_out = read_dirtyb_out; - assign readdata_out = read_data; + assign dirtyb_out = dirtyb_qual; + assign readdata_out = readdata_qual; + end + + if (`WORD_SELECT_WIDTH != 0) begin + wire [`WORD_WIDTH-1:0] readword = readdata_qual[rwsel_in * `WORD_WIDTH +: `WORD_WIDTH]; + for (genvar i = 0; i < WORD_SIZE; i++) begin + assign readword_out[i * 8 +: 8] = readword[i * 8 +: 8] & {8{rbyteen_in[i]}}; + end + end else begin + `UNUSED_VAR (rwsel_in) + for (genvar i = 0; i < WORD_SIZE; i++) begin + assign readword_out[i * 8 +: 8] = readdata_qual[i * 8 +: 8] & {8{rbyteen_in[i]}}; + end end `ifdef DBG_PRINT_CACHE_DATA always @(posedge clk) begin - if (valid_in && !stall) begin - if (write_enable) begin - if (is_fill_in) begin - $display("%t: cache%0d:%0d data-fill: addr=%0h, dirty=%b, blk_addr=%0d, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_in, BANK_ID), dirtyb_out, addrline, write_data); + if (!stall) begin + if (writeen_in) begin + if (wfill_in) begin + $display("%t: cache%0d:%0d data-fill: addr=%0h, dirty=%b, blk_addr=%0d, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(waddr_in, BANK_ID), dirtyb_out, waddr, write_data); end else begin - $display("%t: cache%0d:%0d data-write: addr=%0h, wid=%0d, PC=%0h, byteen=%b, dirty=%b, blk_addr=%0d, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_in, BANK_ID), debug_wid, debug_pc, byte_enable, dirtyb_out, addrline, wordsel_in, writeword_in); + $display("%t: cache%0d:%0d data-write: addr=%0h, wid=%0d, PC=%0h, byteen=%b, dirty=%b, blk_addr=%0d, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(waddr_in, BANK_ID), rdebug_wid, rdebug_pc, byte_enable, dirtyb_out, waddr, wwsel_in, writeword_in); end - end else begin - $display("%t: cache%0d:%0d data-read: addr=%0h, wid=%0d, PC=%0h, dirty=%b, blk_addr=%0d, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_in, BANK_ID), debug_wid, debug_pc, dirtyb_out, addrline, wordsel_in, read_data); + end + if (readen_in) begin + $display("%t: cache%0d:%0d data-read: addr=%0h, wid=%0d, PC=%0h, dirty=%b, blk_addr=%0d, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(raddr_in, BANK_ID), rdebug_wid, rdebug_pc, dirtyb_out, raddr, rwsel_in, read_data); end end end diff --git a/hw/rtl/cache/VX_data_store.v b/hw/rtl/cache/VX_data_store.v index 6593de29b..c65e8dbd4 100644 --- a/hw/rtl/cache/VX_data_store.v +++ b/hw/rtl/cache/VX_data_store.v @@ -18,7 +18,7 @@ module VX_data_store #( input wire write_enable, input wire write_fill, - input wire[`BANK_LINE_WORDS-1:0][WORD_SIZE-1:0] byte_enable, + input wire[BANK_LINE_SIZE-1:0] byte_enable, input wire[`LINE_SELECT_BITS-1:0] write_addr, input wire[`BANK_LINE_WIDTH-1:0] write_data, diff --git a/hw/rtl/cache/VX_miss_resrv.v b/hw/rtl/cache/VX_miss_resrv.v index 855a48078..1c07d14db 100644 --- a/hw/rtl/cache/VX_miss_resrv.v +++ b/hw/rtl/cache/VX_miss_resrv.v @@ -24,36 +24,34 @@ module VX_miss_resrv #( `ifdef DBG_CACHE_REQ_INFO `IGNORE_WARNINGS_BEGIN - input wire[31:0] debug_pc_st0, - input wire[`NR_BITS-1:0] debug_rd_st0, - input wire[`NW_BITS-1:0] debug_wid_st0, - input wire[`UP(CORE_TAG_ID_BITS)-1:0] debug_tagid_st0, - input wire[31:0] debug_pc_st3, - input wire[`NR_BITS-1:0] debug_rd_st3, - input wire[`NW_BITS-1:0] debug_wid_st3, - input wire[`UP(CORE_TAG_ID_BITS)-1:0] debug_tagid_st3, + input wire[31:0] deq_debug_pc, + input wire[`NW_BITS-1:0] deq_debug_wid, + input wire[31:0] enq_debug_pc, + input wire[`NW_BITS-1:0] enq_debug_wid, `IGNORE_WARNINGS_END `endif // enqueue - input wire enqueue_st3, - input wire [`LINE_ADDR_WIDTH-1:0] enqueue_addr_st3, - input wire [`MSHR_DATA_WIDTH-1:0] enqueue_data_st3, - input wire enqueue_is_mshr_st3, - input wire enqueue_ready_st3, + input wire enqueue, + input wire [`LINE_ADDR_WIDTH-1:0] enqueue_addr, + input wire [`MSHR_DATA_WIDTH-1:0] enqueue_data, + input wire enqueue_is_mshr, + input wire enqueue_ready, output wire enqueue_full, - // fill - input wire update_ready_st0, - input wire [`LINE_ADDR_WIDTH-1:0] addr_st0, - output wire pending_hazard_st0, + // lookup + input wire lookup_ready, + input wire [`LINE_ADDR_WIDTH-1:0] lookup_addr, + output wire lookup_match, + + // schedule + input wire schedule, + output wire schedule_valid, + output wire [`LINE_ADDR_WIDTH-1:0] schedule_addr, + output wire [`MSHR_DATA_WIDTH-1:0] schedule_data, // dequeue - input wire schedule_st0, - output wire dequeue_valid_st0, - output wire [`LINE_ADDR_WIDTH-1:0] dequeue_addr_st0, - output wire [`MSHR_DATA_WIDTH-1:0] dequeue_data_st0, - input wire dequeue_st3 + input wire dequeue ); `USE_FAST_BRAM reg [`LINE_ADDR_WIDTH-1:0] addr_table [MSHR_SIZE-1:0]; @@ -67,17 +65,17 @@ module VX_miss_resrv #( wire [MSHR_SIZE-1:0] valid_address_match; for (genvar i = 0; i < MSHR_SIZE; i++) begin - assign valid_address_match[i] = valid_table[i] && (addr_table[i] == addr_st0); + assign valid_address_match[i] = valid_table[i] && (addr_table[i] == lookup_addr); end - assign pending_hazard_st0 = (| valid_address_match); + assign lookup_match = (| valid_address_match); wire dequeue_ready = ready_table[schedule_ptr]; - assign dequeue_valid_st0 = dequeue_ready; - assign dequeue_addr_st0 = addr_table[schedule_ptr]; + assign schedule_valid = dequeue_ready; + assign schedule_addr = addr_table[schedule_ptr]; - wire mshr_push = enqueue_st3 && !enqueue_is_mshr_st3; + wire mshr_push = enqueue && !enqueue_is_mshr; wire [`LOG2UP(MSHR_SIZE)-1:0] head_ptr_n = head_ptr + $bits(head_ptr)'(1); @@ -91,33 +89,33 @@ module VX_miss_resrv #( tail_ptr <= 0; size <= 0; end else begin - if (update_ready_st0) begin + if (lookup_ready) begin ready_table <= ready_table | valid_address_match; end - if (enqueue_st3) begin + if (enqueue) begin assert(!enqueue_full); - if (enqueue_is_mshr_st3) begin + if (enqueue_is_mshr) begin // returning missed msrq entry, restore schedule valid_table[restore_ptr] <= 1; - ready_table[restore_ptr] <= enqueue_ready_st3; + ready_table[restore_ptr] <= enqueue_ready; restore_ptr <= restore_ptr + $bits(restore_ptr)'(1); schedule_ptr <= head_ptr; end else begin valid_table[tail_ptr] <= 1; - ready_table[tail_ptr] <= enqueue_ready_st3; + ready_table[tail_ptr] <= enqueue_ready; tail_ptr <= tail_ptr + $bits(tail_ptr)'(1); size <= size + $bits(size)'(1); end - end else if (dequeue_st3) begin + end else if (dequeue) begin head_ptr <= head_ptr_n; restore_ptr <= head_ptr_n; valid_table[head_ptr] <= 0; size <= size - $bits(size)'(1); end - if (schedule_st0) begin - assert(dequeue_valid_st0); + if (schedule) begin + assert(schedule_valid); valid_table[schedule_ptr] <= 0; ready_table[schedule_ptr] <= 0; schedule_ptr <= schedule_ptr + $bits(schedule_ptr)'(1); @@ -126,8 +124,8 @@ module VX_miss_resrv #( end always @(posedge clk) begin - if (enqueue_st3 && !enqueue_is_mshr_st3) begin - addr_table[tail_ptr] <= enqueue_addr_st3; + if (enqueue && !enqueue_is_mshr) begin + addr_table[tail_ptr] <= enqueue_addr; end end @@ -142,23 +140,23 @@ module VX_miss_resrv #( .wren(mshr_push), .byteen(1'b1), .rden(1'b1), - .din(enqueue_data_st3), - .dout(dequeue_data_st0) + .din(enqueue_data), + .dout(schedule_data) ); `ifdef DBG_PRINT_CACHE_MSHR always @(posedge clk) begin - if (update_ready_st0 || schedule_st0 || enqueue_st3 || dequeue_st3) begin - if (schedule_st0) - $display("%t: cache%0d:%0d msrq-schedule: addr%0d=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, schedule_ptr, `LINE_TO_BYTE_ADDR(dequeue_addr_st0, BANK_ID), debug_wid_st0, debug_pc_st0); - if (enqueue_st3) begin - if (enqueue_is_mshr_st3) - $display("%t: cache%0d:%0d msrq-restore: addr%0d=%0h, ready=%b", $time, CACHE_ID, BANK_ID, restore_ptr, `LINE_TO_BYTE_ADDR(enqueue_addr_st3, BANK_ID), enqueue_ready_st3); + if (lookup_ready || schedule || enqueue || dequeue) begin + if (schedule) + $display("%t: cache%0d:%0d msrq-schedule: addr%0d=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, schedule_ptr, `LINE_TO_BYTE_ADDR(schedule_addr, BANK_ID), deq_debug_wid, deq_debug_pc); + if (enqueue) begin + if (enqueue_is_mshr) + $display("%t: cache%0d:%0d msrq-restore: addr%0d=%0h, ready=%b", $time, CACHE_ID, BANK_ID, restore_ptr, `LINE_TO_BYTE_ADDR(enqueue_addr, BANK_ID), enqueue_ready); else - $display("%t: cache%0d:%0d msrq-enq: addr%0d=%0h, ready=%b, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, tail_ptr, `LINE_TO_BYTE_ADDR(enqueue_addr_st3, BANK_ID), enqueue_ready_st3, debug_wid_st3, debug_pc_st3); + $display("%t: cache%0d:%0d msrq-enq: addr%0d=%0h, ready=%b, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, tail_ptr, `LINE_TO_BYTE_ADDR(enqueue_addr, BANK_ID), enqueue_ready, enq_debug_wid, enq_debug_pc); end - if (dequeue_st3) - $display("%t: cache%0d:%0d msrq-deq addr%0d, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, head_ptr, debug_wid_st3, debug_pc_st3); + if (dequeue) + $display("%t: cache%0d:%0d msrq-deq addr%0d, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, head_ptr, enq_debug_wid, enq_debug_pc); $write("%t: cache%0d:%0d msrq-table", $time, CACHE_ID, BANK_ID); for (integer j = 0; j < MSHR_SIZE; j++) begin if (valid_table[j]) begin diff --git a/hw/rtl/cache/VX_tag_access.v b/hw/rtl/cache/VX_tag_access.v index 33b1f0b01..5004562db 100644 --- a/hw/rtl/cache/VX_tag_access.v +++ b/hw/rtl/cache/VX_tag_access.v @@ -25,9 +25,7 @@ module VX_tag_access #( `ifdef DBG_CACHE_REQ_INFO `IGNORE_WARNINGS_BEGIN input wire[31:0] debug_pc, - input wire[`NR_BITS-1:0] debug_rd, input wire[`NW_BITS-1:0] debug_wid, - input wire[`UP(CORE_TAG_ID_BITS)-1:0] debug_tagid, `IGNORE_WARNINGS_END `endif diff --git a/hw/scripts/scope.json b/hw/scripts/scope.json index 92e11ec89..97ea030bd 100644 --- a/hw/scripts/scope.json +++ b/hw/scripts/scope.json @@ -193,11 +193,9 @@ "?valid_st0": 1, "?valid_st1": 1, "?valid_st2": 1, - "?valid_st3": 1, "addr_st0": 32, "addr_st1": 32, "addr_st2": 32, - "addr_st3": 32, "is_fill_st0": 1, "is_mshr_st0": 1, "miss_st1": 1,