Added schedule_ptr to mrvq for speculative pops

This commit is contained in:
felsabbagh3 2020-05-23 21:36:57 -07:00
parent c54fa50715
commit 0cd9bd689e
2 changed files with 113 additions and 53 deletions

127
hw/rtl/cache/VX_bank.v vendored
View file

@ -245,7 +245,7 @@ module VX_bank #(
wire[`LINE_ADDR_WIDTH-1:0] addr_st2;
wire is_fill_st2;
wire msrq_push_stall;
wire mrvq_push_stall;
wire cwbq_push_stall;
wire dwbq_push_stall;
wire dram_fill_req_stall;
@ -268,10 +268,17 @@ module VX_bank #(
end
end
assign mrvq_pop = mrvq_valid_st0 && !stall_bank_pipe;
assign dfpq_pop = !mrvq_pop && !dfpq_empty && !stall_bank_pipe;
assign reqq_pop = !mrvq_stop && !mrvq_pop && !dfpq_pop && !reqq_empty && reqq_req_st0 && !stall_bank_pipe && !is_fill_st1[0] && !is_fill_in_pipe;
assign snrq_pop = !mrvq_stop && !reqq_pop && !reqq_pop && !mrvq_pop && !dfpq_pop && !snrq_empty && !stall_bank_pipe;
wire mrvq_pop_unqual = mrvq_valid_st0;
wire dfpq_pop_unqual = !mrvq_pop_unqual && !dfpq_empty;
wire reqq_pop_unqual = !mrvq_stop && !mrvq_pop_unqual && !dfpq_pop_unqual && !reqq_empty && reqq_req_st0 && !is_fill_st1[0] && !is_fill_in_pipe;
wire snrq_pop_unqual = !mrvq_stop && !reqq_pop_unqual && !reqq_pop_unqual && !mrvq_pop_unqual && !dfpq_pop_unqual && !snrq_empty;
assign mrvq_pop = mrvq_pop_unqual && !stall_bank_pipe && !recover_mrvq_state_st2;
assign dfpq_pop = dfpq_pop_unqual && !stall_bank_pipe;
assign reqq_pop = reqq_pop_unqual && !stall_bank_pipe;
assign snrq_pop = snrq_pop_unqual && !stall_bank_pipe;
wire qual_is_fill_st0;
wire qual_valid_st0;
@ -294,41 +301,41 @@ module VX_bank #(
wire is_snp_st1 [STAGE_1_CYCLES-1:0];
wire from_mrvq_st1 [STAGE_1_CYCLES-1:0];
assign qual_is_fill_st0 = dfpq_pop;
assign qual_is_fill_st0 = dfpq_pop_unqual;
assign qual_valid_st0 = dfpq_pop || mrvq_pop || reqq_pop || snrq_pop;
assign qual_addr_st0 = dfpq_pop ? dfpq_addr_st0 :
mrvq_pop ? mrvq_addr_st0 :
reqq_pop ? reqq_req_addr_st0[31:`LINE_SELECT_ADDR_START] :
snrq_pop ? snrq_addr_st0 :
assign qual_addr_st0 = dfpq_pop_unqual ? dfpq_addr_st0 :
mrvq_pop_unqual ? mrvq_addr_st0 :
reqq_pop_unqual ? reqq_req_addr_st0[31:`LINE_SELECT_ADDR_START] :
snrq_pop_unqual ? snrq_addr_st0 :
0;
assign qual_wsel_st0 = reqq_pop ? reqq_req_addr_st0[`BASE_ADDR_BITS-1:0] :
mrvq_pop ? mrvq_wsel_st0 :
assign qual_wsel_st0 = reqq_pop_unqual ? reqq_req_addr_st0[`BASE_ADDR_BITS-1:0] :
mrvq_pop_unqual ? mrvq_wsel_st0 :
0;
assign qual_writedata_st0 = dfpq_pop ? dfpq_filldata_st0 : 57;
assign qual_writedata_st0 = dfpq_pop_unqual ? dfpq_filldata_st0 : 57;
assign qual_inst_meta_st0 = mrvq_pop ? {`REQ_TAG_WIDTH'(mrvq_tag_st0) , mrvq_mem_read_st0, mrvq_mem_write_st0, mrvq_tid_st0} :
reqq_pop ? {`REQ_TAG_WIDTH'(reqq_req_tag_st0), reqq_req_mem_read_st0, reqq_req_mem_write_st0, reqq_req_tid_st0} :
snrq_pop ? {`REQ_TAG_WIDTH'(snrq_tag_st0), `BYTE_EN_BITS'(0), `BYTE_EN_BITS'(0), `REQS_BITS'(0)} :
assign qual_inst_meta_st0 = mrvq_pop_unqual ? {`REQ_TAG_WIDTH'(mrvq_tag_st0) , mrvq_mem_read_st0, mrvq_mem_write_st0, mrvq_tid_st0} :
reqq_pop_unqual ? {`REQ_TAG_WIDTH'(reqq_req_tag_st0), reqq_req_mem_read_st0, reqq_req_mem_write_st0, reqq_req_tid_st0} :
snrq_pop_unqual ? {`REQ_TAG_WIDTH'(snrq_tag_st0), `BYTE_EN_BITS'(0), `BYTE_EN_BITS'(0), `REQS_BITS'(0)} :
0;
assign qual_going_to_write_st0 = dfpq_pop ? 1 :
(mrvq_pop && (mrvq_mem_write_st0 != `BYTE_EN_NO)) ? 1 :
(reqq_pop && (reqq_req_mem_write_st0 != `BYTE_EN_NO)) ? 1 :
assign qual_going_to_write_st0 = dfpq_pop_unqual ? 1 :
(mrvq_pop_unqual && (mrvq_mem_write_st0 != `BYTE_EN_NO)) ? 1 :
(reqq_pop_unqual && (reqq_req_mem_write_st0 != `BYTE_EN_NO)) ? 1 :
0;
assign qual_is_snp_st0 = mrvq_pop ? mrvq_is_snp_st0 :
snrq_pop ? 1 :
assign qual_is_snp_st0 = mrvq_pop_unqual ? mrvq_is_snp_st0 :
snrq_pop_unqual ? 1 :
0;
assign qual_writeword_st0 = mrvq_pop ? mrvq_writeword_st0 :
reqq_pop ? reqq_req_writeword_st0 :
assign qual_writeword_st0 = mrvq_pop_unqual ? mrvq_writeword_st0 :
reqq_pop_unqual ? reqq_req_writeword_st0 :
0;
assign qual_from_mrvq_st0 = mrvq_pop;
assign qual_from_mrvq_st0 = mrvq_pop_unqual;
`DEBUG_BEGIN
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin
@ -377,14 +384,18 @@ module VX_bank #(
wire snp_to_mrvq_st1e;
wire mrvq_init_ready_state_st1e;
wire miss_add_because_miss;
wire valid_st1e;
wire from_mrvq_st1e;
assign is_snp_st1e = is_snp_st1[STAGE_1_CYCLES-1];
assign from_mrvq_st1e = from_mrvq_st1[STAGE_1_CYCLES-1];
assign valid_st1e = valid_st1 [STAGE_1_CYCLES-1];
assign is_snp_st1e = is_snp_st1 [STAGE_1_CYCLES-1];
assign {tag_st1e, mem_read_st1e, mem_write_st1e, tid_st1e} = inst_meta_st1[STAGE_1_CYCLES-1];
assign st2_pending_hazard_st1e = (miss_add_because_miss) && ((addr_st2 == addr_st1[STAGE_1_CYCLES-1]) && !is_fill_st2);
assign force_request_miss_st1e = (mrvq_pending_hazard_st1e || st2_pending_hazard_st1e) && valid_st1[STAGE_1_CYCLES-1] && !from_mrvq_st1[STAGE_1_CYCLES-1];
assign force_request_miss_st1e = (valid_st1e && !from_mrvq_st1e && (mrvq_pending_hazard_st1e || st2_pending_hazard_st1e)) || (valid_st1e && from_mrvq_st1e && recover_mrvq_state_st2);
VX_tag_data_access #(
.CACHE_SIZE (CACHE_SIZE),
@ -406,7 +417,7 @@ module VX_bank #(
.readaddr_st10(addr_st1[0][`LINE_SELECT_BITS-1:0]),
// Actual Read/Write
.valid_req_st1e(valid_st1[STAGE_1_CYCLES-1]),
.valid_req_st1e(valid_st1e),
.writefill_st1e(is_fill_st1[STAGE_1_CYCLES-1]),
.writeaddr_st1e(addr_st1[STAGE_1_CYCLES-1]),
.writewsel_st1e(wsel_st1[STAGE_1_CYCLES-1]),
@ -435,7 +446,8 @@ module VX_bank #(
end
`DEBUG_END
wire qual_valid_st1e_2 = valid_st1[STAGE_1_CYCLES-1] && !is_fill_st1[STAGE_1_CYCLES-1];
wire qual_valid_st1e_2 = valid_st1e && !is_fill_st1[STAGE_1_CYCLES-1];
wire from_mrvq_st1e_st2 = from_mrvq_st1e && !is_snp_st1e;
wire valid_st2;
wire [`BASE_ADDR_BITS-1:0] wsel_st2;
@ -449,20 +461,22 @@ module VX_bank #(
wire fill_saw_dirty_st2;
wire is_snp_st2;
wire snp_to_mrvq_st2;
wire from_mrvq_st2;
wire mrvq_init_ready_state_st2;
wire mrvq_init_ready_state_unqual_st2;
wire mrvq_init_ready_state_hazard_st0_st1;
wire mrvq_init_ready_state_hazard_st1e_st1;
wire recover_mrvq_state_st2;
VX_generic_register #(
.N(1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `BASE_ADDR_BITS + `WORD_WIDTH + `WORD_WIDTH + `BANK_LINE_WIDTH + `TAG_SELECT_BITS + 1 + 1 + `REQ_INST_META_WIDTH)
.N(1+ 1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `BASE_ADDR_BITS + `WORD_WIDTH + `WORD_WIDTH + `BANK_LINE_WIDTH + `TAG_SELECT_BITS + 1 + 1 + `REQ_INST_META_WIDTH)
) st_1e_2 (
.clk (clk),
.reset(reset),
.stall(stall_bank_pipe),
.flush(1'b0),
.in ({mrvq_init_ready_state_st1e, snp_to_mrvq_st1e, is_snp_st1e, fill_saw_dirty_st1e, is_fill_st1[STAGE_1_CYCLES-1] , qual_valid_st1e_2, addr_st1[STAGE_1_CYCLES-1], wsel_st1[STAGE_1_CYCLES-1], writeword_st1[STAGE_1_CYCLES-1], readword_st1e, readdata_st1e, readtag_st1e, miss_st1e, dirty_st1e, inst_meta_st1[STAGE_1_CYCLES-1]}),
.out ({mrvq_init_ready_state_unqual_st2, snp_to_mrvq_st2 , is_snp_st2 , fill_saw_dirty_st2 , is_fill_st2 , valid_st2 , addr_st2 , wsel_st2, writeword_st2 , readword_st2 , readdata_st2 , readtag_st2 , miss_st2 , dirty_st2 , inst_meta_st2 })
.in ({from_mrvq_st1e_st2, mrvq_init_ready_state_st1e , snp_to_mrvq_st1e, is_snp_st1e, fill_saw_dirty_st1e, is_fill_st1[STAGE_1_CYCLES-1] , qual_valid_st1e_2, addr_st1[STAGE_1_CYCLES-1], wsel_st1[STAGE_1_CYCLES-1], writeword_st1[STAGE_1_CYCLES-1], readword_st1e, readdata_st1e, readtag_st1e, miss_st1e, dirty_st1e, inst_meta_st1[STAGE_1_CYCLES-1]}),
.out ({from_mrvq_st2 , mrvq_init_ready_state_unqual_st2, snp_to_mrvq_st2 , is_snp_st2 , fill_saw_dirty_st2 , is_fill_st2 , valid_st2 , addr_st2 , wsel_st2, writeword_st2 , readword_st2 , readdata_st2 , readtag_st2 , miss_st2 , dirty_st2 , inst_meta_st2 })
);
@ -477,13 +491,17 @@ module VX_bank #(
wire miss_add_because_pending = snp_to_mrvq_st2;
wire miss_add_unqual = (miss_add_because_miss || miss_add_because_pending);
assign msrq_push_stall = miss_add_unqual && mrvq_full;
assign mrvq_push_stall = miss_add_unqual && mrvq_full;
wire miss_add = miss_add_unqual
&& !mrvq_full
&& !(cwbq_push_stall
|| dwbq_push_stall
|| dram_fill_req_stall);
|| dram_fill_req_stall);
assign recover_mrvq_state_st2 = miss_add && from_mrvq_st2;
wire [`LINE_ADDR_WIDTH-1:0] miss_add_addr = addr_st2;
wire [`BASE_ADDR_BITS-1:0] miss_add_wsel = wsel_st2;
@ -491,9 +509,11 @@ module VX_bank #(
assign {miss_add_tag, miss_add_mem_read, miss_add_mem_write, miss_add_tid} = inst_meta_st2;
wire miss_add_is_snp = is_snp_st2;
wire miss_add_from_mrvq = valid_st2 && from_mrvq_st2 && !stall_bank_pipe;
assign mrvq_init_ready_state_hazard_st0_st1 = miss_add && qual_is_fill_st0 && (miss_add_addr == qual_addr_st0 );
assign mrvq_init_ready_state_hazard_st1e_st1 = miss_add && is_fill_st1[STAGE_1_CYCLES-1] && (miss_add_addr == addr_st1[STAGE_1_CYCLES-1]);
assign mrvq_init_ready_state_hazard_st0_st1 = miss_add_unqual && qual_is_fill_st0 && (miss_add_addr == qual_addr_st0 );
assign mrvq_init_ready_state_hazard_st1e_st1 = miss_add_unqual && is_fill_st1[STAGE_1_CYCLES-1] && (miss_add_addr == addr_st1[STAGE_1_CYCLES-1]);
assign mrvq_init_ready_state_st2 = mrvq_init_ready_state_unqual_st2 || mrvq_init_ready_state_hazard_st0_st1 || mrvq_init_ready_state_hazard_st1e_st1;
@ -513,6 +533,7 @@ module VX_bank #(
// Enqueue
.miss_add (miss_add),
.from_mrvq (miss_add_from_mrvq),
.miss_add_addr (miss_add_addr),
.miss_add_wsel (miss_add_wsel),
.miss_add_data (miss_add_data),
@ -557,7 +578,7 @@ module VX_bank #(
&& !cwbq_full
&& (miss_add_mem_write == `BYTE_EN_NO)
&& !(dwbq_push_stall
|| msrq_push_stall
|| mrvq_push_stall
|| dram_fill_req_stall);
wire [`WORD_WIDTH-1:0] cwbq_data = readword_st2;
@ -586,9 +607,35 @@ module VX_bank #(
// Enqueue DRAM fill request
assign dram_fill_req_valid = miss_add && !mrvq_init_ready_state_st2;
// `IGNORE_WARNINGS_BEGIN
// wire invalidate_fill;
// `IGNORE_WARNINGS_END
// wire possible_fill = valid_st2 && miss_st2 && dram_fill_req_ready && ~is_snp_st2;
// wire [`LINE_ADDR_WIDTH-1:0] fill_invalidator_addr = addr_st2;
// VX_fill_invalidator #(
// .BANK_LINE_SIZE (BANK_LINE_SIZE),
// .NUM_BANKS (NUM_BANKS),
// .FILL_INVALIDAOR_SIZE (FILL_INVALIDAOR_SIZE)
// ) fill_invalidator (
// .clk (clk),
// .reset (reset),
// .possible_fill (possible_fill),
// .success_fill (is_fill_st2),
// .fill_addr (fill_invalidator_addr),
// .invalidate_fill (invalidate_fill)
// );
wire dram_fill_req_unqual = miss_add_unqual && (!mrvq_init_ready_state_st2 || from_mrvq_st2);
assign dram_fill_req_valid = dram_fill_req_unqual
&& dram_fill_req_ready
&& !( dwbq_push_stall
|| mrvq_push_stall
|| cwbq_push_stall);
assign dram_fill_req_addr = addr_st2;
assign dram_fill_req_stall = (valid_st2 && miss_st2 && ~dram_fill_req_ready);
assign dram_fill_req_stall = dram_fill_req_unqual && ~dram_fill_req_ready;
// Enqueue DRAM writeback request
@ -609,7 +656,7 @@ module VX_bank #(
assign dwbq_push = dwbq_push_unqual
&& !dwbq_full
&& !(cwbq_push_stall
|| msrq_push_stall
|| mrvq_push_stall
|| dram_fill_req_stall);
wire [`BANK_LINE_WIDTH-1:0] dwbq_req_data = readdata_st2;
@ -658,7 +705,7 @@ module VX_bank #(
// bank pipeline stall
assign stall_bank_pipe = cwbq_push_stall
|| dwbq_push_stall
|| msrq_push_stall
|| mrvq_push_stall
|| dram_fill_req_stall;
`ifdef DBG_PRINT_CACHE_BANK

View file

@ -23,6 +23,7 @@ module VX_cache_miss_resrv #(
// Miss enqueue
input wire miss_add,
input wire from_mrvq,
input wire[`LINE_ADDR_WIDTH-1:0] miss_add_addr,
input wire[`BASE_ADDR_BITS-1:0] miss_add_wsel,
input wire[`WORD_WIDTH-1:0] miss_add_data,
@ -57,6 +58,7 @@ module VX_cache_miss_resrv #(
reg [MRVQ_SIZE-1:0][`LINE_ADDR_WIDTH-1:0] addr_table;
reg [MRVQ_SIZE-1:0] valid_table;
reg [MRVQ_SIZE-1:0] ready_table;
reg [`LOG2UP(MRVQ_SIZE)-1:0] schedule_ptr;
reg [`LOG2UP(MRVQ_SIZE)-1:0] head_ptr;
reg [`LOG2UP(MRVQ_SIZE)-1:0] tail_ptr;
@ -86,16 +88,21 @@ module VX_cache_miss_resrv #(
assign pending_hazard = |(valid_address_match);
wire dequeue_possible = valid_table[head_ptr] && ready_table[head_ptr];
wire [`LOG2UP(MRVQ_SIZE)-1:0] dequeue_index = head_ptr;
wire dequeue_possible = valid_table[schedule_ptr] && ready_table[schedule_ptr];
wire [`LOG2UP(MRVQ_SIZE)-1:0] dequeue_index = schedule_ptr;
assign miss_resrv_valid_st0 = (MRVQ_SIZE != 2) && dequeue_possible;
assign miss_resrv_addr_st0 = addr_table[dequeue_index];
assign {miss_resrv_data_st0, miss_resrv_tid_st0, miss_resrv_tag_st0, miss_resrv_mem_read_st0, miss_resrv_mem_write_st0, miss_resrv_wsel_st0, miss_resrv_is_snp_st0} = metadata_table[dequeue_index];
wire mrvq_push = miss_add && enqueue_possible && (MRVQ_SIZE != 2);
wire mrvq_push = miss_add && enqueue_possible && !from_mrvq && (MRVQ_SIZE != 2);
wire mrvq_pop = miss_resrv_pop && dequeue_possible;
wire recover_state = miss_add && from_mrvq;
wire increment_head = !miss_add && from_mrvq;
wire update_ready = (|make_ready);
wire qual_mrvq_init = mrvq_push && mrvq_init_ready_state;
@ -105,18 +112,25 @@ module VX_cache_miss_resrv #(
always @(posedge clk) begin
if (reset) begin
valid_table <= 0;
ready_table <= 0;
size <= 0;
head_ptr <= 0;
tail_ptr <= 0;
valid_table <= 0;
ready_table <= 0;
size <= 0;
schedule_ptr <= 0;
head_ptr <= 0;
tail_ptr <= 0;
end else begin
if (mrvq_push) begin
valid_table[enqueue_index] <= 1;
ready_table[enqueue_index] <= mrvq_init_ready_state;
addr_table[enqueue_index] <= miss_add_addr;
metadata_table[enqueue_index] <= {miss_add_data, miss_add_tid, miss_add_tag, miss_add_mem_read, miss_add_mem_write, miss_add_wsel, miss_add_is_snp};
tail_ptr <= tail_ptr + 1;
end else if (increment_head) begin
valid_table[head_ptr] <= 0;
head_ptr <= head_ptr + 1;
end else if (recover_state) begin
schedule_ptr <= schedule_ptr - 1;
end
// update entry as 'ready' during DRAM fill response
@ -125,16 +139,15 @@ module VX_cache_miss_resrv #(
end
if (mrvq_pop) begin
valid_table[dequeue_index] <= 0;
ready_table[dequeue_index] <= 0;
head_ptr <= head_ptr + 1;
schedule_ptr <= schedule_ptr + 1;
end
if (!(mrvq_push && mrvq_pop)) begin
if (!(mrvq_push && increment_head)) begin
if (mrvq_push) begin
size <= size + 1;
end
if (mrvq_pop) begin
if (increment_head) begin
size <= size - 1;
end
end
@ -149,7 +162,7 @@ module VX_cache_miss_resrv #(
for (j = 0; j < MRVQ_SIZE; j++) begin
if (valid_table[j]) begin
$write(" ");
if (head_ptr == $bits(head_ptr)'(j)) $write("*");
if (schedule_ptr == $bits(schedule_ptr)'(j)) $write("*");
if (~ready_table[j]) $write("!");
$write("addr%0d=%0h", j, `LINE_TO_BYTE_ADDR(addr_table[j], BANK_ID));
end