mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-24 05:47:35 -04:00
Fixed Flushing and Prefetching
This commit is contained in:
parent
a7a1906bea
commit
65fa9285bf
7 changed files with 107 additions and 13 deletions
|
@ -106,6 +106,15 @@ module VX_bank
|
|||
);
|
||||
|
||||
|
||||
reg snoop_state = 0;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
snoop_state <= 0;
|
||||
end else begin
|
||||
snoop_state <= (snoop_state | snp_req) && ((FUNC_ID == `LLFUNC_ID) || (FUNC_ID == `L3FUNC_ID));
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
wire snrq_pop;
|
||||
|
@ -504,7 +513,7 @@ module VX_bank
|
|||
wire invalidate_fill;
|
||||
|
||||
// Enqueue to miss reserv if it's a valid miss
|
||||
assign miss_add = valid_st2 && !is_snp_st2 && miss_st2 && !mrvq_full && !((is_snp_st2 && valid_st2 && ffsq_full) ||((valid_st2 && !miss_st2) && cwbq_full) || (((valid_st2 && miss_st2 && dirty_st2) || fill_saw_dirty_st2) && dwbq_full) || (valid_st2 && miss_st2 && mrvq_full) || (valid_st2 && miss_st2 && !invalidate_fill && dram_fill_req_queue_full));
|
||||
assign miss_add = valid_st2 && !is_snp_st2 && miss_st2 && !mrvq_full && !(should_flush && dwbq_push) && !((is_snp_st2 && valid_st2 && ffsq_full) ||((valid_st2 && !miss_st2) && cwbq_full) || (((valid_st2 && miss_st2 && dirty_st2) || fill_saw_dirty_st2) && dwbq_full) || (valid_st2 && miss_st2 && mrvq_full) || (valid_st2 && miss_st2 && !invalidate_fill && dram_fill_req_queue_full));
|
||||
assign miss_add_pc = pc_st2;
|
||||
assign miss_add_addr = addr_st2;
|
||||
assign miss_add_data = writeword_st2;
|
||||
|
@ -535,12 +544,23 @@ module VX_bank
|
|||
.full (cwbq_full)
|
||||
);
|
||||
|
||||
wire should_flush = snoop_state && valid_st2 && (miss_add_mem_write != `NO_MEM_WRITE) && !is_snp_st2 && !is_fill_st2;
|
||||
// Enqueue to DWB Queue
|
||||
wire dwbq_push = ((valid_st2 && miss_st2 && dirty_st2) || fill_saw_dirty_st2) && !dwbq_full && !((is_snp_st2 && valid_st2 && ffsq_full) ||((valid_st2 && !miss_st2) && cwbq_full) || (valid_st2 && miss_st2 && mrvq_full) || (valid_st2 && miss_st2 && !invalidate_fill && dram_fill_req_queue_full));
|
||||
wire[31:0] dwbq_req_addr = {readtag_st2, addr_st2[`LINE_SELECT_ADDR_END:0]} & `BASE_ADDR_MASK;
|
||||
wire[`BANK_LINE_SIZE_RNG][`WORD_SIZE-1:0] dwbq_req_data = readdata_st2;
|
||||
wire dwbq_push = ((valid_st2 && miss_st2 && dirty_st2) || fill_saw_dirty_st2 || should_flush) && !dwbq_full && !((is_snp_st2 && valid_st2 && ffsq_full) ||((valid_st2 && !miss_st2) && cwbq_full) || (valid_st2 && miss_st2 && mrvq_full) || (valid_st2 && miss_st2 && !invalidate_fill && dram_fill_req_queue_full));
|
||||
wire[31:0] dwbq_req_addr;
|
||||
wire dwbq_empty;
|
||||
|
||||
|
||||
wire[`BANK_LINE_SIZE_RNG][`WORD_SIZE-1:0] dwbq_req_data;
|
||||
if ((FUNC_ID == `LLFUNC_ID) || (FUNC_ID == `L3FUNC_ID)) begin
|
||||
assign dwbq_req_data = (should_flush && dwbq_push) ? writeword_st2 : readdata_st2;
|
||||
assign dwbq_req_addr = (should_flush && dwbq_push) ? (addr_st2) : ({readtag_st2, addr_st2[`LINE_SELECT_ADDR_END:0]} & `BASE_ADDR_MASK);
|
||||
end else begin
|
||||
assign dwbq_req_data = readdata_st2;
|
||||
assign dwbq_req_addr = {readtag_st2, addr_st2[`LINE_SELECT_ADDR_END:0]} & `BASE_ADDR_MASK;
|
||||
end
|
||||
|
||||
|
||||
|
||||
wire possible_fill = valid_st2 && miss_st2 && !dram_fill_req_queue_full && !is_snp_st2;
|
||||
wire[31:0] fill_invalidator_addr = addr_st2 & `BASE_ADDR_MASK;
|
||||
VX_fill_invalidator #(
|
||||
|
|
|
@ -105,15 +105,34 @@ module VX_cache_wb_sel_merge
|
|||
core_wb_pc = 0;
|
||||
core_wb_address = 0;
|
||||
for (this_bank = 0; this_bank < NUMBER_BANKS; this_bank = this_bank + 1) begin
|
||||
if (((FUNC_ID == `LLFUNC_ID) && found_bank && per_bank_wb_valid[this_bank] && ((this_bank == main_bank_index) || (per_bank_wb_tid[this_bank] != per_bank_wb_tid[main_bank_index]))) || ((FUNC_ID != `LLFUNC_ID) && ((this_bank == main_bank_index) || (per_bank_wb_tid[this_bank] != per_bank_wb_tid[main_bank_index])) && found_bank && (per_bank_wb_valid[this_bank]) && (per_bank_wb_rd[this_bank] == per_bank_wb_rd[main_bank_index]) && (per_bank_wb_warp_num[this_bank] == per_bank_wb_warp_num[main_bank_index]))) begin
|
||||
core_wb_valid[per_bank_wb_tid[this_bank]] = 1;
|
||||
core_wb_readdata[per_bank_wb_tid[this_bank]] = per_bank_wb_data[this_bank];
|
||||
core_wb_pc[per_bank_wb_tid[this_bank]] = per_bank_wb_pc[this_bank];
|
||||
core_wb_address[per_bank_wb_tid[this_bank]] = per_bank_wb_address[this_bank];
|
||||
per_bank_wb_pop_unqual[this_bank] = 1;
|
||||
if ((FUNC_ID == `LLFUNC_ID) || (FUNC_ID == `L3FUNC_ID)) begin
|
||||
|
||||
if (found_bank && !core_wb_valid[per_bank_wb_tid[this_bank]] && per_bank_wb_valid[this_bank] && ((this_bank == main_bank_index) || (per_bank_wb_tid[this_bank] != per_bank_wb_tid[main_bank_index]))) begin
|
||||
core_wb_valid[per_bank_wb_tid[this_bank]] = 1;
|
||||
core_wb_readdata[per_bank_wb_tid[this_bank]] = per_bank_wb_data[this_bank];
|
||||
core_wb_pc[per_bank_wb_tid[this_bank]] = per_bank_wb_pc[this_bank];
|
||||
core_wb_address[per_bank_wb_tid[this_bank]] = per_bank_wb_address[this_bank];
|
||||
per_bank_wb_pop_unqual[this_bank] = 1;
|
||||
end else begin
|
||||
per_bank_wb_pop_unqual[this_bank] = 0;
|
||||
end
|
||||
|
||||
end else begin
|
||||
per_bank_wb_pop_unqual[this_bank] = 0;
|
||||
|
||||
|
||||
if (((this_bank == main_bank_index) || (per_bank_wb_tid[this_bank] != per_bank_wb_tid[main_bank_index])) && found_bank && !core_wb_valid[per_bank_wb_tid[this_bank]] && (per_bank_wb_valid[this_bank]) && (per_bank_wb_rd[this_bank] == per_bank_wb_rd[main_bank_index]) && (per_bank_wb_warp_num[this_bank] == per_bank_wb_warp_num[main_bank_index])) begin
|
||||
core_wb_valid[per_bank_wb_tid[this_bank]] = 1;
|
||||
core_wb_readdata[per_bank_wb_tid[this_bank]] = per_bank_wb_data[this_bank];
|
||||
core_wb_pc[per_bank_wb_tid[this_bank]] = per_bank_wb_pc[this_bank];
|
||||
core_wb_address[per_bank_wb_tid[this_bank]] = per_bank_wb_address[this_bank];
|
||||
per_bank_wb_pop_unqual[this_bank] = 1;
|
||||
end else begin
|
||||
per_bank_wb_pop_unqual[this_bank] = 0;
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
end
|
||||
endgenerate
|
||||
|
|
|
@ -290,6 +290,6 @@ module VX_tag_data_access
|
|||
assign readtag_st1e = use_read_tag_st1e;
|
||||
assign fill_sent = miss_st1e;
|
||||
assign fill_saw_dirty_st1e = real_writefill && dirty_st1e;
|
||||
assign invalidate_line = is_snp_st1e && miss_st1e;
|
||||
assign invalidate_line = snoop_hit;
|
||||
|
||||
endmodule
|
|
@ -253,6 +253,15 @@
|
|||
`define DFFSQ_SIZE 32
|
||||
`endif
|
||||
|
||||
// Prefetcher
|
||||
`ifndef DPRFQ_SIZE
|
||||
`define DPRFQ_SIZE 32
|
||||
`endif
|
||||
|
||||
`ifndef DPRFQ_STRIDE
|
||||
`define DPRFQ_STRIDE 0
|
||||
`endif
|
||||
|
||||
// Fill Invalidator Size {Fill invalidator must be active}
|
||||
`ifndef DFILL_INVALIDAOR_SIZE
|
||||
`define DFILL_INVALIDAOR_SIZE 32
|
||||
|
@ -361,6 +370,15 @@
|
|||
`define IFFSQ_SIZE 8
|
||||
`endif
|
||||
|
||||
// Prefetcher
|
||||
`ifndef IPRFQ_SIZE
|
||||
`define IPRFQ_SIZE 32
|
||||
`endif
|
||||
|
||||
`ifndef IPRFQ_STRIDE
|
||||
`define IPRFQ_STRIDE 0
|
||||
`endif
|
||||
|
||||
// Fill Invalidator Size {Fill invalidator must be active}
|
||||
`ifndef IFILL_INVALIDAOR_SIZE
|
||||
`define IFILL_INVALIDAOR_SIZE 32
|
||||
|
@ -467,6 +485,15 @@
|
|||
`define SFFSQ_SIZE 16
|
||||
`endif
|
||||
|
||||
// Prefetcher
|
||||
`ifndef SPRFQ_SIZE
|
||||
`define SPRFQ_SIZE 4
|
||||
`endif
|
||||
|
||||
`ifndef SPRFQ_STRIDE
|
||||
`define SPRFQ_STRIDE 0
|
||||
`endif
|
||||
|
||||
// Fill Invalidator Size {Fill invalidator must be active}
|
||||
`ifndef SFILL_INVALIDAOR_SIZE
|
||||
`define SFILL_INVALIDAOR_SIZE 32
|
||||
|
@ -572,6 +599,15 @@
|
|||
`define LLFFSQ_SIZE 32
|
||||
`endif
|
||||
|
||||
// Prefetcher
|
||||
`ifndef LLPRFQ_SIZE
|
||||
`define LLPRFQ_SIZE 32
|
||||
`endif
|
||||
|
||||
`ifndef LLPRFQ_STRIDE
|
||||
`define LLPRFQ_STRIDE 0
|
||||
`endif
|
||||
|
||||
// Fill Invalidator Size {Fill invalidator must be active}
|
||||
`ifndef LLFILL_INVALIDAOR_SIZE
|
||||
`define LLFILL_INVALIDAOR_SIZE 32
|
||||
|
@ -677,6 +713,15 @@
|
|||
`define L3FFSQ_SIZE 8
|
||||
`endif
|
||||
|
||||
// Prefetcher
|
||||
`ifndef L3PRFQ_SIZE
|
||||
`define L3PRFQ_SIZE 32
|
||||
`endif
|
||||
|
||||
`ifndef L3PRFQ_STRIDE
|
||||
`define L3PRFQ_STRIDE 0
|
||||
`endif
|
||||
|
||||
// Fill Invalidator Size {Fill invalidator must be active}
|
||||
`ifndef L3FILL_INVALIDAOR_SIZE
|
||||
`define L3FILL_INVALIDAOR_SIZE 32
|
||||
|
|
|
@ -95,6 +95,8 @@ module VX_dmem_controller (
|
|||
.DFQQ_SIZE (`SDFQQ_SIZE),
|
||||
.LLVQ_SIZE (`SLLVQ_SIZE),
|
||||
.FFSQ_SIZE (`SFFSQ_SIZE),
|
||||
.PRFQ_SIZE (`SPRFQ_SIZE),
|
||||
.PRFQ_STRIDE (`SPRFQ_STRIDE),
|
||||
.FILL_INVALIDAOR_SIZE (`SFILL_INVALIDAOR_SIZE),
|
||||
.SIMULATED_DRAM_LATENCY_CYCLES(`SSIMULATED_DRAM_LATENCY_CYCLES)
|
||||
)
|
||||
|
@ -177,6 +179,8 @@ module VX_dmem_controller (
|
|||
.DFQQ_SIZE (`DDFQQ_SIZE),
|
||||
.LLVQ_SIZE (`DLLVQ_SIZE),
|
||||
.FFSQ_SIZE (`DFFSQ_SIZE),
|
||||
.PRFQ_SIZE (`DPRFQ_SIZE),
|
||||
.PRFQ_STRIDE (`DPRFQ_STRIDE),
|
||||
.FILL_INVALIDAOR_SIZE (`DFILL_INVALIDAOR_SIZE),
|
||||
.SIMULATED_DRAM_LATENCY_CYCLES(`DSIMULATED_DRAM_LATENCY_CYCLES)
|
||||
)
|
||||
|
@ -263,6 +267,8 @@ module VX_dmem_controller (
|
|||
.DFQQ_SIZE (`IDFQQ_SIZE),
|
||||
.LLVQ_SIZE (`ILLVQ_SIZE),
|
||||
.FFSQ_SIZE (`IFFSQ_SIZE),
|
||||
.PRFQ_SIZE (`IPRFQ_SIZE),
|
||||
.PRFQ_STRIDE (`IPRFQ_STRIDE),
|
||||
.FILL_INVALIDAOR_SIZE (`IFILL_INVALIDAOR_SIZE),
|
||||
.SIMULATED_DRAM_LATENCY_CYCLES(`ISIMULATED_DRAM_LATENCY_CYCLES)
|
||||
)
|
||||
|
|
|
@ -230,6 +230,8 @@ module Vortex_Cluster
|
|||
.DFQQ_SIZE (`LLDFQQ_SIZE),
|
||||
.LLVQ_SIZE (`LLLLVQ_SIZE),
|
||||
.FFSQ_SIZE (`LLFFSQ_SIZE),
|
||||
.PRFQ_SIZE (`LLPRFQ_SIZE),
|
||||
.PRFQ_STRIDE (`LLPRFQ_STRIDE),
|
||||
.FILL_INVALIDAOR_SIZE (`LLFILL_INVALIDAOR_SIZE),
|
||||
.SIMULATED_DRAM_LATENCY_CYCLES(`LLSIMULATED_DRAM_LATENCY_CYCLES)
|
||||
)
|
||||
|
|
|
@ -234,6 +234,8 @@ module Vortex_SOC (
|
|||
.DFQQ_SIZE (`L3DFQQ_SIZE),
|
||||
.LLVQ_SIZE (`L3LLVQ_SIZE),
|
||||
.FFSQ_SIZE (`L3FFSQ_SIZE),
|
||||
.PRFQ_SIZE (`L3PRFQ_SIZE),
|
||||
.PRFQ_STRIDE (`L3PRFQ_STRIDE),
|
||||
.FILL_INVALIDAOR_SIZE (`L3FILL_INVALIDAOR_SIZE),
|
||||
.SIMULATED_DRAM_LATENCY_CYCLES(`L3SIMULATED_DRAM_LATENCY_CYCLES)
|
||||
)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue