Merge branch 'fpga_synthesis' of github.gatech.edu:casl/Vortex into fpga_synthesis

This commit is contained in:
codetector 2020-04-03 14:36:52 -04:00
commit 9ee12d4a01
No known key found for this signature in database
GPG key ID: 7D42AB4D2C7B40A4
8 changed files with 203 additions and 44 deletions

View file

@ -281,7 +281,6 @@ begin
avs_writedata <= 0;
avs_read <= 0;
avs_write <= 0;
avs_read_ctr <= 0;
avs_write_ctr <= 0;
end
@ -323,9 +322,7 @@ begin
STATE_RUN: begin
if (vx_dram_req_read
&& !avs_waitrequest
&& !avs_raq_full
&& !avs_rdq_full)
&& !vx_dram_req_delay)
begin
avs_address <= (vx_dram_req_addr >> 6);
avs_read <= 1;
@ -333,7 +330,7 @@ begin
end
if (vx_dram_req_write
&& !avs_waitrequest)
&& !vx_dram_req_delay)
begin
avs_writedata <= {>>{vx_dram_req_data}};
avs_address <= (vx_dram_req_addr >> 6);
@ -345,19 +342,22 @@ begin
if (avs_readdatavalid)
begin
$display("%t: AVS Rd Rsp: value=%h", $time, avs_readdata[63:0]);
$display("%t: AVS Rd Rsp", $time);
end
end
end
// Vortex DRAM requests stalling
assign vx_dram_req_delay = !(avs_read || avs_write);
assign vx_dram_req_delay = !((STATE_RUN == state)
&& !avs_waitrequest
&& !avs_raq_full
&& !avs_rdq_full);
// Vortex DRAM fill response
always_comb
begin
vx_dram_fill_rsp = (STATE_RUN == state) && !avs_rdq_empty && vx_dram_fill_accept;
vx_dram_fill_rsp_addr = avs_raq_dout;
vx_dram_fill_rsp_addr = (avs_raq_dout << 6);
{>>{vx_dram_fill_rsp_data}} = avs_rdq_dout;
end
@ -369,7 +369,7 @@ always_comb
begin
avs_raq_pop = vx_dram_fill_rsp || cci_write_req;
avs_raq_din = avs_address;
avs_raq_push = avs_write;
avs_raq_push = avs_read;
end
VX_generic_queue_ll #(
@ -463,10 +463,10 @@ logic cci_write_pending;
always_comb
begin
cci_write_req = (STATE_READ == state)
&& !avs_rdq_empty
&& !cp2af_sRxPort.c1TxAlmFull
&& !cci_write_pending
&& cci_write_ctr < csr_data_size;
&& !avs_rdq_empty
&& !cp2af_sRxPort.c1TxAlmFull
&& !cci_write_pending
&& cci_write_ctr < csr_data_size;
wr_hdr = t_ccip_c1_ReqMemHdr'(0);
wr_hdr.address = csr_io_addr + cci_write_ctr;

64
driver/hw/wave.do Normal file
View file

@ -0,0 +1,64 @@
onerror {resume}
quietly WaveActivateNextPane {} 0
add wave -noupdate -label clk /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/clk
add wave -noupdate -label reset /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/SoftReset
add wave -noupdate -label state /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/state
add wave -noupdate -label cci_write_pending /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/cci_write_pending
add wave -noupdate -label cci_write_ctr -radix decimal -radixshowbase 0 /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/cci_write_ctr
add wave -noupdate -label csr_data_size -radix decimal -radixshowbase 0 /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/csr_data_size
add wave -noupdate -label avs_read_ctr -radix decimal -radixshowbase 0 /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/avs_read_ctr
add wave -noupdate -label avs_waitrequest /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/avs_waitrequest
add wave -noupdate -label avs_address -radix hexadecimal -radixshowbase 0 /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/avs_address
add wave -noupdate -label avs_readdata -radix hexadecimal /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/avs_readdata
add wave -noupdate -label avs_writedata -radix hexadecimal /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/avs_writedata
add wave -noupdate -label avs_write /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/avs_write
add wave -noupdate -label avs_read /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/avs_read
add wave -noupdate -label avs_readdatavalid /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/avs_readdatavalid
add wave -noupdate -label sRx.c0.rspValid /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/cp2af_sRxPort.c0.rspValid
add wave -noupdate -label sRx.c1.rspValid /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/cp2af_sRxPort.c1.rspValid
add wave -noupdate -label sTx.c0.valid /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/af2cp_sTxPort.c0.valid
add wave -noupdate -label sTx.c1.valid /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/af2cp_sTxPort.c1.valid
add wave -noupdate -label cci_write_req /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/cci_write_req
add wave -noupdate -label avs_raq_push /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/avs_raq_push
add wave -noupdate -label avs_rdq_push /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/avs_rdq_push
add wave -noupdate -label avs_raq_pop /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/avs_raq_pop
add wave -noupdate -label avs_rdq_pop /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/avs_rdq_pop
add wave -noupdate -label avs_raq_full /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/avs_raq_full
add wave -noupdate -label avs_rdq_full /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/avs_rdq_full
add wave -noupdate -label avs_raq_empty /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/avs_raq_empty
add wave -noupdate -label avs_rdq_empty /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/avs_rdq_empty
add wave -noupdate -label vx_dram_req_write /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_dram_req_write
add wave -noupdate -label vx_dram_req_delay /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_dram_req_delay
add wave -noupdate -label vx_dram_req_read /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_dram_req_read
add wave -noupdate -label vx_reset /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_soc/reset
add wave -noupdate -label out_dram_fill_rsp /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_soc/out_dram_fill_rsp
add wave -noupdate -label out_dram_fill_accept /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_soc/out_dram_fill_accept
add wave -noupdate -label llc_snp_req /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_soc/llc_snp_req
add wave -noupdate -label llc_snp_req_delay /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_soc/llc_snp_req_delay
add wave -noupdate -label out_break /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_soc/out_ebreak
add wave -noupdate -label warp_pc -radix hexadecimal -radixshowbase 0 {/ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_soc/genblk1/Vortex_Cluster/genblk1[0]/vortex_core/vx_front_end/vx_fetch/warp_pc}
add wave -noupdate -label scheduled_warp {/ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_soc/genblk1/Vortex_Cluster/genblk1[0]/vortex_core/vx_front_end/vx_fetch/scheduled_warp}
add wave -noupdate -label thread_mask {/ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_soc/genblk1/Vortex_Cluster/genblk1[0]/vortex_core/vx_front_end/vx_fetch/thread_mask}
add wave -noupdate -label warp_num {/ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_soc/genblk1/Vortex_Cluster/genblk1[0]/vortex_core/vx_front_end/vx_fetch/warp_num}
add wave -noupdate -label warp_active {/ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_soc/genblk1/Vortex_Cluster/genblk1[0]/vortex_core/vx_front_end/vx_fetch/warp_scheduler/warp_active}
add wave -noupdate -label warp_stalled {/ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_soc/genblk1/Vortex_Cluster/genblk1[0]/vortex_core/vx_front_end/vx_fetch/warp_scheduler/warp_stalled}
add wave -noupdate -label warp_lock {/ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_soc/genblk1/Vortex_Cluster/genblk1[0]/vortex_core/vx_front_end/vx_fetch/warp_scheduler/warp_lock}
add wave -noupdate -label use_active {/ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_soc/genblk1/Vortex_Cluster/genblk1[0]/vortex_core/vx_front_end/vx_fetch/warp_scheduler/use_active}
TreeUpdate [SetDefaultTree]
WaveRestoreCursors {{Cursor 2} {66234495 ps} 0}
quietly wave cursor active 1
configure wave -namecolwidth 195
configure wave -valuecolwidth 100
configure wave -justifyvalue left
configure wave -signalnamewidth 0
configure wave -snapdistance 10
configure wave -datasetprefix 0
configure wave -rowmargin 4
configure wave -childrowmargin 2
configure wave -gridoffset 0
configure wave -gridperiod 1
configure wave -griddelta 40
configure wave -timeline 0
configure wave -timelineunits ps
update
WaveRestoreZoom {66041656 ps} {66406344 ps}

View file

@ -18,7 +18,7 @@ run-fpga: $(PROJECT)
LD_LIBRARY_PATH=../../sw/opae:$(LD_LIBRARY_PATH) ./$(PROJECT)
run-ase: $(PROJECT)
ASE_LOG=0 LD_LIBRARY_PATH=../../sw/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT)
ASE_LOG=0 LD_LIBRARY_PATH=../../sw/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) -t 1
run-rtlsim: $(PROJECT)
LD_LIBRARY_PATH=../../sw/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT)

View file

@ -114,8 +114,6 @@ module VX_bank
wire snrq_valid_st0;
wire[31:0] snrq_addr_st0;
reg snrq_hazard_st0;
assign snrq_valid_st0 = !snrq_empty;
VX_generic_queue_ll #(.DATAW(32), .SIZE(SNRQ_SIZE)) snr_queue(
.clk (clk),
@ -133,7 +131,6 @@ module VX_bank
wire dfpq_full;
wire[31:0] dfpq_addr_st0;
wire[`BANK_LINE_SIZE_RNG][`WORD_SIZE-1:0] dfpq_filldata_st0;
reg dfpq_hazard_st0;
assign dram_fill_accept = !dfpq_full;
@ -161,7 +158,6 @@ module VX_bank
wire [`NW_M1:0] reqq_req_warp_num_st0;
wire [2:0] reqq_req_mem_read_st0;
wire [2:0] reqq_req_mem_write_st0;
reg reqq_hazard_st0;
wire [31:0] reqq_req_pc_st0;
assign reqq_push = !delay_req && (|bank_valids);
@ -229,7 +225,6 @@ module VX_bank
wire [`NW_M1:0] mrvq_warp_num_st0;
wire [2:0] mrvq_mem_read_st0;
wire [2:0] mrvq_mem_write_st0;
reg mrvq_hazard_st0;
wire miss_add;
wire[31:0] miss_add_addr;
@ -321,28 +316,13 @@ module VX_bank
// assign is_fill_in_pipe = (|is_fill_st1) || is_fill_st2;
assign mrvq_pop = mrvq_valid_st0 && !stall_bank_pipe && !mrvq_hazard_st0;
assign dfpq_pop = !mrvq_pop && !dfpq_empty && !stall_bank_pipe && !dfpq_hazard_st0;
assign reqq_pop = !mrvq_stop && !mrvq_pop && !dfpq_pop && !reqq_empty && reqq_req_st0 && !stall_bank_pipe && !is_fill_st1[0] && !(reqq_hazard_st0 || (mrvq_valid_st0 && mrvq_hazard_st0)) && !is_fill_in_pipe;
assign snrq_pop = !reqq_pop && !reqq_pop && !mrvq_pop && !dfpq_pop && snrq_valid_st0 && !stall_bank_pipe && !snrq_hazard_st0;
assign mrvq_pop = mrvq_valid_st0 && !stall_bank_pipe;
assign dfpq_pop = !mrvq_pop && !dfpq_empty && !stall_bank_pipe;
assign reqq_pop = !mrvq_stop && !mrvq_pop && !dfpq_pop && !reqq_empty && reqq_req_st0 && !stall_bank_pipe && !is_fill_st1[0] && !is_fill_in_pipe;
assign snrq_pop = !reqq_pop && !reqq_pop && !mrvq_pop && !dfpq_pop && snrq_valid_st0 && !stall_bank_pipe;
integer st1_cycle;
always @(*) begin
dfpq_hazard_st0 = 0;
mrvq_hazard_st0 = 0;
reqq_hazard_st0 = 0;
snrq_hazard_st0 = 0;
// for (st1_cycle = 0; st1_cycle < STAGE_1_CYCLES; st1_cycle = st1_cycle + 1) begin
// if (valid_st1[st1_cycle] && going_to_write_st1[st1_cycle]) begin
// if (dfpq_addr_st0 [31:`LINE_SELECT_ADDR_START] == addr_st1[st1_cycle][31:`LINE_SELECT_ADDR_START]) dfpq_hazard_st0 = 1;
// if (mrvq_addr_st0 [31:`LINE_SELECT_ADDR_START] == addr_st1[st1_cycle][31:`LINE_SELECT_ADDR_START]) mrvq_hazard_st0 = 1;
// if (reqq_req_addr_st0[31:`LINE_SELECT_ADDR_START] == addr_st1[st1_cycle][31:`LINE_SELECT_ADDR_START]) reqq_hazard_st0 = 1;
// if (snrq_addr_st0 [31:`LINE_SELECT_ADDR_START] == addr_st1[st1_cycle][31:`LINE_SELECT_ADDR_START]) snrq_hazard_st0 = 1;
// end
// end
end
wire qual_is_fill_st0;
wire qual_valid_st0;
wire [31:0] qual_addr_st0;

View file

@ -43,6 +43,10 @@ module VX_cache
// Fill Invalidator Size {Fill invalidator must be active}
parameter FILL_INVALIDAOR_SIZE = 16,
// Prefetcher
parameter PRFQ_SIZE = 64,
parameter PRFQ_STRIDE = 0,
// Dram knobs
parameter SIMULATED_DRAM_LATENCY_CYCLES = 10
@ -147,7 +151,8 @@ module VX_cache
assign snp_req_delay = (|per_bank_snrq_full);
assign dram_fill_accept = (NUMBER_BANKS == 1) ? per_bank_dram_fill_accept[0] : per_bank_dram_fill_accept[dram_fill_rsp_addr[`BANK_SELECT_ADDR_RNG]];
// assign dram_fill_accept = (NUMBER_BANKS == 1) ? per_bank_dram_fill_accept[0] : per_bank_dram_fill_accept[dram_fill_rsp_addr[`BANK_SELECT_ADDR_RNG]];
assign dram_fill_accept = (|per_bank_dram_fill_accept);
VX_cache_dram_req_arb #(
.CACHE_SIZE_BYTES (CACHE_SIZE_BYTES),
@ -165,6 +170,8 @@ module VX_cache
.DFQQ_SIZE (DFQQ_SIZE),
.LLVQ_SIZE (LLVQ_SIZE),
.FILL_INVALIDAOR_SIZE (FILL_INVALIDAOR_SIZE),
.PRFQ_SIZE (PRFQ_SIZE),
.PRFQ_STRIDE (PRFQ_STRIDE),
.SIMULATED_DRAM_LATENCY_CYCLES(SIMULATED_DRAM_LATENCY_CYCLES)
)
VX_cache_dram_req_arb

View file

@ -39,6 +39,10 @@ module VX_cache_dram_req_arb
// Fill Invalidator Size {Fill invalidator must be active}
parameter FILL_INVALIDAOR_SIZE = 16,
// Prefetcher
parameter PRFQ_SIZE = 64,
parameter PRFQ_STRIDE = 2,
// Dram knobs
parameter SIMULATED_DRAM_LATENCY_CYCLES = 10
@ -74,6 +78,33 @@ module VX_cache_dram_req_arb
);
wire pref_pop;
wire pref_valid;
wire[31:0] pref_addr;
assign pref_pop = !dwb_valid && !dfqq_req && !dram_req_delay && pref_valid;
VX_prefetcher #(
.PRFQ_SIZE (PRFQ_SIZE),
.PRFQ_STRIDE (PRFQ_STRIDE),
.BANK_LINE_SIZE_BYTES(BANK_LINE_SIZE_BYTES),
.WORD_SIZE_BYTES (WORD_SIZE_BYTES)
)
prfqq
(
.clk (clk),
.reset (reset),
.dram_req (dram_req && dram_req_read),
.dram_req_addr(dram_req_addr),
.pref_pop (pref_pop),
.pref_valid (pref_valid),
.pref_addr (pref_addr)
);
wire dfqq_req;
wire[31:0] dfqq_req_addr;
wire dfqq_empty;
@ -107,10 +138,10 @@ module VX_cache_dram_req_arb
assign per_bank_dram_wb_queue_pop = dram_req_delay ? 0 : use_wb_valid & ((1 << dwb_bank));
assign dram_req = dwb_valid || dfqq_req;
assign dram_req = dwb_valid || dfqq_req || pref_pop;
assign dram_req_write = dwb_valid;
assign dram_req_read = dfqq_req && !dwb_valid;
assign dram_req_addr = (dwb_valid ? per_bank_dram_wb_req_addr[dwb_bank] : dfqq_req_addr) & `BASE_ADDR_MASK;
assign dram_req_read = (dfqq_req && !dwb_valid) || pref_pop;
assign dram_req_addr = (dwb_valid ? per_bank_dram_wb_req_addr[dwb_bank] : (dfqq_req ? dfqq_req_addr : pref_addr)) & `BASE_ADDR_MASK;
assign dram_req_size = BANK_LINE_SIZE_BYTES;
assign {dram_req_data} = dwb_valid ? {per_bank_dram_wb_req_data[dwb_bank] }: 0;
// assign dram_req_because_of_wb = dwb_valid ? per_bank_dram_because_of_snp[dwb_bank] : 0;

View file

@ -0,0 +1,77 @@
`include "VX_cache_config.v"
module VX_prefetcher
#(
parameter PRFQ_SIZE = 64,
parameter PRFQ_STRIDE = 2,
// Size of line inside a bank in bytes
parameter BANK_LINE_SIZE_BYTES = 16,
// Size of a word in bytes
parameter WORD_SIZE_BYTES = 4
)
(
input wire clk,
input wire reset,
input wire dram_req,
input wire[31:0] dram_req_addr,
input wire pref_pop,
output wire pref_valid,
output wire[31:0] pref_addr
);
reg[`vx_clog2(PRFQ_STRIDE):0] use_valid;
reg[31:0] use_addr;
wire current_valid;
wire[31:0] current_addr;
wire current_full;
wire current_empty;
assign current_valid = ~current_empty;
wire update_use = ((use_valid == 0) || ((use_valid-1) == 0)) && current_valid;
VX_generic_queue_ll #(.DATAW(32), .SIZE(PRFQ_SIZE)) pfq_queue(
.clk (clk),
.reset (reset),
.push (dram_req && !current_full && !pref_pop),
.in_data (dram_req_addr & `BASE_ADDR_MASK),
.pop (update_use),
.out_data(current_addr),
.empty (current_empty),
.full (current_full)
);
assign pref_valid = use_valid != 0;
assign pref_addr = use_addr;
always @(posedge clk) begin
if (reset) begin
use_valid <= 0;
use_addr <= 0;
end else begin
if (update_use) begin
use_valid <= PRFQ_STRIDE;
use_addr <= current_addr + BANK_LINE_SIZE_BYTES;
end else if (pref_valid && pref_pop) begin
use_valid <= use_valid - 1;
use_addr <= use_addr + BANK_LINE_SIZE_BYTES;
end
end
end
endmodule

View file

@ -234,8 +234,8 @@ int _write (int file, char *buf, int nbytes)
static int heap_start = (int) 0x30000000;
static int head_end = (int) 0x40000000;
static int heap_start = (int) 0x90000000;
static int head_end = (int) 0xa0000000;
void * _sbrk (int nbytes)
{