mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 13:27:29 -04:00
minor update
This commit is contained in:
parent
0d0706411d
commit
d68b32cd60
18 changed files with 168 additions and 150 deletions
|
@ -89,6 +89,14 @@ case $DRIVER in
|
|||
DRIVER_PATH=driver/opae
|
||||
DRIVER_EXTRA=vlsim
|
||||
;;
|
||||
asesim)
|
||||
DRIVER_PATH=driver/opae
|
||||
DRIVER_EXTRA=asesim
|
||||
;;
|
||||
fpga)
|
||||
DRIVER_PATH=driver/opae
|
||||
DRIVER_EXTRA=fpga
|
||||
;;
|
||||
*)
|
||||
echo "invalid driver: $DRIVER"
|
||||
exit -1
|
||||
|
|
|
@ -36,6 +36,10 @@ ASE_DIR = ase
|
|||
|
||||
VLSIM_DIR = vlsim
|
||||
|
||||
RTL_DIR=../../hw/rtl
|
||||
|
||||
SCRIPT_DIR=../../hw/scripts
|
||||
|
||||
PROJECT = libvortex.so
|
||||
|
||||
PROJECT_ASE = $(ASE_DIR)/libvortex.so
|
||||
|
@ -50,7 +54,8 @@ SRCS = vortex.cpp ../common/vx_utils.cpp
|
|||
ifdef SCOPE
|
||||
CXXFLAGS += -DSCOPE
|
||||
SRCS += vx_scope.cpp
|
||||
SET_SCOPE = SCOPE=1
|
||||
SCOPE_ENABLE = SCOPE=1
|
||||
SCOPE_H = scope-defs.h
|
||||
endif
|
||||
|
||||
all: vlsim
|
||||
|
@ -59,7 +64,16 @@ all: vlsim
|
|||
json: ../../hw/opae/vortex_afu.json
|
||||
afu_json_mgr json-info --afu-json=$^ --c-hdr=$@
|
||||
|
||||
fpga: $(SRCS)
|
||||
scope-defs.h: $(SCRIPT_DIR)/scope.json
|
||||
$(SCRIPT_DIR)/scope.py $(RTL_INCLUDE) $(CONFIGS) -cc scope-defs.h -vl $(RTL_DIR)/scope-defs.vh $(SCRIPT_DIR)/scope.json
|
||||
|
||||
# generate scope data
|
||||
scope: scope-defs.h
|
||||
|
||||
vlsim-hw: $(SCOPE_H)
|
||||
$(SCOPE_ENABLE) $(MAKE) -C vlsim
|
||||
|
||||
fpga: $(SRCS) $(SCOPE_H)
|
||||
$(CXX) $(CXXFLAGS) -DUSE_FPGA $^ $(LDFLAGS) $(FPGA_LIBS) -o $(PROJECT)
|
||||
|
||||
asesim: $(SRCS) $(ASE_DIR)
|
||||
|
@ -68,9 +82,6 @@ asesim: $(SRCS) $(ASE_DIR)
|
|||
vlsim: $(SRCS) vlsim-hw
|
||||
$(CXX) $(CXXFLAGS) -DUSE_VLSIM $(SRCS) $(LDFLAGS) -L./vlsim $(VLSIM_LIBS) -o $(PROJECT_VLSIM)
|
||||
|
||||
vlsim-hw:
|
||||
$(SET_SCOPE) $(MAKE) -C vlsim
|
||||
|
||||
vortex.o: vortex.cpp
|
||||
$(CXX) $(CXXFLAGS) -c vortex.cpp -o $@
|
||||
|
||||
|
|
|
@ -40,8 +40,6 @@ TOP = vortex_afu_shim
|
|||
|
||||
RTL_DIR=../../../hw/rtl
|
||||
|
||||
SCRIPT_DIR=../../../hw/scripts
|
||||
|
||||
SRCS = fpga.cpp opae_sim.cpp
|
||||
SRCS += $(RTL_DIR)/fp_cores/svdpi/float_dpi.cpp
|
||||
|
||||
|
@ -70,7 +68,6 @@ endif
|
|||
ifdef SCOPE
|
||||
VL_FLAGS += -DSCOPE
|
||||
CFLAGS += -DSCOPE
|
||||
SCOPE_VH = $(RTL_DIR)/scope-defs.vh
|
||||
endif
|
||||
|
||||
# use our OPAE shim
|
||||
|
@ -85,14 +82,8 @@ RTL_INCLUDE += -I../../../hw/opae -I../../../hw/opae/ccip
|
|||
PROJECT = libopae-c-vlsim.so
|
||||
|
||||
all: $(PROJECT)
|
||||
|
||||
# generate scope data
|
||||
scope: $(RTL_DIR)/scope-defs.vh
|
||||
|
||||
$(RTL_DIR)/scope-defs.vh: $(SCRIPT_DIR)/scope.json
|
||||
$(SCRIPT_DIR)/scope.py $(RTL_INCLUDE) $(CONFIGS) -cc ../scope-defs.h -vl $(RTL_DIR)/scope-defs.vh $(SCRIPT_DIR)/scope.json
|
||||
|
||||
$(PROJECT): $(SRCS) $(SCOPE_VH)
|
||||
$(PROJECT): $(SRCS)
|
||||
verilator --exe --cc $(TOP) --top-module $(TOP) $(RTL_INCLUDE) $(VL_FLAGS) $(SRCS) -CFLAGS '$(CFLAGS)' -LDFLAGS '$(LDFLAGS)' -o ../$(PROJECT)
|
||||
make -j -C obj_dir -f V$(TOP).mk
|
||||
|
||||
|
|
|
@ -57,7 +57,7 @@ static std::thread g_timeout_thread;
|
|||
static std::mutex g_timeout_mutex;
|
||||
|
||||
static void timeout_callback(fpga_handle fpga) {
|
||||
std::this_thread::sleep_for(std::chrono::seconds{60});
|
||||
std::this_thread::sleep_for(std::chrono::seconds{HANG_TIMEOUT});
|
||||
vx_scope_stop(fpga, HANG_TIMEOUT);
|
||||
fpgaClose(fpga);
|
||||
exit(0);
|
||||
|
@ -109,7 +109,7 @@ int vx_scope_start(fpga_handle hfpga, uint64_t delay) {
|
|||
// set start delay
|
||||
uint64_t cmd_delay = ((delay << 3) | CMD_SET_DELAY);
|
||||
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, cmd_delay));
|
||||
std::cout << "scope start delay: " << delay << std::endl;
|
||||
std::cout << "scope start delay: " << std::dec << delay << "s" << std::endl;
|
||||
}
|
||||
|
||||
#ifdef HANG_TIMEOUT
|
||||
|
@ -133,9 +133,11 @@ int vx_scope_stop(fpga_handle hfpga, uint64_t delay) {
|
|||
// stop recording
|
||||
uint64_t cmd_stop = ((delay << 3) | CMD_SET_STOP);
|
||||
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, cmd_stop));
|
||||
std::cout << "scope stop delay: " << delay << std::endl;
|
||||
std::cout << "scope stop delay: " << std::dec << delay << "s" << std::endl;
|
||||
}
|
||||
|
||||
std::cout << "scope trace dump begin..." << std::endl;
|
||||
|
||||
std::ofstream ofs("vx_scope.vcd");
|
||||
|
||||
ofs << "$version Generated by Vortex Scope $end" << std::endl;
|
||||
|
@ -146,6 +148,8 @@ int vx_scope_stop(fpga_handle hfpga, uint64_t delay) {
|
|||
dump_taps(ofs, -1);
|
||||
ofs << "$upscope $end" << std::endl;
|
||||
ofs << "enddefinitions $end" << std::endl;
|
||||
|
||||
std::cout << "OK" << std::flush << std::endl;
|
||||
|
||||
uint64_t frame_width, max_frames, data_valid, offset, delta;
|
||||
uint64_t timestamp = 0;
|
||||
|
@ -163,7 +167,7 @@ int vx_scope_stop(fpga_handle hfpga, uint64_t delay) {
|
|||
std::this_thread::sleep_for(std::chrono::seconds(1));
|
||||
} while (true);
|
||||
|
||||
std::cout << "scope trace dump begin..." << std::endl;
|
||||
std::cout << "OK" << std::flush << std::endl;
|
||||
|
||||
// get frame width
|
||||
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, CMD_GET_WIDTH));
|
||||
|
@ -235,7 +239,7 @@ int vx_scope_stop(fpga_handle hfpga, uint64_t delay) {
|
|||
signal_id = num_taps;
|
||||
if (0 == (frame_no % FRAME_FLUSH_SIZE)) {
|
||||
ofs << std::flush;
|
||||
std::cout << "*** " << frame_no << " frames, timestamp=" << timestamp << std::endl;
|
||||
std::cout << "*** " << frame_no << " frames, timestamp=" << timestamp << std::flush << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
`include "VX_platform.vh"
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_avs_wrapper #(
|
||||
parameter AVS_DATAW = 1,
|
||||
|
|
|
@ -3,21 +3,21 @@
|
|||
+define+SYNTHESIS
|
||||
+define+QUARTUS
|
||||
+define+FPU_FAST
|
||||
#+define+SCOPE
|
||||
+define+SCOPE
|
||||
|
||||
#+define+DBG_PRINT_CORE_ICACHE
|
||||
#+define+DBG_PRINT_CORE_DCACHE
|
||||
#+define+DBG_PRINT_CACHE_BANK
|
||||
#+define+DBG_PRINT_CACHE_SNP
|
||||
#+define+DBG_PRINT_CACHE_MSRQ
|
||||
#+define+DBG_PRINT_CACHE_TAG
|
||||
#+define+DBG_PRINT_CACHE_DATA
|
||||
#+define+DBG_PRINT_DRAM
|
||||
#+define+DBG_PRINT_PIPELINE
|
||||
#+define+DBG_PRINT_OPAE
|
||||
#+define+DBG_PRINT_AVS
|
||||
#+define+DBG_PRINT_SCOPE
|
||||
#+define+DBG_CACHE_REQ_INFO
|
||||
+define+DBG_PRINT_CORE_ICACHE
|
||||
+define+DBG_PRINT_CORE_DCACHE
|
||||
+define+DBG_PRINT_CACHE_BANK
|
||||
+define+DBG_PRINT_CACHE_SNP
|
||||
+define+DBG_PRINT_CACHE_MSRQ
|
||||
+define+DBG_PRINT_CACHE_TAG
|
||||
+define+DBG_PRINT_CACHE_DATA
|
||||
+define+DBG_PRINT_DRAM
|
||||
+define+DBG_PRINT_PIPELINE
|
||||
+define+DBG_PRINT_OPAE
|
||||
+define+DBG_PRINT_AVS
|
||||
+define+DBG_PRINT_SCOPE
|
||||
+define+DBG_CACHE_REQ_INFO
|
||||
|
||||
vortex_afu.json
|
||||
QI:vortex_afu.qsf
|
||||
|
|
|
@ -336,7 +336,7 @@ module VX_cluster #(
|
|||
.NUM_REQS (`NUM_CORES),
|
||||
.CREQ_SIZE (`L2CREQ_SIZE),
|
||||
.MSHR_SIZE (`L2MSHR_SIZE),
|
||||
.DRFQ_SIZE (`L2DRFQ_SIZE),
|
||||
.DRPQ_SIZE (`L2DRPQ_SIZE),
|
||||
.SNRQ_SIZE (`L2SNRQ_SIZE),
|
||||
.CWBQ_SIZE (`L2CWBQ_SIZE),
|
||||
.DREQ_SIZE (`L2DREQ_SIZE),
|
||||
|
|
|
@ -218,8 +218,8 @@
|
|||
`endif
|
||||
|
||||
// DRAM Response Queue Size
|
||||
`ifndef DDRFQ_SIZE
|
||||
`define DDRFQ_SIZE 4
|
||||
`ifndef DDRPQ_SIZE
|
||||
`define DDRPQ_SIZE 4
|
||||
`endif
|
||||
|
||||
// Snoop Response Queue Size
|
||||
|
@ -260,8 +260,8 @@
|
|||
`endif
|
||||
|
||||
// DRAM Response Queue Size
|
||||
`ifndef IDRFQ_SIZE
|
||||
`define IDRFQ_SIZE 4
|
||||
`ifndef IDRPQ_SIZE
|
||||
`define IDRPQ_SIZE 4
|
||||
`endif
|
||||
|
||||
// SM Configurable Knobs //////////////////////////////////////////////////////
|
||||
|
@ -319,8 +319,8 @@
|
|||
`endif
|
||||
|
||||
// DRAM Response Queue Size
|
||||
`ifndef L2DRFQ_SIZE
|
||||
`define L2DRFQ_SIZE 4
|
||||
`ifndef L2DRPQ_SIZE
|
||||
`define L2DRPQ_SIZE 4
|
||||
`endif
|
||||
|
||||
// Snoop Request Queue Size
|
||||
|
@ -366,8 +366,8 @@
|
|||
`endif
|
||||
|
||||
// DRAM Response Queue Size
|
||||
`ifndef L3DRFQ_SIZE
|
||||
`define L3DRFQ_SIZE 4
|
||||
`ifndef L3DRPQ_SIZE
|
||||
`define L3DRPQ_SIZE 4
|
||||
`endif
|
||||
|
||||
// Snoop Request Queue Size
|
||||
|
|
|
@ -32,16 +32,16 @@ module VX_mem_arb #(
|
|||
input wire req_ready_out,
|
||||
|
||||
// input response
|
||||
output wire [NUM_REQS-1:0] rsp_valid_out,
|
||||
output wire [NUM_REQS-1:0][TAG_IN_WIDTH-1:0] rsp_tag_out,
|
||||
output wire [NUM_REQS-1:0][DATA_WIDTH-1:0] rsp_data_out,
|
||||
input wire [NUM_REQS-1:0] rsp_ready_out,
|
||||
|
||||
// output response
|
||||
input wire rsp_valid_in,
|
||||
input wire [TAG_OUT_WIDTH-1:0] rsp_tag_in,
|
||||
input wire [DATA_WIDTH-1:0] rsp_data_in,
|
||||
output wire rsp_ready_in
|
||||
output wire rsp_ready_in,
|
||||
|
||||
// output responses
|
||||
output wire [NUM_REQS-1:0] rsp_valid_out,
|
||||
output wire [NUM_REQS-1:0][TAG_IN_WIDTH-1:0] rsp_tag_out,
|
||||
output wire [NUM_REQS-1:0][DATA_WIDTH-1:0] rsp_data_out,
|
||||
input wire [NUM_REQS-1:0] rsp_ready_out
|
||||
);
|
||||
localparam DATAW = TAG_OUT_WIDTH + ADDR_WIDTH + 1 + DATA_SIZE + DATA_WIDTH;
|
||||
|
||||
|
|
|
@ -91,7 +91,7 @@ module VX_mem_unit # (
|
|||
.NUM_REQS (`DNUM_REQUESTS),
|
||||
.CREQ_SIZE (`DCREQ_SIZE),
|
||||
.MSHR_SIZE (`DMSHR_SIZE),
|
||||
.DRFQ_SIZE (`DDRFQ_SIZE),
|
||||
.DRPQ_SIZE (`DDRPQ_SIZE),
|
||||
.SNRQ_SIZE (`DSNRQ_SIZE),
|
||||
.CWBQ_SIZE (`DCWBQ_SIZE),
|
||||
.DREQ_SIZE (`DDREQ_SIZE),
|
||||
|
@ -164,7 +164,7 @@ module VX_mem_unit # (
|
|||
.NUM_REQS (`INUM_REQUESTS),
|
||||
.CREQ_SIZE (`ICREQ_SIZE),
|
||||
.MSHR_SIZE (`IMSHR_SIZE),
|
||||
.DRFQ_SIZE (`IDRFQ_SIZE),
|
||||
.DRPQ_SIZE (`IDRPQ_SIZE),
|
||||
.SNRQ_SIZE (1),
|
||||
.CWBQ_SIZE (`ICWBQ_SIZE),
|
||||
.DREQ_SIZE (`IDREQ_SIZE),
|
||||
|
@ -236,7 +236,7 @@ module VX_mem_unit # (
|
|||
.NUM_REQS (`SNUM_REQUESTS),
|
||||
.CREQ_SIZE (`SCREQ_SIZE),
|
||||
.MSHR_SIZE (8),
|
||||
.DRFQ_SIZE (1),
|
||||
.DRPQ_SIZE (1),
|
||||
.SNRQ_SIZE (1),
|
||||
.CWBQ_SIZE (`SCWBQ_SIZE),
|
||||
.DREQ_SIZE (1),
|
||||
|
|
|
@ -339,7 +339,7 @@ module Vortex (
|
|||
.NUM_REQS (`NUM_CLUSTERS),
|
||||
.CREQ_SIZE (`L3CREQ_SIZE),
|
||||
.MSHR_SIZE (`L3MSHR_SIZE),
|
||||
.DRFQ_SIZE (`L3DRFQ_SIZE),
|
||||
.DRPQ_SIZE (`L3DRPQ_SIZE),
|
||||
.SNRQ_SIZE (`L3SNRQ_SIZE),
|
||||
.CWBQ_SIZE (`L3CWBQ_SIZE),
|
||||
.DREQ_SIZE (`L3DREQ_SIZE),
|
||||
|
|
140
hw/rtl/cache/VX_bank.v
vendored
140
hw/rtl/cache/VX_bank.v
vendored
|
@ -20,7 +20,7 @@ module VX_bank #(
|
|||
// Miss Reserv Queue Knob
|
||||
parameter MSHR_SIZE = 1,
|
||||
// DRAM Response Queue Size
|
||||
parameter DRFQ_SIZE = 1,
|
||||
parameter DRPQ_SIZE = 1,
|
||||
// Snoop Req Queue Size
|
||||
parameter SNRQ_SIZE = 1,
|
||||
|
||||
|
@ -148,7 +148,7 @@ module VX_bank #(
|
|||
|
||||
wire snrq_full;
|
||||
assign snp_req_ready = !snrq_full;
|
||||
wire snp_req_fire = snp_req_valid && snp_req_ready;
|
||||
wire snrq_push = snp_req_valid && snp_req_ready;
|
||||
|
||||
VX_generic_queue #(
|
||||
.DATAW(`LINE_ADDR_WIDTH + 1 + SNP_TAG_WIDTH),
|
||||
|
@ -157,7 +157,7 @@ module VX_bank #(
|
|||
) snp_req_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.push (snp_req_fire),
|
||||
.push (snrq_push),
|
||||
.pop (snrq_pop),
|
||||
.data_in ({snp_req_addr, snp_req_inv, snp_req_tag}),
|
||||
.data_out({snrq_addr_st0, snrq_inv_st0, snrq_tag_st0}),
|
||||
|
@ -178,41 +178,41 @@ module VX_bank #(
|
|||
assign snp_req_ready = 0;
|
||||
end
|
||||
|
||||
wire dfpq_pop;
|
||||
wire dfpq_empty;
|
||||
wire drpq_pop;
|
||||
wire drpq_empty;
|
||||
|
||||
wire [`LINE_ADDR_WIDTH-1:0] dfpq_addr_st0;
|
||||
wire [`BANK_LINE_WIDTH-1:0] dfpq_filldata_st0;
|
||||
wire [`LINE_ADDR_WIDTH-1:0] drpq_addr_st0;
|
||||
wire [`BANK_LINE_WIDTH-1:0] drpq_filldata_st0;
|
||||
|
||||
wire dram_rsp_fire = dram_rsp_valid && dram_rsp_ready;
|
||||
wire drpq_push = dram_rsp_valid && dram_rsp_ready;
|
||||
|
||||
if (DRAM_ENABLE) begin
|
||||
|
||||
wire dfpq_full;
|
||||
assign dram_rsp_ready = !dfpq_full;
|
||||
wire drpq_full;
|
||||
assign dram_rsp_ready = !drpq_full;
|
||||
|
||||
VX_generic_queue #(
|
||||
.DATAW(`LINE_ADDR_WIDTH + $bits(dram_rsp_data)),
|
||||
.SIZE(DRFQ_SIZE),
|
||||
.SIZE(DRPQ_SIZE),
|
||||
.BUFFERED(1)
|
||||
) dfp_queue (
|
||||
) dram_rsp_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.push (dram_rsp_fire),
|
||||
.pop (dfpq_pop),
|
||||
.push (drpq_push),
|
||||
.pop (drpq_pop),
|
||||
.data_in ({dram_rsp_addr, dram_rsp_data}),
|
||||
.data_out({dfpq_addr_st0, dfpq_filldata_st0}),
|
||||
.empty (dfpq_empty),
|
||||
.full (dfpq_full),
|
||||
.data_out({drpq_addr_st0, drpq_filldata_st0}),
|
||||
.empty (drpq_empty),
|
||||
.full (drpq_full),
|
||||
`UNUSED_PIN (size)
|
||||
);
|
||||
end else begin
|
||||
`UNUSED_VAR (dram_rsp_valid)
|
||||
`UNUSED_VAR (dram_rsp_addr)
|
||||
`UNUSED_VAR (dram_rsp_data)
|
||||
assign dfpq_empty = 1;
|
||||
assign dfpq_addr_st0 = 0;
|
||||
assign dfpq_filldata_st0 = 0;
|
||||
assign drpq_empty = 1;
|
||||
assign drpq_addr_st0 = 0;
|
||||
assign drpq_filldata_st0 = 0;
|
||||
assign dram_rsp_ready = 0;
|
||||
end
|
||||
|
||||
|
@ -228,21 +228,21 @@ module VX_bank #(
|
|||
wire [`WORD_WIDTH-1:0] creq_writeword_st0;
|
||||
wire [CORE_TAG_WIDTH-1:0] creq_tag_st0;
|
||||
|
||||
wire core_req_fire = (| core_req_valid) && core_req_ready;
|
||||
wire creq_push = (| core_req_valid) && core_req_ready;
|
||||
assign core_req_ready = !creq_full;
|
||||
|
||||
VX_bank_core_req_arb #(
|
||||
VX_bank_core_req_queue #(
|
||||
.WORD_SIZE (WORD_SIZE),
|
||||
.NUM_REQS (NUM_REQS),
|
||||
.CREQ_SIZE (CREQ_SIZE),
|
||||
.CORE_TAG_WIDTH (CORE_TAG_WIDTH),
|
||||
.CORE_TAG_ID_BITS (CORE_TAG_ID_BITS)
|
||||
) core_req_arb (
|
||||
) core_req_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
// Enqueue
|
||||
.push (core_req_fire),
|
||||
.push (creq_push),
|
||||
.tag_in (core_req_tag),
|
||||
.valids_in (core_req_valid),
|
||||
.rw_in (core_req_rw),
|
||||
|
@ -343,7 +343,7 @@ module VX_bank #(
|
|||
|
||||
wire mshr_push_stall;
|
||||
wire cwbq_push_stall;
|
||||
wire dwbq_push_stall;
|
||||
wire dreq_push_stall;
|
||||
wire snpq_push_stall;
|
||||
wire pipeline_stall;
|
||||
|
||||
|
@ -356,13 +356,13 @@ module VX_bank #(
|
|||
|
||||
// determine which queue to pop next in piority order
|
||||
wire mshr_pop_unqual = mshr_valid_st0;
|
||||
wire dfpq_pop_unqual = !mshr_pop_unqual && !dfpq_empty;
|
||||
wire creq_pop_unqual = !mshr_pop_unqual && !dfpq_pop_unqual && !creq_empty && !mshr_going_full;
|
||||
wire snrq_pop_unqual = !mshr_pop_unqual && !dfpq_pop_unqual && !creq_pop_unqual && !snrq_empty && !mshr_going_full;
|
||||
wire drpq_pop_unqual = !mshr_pop_unqual && !drpq_empty;
|
||||
wire creq_pop_unqual = !mshr_pop_unqual && !drpq_pop_unqual && !creq_empty && !mshr_going_full;
|
||||
wire snrq_pop_unqual = !mshr_pop_unqual && !drpq_pop_unqual && !creq_pop_unqual && !snrq_empty && !mshr_going_full;
|
||||
|
||||
assign mshr_pop = mshr_pop_unqual && !pipeline_stall
|
||||
&& !(is_mshr_miss_st2 || is_mshr_miss_st3); // stop if previous request was a miss
|
||||
assign dfpq_pop = dfpq_pop_unqual && !pipeline_stall;
|
||||
assign drpq_pop = drpq_pop_unqual && !pipeline_stall;
|
||||
assign creq_pop = creq_pop_unqual && !pipeline_stall;
|
||||
assign snrq_pop = snrq_pop_unqual && !pipeline_stall;
|
||||
|
||||
|
@ -377,12 +377,12 @@ module VX_bank #(
|
|||
end
|
||||
|
||||
assign is_mshr_st0 = mshr_pop_unqual;
|
||||
assign is_fill_st0 = dfpq_pop_unqual;
|
||||
assign is_fill_st0 = drpq_pop_unqual;
|
||||
|
||||
assign valid_st0 = dfpq_pop || mshr_pop || creq_pop || snrq_pop;
|
||||
assign valid_st0 = drpq_pop || mshr_pop || creq_pop || snrq_pop;
|
||||
|
||||
assign addr_st0 = mshr_pop_unqual ? mshr_addr_st0 :
|
||||
dfpq_pop_unqual ? dfpq_addr_st0 :
|
||||
drpq_pop_unqual ? drpq_addr_st0 :
|
||||
creq_pop_unqual ? creq_addr_st0[`LINE_SELECT_ADDR_RNG] :
|
||||
snrq_pop_unqual ? snrq_addr_st0 :
|
||||
0;
|
||||
|
@ -396,7 +396,7 @@ module VX_bank #(
|
|||
assign wsel_st0 = 0;
|
||||
end
|
||||
|
||||
assign writedata_st0 = dfpq_filldata_st0;
|
||||
assign writedata_st0 = drpq_filldata_st0;
|
||||
|
||||
assign inst_meta_st0 = mshr_pop_unqual ? {`REQ_TAG_WIDTH'(mshr_tag_st0), mshr_rw_st0, mshr_byteen_st0, mshr_tid_st0} :
|
||||
creq_pop_unqual ? {`REQ_TAG_WIDTH'(creq_tag_st0), creq_rw_st0, creq_byteen_st0, creq_tid_st0} :
|
||||
|
@ -519,7 +519,7 @@ if (DRAM_ENABLE) begin
|
|||
end else begin
|
||||
|
||||
`UNUSED_VAR (mshr_pending_hazard_unqual_st0)
|
||||
`UNUSED_VAR (dram_rsp_fire)
|
||||
`UNUSED_VAR (drpq_push)
|
||||
`UNUSED_VAR (addr_st0)
|
||||
|
||||
assign {tag_st1, mem_rw_st1, mem_byteen_st1, tid_st1} = inst_meta_st1;
|
||||
|
@ -628,8 +628,8 @@ end
|
|||
wire incoming_fill_st3;
|
||||
|
||||
// check if a matching fill request is comming
|
||||
wire incoming_fill_dfp_st2 = dram_rsp_fire && (addr_st2 == dram_rsp_addr);
|
||||
wire incoming_fill_st0_st2 = !dfpq_empty && (addr_st2 == dfpq_addr_st0);
|
||||
wire incoming_fill_dfp_st2 = drpq_push && (addr_st2 == dram_rsp_addr);
|
||||
wire incoming_fill_st0_st2 = !drpq_empty && (addr_st2 == drpq_addr_st0);
|
||||
wire incoming_fill_st1_st2 = is_fill_st1 && (addr_st2 == addr_st1);
|
||||
wire incoming_fill_st2 = incoming_fill_dfp_st2
|
||||
|| incoming_fill_st0_st2
|
||||
|
@ -678,7 +678,7 @@ end
|
|||
|
||||
wire mshr_push = mshr_push_unqual
|
||||
&& !cwbq_push_stall
|
||||
&& !dwbq_push_stall
|
||||
&& !dreq_push_stall
|
||||
&& !snpq_push_stall;
|
||||
|
||||
wire mshr_full;
|
||||
|
@ -693,7 +693,7 @@ end
|
|||
wire mshr_dequeue_st3 = valid_st3 && is_mshr_st3 && !mshr_push_unqual && !pipeline_stall;
|
||||
|
||||
// mark msrq entry that match DRAM fill as 'ready'
|
||||
wire update_ready_st0 = dfpq_pop;
|
||||
wire update_ready_st0 = drpq_pop;
|
||||
|
||||
// push missed requests as 'ready' if it was a forced miss but actually had a hit
|
||||
// or the fill request is comming for the missed block
|
||||
|
@ -792,7 +792,7 @@ end
|
|||
wire cwbq_push = cwbq_push_unqual
|
||||
&& !cwbq_full
|
||||
&& !mshr_push_stall
|
||||
&& !dwbq_push_stall
|
||||
&& !dreq_push_stall
|
||||
&& !snpq_push_stall;
|
||||
|
||||
wire cwbq_pop = core_rsp_valid && core_rsp_ready;
|
||||
|
@ -821,62 +821,62 @@ end
|
|||
|
||||
// Enqueue DRAM request
|
||||
|
||||
wire dwbq_empty, dwbq_full;
|
||||
wire dreq_empty, dreq_full;
|
||||
|
||||
wire dwbq_push_unqual = valid_st3 && send_dwb_req_st3;
|
||||
wire dreq_push_unqual = valid_st3 && send_dwb_req_st3;
|
||||
|
||||
assign dwbq_push_stall = dwbq_push_unqual && dwbq_full;
|
||||
assign dreq_push_stall = dreq_push_unqual && dreq_full;
|
||||
|
||||
wire dwbq_push = dwbq_push_unqual
|
||||
&& !dwbq_full
|
||||
wire dreq_push = dreq_push_unqual
|
||||
&& !dreq_full
|
||||
&& !mshr_push_stall
|
||||
&& !cwbq_push_stall
|
||||
&& !snpq_push_stall;
|
||||
|
||||
wire dwbq_pop = dram_req_valid && dram_req_ready;
|
||||
wire dreq_pop = dram_req_valid && dram_req_ready;
|
||||
|
||||
wire writeback = WRITE_ENABLE && do_writeback_st3;
|
||||
|
||||
wire [`LINE_ADDR_WIDTH-1:0] dwbq_addr = writeback ? {readtag_st3, addr_st3[`LINE_SELECT_BITS-1:0]} :
|
||||
wire [`LINE_ADDR_WIDTH-1:0] dreq_addr = writeback ? {readtag_st3, addr_st3[`LINE_SELECT_BITS-1:0]} :
|
||||
addr_st3;
|
||||
|
||||
wire [BANK_LINE_SIZE-1:0] dwbq_byteen = writeback ? dirtyb_st3 : {BANK_LINE_SIZE{1'b1}};
|
||||
wire [BANK_LINE_SIZE-1:0] dreq_byteen = writeback ? dirtyb_st3 : {BANK_LINE_SIZE{1'b1}};
|
||||
|
||||
if (DRAM_ENABLE) begin
|
||||
VX_generic_queue #(
|
||||
.DATAW(1 + BANK_LINE_SIZE + `LINE_ADDR_WIDTH + `BANK_LINE_WIDTH),
|
||||
.SIZE(DREQ_SIZE),
|
||||
.BUFFERED(1)
|
||||
) dwb_queue (
|
||||
) dram_req_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.push (dwbq_push),
|
||||
.pop (dwbq_pop),
|
||||
.data_in ({writeback, dwbq_byteen, dwbq_addr, readdata_st3}),
|
||||
.push (dreq_push),
|
||||
.pop (dreq_pop),
|
||||
.data_in ({writeback, dreq_byteen, dreq_addr, readdata_st3}),
|
||||
.data_out({dram_req_rw, dram_req_byteen, dram_req_addr, dram_req_data}),
|
||||
.empty (dwbq_empty),
|
||||
.full (dwbq_full),
|
||||
.empty (dreq_empty),
|
||||
.full (dreq_full),
|
||||
`UNUSED_PIN (size)
|
||||
);
|
||||
end else begin
|
||||
`UNUSED_VAR (dwbq_push)
|
||||
`UNUSED_VAR (dwbq_pop)
|
||||
`UNUSED_VAR (dwbq_addr)
|
||||
`UNUSED_VAR (dwbq_byteen)
|
||||
`UNUSED_VAR (dreq_push)
|
||||
`UNUSED_VAR (dreq_pop)
|
||||
`UNUSED_VAR (dreq_addr)
|
||||
`UNUSED_VAR (dreq_byteen)
|
||||
`UNUSED_VAR (readtag_st3)
|
||||
`UNUSED_VAR (dirtyb_st3)
|
||||
`UNUSED_VAR (readdata_st3)
|
||||
`UNUSED_VAR (writeback)
|
||||
`UNUSED_VAR (dram_req_ready)
|
||||
assign dwbq_empty = 1;
|
||||
assign dwbq_full = 0;
|
||||
assign dreq_empty = 1;
|
||||
assign dreq_full = 0;
|
||||
assign dram_req_rw = 0;
|
||||
assign dram_req_byteen = 0;
|
||||
assign dram_req_addr = 0;
|
||||
assign dram_req_data = 0;
|
||||
end
|
||||
|
||||
assign dram_req_valid = !dwbq_empty;
|
||||
assign dram_req_valid = !dreq_empty;
|
||||
|
||||
// Enqueue snoop response
|
||||
|
||||
|
@ -890,7 +890,7 @@ end
|
|||
&& !snpq_full
|
||||
&& !mshr_push_stall
|
||||
&& !cwbq_push_stall
|
||||
&& !dwbq_push_stall;
|
||||
&& !dreq_push_stall;
|
||||
|
||||
wire snpq_pop = snp_rsp_valid && snp_rsp_ready;
|
||||
|
||||
|
@ -923,12 +923,12 @@ end
|
|||
end
|
||||
|
||||
assign snp_rsp_valid = !snpq_empty
|
||||
&& dwbq_empty; // ensure all writebacks are sent
|
||||
&& dreq_empty; // ensure all writebacks are sent
|
||||
|
||||
// bank pipeline stall
|
||||
assign pipeline_stall = mshr_push_stall
|
||||
|| cwbq_push_stall
|
||||
|| dwbq_push_stall
|
||||
|| dreq_push_stall
|
||||
|| snpq_push_stall;
|
||||
|
||||
`SCOPE_ASSIGN (valid_st0, valid_st0);
|
||||
|
@ -949,17 +949,17 @@ end
|
|||
`SCOPE_ASSIGN (addr_st3, `LINE_TO_BYTE_ADDR(addr_st3, BANK_ID));
|
||||
|
||||
`ifdef DBG_PRINT_CACHE_BANK
|
||||
wire incoming_fill_dfp_st3 = dram_rsp_fire && (addr_st3 == dram_rsp_addr);
|
||||
wire incoming_fill_dfp_st3 = drpq_push && (addr_st3 == dram_rsp_addr);
|
||||
always @(posedge clk) begin
|
||||
if (valid_st3 && miss_st3 && (incoming_fill_st3 || incoming_fill_dfp_st3)) begin
|
||||
$display("%t: incoming fill - addr=%0h, st3=%b, dfp=%b", $time, `LINE_TO_BYTE_ADDR(addr_st3, BANK_ID), incoming_fill_st3, incoming_fill_dfp_st3);
|
||||
assert(!is_mshr_st3);
|
||||
end
|
||||
if (pipeline_stall) begin
|
||||
$display("%t: cache%0d:%0d pipeline-stall: msrq=%b, cwbq=%b, dwbq=%b, snpq=%b", $time, CACHE_ID, BANK_ID, mshr_push_stall, cwbq_push_stall, dwbq_push_stall, snpq_push_stall);
|
||||
$display("%t: cache%0d:%0d pipeline-stall: msrq=%b, cwbq=%b, dwbq=%b, snpq=%b", $time, CACHE_ID, BANK_ID, mshr_push_stall, cwbq_push_stall, dreq_push_stall, snpq_push_stall);
|
||||
end
|
||||
if (dfpq_pop) begin
|
||||
$display("%t: cache%0d:%0d fill-rsp: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), dfpq_filldata_st0);
|
||||
if (drpq_pop) begin
|
||||
$display("%t: cache%0d:%0d fill-rsp: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), drpq_filldata_st0);
|
||||
end
|
||||
if (creq_pop) begin
|
||||
if (creq_rw_st0)
|
||||
|
@ -973,11 +973,11 @@ end
|
|||
if (cwbq_push) begin
|
||||
$display("%t: cache%0d:%0d core-rsp: addr=%0h, tag=%0h, tid=%0d, data=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st3, BANK_ID), cwbq_tag_st3, cwbq_tid_st3, cwbq_data_st3, debug_wid_st3, debug_pc_st3);
|
||||
end
|
||||
if (dwbq_push) begin
|
||||
if (dreq_push) begin
|
||||
if (do_writeback_st3)
|
||||
$display("%t: cache%0d:%0d writeback: addr=%0h, data=%0h, byteen=%b, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dwbq_addr, BANK_ID), readdata_st3, dirtyb_st3, debug_wid_st3, debug_pc_st3);
|
||||
$display("%t: cache%0d:%0d writeback: addr=%0h, data=%0h, byteen=%b, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dreq_addr, BANK_ID), readdata_st3, dirtyb_st3, debug_wid_st3, debug_pc_st3);
|
||||
else
|
||||
$display("%t: cache%0d:%0d fill-req: addr=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dwbq_addr, BANK_ID), debug_wid_st3, debug_pc_st3);
|
||||
$display("%t: cache%0d:%0d fill-req: addr=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dreq_addr, BANK_ID), debug_wid_st3, debug_pc_st3);
|
||||
end
|
||||
if (snpq_push) begin
|
||||
$display("%t: cache%0d:%0d snp-rsp: addr=%0h, tag=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st3, BANK_ID), snpq_tag_st3);
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
`include "VX_cache_config.vh"
|
||||
|
||||
module VX_bank_core_req_arb #(
|
||||
module VX_bank_core_req_queue #(
|
||||
// Size of a word in bytes
|
||||
parameter WORD_SIZE = 1,
|
||||
// Number of Word requests per cycle
|
4
hw/rtl/cache/VX_cache.v
vendored
4
hw/rtl/cache/VX_cache.v
vendored
|
@ -19,7 +19,7 @@ module VX_cache #(
|
|||
// Miss Reserv Queue Knob
|
||||
parameter MSHR_SIZE = 8,
|
||||
// DRAM Response Queue Size
|
||||
parameter DRFQ_SIZE = 4,
|
||||
parameter DRPQ_SIZE = 4,
|
||||
// Snoop Req Queue Size
|
||||
parameter SNRQ_SIZE = 4,
|
||||
|
||||
|
@ -265,7 +265,7 @@ module VX_cache #(
|
|||
.NUM_REQS (NUM_REQS),
|
||||
.CREQ_SIZE (CREQ_SIZE),
|
||||
.MSHR_SIZE (MSHR_SIZE),
|
||||
.DRFQ_SIZE (DRFQ_SIZE),
|
||||
.DRPQ_SIZE (DRPQ_SIZE),
|
||||
.SNRQ_SIZE (SNRQ_SIZE),
|
||||
.CWBQ_SIZE (CWBQ_SIZE),
|
||||
.DREQ_SIZE (DREQ_SIZE),
|
||||
|
|
2
hw/rtl/cache/VX_cache_config.vh
vendored
2
hw/rtl/cache/VX_cache_config.vh
vendored
|
@ -80,6 +80,6 @@
|
|||
|
||||
`define LINE_TO_BYTE_ADDR(x, i) {x, (32-$bits(x))'(i << (32-$bits(x)-`BANK_SELECT_BITS))}
|
||||
|
||||
`define TO_FULL_ADDR(x) {x, (32-$bits(x))'(0)}
|
||||
`define TO_FULL_ADDR(x) {x, (32-$bits(x))'(0)}
|
||||
|
||||
`endif
|
||||
|
|
3
hw/rtl/cache/VX_cache_core_rsp_merge.v
vendored
3
hw/rtl/cache/VX_cache_core_rsp_merge.v
vendored
|
@ -90,8 +90,7 @@ module VX_cache_core_rsp_merge #(
|
|||
|
||||
VX_generic_register #(
|
||||
.N(NUM_REQS + (NUM_REQS *`WORD_WIDTH) + (`CORE_REQ_TAG_COUNT * CORE_TAG_WIDTH)),
|
||||
.R(NUM_REQS),
|
||||
.PASSTHRU(NUM_BANKS < 4)
|
||||
.R(NUM_REQS)
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
|
11
hw/rtl/cache/VX_snp_forwarder.v
vendored
11
hw/rtl/cache/VX_snp_forwarder.v
vendored
|
@ -195,6 +195,14 @@ module VX_snp_forwarder #(
|
|||
.ready_out (fwdin_ready)
|
||||
);
|
||||
|
||||
`ifdef DBG_PRINT_CACHE_SNP
|
||||
always @(posedge clk) begin
|
||||
if (fwdin_valid && fwdin_ready) begin
|
||||
$display("%t: cache%0d snp-fwd-in: tag=%0h", $time, CACHE_ID, fwdin_tag);
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
end else begin
|
||||
|
||||
`UNUSED_VAR (clk)
|
||||
|
@ -222,9 +230,6 @@ module VX_snp_forwarder #(
|
|||
if (snp_fwdout_valid[0] && snp_fwdout_ready[0]) begin
|
||||
$display("%t: cache%0d snp-fwd-out: addr=%0h, invalidate=%0d, tag=%0h", $time, CACHE_ID, `TO_FULL_ADDR(snp_fwdout_addr[0]), snp_fwdout_inv[0], snp_fwdout_tag[0]);
|
||||
end
|
||||
if (fwdin_valid && fwdin_ready) begin
|
||||
$display("%t: cache%0d snp-fwd-in: tag=%0h", $time, CACHE_ID, fwdin_tag);
|
||||
end
|
||||
if (snp_rsp_valid && snp_rsp_ready) begin
|
||||
$display("%t: cache%0d snp-fwd-rsp: addr=%0h, invalidate=%0d, tag=%0h", $time, CACHE_ID, snp_rsp_addr, snp_rsp_inv, snp_rsp_tag);
|
||||
end
|
||||
|
|
|
@ -40,22 +40,22 @@ set_global_assignment -name VERILOG_MACRO NDEBUG
|
|||
set_global_assignment -name MESSAGE_DISABLE 16818
|
||||
set_global_assignment -name VERILOG_MACRO FPU_FAST
|
||||
|
||||
set_global_assignment -name MIN_CORE_JUNCTION_TEMP 0
|
||||
set_global_assignment -name MAX_CORE_JUNCTION_TEMP 100
|
||||
set_global_assignment -name POWER_BOARD_THERMAL_MODEL "NONE (CONSERVATIVE)"
|
||||
set_global_assignment -name ROUTER_CLOCKING_TOPOLOGY_ANALYSIS ON
|
||||
set_global_assignment -name ROUTER_LCELL_INSERTION_AND_LOGIC_DUPLICATION ON
|
||||
set_global_assignment -name TIMEQUEST_DO_CCPP_REMOVAL ON
|
||||
set_global_assignment -name SYNTH_TIMING_DRIVEN_SYNTHESIS ON
|
||||
set_global_assignment -name TIMEQUEST_MULTICORNER_ANALYSIS ON
|
||||
set_global_assignment -name POWER_USE_TA_VALUE 65
|
||||
set_global_assignment -name SEED 1
|
||||
set_global_assignment -name OPTIMIZE_MULTI_CORNER_TIMING ON
|
||||
set_global_assignment -name FITTER_EFFORT "STANDARD FIT"
|
||||
set_global_assignment -name OPTIMIZE_HOLD_TIMING "ALL PATHS"
|
||||
set_global_assignment -name OPTIMIZATION_TECHNIQUE SPEED
|
||||
set_global_assignment -name ROUTER_TIMING_OPTIMIZATION_LEVEL MAXIMUM
|
||||
set_global_assignment -name OPTIMIZATION_MODE "AGGRESSIVE PERFORMANCE"
|
||||
#set_global_assignment -name MIN_CORE_JUNCTION_TEMP 0
|
||||
#set_global_assignment -name MAX_CORE_JUNCTION_TEMP 100
|
||||
#set_global_assignment -name POWER_BOARD_THERMAL_MODEL "NONE (CONSERVATIVE)"
|
||||
#set_global_assignment -name ROUTER_CLOCKING_TOPOLOGY_ANALYSIS ON
|
||||
#set_global_assignment -name ROUTER_LCELL_INSERTION_AND_LOGIC_DUPLICATION ON
|
||||
#set_global_assignment -name TIMEQUEST_DO_CCPP_REMOVAL ON
|
||||
#set_global_assignment -name SYNTH_TIMING_DRIVEN_SYNTHESIS ON
|
||||
#set_global_assignment -name TIMEQUEST_MULTICORNER_ANALYSIS ON
|
||||
#set_global_assignment -name POWER_USE_TA_VALUE 65
|
||||
#set_global_assignment -name SEED 1
|
||||
#set_global_assignment -name OPTIMIZE_MULTI_CORNER_TIMING ON
|
||||
#set_global_assignment -name FITTER_EFFORT "STANDARD FIT"
|
||||
#set_global_assignment -name OPTIMIZE_HOLD_TIMING "ALL PATHS"
|
||||
#set_global_assignment -name OPTIMIZATION_TECHNIQUE SPEED
|
||||
#set_global_assignment -name ROUTER_TIMING_OPTIMIZATION_LEVEL MAXIMUM
|
||||
#set_global_assignment -name OPTIMIZATION_MODE "AGGRESSIVE PERFORMANCE"
|
||||
|
||||
set idx 0
|
||||
foreach arg $q_args_orig {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue