fix scheduler rename_table X values - reverted valid bits

This commit is contained in:
Blaise Tine 2020-05-23 00:22:56 -04:00
parent 1512138a15
commit 70dadca9fe
4 changed files with 49 additions and 38 deletions

View file

@ -4,13 +4,15 @@ CFLAGS += -std=c++11 -g -O0 -Wall -Wextra -Wfatal-errors
CFLAGS += -I../../include -I../../../hw/simulate -I../../../runtime
# control RTL debug print states
DBG_PRINT = -DDBG_PRINT_CORE_ICACHE \
-DDBG_PRINT_CORE_DCACHE \
-DDBG_PRINT_CACHE_BANK \
-DDBG_PRINT_CACHE_SNP \
-DDBG_PRINT_CACHE_MSRQ \
-DDBG_PRINT_DRAM \
-DDBG_PRINT_OPAE
DBG_PRINT_FLAGS = -DDBG_PRINT_CORE_ICACHE \
-DDBG_PRINT_CORE_DCACHE \
-DDBG_PRINT_CACHE_BANK \
-DDBG_PRINT_CACHE_SNP \
-DDBG_PRINT_CACHE_MSRQ \
-DDBG_PRINT_DRAM \
-DDBG_PRINT_OPAE
#DBG_PRINT=$(DBG_PRINT_FLAGS)
#MULTICORE += -DNUM_CLUSTERS=2 -DNUM_CORES=4
#MULTICORE += -DNUM_CLUSTERS=1 -DNUM_CORES=4
@ -43,7 +45,7 @@ VL_FLAGS += -DGLOBAL_BLOCK_SIZE=64
# Debugigng
ifdef DEBUG
VL_FLAGS += --trace -DVL_DEBUG=1 $(DBG_PRINT)
VL_FLAGS += --trace $(DBG_PRINT)
CFLAGS += -DVCD_OUTPUT
else
CFLAGS += -DNDEBUG

View file

@ -16,11 +16,22 @@ VF += -DGLOBAL_BLOCK_SIZE=64
#MULTICORE += -DNUM_CLUSTERS=1 -DNUM_CORES=4
MULTICORE += -DNUM_CLUSTERS=1 -DNUM_CORES=2
# control RTL debug print states
DBG_PRINT_FLAGS = -DDBG_PRINT_CORE_ICACHE \
-DDBG_PRINT_CORE_DCACHE \
-DDBG_PRINT_CACHE_BANK \
-DDBG_PRINT_CACHE_SNP \
-DDBG_PRINT_CACHE_MSRQ \
-DDBG_PRINT_DRAM \
-DDBG_PRINT_OPAE
#DBG_PRINT=$(DBG_PRINT_FLAGSs)
INCLUDE = -I./rtl/ -I./rtl/libs -I./rtl/interfaces -I./rtl/pipe_regs -I./rtl/cache -I./rtl/simulate
SRCS += ./simulate/testbench.cpp ./simulate/simulator.cpp
DBG += --trace -DVL_DEBUG=1
DBG += --trace
THREADS ?= $(shell python3 -c 'import multiprocessing as mp; print(max(1, mp.cpu_count() // 2))')
@ -33,16 +44,16 @@ gen-s: build_config
verilator $(VF) -DNDEBUG -cc Vortex_Socket.v -CFLAGS '$(CF) -DNDEBUG'
gen-sd: build_config
verilator $(VF) -cc Vortex_Socket.v -CFLAGS '$(CF) -g -O0 -DVCD_OUTPUT' $(DBG)
verilator $(VF) -cc Vortex_Socket.v $(DBG_PRINT) -CFLAGS '$(CF) -g -O0 -DVCD_OUTPUT' $(DBG)
gen-st: build_config
verilator $(VF) -cc Vortex_Socket.v -CFLAGS '$(CF) -DNDEBUG -O2' --threads $(THREADS)
verilator $(VF) -DNDEBUG -cc Vortex_Socket.v -CFLAGS '$(CF) -DNDEBUG -O2' --threads $(THREADS)
gen-m: build_config
verilator $(VF) -DNDEBUG -cc Vortex_Socket.v $(MULTICORE) -CFLAGS '$(CF) -DNDEBUG $(MULTICORE)'
gen-md: build_config
verilator $(VF) -cc Vortex_Socket.v $(MULTICORE) -CFLAGS '$(CF) -g -O0 -DVCD_OUTPUT $(MULTICORE)' $(DBG)
verilator $(VF) -cc Vortex_Socket.v $(MULTICORE) $(DBG_PRINT) -CFLAGS '$(CF) -g -O0 -DVCD_OUTPUT $(MULTICORE)' $(DBG)
gen-mt: build_config
verilator $(VF) -DNDEBUG -cc Vortex_Socket.v $(MULTICORE) -CFLAGS '$(CF) -DNDEBUG -O2 $(MULTICORE)' --threads $(THREADS)

View file

@ -17,14 +17,13 @@ module VX_scheduler (
assign is_empty = count_valid == 0;
reg[31:0][`NUM_THREADS-1:0] rename_table[`NUM_WARPS-1:0];
reg[31:0] valid_table [`NUM_WARPS-1:0];
wire valid_wb = (writeback_if.wb != 0) && (| writeback_if.valid) && (writeback_if.rd != 0);
wire wb_inc = (bckE_req_if.wb != 0) && (bckE_req_if.rd != 0);
wire rs1_rename = (rename_table[bckE_req_if.warp_num][bckE_req_if.rs1] != 0) && valid_table[bckE_req_if.warp_num][bckE_req_if.rs1];
wire rs2_rename = (rename_table[bckE_req_if.warp_num][bckE_req_if.rs2] != 0) && valid_table[bckE_req_if.warp_num][bckE_req_if.rs2];
wire rd_rename = (rename_table[bckE_req_if.warp_num][bckE_req_if.rd ] != 0) && valid_table[bckE_req_if.warp_num][bckE_req_if.rd ];
wire rs1_rename = (rename_table[bckE_req_if.warp_num][bckE_req_if.rs1] != 0);
wire rs2_rename = (rename_table[bckE_req_if.warp_num][bckE_req_if.rs2] != 0);
wire rd_rename = (rename_table[bckE_req_if.warp_num][bckE_req_if.rd ] != 0);
wire is_store = (bckE_req_if.mem_write != `BYTE_EN_NO);
wire is_load = (bckE_req_if.mem_read != `BYTE_EN_NO);
@ -35,7 +34,7 @@ module VX_scheduler (
wire is_csr = bckE_req_if.is_csr;
wire is_exec = !is_mem && !is_gpu && !is_csr;
wire using_rs2 = (bckE_req_if.rs2_src == `RS2_REG) || is_store || bckE_req_if.is_barrier || bckE_req_if.is_wspawn;
wire using_rs2 = (bckE_req_if.rs2_src == `RS2_REG) || is_store || bckE_req_if.is_barrier || bckE_req_if.is_wspawn;
wire rs1_rename_qual = ((rs1_rename) && (bckE_req_if.rs1 != 0));
wire rs2_rename_qual = ((rs2_rename) && (bckE_req_if.rs2 != 0 && using_rs2));
@ -44,37 +43,31 @@ module VX_scheduler (
wire rename_valid = rs1_rename_qual || rs2_rename_qual || rd_rename_qual;
assign schedule_delay = (| bckE_req_if.valid)
&& ((rename_valid )
&& ((rename_valid)
|| (memory_delay && is_mem)
|| (gpr_stage_delay && (is_mem || is_exec))
|| (exec_delay && is_exec));
integer i, w;
wire[`NUM_THREADS-1:0] old_rename_mask = rename_table[writeback_if.warp_num][writeback_if.rd];
wire[`NUM_THREADS-1:0] invalidate_mask = (~writeback_if.valid);
wire[`NUM_THREADS-1:0] valid_wb_new_mask = old_rename_mask & invalidate_mask;
wire valid_wb_new_valid = valid_wb_new_mask != 0;
wire[`NUM_THREADS-1:0] old_rename_mask = rename_table[writeback_if.warp_num][writeback_if.rd];
wire[`NUM_THREADS-1:0] invalidate_mask = ~writeback_if.valid;
wire[`NUM_THREADS-1:0] valid_wb_new_mask = old_rename_mask & invalidate_mask;
always @(posedge clk) begin
if (reset) begin
for (w = 0; w < `NUM_WARPS; w=w+1) begin
for (i = 0; i < 32; i++) begin
// rename_table[w][i] <= 0;
valid_table[w][i] <= 0;
rename_table[w][i] <= 0;
end
end
end else begin
if (valid_wb) begin
rename_table[writeback_if.warp_num][writeback_if.rd] <= valid_wb_new_mask;
valid_table [writeback_if.warp_num][writeback_if.rd] <= valid_wb_new_valid;
end
if (!schedule_delay && wb_inc) begin
rename_table[bckE_req_if.warp_num][bckE_req_if.rd] <= bckE_req_if.valid;
valid_table [bckE_req_if.warp_num][bckE_req_if.rd] <= 1'b1;
end
if (valid_wb

View file

@ -10,16 +10,20 @@ double sc_time_stamp() {
Simulator::Simulator() {
// force random values for unitialized signals
const char* args[] = {"", "+verilator+rand+reset+2", "+verilator+seed+0"};
const char* args[] = {"", "+verilator+rand+reset+1", "+verilator+seed+0"};
Verilated::commandArgs(3, args);
#ifndef NDEBUG
Verilated::debug(1);
#endif
ram_ = nullptr;
vortex_ = new VVortex_Socket();
// initial values
vortex_->dram_req_ready = 0;
vortex_->dram_rsp_valid = 0;
vortex_->io_req_ready = 0;
vortex_->io_rsp_valid = 0;
vortex_->snp_req_valid = 0;
vortex_->snp_rsp_ready = 0;
#ifdef VCD_OUTPUT
Verilated::traceEverOn(true);
trace_ = new VerilatedVcdC;
@ -47,7 +51,7 @@ void Simulator::print_stats(std::ostream& out) {
void Simulator::dbus_driver() {
if (ram_ == nullptr) {
vortex_->dram_req_ready = false;
vortex_->dram_req_ready = 0;
return;
}
@ -126,7 +130,8 @@ void Simulator::io_driver() {
char c = (char)data_write;
std::cout << c;
}
vortex_->io_req_ready = true;
vortex_->io_req_ready = 1;
vortex_->io_rsp_valid = 01;
}
void Simulator::reset() {
@ -180,8 +185,8 @@ void Simulator::flush_caches(uint32_t mem_addr, uint32_t size) {
// submit snoop requests for the needed blocks
vortex_->snp_req_addr = aligned_addr_start;
vortex_->snp_req_valid = true;
vortex_->snp_rsp_ready = true;
vortex_->snp_req_valid = 1;
vortex_->snp_rsp_ready = 1;
for (;;) {
this->step();
if (vortex_->snp_rsp_valid) {
@ -192,7 +197,7 @@ void Simulator::flush_caches(uint32_t mem_addr, uint32_t size) {
++outstanding_snp_reqs;
vortex_->snp_req_addr += 1;
if (vortex_->snp_req_addr >= aligned_addr_end) {
vortex_->snp_req_valid = false;
vortex_->snp_req_valid = 0;
}
}
if (!vortex_->snp_req_valid