mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 13:27:29 -04:00
fix scheduler rename_table X values - reverted valid bits
This commit is contained in:
parent
1512138a15
commit
70dadca9fe
4 changed files with 49 additions and 38 deletions
|
@ -4,13 +4,15 @@ CFLAGS += -std=c++11 -g -O0 -Wall -Wextra -Wfatal-errors
|
|||
CFLAGS += -I../../include -I../../../hw/simulate -I../../../runtime
|
||||
|
||||
# control RTL debug print states
|
||||
DBG_PRINT = -DDBG_PRINT_CORE_ICACHE \
|
||||
-DDBG_PRINT_CORE_DCACHE \
|
||||
-DDBG_PRINT_CACHE_BANK \
|
||||
-DDBG_PRINT_CACHE_SNP \
|
||||
-DDBG_PRINT_CACHE_MSRQ \
|
||||
-DDBG_PRINT_DRAM \
|
||||
-DDBG_PRINT_OPAE
|
||||
DBG_PRINT_FLAGS = -DDBG_PRINT_CORE_ICACHE \
|
||||
-DDBG_PRINT_CORE_DCACHE \
|
||||
-DDBG_PRINT_CACHE_BANK \
|
||||
-DDBG_PRINT_CACHE_SNP \
|
||||
-DDBG_PRINT_CACHE_MSRQ \
|
||||
-DDBG_PRINT_DRAM \
|
||||
-DDBG_PRINT_OPAE
|
||||
|
||||
#DBG_PRINT=$(DBG_PRINT_FLAGS)
|
||||
|
||||
#MULTICORE += -DNUM_CLUSTERS=2 -DNUM_CORES=4
|
||||
#MULTICORE += -DNUM_CLUSTERS=1 -DNUM_CORES=4
|
||||
|
@ -43,7 +45,7 @@ VL_FLAGS += -DGLOBAL_BLOCK_SIZE=64
|
|||
|
||||
# Debugigng
|
||||
ifdef DEBUG
|
||||
VL_FLAGS += --trace -DVL_DEBUG=1 $(DBG_PRINT)
|
||||
VL_FLAGS += --trace $(DBG_PRINT)
|
||||
CFLAGS += -DVCD_OUTPUT
|
||||
else
|
||||
CFLAGS += -DNDEBUG
|
||||
|
|
19
hw/Makefile
19
hw/Makefile
|
@ -16,11 +16,22 @@ VF += -DGLOBAL_BLOCK_SIZE=64
|
|||
#MULTICORE += -DNUM_CLUSTERS=1 -DNUM_CORES=4
|
||||
MULTICORE += -DNUM_CLUSTERS=1 -DNUM_CORES=2
|
||||
|
||||
# control RTL debug print states
|
||||
DBG_PRINT_FLAGS = -DDBG_PRINT_CORE_ICACHE \
|
||||
-DDBG_PRINT_CORE_DCACHE \
|
||||
-DDBG_PRINT_CACHE_BANK \
|
||||
-DDBG_PRINT_CACHE_SNP \
|
||||
-DDBG_PRINT_CACHE_MSRQ \
|
||||
-DDBG_PRINT_DRAM \
|
||||
-DDBG_PRINT_OPAE
|
||||
|
||||
#DBG_PRINT=$(DBG_PRINT_FLAGSs)
|
||||
|
||||
INCLUDE = -I./rtl/ -I./rtl/libs -I./rtl/interfaces -I./rtl/pipe_regs -I./rtl/cache -I./rtl/simulate
|
||||
|
||||
SRCS += ./simulate/testbench.cpp ./simulate/simulator.cpp
|
||||
|
||||
DBG += --trace -DVL_DEBUG=1
|
||||
DBG += --trace
|
||||
|
||||
THREADS ?= $(shell python3 -c 'import multiprocessing as mp; print(max(1, mp.cpu_count() // 2))')
|
||||
|
||||
|
@ -33,16 +44,16 @@ gen-s: build_config
|
|||
verilator $(VF) -DNDEBUG -cc Vortex_Socket.v -CFLAGS '$(CF) -DNDEBUG'
|
||||
|
||||
gen-sd: build_config
|
||||
verilator $(VF) -cc Vortex_Socket.v -CFLAGS '$(CF) -g -O0 -DVCD_OUTPUT' $(DBG)
|
||||
verilator $(VF) -cc Vortex_Socket.v $(DBG_PRINT) -CFLAGS '$(CF) -g -O0 -DVCD_OUTPUT' $(DBG)
|
||||
|
||||
gen-st: build_config
|
||||
verilator $(VF) -cc Vortex_Socket.v -CFLAGS '$(CF) -DNDEBUG -O2' --threads $(THREADS)
|
||||
verilator $(VF) -DNDEBUG -cc Vortex_Socket.v -CFLAGS '$(CF) -DNDEBUG -O2' --threads $(THREADS)
|
||||
|
||||
gen-m: build_config
|
||||
verilator $(VF) -DNDEBUG -cc Vortex_Socket.v $(MULTICORE) -CFLAGS '$(CF) -DNDEBUG $(MULTICORE)'
|
||||
|
||||
gen-md: build_config
|
||||
verilator $(VF) -cc Vortex_Socket.v $(MULTICORE) -CFLAGS '$(CF) -g -O0 -DVCD_OUTPUT $(MULTICORE)' $(DBG)
|
||||
verilator $(VF) -cc Vortex_Socket.v $(MULTICORE) $(DBG_PRINT) -CFLAGS '$(CF) -g -O0 -DVCD_OUTPUT $(MULTICORE)' $(DBG)
|
||||
|
||||
gen-mt: build_config
|
||||
verilator $(VF) -DNDEBUG -cc Vortex_Socket.v $(MULTICORE) -CFLAGS '$(CF) -DNDEBUG -O2 $(MULTICORE)' --threads $(THREADS)
|
||||
|
|
|
@ -17,14 +17,13 @@ module VX_scheduler (
|
|||
assign is_empty = count_valid == 0;
|
||||
|
||||
reg[31:0][`NUM_THREADS-1:0] rename_table[`NUM_WARPS-1:0];
|
||||
reg[31:0] valid_table [`NUM_WARPS-1:0];
|
||||
|
||||
wire valid_wb = (writeback_if.wb != 0) && (| writeback_if.valid) && (writeback_if.rd != 0);
|
||||
wire wb_inc = (bckE_req_if.wb != 0) && (bckE_req_if.rd != 0);
|
||||
|
||||
wire rs1_rename = (rename_table[bckE_req_if.warp_num][bckE_req_if.rs1] != 0) && valid_table[bckE_req_if.warp_num][bckE_req_if.rs1];
|
||||
wire rs2_rename = (rename_table[bckE_req_if.warp_num][bckE_req_if.rs2] != 0) && valid_table[bckE_req_if.warp_num][bckE_req_if.rs2];
|
||||
wire rd_rename = (rename_table[bckE_req_if.warp_num][bckE_req_if.rd ] != 0) && valid_table[bckE_req_if.warp_num][bckE_req_if.rd ];
|
||||
wire rs1_rename = (rename_table[bckE_req_if.warp_num][bckE_req_if.rs1] != 0);
|
||||
wire rs2_rename = (rename_table[bckE_req_if.warp_num][bckE_req_if.rs2] != 0);
|
||||
wire rd_rename = (rename_table[bckE_req_if.warp_num][bckE_req_if.rd ] != 0);
|
||||
|
||||
wire is_store = (bckE_req_if.mem_write != `BYTE_EN_NO);
|
||||
wire is_load = (bckE_req_if.mem_read != `BYTE_EN_NO);
|
||||
|
@ -35,7 +34,7 @@ module VX_scheduler (
|
|||
wire is_csr = bckE_req_if.is_csr;
|
||||
wire is_exec = !is_mem && !is_gpu && !is_csr;
|
||||
|
||||
wire using_rs2 = (bckE_req_if.rs2_src == `RS2_REG) || is_store || bckE_req_if.is_barrier || bckE_req_if.is_wspawn;
|
||||
wire using_rs2 = (bckE_req_if.rs2_src == `RS2_REG) || is_store || bckE_req_if.is_barrier || bckE_req_if.is_wspawn;
|
||||
|
||||
wire rs1_rename_qual = ((rs1_rename) && (bckE_req_if.rs1 != 0));
|
||||
wire rs2_rename_qual = ((rs2_rename) && (bckE_req_if.rs2 != 0 && using_rs2));
|
||||
|
@ -44,37 +43,31 @@ module VX_scheduler (
|
|||
wire rename_valid = rs1_rename_qual || rs2_rename_qual || rd_rename_qual;
|
||||
|
||||
assign schedule_delay = (| bckE_req_if.valid)
|
||||
&& ((rename_valid )
|
||||
&& ((rename_valid)
|
||||
|| (memory_delay && is_mem)
|
||||
|| (gpr_stage_delay && (is_mem || is_exec))
|
||||
|| (exec_delay && is_exec));
|
||||
|
||||
integer i, w;
|
||||
|
||||
wire[`NUM_THREADS-1:0] old_rename_mask = rename_table[writeback_if.warp_num][writeback_if.rd];
|
||||
wire[`NUM_THREADS-1:0] invalidate_mask = (~writeback_if.valid);
|
||||
|
||||
wire[`NUM_THREADS-1:0] valid_wb_new_mask = old_rename_mask & invalidate_mask;
|
||||
wire valid_wb_new_valid = valid_wb_new_mask != 0;
|
||||
wire[`NUM_THREADS-1:0] old_rename_mask = rename_table[writeback_if.warp_num][writeback_if.rd];
|
||||
wire[`NUM_THREADS-1:0] invalidate_mask = ~writeback_if.valid;
|
||||
wire[`NUM_THREADS-1:0] valid_wb_new_mask = old_rename_mask & invalidate_mask;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
for (w = 0; w < `NUM_WARPS; w=w+1) begin
|
||||
for (i = 0; i < 32; i++) begin
|
||||
// rename_table[w][i] <= 0;
|
||||
valid_table[w][i] <= 0;
|
||||
rename_table[w][i] <= 0;
|
||||
end
|
||||
end
|
||||
end else begin
|
||||
if (valid_wb) begin
|
||||
rename_table[writeback_if.warp_num][writeback_if.rd] <= valid_wb_new_mask;
|
||||
valid_table [writeback_if.warp_num][writeback_if.rd] <= valid_wb_new_valid;
|
||||
|
||||
end
|
||||
|
||||
if (!schedule_delay && wb_inc) begin
|
||||
rename_table[bckE_req_if.warp_num][bckE_req_if.rd] <= bckE_req_if.valid;
|
||||
valid_table [bckE_req_if.warp_num][bckE_req_if.rd] <= 1'b1;
|
||||
end
|
||||
|
||||
if (valid_wb
|
||||
|
|
|
@ -10,16 +10,20 @@ double sc_time_stamp() {
|
|||
|
||||
Simulator::Simulator() {
|
||||
// force random values for unitialized signals
|
||||
const char* args[] = {"", "+verilator+rand+reset+2", "+verilator+seed+0"};
|
||||
const char* args[] = {"", "+verilator+rand+reset+1", "+verilator+seed+0"};
|
||||
Verilated::commandArgs(3, args);
|
||||
|
||||
#ifndef NDEBUG
|
||||
Verilated::debug(1);
|
||||
#endif
|
||||
|
||||
ram_ = nullptr;
|
||||
vortex_ = new VVortex_Socket();
|
||||
|
||||
// initial values
|
||||
vortex_->dram_req_ready = 0;
|
||||
vortex_->dram_rsp_valid = 0;
|
||||
vortex_->io_req_ready = 0;
|
||||
vortex_->io_rsp_valid = 0;
|
||||
vortex_->snp_req_valid = 0;
|
||||
vortex_->snp_rsp_ready = 0;
|
||||
|
||||
#ifdef VCD_OUTPUT
|
||||
Verilated::traceEverOn(true);
|
||||
trace_ = new VerilatedVcdC;
|
||||
|
@ -47,7 +51,7 @@ void Simulator::print_stats(std::ostream& out) {
|
|||
|
||||
void Simulator::dbus_driver() {
|
||||
if (ram_ == nullptr) {
|
||||
vortex_->dram_req_ready = false;
|
||||
vortex_->dram_req_ready = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -126,7 +130,8 @@ void Simulator::io_driver() {
|
|||
char c = (char)data_write;
|
||||
std::cout << c;
|
||||
}
|
||||
vortex_->io_req_ready = true;
|
||||
vortex_->io_req_ready = 1;
|
||||
vortex_->io_rsp_valid = 01;
|
||||
}
|
||||
|
||||
void Simulator::reset() {
|
||||
|
@ -180,8 +185,8 @@ void Simulator::flush_caches(uint32_t mem_addr, uint32_t size) {
|
|||
|
||||
// submit snoop requests for the needed blocks
|
||||
vortex_->snp_req_addr = aligned_addr_start;
|
||||
vortex_->snp_req_valid = true;
|
||||
vortex_->snp_rsp_ready = true;
|
||||
vortex_->snp_req_valid = 1;
|
||||
vortex_->snp_rsp_ready = 1;
|
||||
for (;;) {
|
||||
this->step();
|
||||
if (vortex_->snp_rsp_valid) {
|
||||
|
@ -192,7 +197,7 @@ void Simulator::flush_caches(uint32_t mem_addr, uint32_t size) {
|
|||
++outstanding_snp_reqs;
|
||||
vortex_->snp_req_addr += 1;
|
||||
if (vortex_->snp_req_addr >= aligned_addr_end) {
|
||||
vortex_->snp_req_valid = false;
|
||||
vortex_->snp_req_valid = 0;
|
||||
}
|
||||
}
|
||||
if (!vortex_->snp_req_valid
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue