mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
fixed shared memory addressing critical path, fixed VX_fp_noncomp output bug
This commit is contained in:
parent
3e1fe66d9e
commit
34b650be94
15 changed files with 31 additions and 27 deletions
|
@ -79,7 +79,7 @@ VL_FLAGS += -DNOPAE
|
|||
CFLAGS += -DNOPAE
|
||||
|
||||
# use DPI FPU
|
||||
VL_FLAGS += -DFPU_FAST
|
||||
#VL_FLAGS += -DFPU_FAST
|
||||
|
||||
RTL_INCLUDE += -I../../../hw/opae -I../../../hw/opae/ccip
|
||||
|
||||
|
|
|
@ -4,15 +4,15 @@ CFLAGS += -std=c++11 -O2 -DNDEBUG -Wall -Wextra -Wfatal-errors
|
|||
CFLAGS += -I../../include -I../../../hw/simulate -I../../../hw
|
||||
|
||||
# control RTL debug print states
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_PIPELINE
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CORE_ICACHE
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CORE_DCACHE
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_BANK
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_SNP
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_MSRQ
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_TAG
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_DATA
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_DRAM
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_PIPELINE
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_SNP
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_DRAM
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_AVS
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_SCOPE
|
||||
|
@ -65,7 +65,7 @@ else
|
|||
endif
|
||||
|
||||
# use DPI FPU
|
||||
VL_FLAGS += -DFPU_FAST
|
||||
#VL_FLAGS += -DFPU_FAST
|
||||
|
||||
PROJECT = libvortex.so
|
||||
# PROJECT = libvortex.dylib
|
||||
|
|
|
@ -98,7 +98,7 @@ module VX_alu_unit #(
|
|||
|
||||
VX_generic_register #(
|
||||
.N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1 + `BR_BITS + 32 + 33)
|
||||
) alu_reg (
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall_out),
|
||||
|
|
|
@ -94,7 +94,7 @@ module VX_csr_unit #(
|
|||
|
||||
VX_generic_register #(
|
||||
.N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + 1 + `CSR_ADDR_BITS + 1 + 32 + 32)
|
||||
) csr_reg (
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall),
|
||||
|
|
|
@ -131,7 +131,7 @@ module VX_fpu_unit #(
|
|||
|
||||
VX_generic_register #(
|
||||
.N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1 + (`NUM_THREADS * `FFG_BITS))
|
||||
) fpu_reg (
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall_out),
|
||||
|
|
|
@ -77,7 +77,7 @@ module VX_gpu_unit #(
|
|||
|
||||
VX_generic_register #(
|
||||
.N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + `GPU_TMC_SIZE + `GPU_WSPAWN_SIZE + `GPU_SPLIT_SIZE + `GPU_BARRIER_SIZE)
|
||||
) csr_reg (
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall),
|
||||
|
|
|
@ -75,7 +75,7 @@ module VX_lsu_unit #(
|
|||
|
||||
VX_generic_register #(
|
||||
.N(1 + `NW_BITS + `NUM_THREADS + 32 + 1 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 2 + (`NUM_THREADS * (30 + 2 + 4 + 32)))
|
||||
) req_reg (
|
||||
) pipe_reg0 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall_in),
|
||||
|
@ -181,7 +181,7 @@ module VX_lsu_unit #(
|
|||
|
||||
VX_generic_register #(
|
||||
.N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32))
|
||||
) rsp_reg (
|
||||
) pipe_reg1 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall_out),
|
||||
|
|
|
@ -41,7 +41,9 @@ module VX_mem_unit # (
|
|||
) core_dcache_rsp_qual_if(), core_smem_rsp_if();
|
||||
|
||||
// select shared memory bus
|
||||
wire is_smem_addr = (({core_dcache_req_if.addr[0], 2'b0} - `SHARED_MEM_BASE_ADDR) <= `SCACHE_SIZE);
|
||||
wire is_smem_addr = ({core_dcache_req_if.addr[0], 2'b0} >= `SHARED_MEM_BASE_ADDR)
|
||||
&& ({core_dcache_req_if.addr[0], 2'b0} < (`SHARED_MEM_BASE_ADDR + `SCACHE_SIZE));
|
||||
|
||||
wire smem_req_select = (| core_dcache_req_if.valid) ? is_smem_addr : 0;
|
||||
wire smem_rsp_select = (| core_smem_rsp_if.valid);
|
||||
|
||||
|
|
|
@ -145,7 +145,7 @@ module VX_mul_unit #(
|
|||
|
||||
VX_generic_register #(
|
||||
.N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32))
|
||||
) mul_reg (
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall_out),
|
||||
|
|
|
@ -239,7 +239,7 @@ module VX_warp_sched #(
|
|||
|
||||
VX_generic_register #(
|
||||
.N(1 + `NUM_THREADS + 32 + `NW_BITS)
|
||||
) fetch_reg (
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall_out),
|
||||
|
|
|
@ -77,7 +77,7 @@ module VX_writeback #(
|
|||
|
||||
VX_generic_register #(
|
||||
.N(1 + `NW_BITS + 32 + `NUM_THREADS + `NR_BITS + (`NUM_THREADS * 32))
|
||||
) wb_reg (
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall),
|
||||
|
|
2
hw/rtl/cache/VX_bank.v
vendored
2
hw/rtl/cache/VX_bank.v
vendored
|
@ -869,7 +869,7 @@ module VX_bank #(
|
|||
$display("%t: cache%0d:%0d fill-rsp: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), dfpq_filldata_st0);
|
||||
end
|
||||
if (reqq_pop) begin
|
||||
$display("%t: cache%0d:%0d core-req: addr=%0h, tag=%0h, tid=%0d, rw=%b, byteen=%b, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), reqq_tag_st0, reqq_tid_st0, reqq_rw_st0, reqq_byteen_st0, debug_wid_st0, debug_pc_st0);
|
||||
$display("%t: cache%0d:%0d core-req: addr=%0h, tag=%0h, tid=%0d, rw=%b, byteen=%b, data=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), reqq_tag_st0, reqq_tid_st0, reqq_rw_st0, reqq_byteen_st0, reqq_writeword_st0, debug_wid_st0, debug_pc_st0);
|
||||
end
|
||||
if (snrq_pop) begin
|
||||
$display("%t: cache%0d:%0d snp-req: addr=%0h, tag=%0h, invalidate=%0d", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), snrq_tag_st0, snrq_invalidate_st0);
|
||||
|
|
2
hw/rtl/cache/VX_cache_core_rsp_merge.v
vendored
2
hw/rtl/cache/VX_cache_core_rsp_merge.v
vendored
|
@ -94,7 +94,7 @@ module VX_cache_core_rsp_merge #(
|
|||
VX_generic_register #(
|
||||
.N(NUM_REQUESTS + (NUM_REQUESTS *`WORD_WIDTH) + (`CORE_REQ_TAG_COUNT * CORE_TAG_WIDTH)),
|
||||
.PASSTHRU(NUM_BANKS <= 2)
|
||||
) core_wb_reg (
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall),
|
||||
|
|
2
hw/rtl/cache/VX_data_access.v
vendored
2
hw/rtl/cache/VX_data_access.v
vendored
|
@ -127,7 +127,7 @@ module VX_data_access #(
|
|||
if (is_fill_in) begin
|
||||
$display("%t: cache%0d:%0d data-fill: addr=%0h, dirty=%b, blk_addr=%0d, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_in, BANK_ID), dirtyb_out, addrline, use_write_data);
|
||||
end else begin
|
||||
$display("%t: cache%0d:%0d data-write: addr=%0h, wid=%0d, PC=%0h, dirty=%b, blk_addr=%0d, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_in, BANK_ID), debug_wid, debug_pc, dirtyb_out, addrline, wordsel_in, writeword_in);
|
||||
$display("%t: cache%0d:%0d data-write: addr=%0h, wid=%0d, PC=%0h, byteen=%b, dirty=%b, blk_addr=%0d, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_in, BANK_ID), debug_wid, debug_pc, byte_enable, dirtyb_out, addrline, wordsel_in, writeword_in);
|
||||
end
|
||||
end else begin
|
||||
$display("%t: cache%0d:%0d data-read: addr=%0h, wid=%0d, PC=%0h, dirty=%b, blk_addr=%0d, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_in, BANK_ID), debug_wid, debug_pc, dirtyb_out, addrline, wordsel_in, qual_read_data);
|
||||
|
|
|
@ -38,6 +38,8 @@ module VX_fp_noncomp #(
|
|||
SIG_NAN = 32'h00000100,
|
||||
QUT_NAN = 32'h00000200;
|
||||
|
||||
reg valid_in_r;
|
||||
reg [TAGW-1:0] tag_in_r;
|
||||
reg [`FPU_BITS-1:0] op_type_r;
|
||||
reg [`FRM_BITS-1:0] frm_r;
|
||||
|
||||
|
@ -87,7 +89,7 @@ module VX_fp_noncomp #(
|
|||
|
||||
VX_generic_register #(
|
||||
.N(1 + 1 + 8 + 23 + $bits(fp_type_t) + $bits(fp_type_t) + 1 + 1)
|
||||
) fnc1_reg (
|
||||
) pipe_reg0 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall),
|
||||
|
@ -98,14 +100,14 @@ module VX_fp_noncomp #(
|
|||
end
|
||||
|
||||
VX_generic_register #(
|
||||
.N(`FPU_BITS + `FRM_BITS + (2 * `NUM_THREADS * 32))
|
||||
) fnc2_reg (
|
||||
.N(1 + TAGW + `FPU_BITS + `FRM_BITS + (2 * `NUM_THREADS * 32))
|
||||
) pipe_reg1 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall),
|
||||
.flush (1'b0),
|
||||
.in ({op_type, frm, dataa, datab}),
|
||||
.out ({op_type_r, frm_r, dataa_r, datab_r})
|
||||
.in ({valid_in, tag_in, op_type, frm, dataa, datab}),
|
||||
.out ({valid_in_r, tag_in_r, op_type_r, frm_r, dataa_r, datab_r})
|
||||
);
|
||||
|
||||
// FCLASS
|
||||
|
@ -155,7 +157,7 @@ module VX_fp_noncomp #(
|
|||
for (genvar i = 0; i < LANES; i++) begin
|
||||
always @(*) begin
|
||||
case (frm_r)
|
||||
0: fsgnj_res[i] = { b_sign[i], a_exponent[i], a_mantissa[i]};
|
||||
0: fsgnj_res[i] = { b_sign[i], a_exponent[i], a_mantissa[i]};
|
||||
1: fsgnj_res[i] = {~b_sign[i], a_exponent[i], a_mantissa[i]};
|
||||
2: fsgnj_res[i] = { a_sign[i] ^ b_sign[i], a_exponent[i], a_mantissa[i]};
|
||||
default: fsgnj_res[i] = 32'hdeadbeaf; // don't care value
|
||||
|
@ -249,13 +251,13 @@ module VX_fp_noncomp #(
|
|||
|
||||
VX_generic_register #(
|
||||
.N(1 + TAGW + (LANES * 32) + 1 + (LANES * `FFG_BITS))
|
||||
) nc_reg (
|
||||
) pipe_reg2 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall),
|
||||
.flush (1'b0),
|
||||
.in ({valid_in, tag_in, tmp_result, tmp_has_fflags, tmp_fflags}),
|
||||
.out ({valid_out, tag_out, result, has_fflags, fflags})
|
||||
.in ({valid_in_r, tag_in_r, tmp_result, tmp_has_fflags, tmp_fflags}),
|
||||
.out ({valid_out, tag_out, result, has_fflags, fflags})
|
||||
);
|
||||
|
||||
assign ready_in = ~stall;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue