fixed register file initialization to zero synthesis inference

This commit is contained in:
Blaise Tine 2020-12-10 00:27:56 -08:00
parent 3e9abb978b
commit 29cd2f5dff
10 changed files with 171 additions and 130 deletions

View file

@ -85,7 +85,7 @@ vlsim-hw: $(SCOPE_H)
fpga: $(SRCS) $(SCOPE_H)
$(CXX) $(CXXFLAGS) -DUSE_FPGA $^ $(LDFLAGS) $(FPGA_LIBS) -o $(PROJECT)
asesim: $(SRCS) $(ASE_DIR)
asesim: $(SRCS) $(ASE_DIR) $(SCOPE_H)
$(CXX) $(CXXFLAGS) -DUSE_ASE $(SRCS) $(LDFLAGS) $(ASE_LIBS) -o $(PROJECT_ASE)
vlsim: $(SRCS) vlsim-hw

View file

@ -1,6 +1,10 @@
#pragma once
#if defined(USE_FPGA)
#define HANG_TIMEOUT 60
#else
#define HANG_TIMEOUT (30*60)
#endif
int vx_scope_start(fpga_handle hfpga, uint64_t delay = -1);

View file

@ -61,7 +61,7 @@ make ase
# tests
./run_ase.sh build_ase_1c ../../driver/tests/basic/basic -n16
./run_ase.sh build_ase_1c ../../driver/tests/demo/demo -n 16
./run_ase.sh build_ase_1c ../../driver/tests/demo/demo -n16
./run_ase.sh build_ase_1c ../../driver/tests/dogfood/dogfood -n16
./run_ase.sh build_ase_1c ../../benchmarks/opencl/vecadd/vecadd

View file

@ -4,21 +4,21 @@
+define+QUARTUS
+define+FPU_FAST
#+define+SCOPE
#+define+PERF_ENABLE
+define+PERF_ENABLE
#+define+DBG_PRINT_CORE_ICACHE
#+define+DBG_PRINT_CORE_DCACHE
#+define+DBG_PRINT_CACHE_BANK
#+define+DBG_PRINT_CACHE_SNP
#+define+DBG_PRINT_CACHE_MSRQ
#+define+DBG_PRINT_CACHE_TAG
#+define+DBG_PRINT_CACHE_DATA
#+define+DBG_PRINT_DRAM
#+define+DBG_PRINT_PIPELINE
#+define+DBG_PRINT_OPAE
#+define+DBG_PRINT_AVS
#+define+DBG_PRINT_SCOPE
#+define+DBG_CACHE_REQ_INFO
+define+DBG_PRINT_CORE_ICACHE
+define+DBG_PRINT_CORE_DCACHE
+define+DBG_PRINT_CACHE_BANK
+define+DBG_PRINT_CACHE_SNP
+define+DBG_PRINT_CACHE_MSRQ
+define+DBG_PRINT_CACHE_TAG
+define+DBG_PRINT_CACHE_DATA
+define+DBG_PRINT_DRAM
+define+DBG_PRINT_PIPELINE
+define+DBG_PRINT_OPAE
+define+DBG_PRINT_AVS
+define+DBG_PRINT_SCOPE
+define+DBG_CACHE_REQ_INFO
vortex_afu.json
QI:vortex_afu.qsf

View file

@ -102,7 +102,7 @@ module VX_csr_unit #(
endcase
end
wire stall_in = fpu_pending[csr_pipe_req_if.wid];
wire stall_in = !csr_pipe_req_if.is_io && fpu_pending[csr_pipe_req_if.wid];
wire pipe_req_valid_qual = csr_pipe_req_if.valid && !stall_in;

View file

@ -1,83 +0,0 @@
`include "VX_define.vh"
`TRACING_OFF
module VX_gpr_ram (
input wire clk,
input wire wren,
input wire [`NUM_THREADS-1:0] tmask,
input wire [`NW_BITS+`NR_BITS-1:0] waddr,
input wire [`NUM_THREADS-1:0][31:0] wdata,
input wire [`NW_BITS+`NR_BITS-1:0] raddr1,
input wire [`NW_BITS+`NR_BITS-1:0] raddr2,
input wire [`NW_BITS+`NR_BITS-1:0] raddr3,
output wire [`NUM_THREADS-1:0][31:0] rdata1,
output wire [`NUM_THREADS-1:0][31:0] rdata2,
output wire [`NUM_THREADS-1:0][31:0] rdata3
);
localparam RAM_DATAW = `NUM_THREADS * 32;
localparam RAM_ADDRW = `NW_BITS + `NR_BITS;
localparam RAM_DEPTH = `NUM_WARPS * `NUM_REGS;
localparam RAM_BYTEEN = `NUM_THREADS * 4;
`UNUSED_VAR (raddr3)
`ifdef EXT_F_ENABLE
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
reg [31:0] mem_i [(RAM_DEPTH/2)-1:0];
reg [31:0] mem_f [(RAM_DEPTH/2)-1:0];
initial mem_i = '{default: 0};
wire waddr_is_fp = waddr[RAM_ADDRW-1];
wire raddr1_is_fp = raddr1[RAM_ADDRW-1];
wire raddr2_is_fp = raddr2[RAM_ADDRW-1];
wire [RAM_ADDRW-2:0] waddr_qual = waddr[RAM_ADDRW-2:0];
wire [RAM_ADDRW-2:0] raddr1_qual = raddr1[RAM_ADDRW-2:0];
wire [RAM_ADDRW-2:0] raddr2_qual = raddr2[RAM_ADDRW-2:0];
wire [RAM_ADDRW-2:0] raddr3_qual = raddr3[RAM_ADDRW-2:0];
always @(posedge clk) begin
if (wren && tmask[i] && !waddr_is_fp) begin
mem_i[waddr_qual] <= wdata[i];
end
end
always @(posedge clk) begin
if (wren && tmask[i] && waddr_is_fp) begin
mem_f[waddr_qual] <= wdata[i];
end
end
assign rdata1[i] = raddr1_is_fp ? mem_f[raddr1_qual] : mem_i[raddr1_qual];
assign rdata2[i] = raddr2_is_fp ? mem_f[raddr2_qual] : mem_i[raddr2_qual];
assign rdata3[i] = mem_f[raddr3_qual];
end
`else
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
reg [31:0] mem [RAM_DEPTH-1:0];
initial mem = '{default: 0};
always @(posedge clk) begin
if (wren && tmask[i]) begin
mem[waddr] <= wdata[i];
end
end
assign rdata1[i] = mem[raddr1];
assign rdata2[i] = mem[raddr2];
assign rdata3[i] = 0;
end
`endif
endmodule
`TRACING_ON

35
hw/rtl/VX_gpr_ram_f.v Normal file
View file

@ -0,0 +1,35 @@
`include "VX_define.vh"
`TRACING_OFF
module VX_gpr_ram_f #(
parameter DATAW = 1,
parameter DEPTH = 1,
parameter ADDRW = $clog2(DEPTH)
) (
input wire clk,
input wire wren,
input wire [ADDRW-1:0] waddr,
input wire [DATAW-1:0] wdata,
input wire [ADDRW-1:0] raddr1,
input wire [ADDRW-1:0] raddr2,
input wire [ADDRW-1:0] raddr3,
output wire [DATAW-1:0] rdata1,
output wire [DATAW-1:0] rdata2,
output wire [DATAW-1:0] rdata3
);
reg [DATAW-1:0] mem [DEPTH-1:0];
always @(posedge clk) begin
if (wren) begin
mem [waddr] <= wdata;
end
end
assign rdata1 = mem [raddr1];
assign rdata2 = mem [raddr2];
assign rdata3 = mem [raddr3];
endmodule
`TRACING_ON

34
hw/rtl/VX_gpr_ram_i.v Normal file
View file

@ -0,0 +1,34 @@
`include "VX_define.vh"
`TRACING_OFF
module VX_gpr_ram_i #(
parameter DATAW = 1,
parameter DEPTH = 1,
parameter ADDRW = $clog2(DEPTH)
) (
input wire clk,
input wire wren,
input wire [ADDRW-1:0] waddr,
input wire [DATAW-1:0] wdata,
input wire [ADDRW-1:0] raddr1,
input wire [ADDRW-1:0] raddr2,
output wire [DATAW-1:0] rdata1,
output wire [DATAW-1:0] rdata2
);
reg [DATAW-1:0] mem [DEPTH-1:0];
initial mem = '{default: 0};
always @(posedge clk) begin
if (wren) begin
mem [waddr] <= wdata;
end
end
assign rdata1 = mem [raddr1];
assign rdata2 = mem [raddr2];
endmodule
`TRACING_ON

View file

@ -13,41 +13,92 @@ module VX_gpr_stage #(
// outputs
VX_gpr_rsp_if gpr_rsp_if
);
`UNUSED_VAR (reset)
wire [`NUM_THREADS-1:0][31:0] rdata1, rdata2, rdata3;
wire [`NW_BITS+`NR_BITS-1:0] waddr, raddr1, raddr2, raddr3;
`UNUSED_VAR (reset)
`ifdef EXT_F_ENABLE
assign waddr = {writeback_if.rd[`NR_BITS-1], writeback_if.wid, writeback_if.rd[`NR_BITS-2:0]};
assign raddr1 = {gpr_req_if.rs1[`NR_BITS-1], gpr_req_if.wid, gpr_req_if.rs1[`NR_BITS-2:0]};
assign raddr2 = {gpr_req_if.rs2[`NR_BITS-1], gpr_req_if.wid, gpr_req_if.rs2[`NR_BITS-2:0]};
assign raddr3 = {gpr_req_if.rs3[`NR_BITS-1], gpr_req_if.wid, gpr_req_if.rs3[`NR_BITS-2:0]};
localparam RAM_DEPTH = `NUM_WARPS * (`NUM_REGS / 2);
wire [`NUM_THREADS-1:0][31:0] rdata1_i, rdata2_i, rdata1_f, rdata2_f, rdata3_f;
wire [$clog2(RAM_DEPTH)-1:0] waddr, raddr1, raddr2, raddr3;
wire waddr_is_fp = writeback_if.rd[`NR_BITS-1];
wire raddr1_is_fp = gpr_req_if.rs1[`NR_BITS-1];
wire raddr2_is_fp = gpr_req_if.rs2[`NR_BITS-1];
wire raddr3_is_fp = gpr_req_if.rs3[`NR_BITS-1];
`UNUSED_VAR (raddr3_is_fp)
assign waddr = {writeback_if.wid, writeback_if.rd[`NR_BITS-2:0]};
assign raddr1 = {gpr_req_if.wid, gpr_req_if.rs1[`NR_BITS-2:0]};
assign raddr2 = {gpr_req_if.wid, gpr_req_if.rs2[`NR_BITS-2:0]};
assign raddr3 = {gpr_req_if.wid, gpr_req_if.rs3[`NR_BITS-2:0]};
for (genvar i = 0; i < `NUM_THREADS; i++) begin
VX_gpr_ram_i #(
.DATAW (32),
.DEPTH (RAM_DEPTH)
) gpr_ram_i (
.clk (clk),
.wren (writeback_if.valid && writeback_if.tmask[i] && !waddr_is_fp),
.waddr (waddr),
.wdata (writeback_if.data[i]),
.raddr1 (raddr1),
.raddr2 (raddr2),
.rdata1 (rdata1_i[i]),
.rdata2 (rdata2_i[i])
);
end
for (genvar i = 0; i < `NUM_THREADS; i++) begin
VX_gpr_ram_f #(
.DATAW (32),
.DEPTH (RAM_DEPTH)
) gpr_ram_f (
.clk (clk),
.wren (writeback_if.valid && writeback_if.tmask[i] && waddr_is_fp),
.waddr (waddr),
.wdata (writeback_if.data[i]),
.raddr1 (raddr1),
.raddr2 (raddr2),
.raddr3 (raddr3),
.rdata1 (rdata1_f[i]),
.rdata2 (rdata2_f[i]),
.rdata3 (rdata3_f[i])
);
end
assign gpr_rsp_if.rs1_data = raddr1_is_fp ? rdata1_f : rdata1_i;
assign gpr_rsp_if.rs2_data = raddr2_is_fp ? rdata2_f : rdata2_i;
assign gpr_rsp_if.rs3_data = rdata3_f;
`else
localparam RAM_DEPTH = `NUM_WARPS * `NUM_REGS;
wire [`NUM_THREADS-1:0][31:0] rdata1_i, rdata2_i;
wire [$clog2(RAM_DEPTH)-1:0] waddr, raddr1, raddr2;
assign waddr = {writeback_if.wid, writeback_if.rd};
assign raddr1 = {gpr_req_if.wid, gpr_req_if.rs1};
assign raddr2 = {gpr_req_if.wid, gpr_req_if.rs2};
assign raddr3 = {gpr_req_if.wid, gpr_req_if.rs3};
assign raddr2 = {gpr_req_if.wid, gpr_req_if.rs2};
`UNUSED_VAR (gpr_req_if.rs3)
for (genvar i = 0; i < `NUM_THREADS; i++) begin
VX_gpr_ram_i #(
.DATAW (32),
.DEPTH (RAM_DEPTH)
) gpr_ram_i (
.clk (clk),
.wren (writeback_if.valid && writeback_if.tmask[i]),
.waddr (waddr),
.wdata (writeback_if.data[i]),
.raddr1 (raddr1),
.raddr2 (raddr2),
.rdata1 (rdata1_i[i]),
.rdata2 (rdata2_i[i])
);
end
assign gpr_rsp_if.rs1_data = rdata1_i;
assign gpr_rsp_if.rs2_data = rdata2_i;
assign gpr_rsp_if.rs3_data = 0;
`endif
VX_gpr_ram gpr_ram (
.clk (clk),
.wren (writeback_if.valid),
.tmask (writeback_if.tmask),
.waddr (waddr),
.wdata (writeback_if.data),
.raddr1 (raddr1),
.raddr2 (raddr2),
.raddr3 (raddr3),
.rdata1 (rdata1),
.rdata2 (rdata2),
.rdata3 (rdata3)
);
assign gpr_rsp_if.rs1_data = rdata1;
assign gpr_rsp_if.rs2_data = rdata2;
assign gpr_rsp_if.rs3_data = rdata3;
assign writeback_if.ready = 1'b1;
endmodule

View file

@ -54,6 +54,6 @@ typedef struct packed {
logic [`NW_BITS-1:0] size_m1;
} gpu_barrier_t;
`define GPU_BARRIER_SIZE (1+`NB_BITS+`NB_BITS)
`define GPU_BARRIER_SIZE (1+`NB_BITS+`NW_BITS)
`endif