mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 05:17:45 -04:00
fixed register file initialization to zero synthesis inference
This commit is contained in:
parent
3e9abb978b
commit
29cd2f5dff
10 changed files with 171 additions and 130 deletions
|
@ -85,7 +85,7 @@ vlsim-hw: $(SCOPE_H)
|
|||
fpga: $(SRCS) $(SCOPE_H)
|
||||
$(CXX) $(CXXFLAGS) -DUSE_FPGA $^ $(LDFLAGS) $(FPGA_LIBS) -o $(PROJECT)
|
||||
|
||||
asesim: $(SRCS) $(ASE_DIR)
|
||||
asesim: $(SRCS) $(ASE_DIR) $(SCOPE_H)
|
||||
$(CXX) $(CXXFLAGS) -DUSE_ASE $(SRCS) $(LDFLAGS) $(ASE_LIBS) -o $(PROJECT_ASE)
|
||||
|
||||
vlsim: $(SRCS) vlsim-hw
|
||||
|
|
|
@ -1,6 +1,10 @@
|
|||
#pragma once
|
||||
|
||||
#if defined(USE_FPGA)
|
||||
#define HANG_TIMEOUT 60
|
||||
#else
|
||||
#define HANG_TIMEOUT (30*60)
|
||||
#endif
|
||||
|
||||
int vx_scope_start(fpga_handle hfpga, uint64_t delay = -1);
|
||||
|
||||
|
|
|
@ -61,7 +61,7 @@ make ase
|
|||
|
||||
# tests
|
||||
./run_ase.sh build_ase_1c ../../driver/tests/basic/basic -n16
|
||||
./run_ase.sh build_ase_1c ../../driver/tests/demo/demo -n 16
|
||||
./run_ase.sh build_ase_1c ../../driver/tests/demo/demo -n16
|
||||
./run_ase.sh build_ase_1c ../../driver/tests/dogfood/dogfood -n16
|
||||
./run_ase.sh build_ase_1c ../../benchmarks/opencl/vecadd/vecadd
|
||||
|
||||
|
|
|
@ -4,21 +4,21 @@
|
|||
+define+QUARTUS
|
||||
+define+FPU_FAST
|
||||
#+define+SCOPE
|
||||
#+define+PERF_ENABLE
|
||||
+define+PERF_ENABLE
|
||||
|
||||
#+define+DBG_PRINT_CORE_ICACHE
|
||||
#+define+DBG_PRINT_CORE_DCACHE
|
||||
#+define+DBG_PRINT_CACHE_BANK
|
||||
#+define+DBG_PRINT_CACHE_SNP
|
||||
#+define+DBG_PRINT_CACHE_MSRQ
|
||||
#+define+DBG_PRINT_CACHE_TAG
|
||||
#+define+DBG_PRINT_CACHE_DATA
|
||||
#+define+DBG_PRINT_DRAM
|
||||
#+define+DBG_PRINT_PIPELINE
|
||||
#+define+DBG_PRINT_OPAE
|
||||
#+define+DBG_PRINT_AVS
|
||||
#+define+DBG_PRINT_SCOPE
|
||||
#+define+DBG_CACHE_REQ_INFO
|
||||
+define+DBG_PRINT_CORE_ICACHE
|
||||
+define+DBG_PRINT_CORE_DCACHE
|
||||
+define+DBG_PRINT_CACHE_BANK
|
||||
+define+DBG_PRINT_CACHE_SNP
|
||||
+define+DBG_PRINT_CACHE_MSRQ
|
||||
+define+DBG_PRINT_CACHE_TAG
|
||||
+define+DBG_PRINT_CACHE_DATA
|
||||
+define+DBG_PRINT_DRAM
|
||||
+define+DBG_PRINT_PIPELINE
|
||||
+define+DBG_PRINT_OPAE
|
||||
+define+DBG_PRINT_AVS
|
||||
+define+DBG_PRINT_SCOPE
|
||||
+define+DBG_CACHE_REQ_INFO
|
||||
|
||||
vortex_afu.json
|
||||
QI:vortex_afu.qsf
|
||||
|
|
|
@ -102,7 +102,7 @@ module VX_csr_unit #(
|
|||
endcase
|
||||
end
|
||||
|
||||
wire stall_in = fpu_pending[csr_pipe_req_if.wid];
|
||||
wire stall_in = !csr_pipe_req_if.is_io && fpu_pending[csr_pipe_req_if.wid];
|
||||
|
||||
wire pipe_req_valid_qual = csr_pipe_req_if.valid && !stall_in;
|
||||
|
||||
|
|
|
@ -1,83 +0,0 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
`TRACING_OFF
|
||||
|
||||
module VX_gpr_ram (
|
||||
input wire clk,
|
||||
input wire wren,
|
||||
input wire [`NUM_THREADS-1:0] tmask,
|
||||
input wire [`NW_BITS+`NR_BITS-1:0] waddr,
|
||||
input wire [`NUM_THREADS-1:0][31:0] wdata,
|
||||
input wire [`NW_BITS+`NR_BITS-1:0] raddr1,
|
||||
input wire [`NW_BITS+`NR_BITS-1:0] raddr2,
|
||||
input wire [`NW_BITS+`NR_BITS-1:0] raddr3,
|
||||
output wire [`NUM_THREADS-1:0][31:0] rdata1,
|
||||
output wire [`NUM_THREADS-1:0][31:0] rdata2,
|
||||
output wire [`NUM_THREADS-1:0][31:0] rdata3
|
||||
);
|
||||
localparam RAM_DATAW = `NUM_THREADS * 32;
|
||||
localparam RAM_ADDRW = `NW_BITS + `NR_BITS;
|
||||
localparam RAM_DEPTH = `NUM_WARPS * `NUM_REGS;
|
||||
localparam RAM_BYTEEN = `NUM_THREADS * 4;
|
||||
|
||||
`UNUSED_VAR (raddr3)
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
|
||||
reg [31:0] mem_i [(RAM_DEPTH/2)-1:0];
|
||||
reg [31:0] mem_f [(RAM_DEPTH/2)-1:0];
|
||||
|
||||
initial mem_i = '{default: 0};
|
||||
|
||||
wire waddr_is_fp = waddr[RAM_ADDRW-1];
|
||||
wire raddr1_is_fp = raddr1[RAM_ADDRW-1];
|
||||
wire raddr2_is_fp = raddr2[RAM_ADDRW-1];
|
||||
|
||||
wire [RAM_ADDRW-2:0] waddr_qual = waddr[RAM_ADDRW-2:0];
|
||||
wire [RAM_ADDRW-2:0] raddr1_qual = raddr1[RAM_ADDRW-2:0];
|
||||
wire [RAM_ADDRW-2:0] raddr2_qual = raddr2[RAM_ADDRW-2:0];
|
||||
wire [RAM_ADDRW-2:0] raddr3_qual = raddr3[RAM_ADDRW-2:0];
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (wren && tmask[i] && !waddr_is_fp) begin
|
||||
mem_i[waddr_qual] <= wdata[i];
|
||||
end
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (wren && tmask[i] && waddr_is_fp) begin
|
||||
mem_f[waddr_qual] <= wdata[i];
|
||||
end
|
||||
end
|
||||
|
||||
assign rdata1[i] = raddr1_is_fp ? mem_f[raddr1_qual] : mem_i[raddr1_qual];
|
||||
assign rdata2[i] = raddr2_is_fp ? mem_f[raddr2_qual] : mem_i[raddr2_qual];
|
||||
assign rdata3[i] = mem_f[raddr3_qual];
|
||||
end
|
||||
|
||||
`else
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
|
||||
reg [31:0] mem [RAM_DEPTH-1:0];
|
||||
|
||||
initial mem = '{default: 0};
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (wren && tmask[i]) begin
|
||||
mem[waddr] <= wdata[i];
|
||||
end
|
||||
end
|
||||
|
||||
assign rdata1[i] = mem[raddr1];
|
||||
assign rdata2[i] = mem[raddr2];
|
||||
assign rdata3[i] = 0;
|
||||
end
|
||||
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
|
||||
`TRACING_ON
|
35
hw/rtl/VX_gpr_ram_f.v
Normal file
35
hw/rtl/VX_gpr_ram_f.v
Normal file
|
@ -0,0 +1,35 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
`TRACING_OFF
|
||||
|
||||
module VX_gpr_ram_f #(
|
||||
parameter DATAW = 1,
|
||||
parameter DEPTH = 1,
|
||||
parameter ADDRW = $clog2(DEPTH)
|
||||
) (
|
||||
input wire clk,
|
||||
input wire wren,
|
||||
input wire [ADDRW-1:0] waddr,
|
||||
input wire [DATAW-1:0] wdata,
|
||||
input wire [ADDRW-1:0] raddr1,
|
||||
input wire [ADDRW-1:0] raddr2,
|
||||
input wire [ADDRW-1:0] raddr3,
|
||||
output wire [DATAW-1:0] rdata1,
|
||||
output wire [DATAW-1:0] rdata2,
|
||||
output wire [DATAW-1:0] rdata3
|
||||
);
|
||||
reg [DATAW-1:0] mem [DEPTH-1:0];
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (wren) begin
|
||||
mem [waddr] <= wdata;
|
||||
end
|
||||
end
|
||||
|
||||
assign rdata1 = mem [raddr1];
|
||||
assign rdata2 = mem [raddr2];
|
||||
assign rdata3 = mem [raddr3];
|
||||
|
||||
endmodule
|
||||
|
||||
`TRACING_ON
|
34
hw/rtl/VX_gpr_ram_i.v
Normal file
34
hw/rtl/VX_gpr_ram_i.v
Normal file
|
@ -0,0 +1,34 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
`TRACING_OFF
|
||||
|
||||
module VX_gpr_ram_i #(
|
||||
parameter DATAW = 1,
|
||||
parameter DEPTH = 1,
|
||||
parameter ADDRW = $clog2(DEPTH)
|
||||
) (
|
||||
input wire clk,
|
||||
input wire wren,
|
||||
input wire [ADDRW-1:0] waddr,
|
||||
input wire [DATAW-1:0] wdata,
|
||||
input wire [ADDRW-1:0] raddr1,
|
||||
input wire [ADDRW-1:0] raddr2,
|
||||
output wire [DATAW-1:0] rdata1,
|
||||
output wire [DATAW-1:0] rdata2
|
||||
);
|
||||
reg [DATAW-1:0] mem [DEPTH-1:0];
|
||||
|
||||
initial mem = '{default: 0};
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (wren) begin
|
||||
mem [waddr] <= wdata;
|
||||
end
|
||||
end
|
||||
|
||||
assign rdata1 = mem [raddr1];
|
||||
assign rdata2 = mem [raddr2];
|
||||
|
||||
endmodule
|
||||
|
||||
`TRACING_ON
|
|
@ -13,41 +13,92 @@ module VX_gpr_stage #(
|
|||
// outputs
|
||||
VX_gpr_rsp_if gpr_rsp_if
|
||||
);
|
||||
`UNUSED_VAR (reset)
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] rdata1, rdata2, rdata3;
|
||||
wire [`NW_BITS+`NR_BITS-1:0] waddr, raddr1, raddr2, raddr3;
|
||||
|
||||
`UNUSED_VAR (reset)
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
assign waddr = {writeback_if.rd[`NR_BITS-1], writeback_if.wid, writeback_if.rd[`NR_BITS-2:0]};
|
||||
assign raddr1 = {gpr_req_if.rs1[`NR_BITS-1], gpr_req_if.wid, gpr_req_if.rs1[`NR_BITS-2:0]};
|
||||
assign raddr2 = {gpr_req_if.rs2[`NR_BITS-1], gpr_req_if.wid, gpr_req_if.rs2[`NR_BITS-2:0]};
|
||||
assign raddr3 = {gpr_req_if.rs3[`NR_BITS-1], gpr_req_if.wid, gpr_req_if.rs3[`NR_BITS-2:0]};
|
||||
localparam RAM_DEPTH = `NUM_WARPS * (`NUM_REGS / 2);
|
||||
wire [`NUM_THREADS-1:0][31:0] rdata1_i, rdata2_i, rdata1_f, rdata2_f, rdata3_f;
|
||||
wire [$clog2(RAM_DEPTH)-1:0] waddr, raddr1, raddr2, raddr3;
|
||||
|
||||
wire waddr_is_fp = writeback_if.rd[`NR_BITS-1];
|
||||
wire raddr1_is_fp = gpr_req_if.rs1[`NR_BITS-1];
|
||||
wire raddr2_is_fp = gpr_req_if.rs2[`NR_BITS-1];
|
||||
wire raddr3_is_fp = gpr_req_if.rs3[`NR_BITS-1];
|
||||
`UNUSED_VAR (raddr3_is_fp)
|
||||
|
||||
assign waddr = {writeback_if.wid, writeback_if.rd[`NR_BITS-2:0]};
|
||||
assign raddr1 = {gpr_req_if.wid, gpr_req_if.rs1[`NR_BITS-2:0]};
|
||||
assign raddr2 = {gpr_req_if.wid, gpr_req_if.rs2[`NR_BITS-2:0]};
|
||||
assign raddr3 = {gpr_req_if.wid, gpr_req_if.rs3[`NR_BITS-2:0]};
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
VX_gpr_ram_i #(
|
||||
.DATAW (32),
|
||||
.DEPTH (RAM_DEPTH)
|
||||
) gpr_ram_i (
|
||||
.clk (clk),
|
||||
.wren (writeback_if.valid && writeback_if.tmask[i] && !waddr_is_fp),
|
||||
.waddr (waddr),
|
||||
.wdata (writeback_if.data[i]),
|
||||
.raddr1 (raddr1),
|
||||
.raddr2 (raddr2),
|
||||
.rdata1 (rdata1_i[i]),
|
||||
.rdata2 (rdata2_i[i])
|
||||
);
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
VX_gpr_ram_f #(
|
||||
.DATAW (32),
|
||||
.DEPTH (RAM_DEPTH)
|
||||
) gpr_ram_f (
|
||||
.clk (clk),
|
||||
.wren (writeback_if.valid && writeback_if.tmask[i] && waddr_is_fp),
|
||||
.waddr (waddr),
|
||||
.wdata (writeback_if.data[i]),
|
||||
.raddr1 (raddr1),
|
||||
.raddr2 (raddr2),
|
||||
.raddr3 (raddr3),
|
||||
.rdata1 (rdata1_f[i]),
|
||||
.rdata2 (rdata2_f[i]),
|
||||
.rdata3 (rdata3_f[i])
|
||||
);
|
||||
end
|
||||
|
||||
assign gpr_rsp_if.rs1_data = raddr1_is_fp ? rdata1_f : rdata1_i;
|
||||
assign gpr_rsp_if.rs2_data = raddr2_is_fp ? rdata2_f : rdata2_i;
|
||||
assign gpr_rsp_if.rs3_data = rdata3_f;
|
||||
`else
|
||||
localparam RAM_DEPTH = `NUM_WARPS * `NUM_REGS;
|
||||
wire [`NUM_THREADS-1:0][31:0] rdata1_i, rdata2_i;
|
||||
wire [$clog2(RAM_DEPTH)-1:0] waddr, raddr1, raddr2;
|
||||
|
||||
assign waddr = {writeback_if.wid, writeback_if.rd};
|
||||
assign raddr1 = {gpr_req_if.wid, gpr_req_if.rs1};
|
||||
assign raddr2 = {gpr_req_if.wid, gpr_req_if.rs2};
|
||||
assign raddr3 = {gpr_req_if.wid, gpr_req_if.rs3};
|
||||
assign raddr2 = {gpr_req_if.wid, gpr_req_if.rs2};
|
||||
`UNUSED_VAR (gpr_req_if.rs3)
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
VX_gpr_ram_i #(
|
||||
.DATAW (32),
|
||||
.DEPTH (RAM_DEPTH)
|
||||
) gpr_ram_i (
|
||||
.clk (clk),
|
||||
.wren (writeback_if.valid && writeback_if.tmask[i]),
|
||||
.waddr (waddr),
|
||||
.wdata (writeback_if.data[i]),
|
||||
.raddr1 (raddr1),
|
||||
.raddr2 (raddr2),
|
||||
.rdata1 (rdata1_i[i]),
|
||||
.rdata2 (rdata2_i[i])
|
||||
);
|
||||
end
|
||||
|
||||
assign gpr_rsp_if.rs1_data = rdata1_i;
|
||||
assign gpr_rsp_if.rs2_data = rdata2_i;
|
||||
assign gpr_rsp_if.rs3_data = 0;
|
||||
`endif
|
||||
|
||||
VX_gpr_ram gpr_ram (
|
||||
.clk (clk),
|
||||
.wren (writeback_if.valid),
|
||||
.tmask (writeback_if.tmask),
|
||||
.waddr (waddr),
|
||||
.wdata (writeback_if.data),
|
||||
.raddr1 (raddr1),
|
||||
.raddr2 (raddr2),
|
||||
.raddr3 (raddr3),
|
||||
.rdata1 (rdata1),
|
||||
.rdata2 (rdata2),
|
||||
.rdata3 (rdata3)
|
||||
);
|
||||
|
||||
assign gpr_rsp_if.rs1_data = rdata1;
|
||||
assign gpr_rsp_if.rs2_data = rdata2;
|
||||
assign gpr_rsp_if.rs3_data = rdata3;
|
||||
|
||||
assign writeback_if.ready = 1'b1;
|
||||
|
||||
endmodule
|
|
@ -54,6 +54,6 @@ typedef struct packed {
|
|||
logic [`NW_BITS-1:0] size_m1;
|
||||
} gpu_barrier_t;
|
||||
|
||||
`define GPU_BARRIER_SIZE (1+`NB_BITS+`NB_BITS)
|
||||
`define GPU_BARRIER_SIZE (1+`NB_BITS+`NW_BITS)
|
||||
|
||||
`endif
|
Loading…
Add table
Add a link
Reference in a new issue