floating point support fixes

This commit is contained in:
Blaise Tine 2020-07-27 16:01:56 -04:00
parent ff12393998
commit e0a9089647
23 changed files with 408 additions and 330 deletions

View file

@ -7,18 +7,19 @@ module VX_commit #(
input wire reset,
// inputs
VX_commit_if alu_commit_if,
VX_commit_if lsu_commit_if,
VX_commit_if mul_commit_if,
VX_commit_if csr_commit_if,
VX_commit_if fpu_commit_if,
VX_commit_if gpu_commit_if,
VX_commit_if alu_commit_if,
VX_commit_if lsu_commit_if,
VX_commit_if mul_commit_if,
VX_commit_if csr_commit_if,
VX_fpu_to_cmt_if fpu_commit_if,
VX_commit_if gpu_commit_if,
// outputs
VX_commit_is_if commit_is_if,
VX_wb_if writeback_if,
VX_perf_cntrs_if perf_cntrs_if
VX_cmt_to_issue_if cmt_to_issue_if,
VX_wb_if writeback_if,
VX_cmt_to_csr_if cmt_to_csr_if
);
// update CRSs
wire [`NUM_EXS-1:0] commited_mask;
assign commited_mask = {(alu_commit_if.valid && alu_commit_if.ready),
@ -37,38 +38,32 @@ module VX_commit #(
.count (num_commits)
);
wire has_committed = (| commited_mask);
assign cmt_to_csr_if.valid = (| commited_mask);
assign cmt_to_csr_if.num_commits = num_commits;
reg [63:0] total_cycles, total_instrs;
always @(posedge clk) begin
if (reset) begin
total_cycles <= 0;
total_instrs <= 0;
end else begin
total_cycles <= total_cycles + 1;
if (has_committed) begin
total_instrs <= total_instrs + 64'(num_commits);
end
end
end
assign cmt_to_csr_if.upd_fflags = (fpu_commit_if.valid && fpu_commit_if.ready) && fpu_commit_if.upd_fflags;
assign cmt_to_csr_if.fpu_warp_num = cmt_to_issue_if.fpu_data.warp_num;
assign cmt_to_csr_if.fflags_NV = fpu_commit_if.fflags_NV;
assign cmt_to_csr_if.fflags_DZ = fpu_commit_if.fflags_DZ;
assign cmt_to_csr_if.fflags_OF = fpu_commit_if.fflags_OF;
assign cmt_to_csr_if.fflags_UF = fpu_commit_if.fflags_UF;
assign cmt_to_csr_if.fflags_NX = fpu_commit_if.fflags_NX;
assign perf_cntrs_if.total_cycles = total_cycles;
assign perf_cntrs_if.total_instrs = total_instrs;
// Notify issue stage
assign commit_is_if.alu_valid = alu_commit_if.valid && alu_commit_if.ready;
assign commit_is_if.lsu_valid = lsu_commit_if.valid && lsu_commit_if.ready;
assign commit_is_if.csr_valid = csr_commit_if.valid && csr_commit_if.ready;
assign commit_is_if.mul_valid = mul_commit_if.valid && mul_commit_if.ready;
assign commit_is_if.fpu_valid = fpu_commit_if.valid && fpu_commit_if.ready;
assign commit_is_if.gpu_valid = gpu_commit_if.valid && gpu_commit_if.ready;
assign cmt_to_issue_if.alu_valid = alu_commit_if.valid && alu_commit_if.ready;
assign cmt_to_issue_if.lsu_valid = lsu_commit_if.valid && lsu_commit_if.ready;
assign cmt_to_issue_if.csr_valid = csr_commit_if.valid && csr_commit_if.ready;
assign cmt_to_issue_if.mul_valid = mul_commit_if.valid && mul_commit_if.ready;
assign cmt_to_issue_if.fpu_valid = fpu_commit_if.valid && fpu_commit_if.ready;
assign cmt_to_issue_if.gpu_valid = gpu_commit_if.valid && gpu_commit_if.ready;
assign commit_is_if.alu_tag = alu_commit_if.issue_tag;
assign commit_is_if.lsu_tag = lsu_commit_if.issue_tag;
assign commit_is_if.csr_tag = csr_commit_if.issue_tag;
assign commit_is_if.mul_tag = mul_commit_if.issue_tag;
assign commit_is_if.fpu_tag = fpu_commit_if.issue_tag;
assign commit_is_if.gpu_tag = gpu_commit_if.issue_tag;
assign cmt_to_issue_if.alu_tag = alu_commit_if.issue_tag;
assign cmt_to_issue_if.lsu_tag = lsu_commit_if.issue_tag;
assign cmt_to_issue_if.csr_tag = csr_commit_if.issue_tag;
assign cmt_to_issue_if.mul_tag = mul_commit_if.issue_tag;
assign cmt_to_issue_if.fpu_tag = fpu_commit_if.issue_tag;
assign cmt_to_issue_if.gpu_tag = gpu_commit_if.issue_tag;
assign gpu_commit_if.ready = 1'b1; // doesn't writeback
@ -83,7 +78,7 @@ module VX_commit #(
.csr_commit_if (csr_commit_if),
.mul_commit_if (mul_commit_if),
.fpu_commit_if (fpu_commit_if),
.commit_is_if (commit_is_if),
.cmt_to_issue_if(cmt_to_issue_if),
.writeback_if (writeback_if)
);
@ -91,22 +86,22 @@ module VX_commit #(
`ifdef DBG_PRINT_PIPELINE
always @(posedge clk) begin
if (alu_commit_if.valid && alu_commit_if.ready) begin
$display("%t: Core%0d-commit: warp=%0d, PC=%0h, ex=ALU, istag=%0d, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, commit_is_if.alu_data.warp_num, commit_is_if.alu_data.curr_PC, alu_commit_if.issue_tag, commit_is_if.alu_data.thread_mask, commit_is_if.alu_data.wb, commit_is_if.alu_data.rd, alu_commit_if.data);
$display("%t: Core%0d-commit: warp=%0d, PC=%0h, ex=ALU, istag=%0d, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, cmt_to_issue_if.alu_data.warp_num, cmt_to_issue_if.alu_data.curr_PC, alu_commit_if.issue_tag, cmt_to_issue_if.alu_data.thread_mask, cmt_to_issue_if.alu_data.wb, cmt_to_issue_if.alu_data.rd, alu_commit_if.data);
end
if (lsu_commit_if.valid && lsu_commit_if.ready) begin
$display("%t: Core%0d-commit: warp=%0d, PC=%0h, ex=LSU, istag=%0d, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, commit_is_if.lsu_data.warp_num, commit_is_if.lsu_data.curr_PC, lsu_commit_if.issue_tag, commit_is_if.lsu_data.thread_mask, commit_is_if.lsu_data.wb, commit_is_if.lsu_data.rd, lsu_commit_if.data);
$display("%t: Core%0d-commit: warp=%0d, PC=%0h, ex=LSU, istag=%0d, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, cmt_to_issue_if.lsu_data.warp_num, cmt_to_issue_if.lsu_data.curr_PC, lsu_commit_if.issue_tag, cmt_to_issue_if.lsu_data.thread_mask, cmt_to_issue_if.lsu_data.wb, cmt_to_issue_if.lsu_data.rd, lsu_commit_if.data);
end
if (csr_commit_if.valid && csr_commit_if.ready) begin
$display("%t: Core%0d-commit: warp=%0d, PC=%0h, ex=CSR, istag=%0d, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, commit_is_if.csr_data.warp_num, commit_is_if.csr_data.curr_PC, csr_commit_if.issue_tag, commit_is_if.csr_data.thread_mask, commit_is_if.csr_data.wb, commit_is_if.csr_data.rd, csr_commit_if.data);
$display("%t: Core%0d-commit: warp=%0d, PC=%0h, ex=CSR, istag=%0d, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, cmt_to_issue_if.csr_data.warp_num, cmt_to_issue_if.csr_data.curr_PC, csr_commit_if.issue_tag, cmt_to_issue_if.csr_data.thread_mask, cmt_to_issue_if.csr_data.wb, cmt_to_issue_if.csr_data.rd, csr_commit_if.data);
end
if (mul_commit_if.valid && mul_commit_if.ready) begin
$display("%t: Core%0d-commit: warp=%0d, PC=%0h, ex=MUL, istag=%0d, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, commit_is_if.mul_data.warp_num, commit_is_if.mul_data.curr_PC, mul_commit_if.issue_tag, commit_is_if.mul_data.thread_mask, commit_is_if.mul_data.wb, commit_is_if.mul_data.rd, mul_commit_if.data);
$display("%t: Core%0d-commit: warp=%0d, PC=%0h, ex=MUL, istag=%0d, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, cmt_to_issue_if.mul_data.warp_num, cmt_to_issue_if.mul_data.curr_PC, mul_commit_if.issue_tag, cmt_to_issue_if.mul_data.thread_mask, cmt_to_issue_if.mul_data.wb, cmt_to_issue_if.mul_data.rd, mul_commit_if.data);
end
if (fpu_commit_if.valid && fpu_commit_if.ready) begin
$display("%t: Core%0d-commit: warp=%0d, PC=%0h, ex=FPU, istag=%0d, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, commit_is_if.fpu_data.warp_num, commit_is_if.fpu_data.curr_PC, fpu_commit_if.issue_tag, commit_is_if.fpu_data.thread_mask, commit_is_if.fpu_data.wb, commit_is_if.fpu_data.rd, fpu_commit_if.data);
$display("%t: Core%0d-commit: warp=%0d, PC=%0h, ex=FPU, istag=%0d, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, cmt_to_issue_if.fpu_data.warp_num, cmt_to_issue_if.fpu_data.curr_PC, fpu_commit_if.issue_tag, cmt_to_issue_if.fpu_data.thread_mask, cmt_to_issue_if.fpu_data.wb, cmt_to_issue_if.fpu_data.rd, fpu_commit_if.data);
end
if (gpu_commit_if.valid && gpu_commit_if.ready) begin
$display("%t: Core%0d-commit: warp=%0d, PC=%0h, ex=GPU, istag=%0d, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, commit_is_if.gpu_data.warp_num, commit_is_if.gpu_data.curr_PC, gpu_commit_if.issue_tag, commit_is_if.gpu_data.thread_mask, commit_is_if.gpu_data.wb, commit_is_if.gpu_data.rd, gpu_commit_if.data);
$display("%t: Core%0d-commit: warp=%0d, PC=%0h, ex=GPU, istag=%0d, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, cmt_to_issue_if.gpu_data.warp_num, cmt_to_issue_if.gpu_data.curr_PC, gpu_commit_if.issue_tag, cmt_to_issue_if.gpu_data.thread_mask, cmt_to_issue_if.gpu_data.wb, cmt_to_issue_if.gpu_data.rd, gpu_commit_if.data);
end
end
`endif

View file

@ -67,9 +67,9 @@
// CSR Addresses ==============================================================
`define CSR_FFLAGS 12'hF01
`define CSR_FRM 12'hF02
`define CSR_FCSR 12'hF03
`define CSR_FFLAGS 12'h001
`define CSR_FRM 12'h002
`define CSR_FCSR 12'h003
`define CSR_VEND_ID 12'hF11
`define CSR_ARCH_ID 12'hF12

View file

@ -6,9 +6,8 @@ module VX_csr_data #(
input wire clk,
input wire reset,
VX_perf_cntrs_if perf_cntrs_if,
VX_fpu_from_csr_if fpu_from_csr_if,
VX_fpu_to_csr_if fpu_to_csr_if,
VX_cmt_to_csr_if cmt_to_csr_if,
VX_csr_to_fpu_if csr_to_fpu_if,
input wire[`NW_BITS-1:0] warp_num,
@ -33,11 +32,11 @@ module VX_csr_data #(
assign wr_addr = $size(wr_addr)'(write_addr);
wire [`FFG_BITS-1:0] fflags_update;
assign fflags_update[4] = fpu_to_csr_if.fflags_NV;
assign fflags_update[3] = fpu_to_csr_if.fflags_DZ;
assign fflags_update[2] = fpu_to_csr_if.fflags_OF;
assign fflags_update[1] = fpu_to_csr_if.fflags_UF;
assign fflags_update[0] = fpu_to_csr_if.fflags_NX;
assign fflags_update[4] = cmt_to_csr_if.fflags_NV;
assign fflags_update[3] = cmt_to_csr_if.fflags_DZ;
assign fflags_update[2] = cmt_to_csr_if.fflags_OF;
assign fflags_update[1] = cmt_to_csr_if.fflags_UF;
assign fflags_update[0] = cmt_to_csr_if.fflags_NX;
integer i;
@ -68,9 +67,23 @@ module VX_csr_data #(
csr_table[wr_addr] <= write_data;
end
endcase
end else if (fpu_to_csr_if.valid) begin
fflags_table[fpu_to_csr_if.warp_num][`FFG_BITS-1:0] <= fflags_update;
fcsr_table[fpu_to_csr_if.warp_num][`FFG_BITS-1:0] <= fflags_update;
end else if (cmt_to_csr_if.upd_fflags) begin
fflags_table[cmt_to_csr_if.fpu_warp_num][`FFG_BITS-1:0] <= fflags_update;
fcsr_table[cmt_to_csr_if.fpu_warp_num][`FFG_BITS-1:0] <= fflags_update;
end
end
end
reg [63:0] total_cycles, total_instrs;
always @(posedge clk) begin
if (reset) begin
total_cycles <= 0;
total_instrs <= 0;
end else begin
total_cycles <= total_cycles + 1;
if (cmt_to_csr_if.valid) begin
total_instrs <= total_instrs + 64'(cmt_to_csr_if.num_commits);
end
end
end
@ -87,10 +100,10 @@ module VX_csr_data #(
`CSR_NT : read_data = `NUM_THREADS;
`CSR_NW : read_data = `NUM_WARPS;
`CSR_NC : read_data = `NUM_CORES * `NUM_CLUSTERS;
`CSR_CYCLE_L : read_data = perf_cntrs_if.total_cycles[31:0];
`CSR_CYCLE_H : read_data = perf_cntrs_if.total_cycles[63:32];
`CSR_INSTR_L : read_data = perf_cntrs_if.total_instrs[31:0];
`CSR_INSTR_H : read_data = perf_cntrs_if.total_instrs[63:32];
`CSR_CYCLE_L : read_data = total_cycles[31:0];
`CSR_CYCLE_H : read_data = total_cycles[63:32];
`CSR_INSTR_L : read_data = total_instrs[31:0];
`CSR_INSTR_H : read_data = total_instrs[63:32];
`CSR_VEND_ID : read_data = `VENDOR_ID;
`CSR_ARCH_ID : read_data = `ARCHITECTURE_ID;
`CSR_IMPL_ID : read_data = `IMPLEMENTATION_ID;
@ -99,6 +112,6 @@ module VX_csr_data #(
endcase
end
assign fpu_from_csr_if.frm = frm_table[fpu_from_csr_if.warp_num];
assign csr_to_fpu_if.frm = frm_table[csr_to_fpu_if.warp_num];
endmodule

View file

@ -6,10 +6,8 @@ module VX_csr_unit #(
input wire clk,
input wire reset,
VX_perf_cntrs_if perf_cntrs_if,
VX_fpu_from_csr_if fpu_from_csr_if,
VX_fpu_to_csr_if fpu_to_csr_if,
VX_cmt_to_csr_if cmt_to_csr_if,
VX_csr_to_fpu_if csr_to_fpu_if,
VX_csr_io_req_if csr_io_req_if,
VX_csr_io_rsp_if csr_io_rsp_if,
@ -51,9 +49,8 @@ module VX_csr_unit #(
) csr_data (
.clk (clk),
.reset (reset),
.perf_cntrs_if (perf_cntrs_if),
.fpu_to_csr_if (fpu_to_csr_if),
.fpu_from_csr_if(fpu_from_csr_if),
.cmt_to_csr_if (cmt_to_csr_if),
.csr_to_fpu_if (csr_to_fpu_if),
.read_addr (csr_pipe_req_if.csr_addr),
.read_data (csr_read_data_unqual),
.write_enable (is_csr_s2),

View file

@ -233,12 +233,12 @@
`define FPU_BITS 5
`define FPU_OP(x) x[`FPU_BITS-1:0]
`define FRM_RNE 3'b000
`define FRM_RTZ 3'b001
`define FRM_RDN 3'b010
`define FRM_RUP 3'b011
`define FRM_RMM 3'b100
`define FRM_DYN 3'b111
`define FRM_RNE 3'b000 // round to nearest even
`define FRM_RTZ 3'b001 // round to zero
`define FRM_RDN 3'b010 // round to -inf
`define FRM_RUP 3'b011 // round to +inf
`define FRM_RMM 3'b100 // round to nearest max magnitude
`define FRM_DYN 3'b111 // dynamic mode
`define FRM_BITS 3
`define FFG_BITS 5

View file

@ -18,7 +18,7 @@ module VX_execute #(
VX_cache_core_rsp_if dcache_rsp_if,
// perf
VX_perf_cntrs_if perf_cntrs_if,
VX_cmt_to_csr_if cmt_to_csr_if,
// inputs
VX_alu_req_if alu_req_if,
@ -35,13 +35,13 @@ module VX_execute #(
VX_commit_if lsu_commit_if,
VX_commit_if csr_commit_if,
VX_commit_if mul_commit_if,
VX_commit_if fpu_commit_if,
VX_fpu_to_cmt_if fpu_commit_if,
VX_commit_if gpu_commit_if,
output wire ebreak
);
VX_fpu_to_csr_if fpu_to_csr_if();
VX_fpu_from_csr_if fpu_from_csr_if();
VX_csr_to_fpu_if csr_to_fpu_if();
VX_alu_unit #(
.CORE_ID(CORE_ID)
@ -70,9 +70,8 @@ module VX_execute #(
) csr_unit (
.clk (clk),
.reset (reset),
.perf_cntrs_if (perf_cntrs_if),
.fpu_to_csr_if (fpu_to_csr_if),
.fpu_from_csr_if(fpu_from_csr_if),
.cmt_to_csr_if (cmt_to_csr_if),
.csr_to_fpu_if (csr_to_fpu_if),
.csr_io_req_if (csr_io_req_if),
.csr_io_rsp_if (csr_io_rsp_if),
.csr_req_if (csr_req_if),
@ -100,8 +99,7 @@ module VX_execute #(
.clk (clk),
.reset (reset),
.fpu_req_if (fpu_req_if),
.fpu_from_csr_if(fpu_from_csr_if),
.fpu_to_csr_if (fpu_to_csr_if),
.csr_to_fpu_if (csr_to_fpu_if),
.fpu_commit_if (fpu_commit_if)
);
`else

View file

@ -11,11 +11,10 @@ module VX_fpu_unit #(
// inputs
VX_fpu_req_if fpu_req_if,
VX_fpu_from_csr_if fpu_from_csr_if,
VX_csr_to_fpu_if csr_to_fpu_if,
// outputs
VX_commit_if fpu_commit_if,
VX_fpu_to_csr_if fpu_to_csr_if
VX_fpu_to_cmt_if fpu_commit_if
);
localparam FOP_BITS = fpnew_pkg::OP_BITS;
localparam FMTF_BITS = $clog2(fpnew_pkg::NUM_FP_FORMATS);
@ -46,11 +45,10 @@ module VX_fpu_unit #(
wire fpu_in_ready, fpu_in_valid;
wire fpu_out_ready, fpu_out_valid;
wire [`LOG2UP(`FPURQ_SIZE)-1:0] fpu_in_tag, fpu_out_tag;
reg [`LOG2UP(`FPURQ_SIZE)-1:0] fpu_in_tag, fpu_out_tag;
reg [2:0][`NUM_THREADS-1:0][31:0] fpu_operands;
wire [2:0][`NUM_THREADS-1:0][31:0] fpu_operands;
assign fpu_operands = {fpu_req_if.rs1_data, fpu_req_if.rs2_data, fpu_req_if.rs3_data};
wire [FMTF_BITS-1:0] fpu_src_fmt = fpnew_pkg::FP32;
wire [FMTF_BITS-1:0] fpu_dst_fmt = fpnew_pkg::FP32;
wire [FMTI_BITS-1:0] fpu_int_fmt = fpnew_pkg::INT32;
@ -58,40 +56,53 @@ module VX_fpu_unit #(
wire [`NUM_THREADS-1:0][31:0] fpu_result;
fpnew_pkg::status_t fpu_status;
assign fpu_from_csr_if.warp_num = fpu_req_if.warp_num;
wire is_dyn_rnd = &(fpu_req_if.frm);
wire [`FRM_BITS-1:0] real_frm = is_dyn_rnd ? fpu_from_csr_if.frm : fpu_req_if.frm;
assign csr_to_fpu_if.warp_num = fpu_req_if.warp_num;
wire [`FRM_BITS-1:0] real_frm = (fpu_req_if.frm == `FRM_DYN) ? csr_to_fpu_if.frm : fpu_req_if.frm;
reg [FOP_BITS-1:0] fpu_op;
reg [`FRM_BITS-1:0] fpu_rnd;
reg fpu_op_mod;
reg fflags_en, fflags_en_o;
always @(*) begin
fpu_op = fpnew_pkg::SGNJ;
fpu_op_mod = 0;
fpu_rnd = fpu_req_if.frm;
fpu_rnd = real_frm;
fpu_op_mod = 0;
fflags_en = 1;
fpu_operands[0] = fpu_req_if.rs1_data;
fpu_operands[1] = fpu_req_if.rs2_data;
fpu_operands[2] = fpu_req_if.rs3_data;
case (fpu_req_if.fpu_op)
`FPU_ADD: fpu_op = fpnew_pkg::ADD;
`FPU_SUB: begin fpu_op = fpnew_pkg::ADD; fpu_op_mod = 1; end
`FPU_MUL: fpu_op = fpnew_pkg::MUL;
`FPU_DIV: fpu_op = fpnew_pkg::DIV;
`FPU_ADD: begin
fpu_op = fpnew_pkg::ADD;
fpu_operands[1] = fpu_req_if.rs1_data;
fpu_operands[2] = fpu_req_if.rs2_data;
end
`FPU_SUB: begin
fpu_op = fpnew_pkg::ADD;
fpu_operands[1] = fpu_req_if.rs1_data;
fpu_operands[2] = fpu_req_if.rs2_data;
fpu_op_mod = 1;
end
`FPU_MUL: fpu_op = fpnew_pkg::MUL;
`FPU_DIV: fpu_op = fpnew_pkg::DIV;
`FPU_SQRT: fpu_op = fpnew_pkg::SQRT;
`FPU_MADD: fpu_op = fpnew_pkg::FMADD;
`FPU_MSUB: begin fpu_op = fpnew_pkg::FMADD; fpu_op_mod = 1; end
`FPU_NMSUB: fpu_op = fpnew_pkg::FNMSUB;
`FPU_NMADD: begin fpu_op = fpnew_pkg::FNMSUB; fpu_op_mod = 1; end
`FPU_SGNJ: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RNE; end
`FPU_SGNJN: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RTZ; end
`FPU_SGNJX: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RDN; end
`FPU_SGNJ: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RNE; fflags_en = 0; end
`FPU_SGNJN: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RTZ; fflags_en = 0; end
`FPU_SGNJX: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RDN; fflags_en = 0; end
`FPU_MIN: begin fpu_op = fpnew_pkg::MINMAX; fpu_rnd = `FRM_RNE; end
`FPU_MAX: begin fpu_op = fpnew_pkg::MINMAX; fpu_rnd = `FRM_RTZ; end
`FPU_CVTWS: fpu_op = fpnew_pkg::F2I;
`FPU_CVTWUS:begin fpu_op = fpnew_pkg::ADD; fpu_op_mod = 1; end
`FPU_CVTWUS:begin fpu_op = fpnew_pkg::F2I; fpu_op_mod = 1; end
`FPU_CVTSW: fpu_op = fpnew_pkg::I2F;
`FPU_CVTSWU:begin fpu_op = fpnew_pkg::I2F; fpu_op_mod = 1; end
`FPU_MVXW: begin fpu_op = fpnew_pkg::SGNJ; fpu_op_mod = 1; fpu_rnd = `FRM_RUP; end
`FPU_MVWX: begin fpu_op = fpnew_pkg::SGNJ; fpu_op_mod = 0; fpu_rnd = `FRM_RUP; end
`FPU_CLASS: fpu_op = fpnew_pkg::CLASSIFY;
`FPU_MVXW: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RUP; fflags_en = 0; end
`FPU_MVWX: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RUP; fflags_en = 0; end
`FPU_CLASS: begin fpu_op = fpnew_pkg::CLASSIFY; fflags_en = 0; end
`FPU_CMP: fpu_op = fpnew_pkg::CMP;
default:;
endcase
@ -102,7 +113,7 @@ module VX_fpu_unit #(
fpnew_top #(
.Features (FPU_FEATURES),
.Implementation (FPU_IMPLEMENTATION),
.TagType (logic [`LOG2UP(`FPURQ_SIZE)-1:0])
.TagType (logic[`LOG2UP(`FPURQ_SIZE)-1+1:0])
) fpnew_core (
.clk_i (clk),
.rst_ni (1'b1),
@ -114,13 +125,13 @@ module VX_fpu_unit #(
.dst_fmt_i (fpu_dst_fmt),
.int_fmt_i (fpu_int_fmt),
.vectorial_op_i (1'b1),
.tag_i (fpu_in_tag),
.tag_i ({fflags_en, fpu_in_tag}),
.in_valid_i (fpu_in_valid),
.in_ready_o (fpu_in_ready),
.flush_i (reset),
.result_o (fpu_result),
.status_o (fpu_status),
.tag_o (fpu_out_tag),
.tag_o ({fflags_en_o, fpu_out_tag}),
.out_valid_o (fpu_out_valid),
.out_ready_i (fpu_out_ready),
`UNUSED_PIN (busy_o)
@ -128,30 +139,22 @@ module VX_fpu_unit #(
`ENABLE_TRACING
reg [`NW_BITS-1:0] rsp_warp_num_buf [`ISSUEQ_SIZE];
assign fpu_in_valid = fpu_req_if.valid;
assign fpu_in_tag = fpu_req_if.issue_tag;
always @(posedge clk) begin
if (fpu_req_if.valid && fpu_req_if.ready) begin
rsp_warp_num_buf[fpu_in_tag] <= fpu_req_if.warp_num;
end
end
assign fpu_req_if.ready = fpu_in_ready;
assign fpu_commit_if.valid = fpu_out_valid;
assign fpu_commit_if.issue_tag = fpu_out_tag;
assign fpu_commit_if.data = fpu_result;
assign fpu_out_ready = fpu_commit_if.ready;
assign fpu_to_csr_if.valid = fpu_out_valid && fpu_req_if.ready;
assign fpu_to_csr_if.warp_num = rsp_warp_num_buf[fpu_out_tag];
assign fpu_to_csr_if.fflags_NV = fpu_status.NV;
assign fpu_to_csr_if.fflags_DZ = fpu_status.DZ;
assign fpu_to_csr_if.fflags_OF = fpu_status.OF;
assign fpu_to_csr_if.fflags_UF = fpu_status.UF;
assign fpu_to_csr_if.fflags_NX = fpu_status.NX;
assign fpu_commit_if.upd_fflags = fflags_en_o;
assign fpu_commit_if.fflags_NV = fpu_status.NV;
assign fpu_commit_if.fflags_DZ = fpu_status.DZ;
assign fpu_commit_if.fflags_OF = fpu_status.OF;
assign fpu_commit_if.fflags_UF = fpu_status.UF;
assign fpu_commit_if.fflags_NX = fpu_status.NX;
assign fpu_out_ready = fpu_commit_if.ready;
endmodule

View file

@ -5,8 +5,6 @@
module VX_gpr_fp_ctrl (
input wire clk,
input wire reset,
VX_decode_if decode_if,
input wire [`NUM_THREADS-1:0][31:0] rs1_int_data,
input wire [`NUM_THREADS-1:0][31:0] rs2_int_data,
@ -17,10 +15,7 @@ module VX_gpr_fp_ctrl (
output wire [`NR_BITS-1:0] raddr1,
output wire [`NR_BITS-1:0] raddr2,
VX_gpr_data_if gpr_data_if,
input wire schedule_delay,
output wire gpr_delay
VX_gpr_read_if gpr_read_if
);
// param
localparam GPR_DELAY_WID = 1;
@ -32,63 +27,74 @@ module VX_gpr_fp_ctrl (
reg [`NUM_THREADS-1:0][31:0] rs2_data;
reg [`NUM_THREADS-1:0][31:0] rs3_data;
wire gpr_delay;
wire gpr_fire = gpr_read_if.valid && gpr_read_if.ready;
always @(posedge clk) begin
if (reset) begin
multi_cyc_state <= 0;
end else if (!schedule_delay) begin
multi_cyc_state <= decode_if.use_rs3 && (0 == multi_cyc_state);
end else begin
end else if (gpr_delay) begin
multi_cyc_state <= 1;
end else if (gpr_fire) begin
multi_cyc_state <= 0;
end
end
// select rs1 data
always @(posedge clk) begin
if (reset) begin
tmp_rs1_data <= 0;
end else begin
if (decode_if.rs1_is_fp) begin
tmp_rs1_data <= rs1_fp_data;
end else begin
tmp_rs1_data <= rs1_int_data;
end
if (gpr_delay) begin
if (gpr_read_if.rs1_is_fp) begin
tmp_rs1_data <= rs1_fp_data;
end else begin
tmp_rs1_data <= rs1_int_data;
end
end
end
end
// select rs2 data
always @(posedge clk) begin
if(reset) begin
tmp_rs2_data <= 0;
end else begin
if (decode_if.rs2_is_fp) begin
tmp_rs2_data <= rs2_fp_data;
end else begin
tmp_rs2_data <= rs2_int_data;
end
if (gpr_delay) begin
if (gpr_read_if.rs2_is_fp) begin
tmp_rs2_data <= rs2_fp_data;
end else begin
tmp_rs2_data <= rs2_int_data;
end
end
end
end
// outputs
assign gpr_delay = decode_if.use_rs3 && (0 == multi_cyc_state);
assign gpr_delay = gpr_read_if.valid && gpr_read_if.use_rs3 && (0 == multi_cyc_state);
assign raddr1 = multi_cyc_state ? decode_if.rs3 : decode_if.rs1;
assign raddr2 = decode_if.rs2;
assign raddr1 = multi_cyc_state ? gpr_read_if.rs3 : gpr_read_if.rs1;
assign raddr2 = gpr_read_if.rs2;
always @(*) begin
if (decode_if.use_rs3) begin
if (gpr_read_if.use_rs3) begin
rs1_data = tmp_rs1_data;
rs2_data = tmp_rs2_data;
rs3_data = rs1_fp_data;
end else begin
rs1_data = decode_if.rs1_is_fp ? rs1_fp_data : rs1_int_data;
rs2_data = decode_if.rs2_is_fp ? rs2_fp_data : rs2_int_data;
rs1_data = gpr_read_if.rs1_is_fp ? rs1_fp_data : rs1_int_data;
rs2_data = gpr_read_if.rs2_is_fp ? rs2_fp_data : rs2_int_data;
rs3_data = {`NUM_THREADS{32'h8000_0000}}; // default value: -0 in single fp
end
end
assign gpr_data_if.rs1_data = rs1_data;
assign gpr_data_if.rs2_data = rs2_data;
assign gpr_data_if.rs3_data = rs3_data;
assign gpr_read_if.ready = ~gpr_delay;
assign gpr_read_if.rs1_data = rs1_data;
assign gpr_read_if.rs2_data = rs2_data;
assign gpr_read_if.rs3_data = rs3_data;
endmodule

View file

@ -7,14 +7,10 @@ module VX_gpr_stage #(
input wire reset,
// inputs
VX_wb_if writeback_if,
VX_decode_if decode_if,
VX_wb_if writeback_if,
// outputs
VX_gpr_data_if gpr_data_if,
input wire schedule_delay,
output wire gpr_delay
VX_gpr_read_if gpr_read_if
);
`UNUSED_VAR (reset)
@ -63,27 +59,24 @@ module VX_gpr_stage #(
.clk (clk),
.reset (reset),
//inputs
.decode_if (decode_if),
.rs1_int_data (rs1_int_data[decode_if.warp_num]),
.rs2_int_data (rs2_int_data[decode_if.warp_num]),
.rs1_fp_data (rs1_fp_data[decode_if.warp_num]),
.rs2_fp_data (rs2_fp_data[decode_if.warp_num]),
//inputs
.rs1_int_data (rs1_int_data[gpr_read_if.warp_num]),
.rs2_int_data (rs2_int_data[gpr_read_if.warp_num]),
.rs1_fp_data (rs1_fp_data[gpr_read_if.warp_num]),
.rs2_fp_data (rs2_fp_data[gpr_read_if.warp_num]),
// outputs
.raddr1 (raddr1),
.raddr2 (raddr2),
.gpr_data_if (gpr_data_if),
.schedule_delay (schedule_delay),
.gpr_delay (gpr_delay)
.gpr_read_if (gpr_read_if)
);
`else
assign raddr1 = decode_if.rs1;
assign raddr2 = decode_if.rs2;
assign gpr_data_if.rs1_data = rs1_int_data[decode_if.warp_num];
assign gpr_data_if.rs2_data = rs2_int_data[decode_if.warp_num];
assign gpr_data_if.rs3_data = 0;
assign raddr1 = gpr_read_if.rs1;
assign raddr2 = gpr_read_if.rs2;
assign gpr_read_if.rs1_data = rs1_int_data[gpr_read_if.warp_num];
assign gpr_read_if.rs2_data = rs2_int_data[gpr_read_if.warp_num];
assign gpr_read_if.rs3_data = 0;
assign gpr_delay = 0;
`UNUSED_VAR (schedule_delay)
`endif

View file

@ -8,7 +8,7 @@ module VX_issue #(
VX_decode_if decode_if,
VX_wb_if writeback_if,
VX_commit_is_if commit_is_if,
VX_cmt_to_issue_if cmt_to_issue_if,
VX_alu_req_if alu_req_if,
VX_lsu_req_if lsu_req_if,
@ -17,11 +17,19 @@ module VX_issue #(
VX_fpu_req_if fpu_req_if,
VX_gpu_req_if gpu_req_if
);
VX_gpr_data_if gpr_data_if();
wire schedule_delay;
wire gpr_delay;
VX_gpr_read_if gpr_read_if();
assign gpr_read_if.valid = decode_if.valid;
assign gpr_read_if.warp_num = decode_if.warp_num;
assign gpr_read_if.rs1 = decode_if.rs1;
assign gpr_read_if.rs2 = decode_if.rs2;
assign gpr_read_if.rs3 = decode_if.rs3;
assign gpr_read_if.rs1_is_fp = decode_if.rs1_is_fp;
assign gpr_read_if.rs2_is_fp = decode_if.rs2_is_fp;
assign gpr_read_if.use_rs3 = decode_if.use_rs3;
wire [`ISTAG_BITS-1:0] issue_tag, issue_tmp_tag;
wire gpr_busy = ~gpr_read_if.ready;
wire alu_busy = ~alu_req_if.ready;
wire lsu_busy = ~lsu_req_if.ready;
wire csr_busy = ~csr_req_if.ready;
@ -36,16 +44,15 @@ module VX_issue #(
.reset (reset),
.decode_if (decode_if),
.writeback_if (writeback_if),
.commit_is_if (commit_is_if),
.gpr_busy (gpr_delay),
.cmt_to_issue_if(cmt_to_issue_if),
.gpr_busy (gpr_busy),
.alu_busy (alu_busy),
.lsu_busy (lsu_busy),
.csr_busy (csr_busy),
.mul_busy (mul_busy),
.fpu_busy (fpu_busy),
.gpu_busy (gpu_busy),
.issue_tag (issue_tag),
.schedule_delay (schedule_delay),
.issue_tag (issue_tag),
`UNUSED_PIN (is_empty)
);
@ -53,19 +60,16 @@ module VX_issue #(
.CORE_ID(CORE_ID)
) gpr_stage (
.clk (clk),
.reset (reset),
.decode_if (decode_if),
.reset (reset),
.writeback_if (writeback_if),
.gpr_data_if (gpr_data_if),
.schedule_delay (schedule_delay),
.gpr_delay (gpr_delay)
.gpr_read_if (gpr_read_if)
);
VX_decode_if decode_tmp_if();
VX_gpr_data_if gpr_data_tmp_if();
VX_gpr_read_if gpr_data_tmp_if();
wire stall = ~alu_req_if.ready || schedule_delay;
wire flush = alu_req_if.ready && schedule_delay;
wire stall = ~alu_req_if.ready || ~decode_if.ready;
wire flush = alu_req_if.ready && ~decode_if.ready;
VX_generic_register #(
.N(1 + `ISTAG_BITS + `NW_BITS + `NUM_THREADS + 32 + 32 + `NR_BITS + `NR_BITS + `NR_BITS + 32 + 1 + 1 + 1 + 1 + `EX_BITS + `OP_BITS + 1 + `NR_BITS + 1 + 1 + 1 + `FRM_BITS + (`NUM_THREADS * 32) + (`NUM_THREADS * 32) + (`NUM_THREADS * 32))
@ -74,13 +78,13 @@ module VX_issue #(
.reset (reset),
.stall (stall),
.flush (flush),
.in ({decode_if.valid, issue_tag, decode_if.warp_num, decode_if.thread_mask, decode_if.curr_PC, decode_if.next_PC, decode_if.rd, decode_if.rs1, decode_if.rs2, decode_if.imm, decode_if.rs1_is_PC, decode_if.rs2_is_imm, decode_if.use_rs1, decode_if.use_rs2, decode_if.ex_type, decode_if.ex_op, decode_if.wb, decode_if.rs3, decode_if.use_rs3, decode_if.rs1_is_fp, decode_if.rs2_is_fp, decode_if.frm, gpr_data_if.rs1_data, gpr_data_if.rs2_data, gpr_data_if.rs3_data}),
.in ({decode_if.valid, issue_tag, decode_if.warp_num, decode_if.thread_mask, decode_if.curr_PC, decode_if.next_PC, decode_if.rd, decode_if.rs1, decode_if.rs2, decode_if.imm, decode_if.rs1_is_PC, decode_if.rs2_is_imm, decode_if.use_rs1, decode_if.use_rs2, decode_if.ex_type, decode_if.ex_op, decode_if.wb, decode_if.rs3, decode_if.use_rs3, decode_if.rs1_is_fp, decode_if.rs2_is_fp, decode_if.frm, gpr_read_if.rs1_data, gpr_read_if.rs2_data, gpr_read_if.rs3_data}),
.out ({decode_tmp_if.valid, issue_tmp_tag, decode_tmp_if.warp_num, decode_tmp_if.thread_mask, decode_tmp_if.curr_PC, decode_tmp_if.next_PC, decode_tmp_if.rd, decode_tmp_if.rs1, decode_tmp_if.rs2, decode_tmp_if.imm, decode_tmp_if.rs1_is_PC, decode_tmp_if.rs2_is_imm, decode_tmp_if.use_rs1, decode_tmp_if.use_rs2, decode_tmp_if.ex_type, decode_tmp_if.ex_op, decode_tmp_if.wb, decode_tmp_if.rs3, decode_tmp_if.use_rs3, decode_tmp_if.rs1_is_fp, decode_tmp_if.rs2_is_fp, decode_tmp_if.frm, gpr_data_tmp_if.rs1_data, gpr_data_tmp_if.rs2_data, gpr_data_tmp_if.rs3_data})
);
VX_issue_demux issue_demux (
.decode_if (decode_tmp_if),
.gpr_data_if (gpr_data_tmp_if),
.gpr_read_if (gpr_data_tmp_if),
.issue_tag (issue_tmp_tag),
.alu_req_if (alu_req_if),
.lsu_req_if (lsu_req_if),

View file

@ -101,7 +101,7 @@ module VX_pipeline #(
assign csr_io_rsp_data = csr_io_rsp_if.data;
assign csr_io_rsp_if.ready = csr_io_rsp_ready;
VX_perf_cntrs_if perf_cntrs_if();
VX_cmt_to_csr_if cmt_to_csr_if();
VX_decode_if decode_if();
VX_branch_ctl_if branch_ctl_if();
VX_warp_ctl_if warp_ctl_if();
@ -113,14 +113,14 @@ module VX_pipeline #(
VX_fpu_req_if fpu_req_if();
VX_gpu_req_if gpu_req_if();
VX_wb_if writeback_if();
VX_commit_is_if commit_is_if();
VX_cmt_to_issue_if cmt_to_issue_if();
VX_wstall_if wstall_if();
VX_join_if join_if();
VX_commit_if alu_commit_if();
VX_commit_if lsu_commit_if();
VX_commit_if csr_commit_if();
VX_commit_if mul_commit_if();
VX_commit_if fpu_commit_if();
VX_fpu_to_cmt_if fpu_commit_if();
VX_commit_if gpu_commit_if();
VX_fetch #(
@ -157,7 +157,7 @@ module VX_pipeline #(
.decode_if (decode_if),
.writeback_if (writeback_if),
.commit_is_if (commit_is_if),
.cmt_to_issue_if (cmt_to_issue_if),
.alu_req_if (alu_req_if),
.lsu_req_if (lsu_req_if),
@ -180,7 +180,7 @@ module VX_pipeline #(
.csr_io_req_if (csr_io_req_if),
.csr_io_rsp_if (csr_io_rsp_if),
.perf_cntrs_if (perf_cntrs_if),
.cmt_to_csr_if (cmt_to_csr_if),
.alu_req_if (alu_req_if),
.lsu_req_if (lsu_req_if),
@ -214,9 +214,9 @@ module VX_pipeline #(
.fpu_commit_if (fpu_commit_if),
.gpu_commit_if (gpu_commit_if),
.commit_is_if (commit_is_if),
.cmt_to_issue_if(cmt_to_issue_if),
.writeback_if (writeback_if),
.perf_cntrs_if (perf_cntrs_if)
.cmt_to_csr_if (cmt_to_csr_if)
);
assign dcache_req_valid = core_dcache_req_if.valid;

View file

@ -3,22 +3,21 @@
module VX_scheduler #(
parameter CORE_ID = 0
) (
input wire clk,
input wire reset,
input wire clk,
input wire reset,
VX_decode_if decode_if,
VX_wb_if writeback_if,
VX_commit_is_if commit_is_if,
input wire gpr_busy,
input wire alu_busy,
input wire lsu_busy,
input wire csr_busy,
input wire mul_busy,
input wire fpu_busy,
input wire gpu_busy,
VX_decode_if decode_if,
VX_wb_if writeback_if,
VX_cmt_to_issue_if cmt_to_issue_if,
input wire gpr_busy,
input wire alu_busy,
input wire lsu_busy,
input wire csr_busy,
input wire mul_busy,
input wire fpu_busy,
input wire gpu_busy,
output wire [`ISTAG_BITS-1:0] issue_tag,
output wire schedule_delay,
output wire is_empty
output wire is_empty
);
localparam CTVW = `CLOG2(`NUM_WARPS * `NUM_REGS + 1);
@ -53,7 +52,7 @@ module VX_scheduler #(
wire rs3_inuse_qual = rs3_inuse && decode_if.use_rs3;
wire rd_inuse_qual = rd_inuse && decode_if.wb;
wire rename_valid = (rs1_inuse_qual || rs2_inuse_qual || rs3_inuse_qual || rd_inuse_qual);
wire inuse_valid = (rd_inuse_qual || rs1_inuse_qual || rs2_inuse_qual || rs3_inuse_qual);
wire ex_stalled = ((gpr_busy)
|| (alu_busy && (decode_if.ex_type == `EX_ALU))
@ -63,9 +62,9 @@ module VX_scheduler #(
|| (fpu_busy && (decode_if.ex_type == `EX_FPU))
|| (gpu_busy && (decode_if.ex_type == `EX_GPU)));
wire iq_full;
wire issue_buf_full;
wire stall = (ex_stalled || rename_valid || iq_full) && decode_if.valid;
wire stall = (ex_stalled || inuse_valid || issue_buf_full) && decode_if.valid;
wire acquire_rd = decode_if.valid && (decode_if.wb != 0) && ~stall;
@ -85,7 +84,7 @@ module VX_scheduler #(
inuse_table[w][i] <= 0;
end
end
count_valid <= 0;
count_valid <= 0;
end else begin
if (acquire_rd) begin
inuse_registers[decode_if.warp_num][read_rd] <= decode_if.thread_mask;
@ -103,19 +102,19 @@ module VX_scheduler #(
wire ib_acquire = decode_if.valid && ~stall;
`DEBUG_BLOCK(
wire [`NW_BITS-1:0] cis_alu_warp_num = commit_is_if.alu_data.warp_num;
wire [`NUM_THREADS-1:0] cis_alu_thread_mask = commit_is_if.alu_data.thread_mask;
wire [31:0] cis_alu_curr_PC = commit_is_if.alu_data.curr_PC;
wire [`NR_BITS-1:0] cis_alu_rd = commit_is_if.alu_data.rd;
wire cis_alu_rd_is_fp = commit_is_if.alu_data.rd_is_fp;
wire cis_alu_wb = commit_is_if.alu_data.wb;
wire [`NW_BITS-1:0] cis_alu_warp_num = cmt_to_issue_if.alu_data.warp_num;
wire [`NUM_THREADS-1:0] cis_alu_thread_mask = cmt_to_issue_if.alu_data.thread_mask;
wire [31:0] cis_alu_curr_PC = cmt_to_issue_if.alu_data.curr_PC;
wire [`NR_BITS-1:0] cis_alu_rd = cmt_to_issue_if.alu_data.rd;
wire cis_alu_rd_is_fp = cmt_to_issue_if.alu_data.rd_is_fp;
wire cis_alu_wb = cmt_to_issue_if.alu_data.wb;
wire [`NW_BITS-1:0] cis_fpu_warp_num = commit_is_if.fpu_data.warp_num;
wire [`NUM_THREADS-1:0] cis_fpu_thread_mask = commit_is_if.fpu_data.thread_mask;
wire [31:0] cis_fpu_curr_PC = commit_is_if.fpu_data.curr_PC;
wire [`NR_BITS-1:0] cis_fpu_rd = commit_is_if.fpu_data.rd;
wire cis_fpu_rd_is_fp = commit_is_if.fpu_data.rd_is_fp;
wire cis_fpu_wb = commit_is_if.fpu_data.wb;
wire [`NW_BITS-1:0] cis_fpu_warp_num = cmt_to_issue_if.fpu_data.warp_num;
wire [`NUM_THREADS-1:0] cis_fpu_thread_mask = cmt_to_issue_if.fpu_data.thread_mask;
wire [31:0] cis_fpu_curr_PC = cmt_to_issue_if.fpu_data.curr_PC;
wire [`NR_BITS-1:0] cis_fpu_rd = cmt_to_issue_if.fpu_data.rd;
wire cis_fpu_rd_is_fp = cmt_to_issue_if.fpu_data.rd_is_fp;
wire cis_fpu_wb = cmt_to_issue_if.fpu_data.wb;
)
VX_cam_buffer #(
@ -128,22 +127,22 @@ module VX_scheduler #(
.write_data ({decode_if.warp_num, decode_if.thread_mask, decode_if.curr_PC, decode_if.rd, decode_if.rd_is_fp, decode_if.wb}),
.write_addr (issue_tag),
.acquire_slot (ib_acquire),
.release_slot ({commit_is_if.alu_valid, commit_is_if.lsu_valid, commit_is_if.csr_valid, commit_is_if.mul_valid, commit_is_if.fpu_valid, commit_is_if.gpu_valid}),
.read_addr ({commit_is_if.alu_tag, commit_is_if.lsu_tag, commit_is_if.csr_tag, commit_is_if.mul_tag, commit_is_if.fpu_tag, commit_is_if.gpu_tag}),
.read_data ({commit_is_if.alu_data, commit_is_if.lsu_data, commit_is_if.csr_data, commit_is_if.mul_data, commit_is_if.fpu_data, commit_is_if.gpu_data}),
.full (iq_full)
.release_slot ({cmt_to_issue_if.alu_valid, cmt_to_issue_if.lsu_valid, cmt_to_issue_if.csr_valid, cmt_to_issue_if.mul_valid, cmt_to_issue_if.fpu_valid, cmt_to_issue_if.gpu_valid}),
.read_addr ({cmt_to_issue_if.alu_tag, cmt_to_issue_if.lsu_tag, cmt_to_issue_if.csr_tag, cmt_to_issue_if.mul_tag, cmt_to_issue_if.fpu_tag, cmt_to_issue_if.gpu_tag}),
.read_data ({cmt_to_issue_if.alu_data, cmt_to_issue_if.lsu_data, cmt_to_issue_if.csr_data, cmt_to_issue_if.mul_data, cmt_to_issue_if.fpu_data, cmt_to_issue_if.gpu_data}),
.full (issue_buf_full)
);
assign decode_if.ready = ~stall;
assign schedule_delay = stall;
assign is_empty = (0 == count_valid);
`ifdef DBG_PRINT_PIPELINE
always @(posedge clk) begin
if (stall) begin
$display("%t: Core%0d-stall: warp=%0d, PC=%0h, rd=%0d, wb=%0d, iq_full=%b, inuse=%b%b%b%b, alu=%b, lsu=%b, csr=%b, mul=%b, fpu=%b, gpu=%b", $time, CORE_ID, decode_if.warp_num, decode_if.curr_PC, decode_if.rd, decode_if.wb, iq_full, rd_inuse_qual, rs1_inuse_qual, rs2_inuse_qual, rs3_inuse_qual, alu_busy, lsu_busy, csr_busy, mul_busy, fpu_busy, gpu_busy);
$display("%t: Core%0d-stall: warp=%0d, PC=%0h, rd=%0d, wb=%0d, ib_full=%b, inuse=%b%b%b%b, gpr=%b, alu=%b, lsu=%b, csr=%b, mul=%b, fpu=%b, gpu=%b",
$time, CORE_ID, decode_if.warp_num, decode_if.curr_PC, decode_if.rd, decode_if.wb, issue_buf_full, rd_inuse_qual, rs1_inuse_qual,
rs2_inuse_qual, rs3_inuse_qual, gpr_busy, alu_busy, lsu_busy, csr_busy, mul_busy, fpu_busy, gpu_busy);
end
end
`endif

View file

@ -10,19 +10,19 @@ module VX_writeback #(
VX_commit_if alu_commit_if,
VX_commit_if lsu_commit_if,
VX_commit_if mul_commit_if,
VX_commit_if fpu_commit_if,
VX_fpu_to_cmt_if fpu_commit_if,
VX_commit_if csr_commit_if,
VX_commit_is_if commit_is_if,
VX_cmt_to_issue_if cmt_to_issue_if,
// outputs
VX_wb_if writeback_if
);
wire alu_valid = alu_commit_if.valid && commit_is_if.alu_data.wb;
wire lsu_valid = lsu_commit_if.valid && commit_is_if.lsu_data.wb;
wire csr_valid = csr_commit_if.valid && commit_is_if.csr_data.wb;
wire mul_valid = mul_commit_if.valid && commit_is_if.mul_data.wb;
wire fpu_valid = fpu_commit_if.valid && commit_is_if.fpu_data.wb;
wire alu_valid = alu_commit_if.valid && cmt_to_issue_if.alu_data.wb;
wire lsu_valid = lsu_commit_if.valid && cmt_to_issue_if.lsu_data.wb;
wire csr_valid = csr_commit_if.valid && cmt_to_issue_if.csr_data.wb;
wire mul_valid = mul_commit_if.valid && cmt_to_issue_if.mul_data.wb;
wire fpu_valid = fpu_commit_if.valid && cmt_to_issue_if.fpu_data.wb;
VX_wb_if writeback_tmp_if();
@ -33,39 +33,39 @@ module VX_writeback #(
fpu_valid ? fpu_commit_if.valid :
0;
assign writeback_tmp_if.warp_num = alu_valid ? commit_is_if.alu_data.warp_num :
lsu_valid ? commit_is_if.lsu_data.warp_num :
csr_valid ? commit_is_if.csr_data.warp_num :
mul_valid ? commit_is_if.mul_data.warp_num :
fpu_valid ? commit_is_if.fpu_data.warp_num :
assign writeback_tmp_if.warp_num = alu_valid ? cmt_to_issue_if.alu_data.warp_num :
lsu_valid ? cmt_to_issue_if.lsu_data.warp_num :
csr_valid ? cmt_to_issue_if.csr_data.warp_num :
mul_valid ? cmt_to_issue_if.mul_data.warp_num :
fpu_valid ? cmt_to_issue_if.fpu_data.warp_num :
0;
assign writeback_tmp_if.curr_PC = alu_valid ? commit_is_if.alu_data.curr_PC :
lsu_valid ? commit_is_if.lsu_data.curr_PC :
csr_valid ? commit_is_if.csr_data.curr_PC :
mul_valid ? commit_is_if.mul_data.curr_PC :
fpu_valid ? commit_is_if.fpu_data.curr_PC :
assign writeback_tmp_if.curr_PC = alu_valid ? cmt_to_issue_if.alu_data.curr_PC :
lsu_valid ? cmt_to_issue_if.lsu_data.curr_PC :
csr_valid ? cmt_to_issue_if.csr_data.curr_PC :
mul_valid ? cmt_to_issue_if.mul_data.curr_PC :
fpu_valid ? cmt_to_issue_if.fpu_data.curr_PC :
0;
assign writeback_tmp_if.thread_mask = alu_valid ? commit_is_if.alu_data.thread_mask :
lsu_valid ? commit_is_if.lsu_data.thread_mask :
csr_valid ? commit_is_if.csr_data.thread_mask :
mul_valid ? commit_is_if.mul_data.thread_mask :
fpu_valid ? commit_is_if.fpu_data.thread_mask :
assign writeback_tmp_if.thread_mask = alu_valid ? cmt_to_issue_if.alu_data.thread_mask :
lsu_valid ? cmt_to_issue_if.lsu_data.thread_mask :
csr_valid ? cmt_to_issue_if.csr_data.thread_mask :
mul_valid ? cmt_to_issue_if.mul_data.thread_mask :
fpu_valid ? cmt_to_issue_if.fpu_data.thread_mask :
0;
assign writeback_tmp_if.rd = alu_valid ? commit_is_if.alu_data.rd :
lsu_valid ? commit_is_if.lsu_data.rd :
csr_valid ? commit_is_if.csr_data.rd :
mul_valid ? commit_is_if.mul_data.rd :
fpu_valid ? commit_is_if.fpu_data.rd :
assign writeback_tmp_if.rd = alu_valid ? cmt_to_issue_if.alu_data.rd :
lsu_valid ? cmt_to_issue_if.lsu_data.rd :
csr_valid ? cmt_to_issue_if.csr_data.rd :
mul_valid ? cmt_to_issue_if.mul_data.rd :
fpu_valid ? cmt_to_issue_if.fpu_data.rd :
0;
assign writeback_tmp_if.rd_is_fp = alu_valid ? 0 :
lsu_valid ? commit_is_if.lsu_data.rd_is_fp :
lsu_valid ? cmt_to_issue_if.lsu_data.rd_is_fp :
csr_valid ? 0 :
mul_valid ? 0 :
fpu_valid ? commit_is_if.fpu_data.rd_is_fp :
fpu_valid ? cmt_to_issue_if.fpu_data.rd_is_fp :
0;
assign writeback_tmp_if.data = alu_valid ? alu_commit_if.data :
@ -94,11 +94,11 @@ module VX_writeback #(
assign mul_commit_if.ready = !stall && !alu_valid && !lsu_valid && !csr_valid;
assign fpu_commit_if.ready = !stall && !alu_valid && !lsu_valid && !csr_valid && !mul_valid;
// special workaround to control RISC-V benchmarks termination on Verilator
reg [31:0] last_data_wb /* verilator public */;
// special workaround to get RISC-V tests Pass status on Verilator
reg [31:0] last_data_wb [`NUM_REGS-1:0] /* verilator public */;
always @(posedge clk) begin
if (writeback_tmp_if.valid && ~stall && (writeback_tmp_if.rd == 28)) begin
last_data_wb <= writeback_tmp_if.data[0];
if (writeback_tmp_if.valid && ~stall) begin
last_data_wb[writeback_tmp_if.rd] <= writeback_tmp_if.data[0];
end
end

View file

@ -0,0 +1,21 @@
`ifndef VX_CMT_TO_CSR_IF
`define VX_CMT_TO_CSR_IF
`include "VX_define.vh"
interface VX_cmt_to_csr_if ();
wire valid;
wire [`NE_BITS:0] num_commits;
wire upd_fflags;
wire [`NW_BITS-1:0] fpu_warp_num;
wire fflags_NV;
wire fflags_DZ;
wire fflags_OF;
wire fflags_UF;
wire fflags_NX;
endinterface
`endif

View file

@ -1,5 +1,5 @@
`ifndef VX_COMMIT_IS_IF
`define VX_COMMIT_IS_IF
`ifndef VX_CMT_TO_ISSUE_IF
`define VX_CMT_TO_ISSUE_IF
`include "VX_define.vh"
@ -13,7 +13,7 @@ typedef struct packed {
} is_data_t;
interface VX_commit_is_if ();
interface VX_cmt_to_issue_if ();
wire alu_valid;
wire lsu_valid;

View file

@ -1,5 +1,5 @@
`ifndef VX_FPU_FROM_CSR_IF
`define VX_FPU_FROM_CSR_IF
`ifndef VX_CSR_TO_FPU_IF
`define VX_CSR_TO_FPU_IF
`include "VX_define.vh"
@ -7,9 +7,9 @@
`IGNORE_WARNINGS_BEGIN
`endif
interface VX_fpu_from_csr_if ();
interface VX_csr_to_fpu_if ();
wire [`NW_BITS-1:0] warp_num;
wire [`NW_BITS-1:0] warp_num;
wire [`FRM_BITS-1:0] frm;
endinterface

View file

@ -0,0 +1,21 @@
`ifndef VX_FPU_TO_CMT_IF
`define VX_FPU_TO_CMT_IF
`include "VX_define.vh"
interface VX_fpu_to_cmt_if ();
wire valid;
wire [`ISTAG_BITS-1:0] issue_tag;
wire [`NUM_THREADS-1:0][31:0] data;
wire upd_fflags;
wire fflags_NV;
wire fflags_DZ;
wire fflags_OF;
wire fflags_UF;
wire fflags_NX;
wire ready;
endinterface
`endif

View file

@ -1,14 +0,0 @@
`ifndef VX_GPR_DATA_IF
`define VX_GPR_DATA_IF
`include "VX_define.vh"
interface VX_gpr_data_if ();
wire [`NUM_THREADS-1:0][31:0] rs1_data;
wire [`NUM_THREADS-1:0][31:0] rs2_data;
wire [`NUM_THREADS-1:0][31:0] rs3_data;
endinterface
`endif

View file

@ -0,0 +1,29 @@
`ifndef VX_GPR_READ_IF
`define VX_GPR_READ_IF
`include "VX_define.vh"
interface VX_gpr_read_if ();
wire valid;
wire [`NW_BITS-1:0] warp_num;
wire [`NR_BITS-1:0] rs1;
wire [`NR_BITS-1:0] rs2;
wire [`NR_BITS-1:0] rs3;
wire use_rs3;
wire rs1_is_fp;
wire rs2_is_fp;
wire [`NUM_THREADS-1:0][31:0] rs1_data;
wire [`NUM_THREADS-1:0][31:0] rs2_data;
wire [`NUM_THREADS-1:0][31:0] rs3_data;
wire ready;
endinterface
`endif

View file

@ -1,13 +0,0 @@
`ifndef VX_PERF_CNTRS_IF
`define VX_PERF_CNTRS_IF
`include "VX_define.vh"
interface VX_perf_cntrs_if ();
wire [63:0] total_cycles;
wire [63:0] total_instrs;
endinterface
`endif

View file

@ -237,7 +237,7 @@ void Simulator::flush_caches(uint32_t mem_addr, uint32_t size) {
#endif
}
bool Simulator::run() {
void Simulator::run() {
#ifndef NDEBUG
std::cout << timestamp << ": [sim] run()" << std::endl;
#endif
@ -252,20 +252,18 @@ bool Simulator::run() {
}
// wait 5 cycles to flush the pipeline
this->wait(5);
this->wait(5);
}
int Simulator::get_status(int reg) {
// check riscv-tests PASSED/FAILED status
#if (NUM_CLUSTERS == 1 && NUM_CORES == 1)
int status = (int)vortex_->Vortex->genblk1__DOT__cluster->genblk1__BRA__0__KET____DOT__core->pipeline->commit->writeback->last_data_wb & 0xf;
#else
#if (NUM_CLUSTERS == 1)
int status = (int)vortex_->Vortex->genblk1__DOT__cluster->genblk1__BRA__0__KET____DOT__core->pipeline->commit->writeback->last_data_wb & 0xf;
#else
int status = (int)vortex_->Vortex->genblk2__DOT__genblk1__BRA__0__KET____DOT__cluster->genblk1__BRA__0__KET____DOT__core->pipeline->commit->writeback->last_data_wb & 0xf;
#endif
#endif
return (status == 1);
#if (NUM_CLUSTERS == 1 && NUM_CORES == 1)
return (int)vortex_->Vortex->genblk1__DOT__cluster->genblk1__BRA__0__KET____DOT__core->pipeline->commit->writeback->last_data_wb[reg];
#elif (NUM_CLUSTERS == 1)
return (int)vortex_->Vortex->genblk1__DOT__cluster->genblk1__BRA__0__KET____DOT__core->pipeline->commit->writeback->last_data_wb[reg];
#else
return (int)vortex_->Vortex->genblk2__DOT__genblk1__BRA__0__KET____DOT__cluster->genblk1__BRA__0__KET____DOT__core->pipeline->commit->writeback->last_data_wb[reg];
#endif
}
void Simulator::load_bin(const char* program_file) {

View file

@ -43,7 +43,8 @@ public:
void attach_ram(RAM* ram);
bool run();
void run();
int get_status(int reg);
void print_stats(std::ostream& out);
private:

View file

@ -9,9 +9,8 @@ int main(int argc, char **argv)
#ifdef ALL_TESTS
bool passed = true;
std::string tests[] = {
"../../../benchmarks/riscv_tests/rv32uf-p-fadd.hex",
/*"../../../benchmarks/riscv_tests/rv32ui-p-add.hex",
std::string tests[] = {/*
"../../../benchmarks/riscv_tests/rv32ui-p-add.hex",
"../../../benchmarks/riscv_tests/rv32ui-p-addi.hex",
"../../../benchmarks/riscv_tests/rv32ui-p-and.hex",
"../../../benchmarks/riscv_tests/rv32ui-p-andi.hex",
@ -58,20 +57,23 @@ int main(int argc, char **argv)
"../../../benchmarks/riscv_tests/rv32um-p-mulhu.hex",
"../../../benchmarks/riscv_tests/rv32um-p-rem.hex",
"../../../benchmarks/riscv_tests/rv32um-p-remu.hex",
#endif
#endif*/
};
std::string tests_fp[] = {
#ifdef EXT_F_ENABLE
"../../../benchmarks/riscv_tests/rv32uf-p-fadd.hex",
"../../../benchmarks/riscv_tests/rv32uf-p-fdiv.hex",
"../../../benchmarks/riscv_tests/rv32uf-p-fmadd.hex"
"../../../benchmarks/riscv_tests/rv32uf-p-fmin.hex",
"../../../benchmarks/riscv_tests/rv32uf-p-fcmp.hex",
//"../../../benchmarks/riscv_tests/rv32uf-p-fadd.hex",
//"../../../benchmarks/riscv_tests/rv32uf-p-fdiv.hex",
//"../../../benchmarks/riscv_tests/rv32uf-p-fmadd.hex",
//"../../../benchmarks/riscv_tests/rv32uf-p-fmin.hex",
//"../../../benchmarks/riscv_tests/rv32uf-p-fcmp.hex",
"../../../benchmarks/riscv_tests/rv32uf-p-fclass.hex",
"../../../benchmarks/riscv_tests/rv32uf-p-ldst.hex",
"../../../benchmarks/riscv_tests/rv32uf-p-fcvt.hex",
"../../../benchmarks/riscv_tests/rv32uf-p-fcvt_w.hex",
"../../../benchmarks/riscv_tests/rv32uf-p-move.hex",
"../../../benchmarks/riscv_tests/rv32uf-p-recoding.hex",
#endif*/
#endif
};
for (std::string test : tests) {
@ -83,7 +85,30 @@ int main(int argc, char **argv)
Simulator simulator;
simulator.attach_ram(&ram);
simulator.load_ihex(test.c_str());
bool status = simulator.run();
simulator.run();
bool status = (1 == simulator.get_status(28));
if (status) std::cerr << GREEN << "Test Passed: " << test << std::endl;
if (!status) std::cerr << RED << "Test Failed: " << test << std::endl;
std::cerr << DEFAULT;
passed = passed && status;
if (!passed)
break;
}
for (std::string test : tests_fp) {
std::cerr << DEFAULT << "\n---------------------------------------\n";
std::cerr << test << std::endl;
RAM ram;
Simulator simulator;
simulator.attach_ram(&ram);
simulator.load_ihex(test.c_str());
simulator.run();
bool status = (1 == simulator.get_status(3));
if (status) std::cerr << GREEN << "Test Passed: " << test << std::endl;
if (!status) std::cerr << RED << "Test Failed: " << test << std::endl;
@ -113,7 +138,9 @@ int main(int argc, char **argv)
Simulator simulator;
simulator.attach_ram(&ram);
simulator.load_ihex(test);
bool status = simulator.run();
simulator.run();
bool status = (1 == simulator.get_status(28));
if (status) std::cerr << GREEN << "Test Passed: " << test << std::endl;
if (!status) std::cerr << RED << "Test Failed: " << test << std::endl;