diff --git a/.gitlab-ci/expected_synth.yml b/.gitlab-ci/expected_synth.yml index 7438fbc80..feb3a651d 100644 --- a/.gitlab-ci/expected_synth.yml +++ b/.gitlab-ci/expected_synth.yml @@ -1,2 +1,2 @@ cv32a65x: - gates: 172749 + gates: 171804 diff --git a/core/cva6.sv b/core/cva6.sv index 419f72d89..c2f6e62f4 100644 --- a/core/cva6.sv +++ b/core/cva6.sv @@ -114,6 +114,12 @@ module cva6 logic is_double_rd_macro_instr; // is double move decoded 32bit instruction of macro definition logic vfp; // is this a vector floating-point instruction? }, + localparam type writeback_t = struct packed { + logic valid; // wb data is valid + logic [CVA6Cfg.XLEN-1:0] data; //wb data + logic ex_valid; // exception from WB + logic [CVA6Cfg.TRANS_ID_BITS-1:0] trans_id; //transaction ID + }, // branch-predict // this is the struct we get back from ex stage and we will use it to update @@ -785,6 +791,7 @@ module cva6 .exception_t(exception_t), .fu_data_t(fu_data_t), .scoreboard_entry_t(scoreboard_entry_t), + .writeback_t(writeback_t), .x_issue_req_t(x_issue_req_t), .x_issue_resp_t(x_issue_resp_t), .x_register_t(x_register_t), diff --git a/core/issue_read_operands.sv b/core/issue_read_operands.sv index c54fec41e..75d487ee2 100644 --- a/core/issue_read_operands.sv +++ b/core/issue_read_operands.sv @@ -21,6 +21,8 @@ module issue_read_operands parameter type branchpredict_sbe_t = logic, parameter type fu_data_t = logic, parameter type scoreboard_entry_t = logic, + parameter type forwarding_t = logic, + parameter type writeback_t = logic, parameter type rs3_len_t = logic, parameter type x_issue_req_t = logic, parameter type x_issue_resp_t = logic, @@ -44,29 +46,8 @@ module issue_read_operands input logic [CVA6Cfg.NrIssuePorts-1:0] issue_instr_valid_i, // Issue stage acknowledge - TO_BE_COMPLETED output logic [CVA6Cfg.NrIssuePorts-1:0] issue_ack_o, - // rs1 operand address - scoreboard - output logic [CVA6Cfg.NrIssuePorts-1:0][REG_ADDR_SIZE-1:0] rs1_o, - // rs1 operand - scoreboard - input logic [CVA6Cfg.NrIssuePorts-1:0][CVA6Cfg.XLEN-1:0] rs1_i, - // rs1 operand is valid - scoreboard - input logic [CVA6Cfg.NrIssuePorts-1:0] rs1_valid_i, - // rs2 operand address - scoreboard - output logic [CVA6Cfg.NrIssuePorts-1:0][REG_ADDR_SIZE-1:0] rs2_o, - // rs2 operand - scoreboard - input logic [CVA6Cfg.NrIssuePorts-1:0][CVA6Cfg.XLEN-1:0] rs2_i, - // rs2 operand is valid - scoreboard - input logic [CVA6Cfg.NrIssuePorts-1:0] rs2_valid_i, - // rs3 operand address - scoreboard - output logic [CVA6Cfg.NrIssuePorts-1:0][REG_ADDR_SIZE-1:0] rs3_o, - // rs3 operand - scoreboard - input rs3_len_t [CVA6Cfg.NrIssuePorts-1:0] rs3_i, - // rs3 operand is valid - scoreboard - input logic [CVA6Cfg.NrIssuePorts-1:0] rs3_valid_i, - // get clobber input - // TO_BE_COMPLETED - TO_BE_COMPLETED - input fu_t [2**REG_ADDR_SIZE-1:0] rd_clobber_gpr_i, - // TO_BE_COMPLETED - TO_BE_COMPLETED - input fu_t [2**REG_ADDR_SIZE-1:0] rd_clobber_fpr_i, + // Forwarding - SCOREBOARD + input forwarding_t fwd_i, // TO_BE_COMPLETED - TO_BE_COMPLETED output fu_data_t [CVA6Cfg.NrIssuePorts-1:0] fu_data_o, // Unregistered version of fu_data_o.operanda - TO_BE_COMPLETED @@ -160,20 +141,49 @@ module issue_read_operands rs3_len_t operand_c_fpr; // output flipflop (ID <-> EX) fu_data_t [CVA6Cfg.NrIssuePorts-1:0] fu_data_n, fu_data_q; - logic [ CVA6Cfg.XLEN-1:0] imm_forward_rs3; + logic [ CVA6Cfg.XLEN-1:0] imm_forward_rs3; - logic [CVA6Cfg.NrIssuePorts-1:0] alu_valid_q; - logic [CVA6Cfg.NrIssuePorts-1:0] mult_valid_q; - logic [CVA6Cfg.NrIssuePorts-1:0] fpu_valid_q; - logic [ 1:0] fpu_fmt_q; - logic [ 2:0] fpu_rm_q; - logic [CVA6Cfg.NrIssuePorts-1:0] alu2_valid_q; - logic [CVA6Cfg.NrIssuePorts-1:0] lsu_valid_q; - logic [CVA6Cfg.NrIssuePorts-1:0] csr_valid_q; - logic [CVA6Cfg.NrIssuePorts-1:0] branch_valid_q; - logic [CVA6Cfg.NrIssuePorts-1:0] cvxif_valid_q; - logic [ 31:0] cvxif_off_instr_q; - logic cvxif_instruction_valid; + logic [CVA6Cfg.NrIssuePorts-1:0] alu_valid_q; + logic [CVA6Cfg.NrIssuePorts-1:0] mult_valid_q; + logic [CVA6Cfg.NrIssuePorts-1:0] fpu_valid_q; + logic [ 1:0] fpu_fmt_q; + logic [ 2:0] fpu_rm_q; + logic [CVA6Cfg.NrIssuePorts-1:0] alu2_valid_q; + logic [CVA6Cfg.NrIssuePorts-1:0] lsu_valid_q; + logic [CVA6Cfg.NrIssuePorts-1:0] csr_valid_q; + logic [CVA6Cfg.NrIssuePorts-1:0] branch_valid_q; + logic [CVA6Cfg.NrIssuePorts-1:0] cvxif_valid_q; + logic [ 31:0] cvxif_off_instr_q; + logic cvxif_instruction_valid; + + //fwd logic + logic [CVA6Cfg.NrIssuePorts-1:0] rs1_has_raw; + logic [CVA6Cfg.NrIssuePorts-1:0] rs2_has_raw; + logic [CVA6Cfg.NrIssuePorts-1:0] rs3_has_raw; + + logic [CVA6Cfg.NrIssuePorts-1:0][CVA6Cfg.XLEN-1:0] rs3; + logic [CVA6Cfg.NrIssuePorts-1:0] rs3_fpr; + + logic [CVA6Cfg.NrIssuePorts-1:0] rs1_valid; + logic [CVA6Cfg.NrIssuePorts-1:0] rs2_valid; + logic [CVA6Cfg.NrIssuePorts-1:0] rs3_valid; + + logic [CVA6Cfg.NrIssuePorts-1:0][CVA6Cfg.XLEN-1:0] rs1_res; + logic [CVA6Cfg.NrIssuePorts-1:0][CVA6Cfg.XLEN-1:0] rs2_res; + logic [CVA6Cfg.NrIssuePorts-1:0][CVA6Cfg.XLEN-1:0] rs3_res; + + // clobber + fu_t [2**ariane_pkg::REG_ADDR_SIZE-1:0] rd_clobber_gpr, rd_clobber_fpr; + logic [2**ariane_pkg::REG_ADDR_SIZE-1:0][CVA6Cfg.NR_SB_ENTRIES:0] gpr_clobber_vld; + logic [2**ariane_pkg::REG_ADDR_SIZE-1:0][CVA6Cfg.NR_SB_ENTRIES:0] fpr_clobber_vld; + ariane_pkg::fu_t [ CVA6Cfg.NR_SB_ENTRIES:0] clobber_fu; + + //forward logic + logic [CVA6Cfg.NrIssuePorts-1:0][CVA6Cfg.NR_SB_ENTRIES+CVA6Cfg.NrWbPorts-1:0] + rs1_fwd_req, rs2_fwd_req, rs3_fwd_req; + logic [CVA6Cfg.NrIssuePorts-1:0] rs1_is_not_gpr0, rs2_is_not_gpr0, rs3_is_not_gpr0; + logic [CVA6Cfg.NrIssuePorts-1:0][CVA6Cfg.NR_SB_ENTRIES+CVA6Cfg.NrWbPorts-1:0][CVA6Cfg.XLEN-1:0] rs_data; + logic [CVA6Cfg.NrIssuePorts-1:0] rs1_available, rs2_available, rs3_available; logic [CVA6Cfg.NrIssuePorts-1:0][31:0] tinst_n, tinst_q; // transformed instruction @@ -387,6 +397,228 @@ module issue_read_operands end end + // ------------------- + // RD clobber process + // ------------------- + // rd_clobber output: output currently clobbered destination registers + + always_comb begin : clobber_assign + gpr_clobber_vld = '0; + fpr_clobber_vld = '0; + + // default (highest entry hast lowest prio in arbiter tree below) + clobber_fu[CVA6Cfg.NR_SB_ENTRIES] = ariane_pkg::NONE; + for (int unsigned i = 0; i < 2 ** ariane_pkg::REG_ADDR_SIZE; i++) begin + gpr_clobber_vld[i][CVA6Cfg.NR_SB_ENTRIES] = 1'b1; + fpr_clobber_vld[i][CVA6Cfg.NR_SB_ENTRIES] = 1'b1; + end + + // check for all valid entries and set the clobber accordingly + + for (int unsigned i = 0; i < CVA6Cfg.NR_SB_ENTRIES; i++) begin + gpr_clobber_vld[fwd_i.sbe[i].rd][i] = fwd_i.still_issued[i] & ~(CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr( + fwd_i.sbe[i].op)); + fpr_clobber_vld[fwd_i.sbe[i].rd][i] = fwd_i.still_issued[i] & (CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr( + fwd_i.sbe[i].op)); + clobber_fu[i] = fwd_i.sbe[i].fu; + end + + // GPR[0] is always free + gpr_clobber_vld[0] = '0; + end + + for (genvar k = 0; k < 2 ** ariane_pkg::REG_ADDR_SIZE; k++) begin : gen_sel_clobbers + // get fu that is going to clobber this register (there should be only one) + rr_arb_tree #( + .NumIn(CVA6Cfg.NR_SB_ENTRIES + 1), + .DataType(ariane_pkg::fu_t), + .ExtPrio(1'b1), + .AxiVldRdy(1'b1) + ) i_sel_gpr_clobbers ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i(1'b0), + .rr_i ('0), + .req_i (gpr_clobber_vld[k]), + .gnt_o (), + .data_i (clobber_fu), + .gnt_i (1'b1), + .req_o (), + .data_o (rd_clobber_gpr[k]), + .idx_o () + ); + if (CVA6Cfg.FpPresent) begin + rr_arb_tree #( + .NumIn(CVA6Cfg.NR_SB_ENTRIES + 1), + .DataType(ariane_pkg::fu_t), + .ExtPrio(1'b1), + .AxiVldRdy(1'b1) + ) i_sel_fpr_clobbers ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i(1'b0), + .rr_i ('0), + .req_i (fpr_clobber_vld[k]), + .gnt_o (), + .data_i (clobber_fu), + .gnt_i (1'b1), + .req_o (), + .data_o (rd_clobber_fpr[k]), + .idx_o () + ); + end else begin + assign rd_clobber_fpr[k] = NONE; + end + end + + // ---------------------------------- + // Read Operands (a.k.a forwarding) + // ---------------------------------- + // read operand interface: same logic as register file + + // WB ports have higher prio than entries + for (genvar i = 0; i < CVA6Cfg.NrIssuePorts; i++) begin + for (genvar k = 0; unsigned'(k) < CVA6Cfg.NrWbPorts; k++) begin : gen_rs_wb + + assign rs1_fwd_req[i][k] = (fwd_i.sbe[fwd_i.wb[k].trans_id].rd == issue_instr_i[i].rs1) & (fwd_i.still_issued[fwd_i.wb[k].trans_id]) & fwd_i.wb[k].valid & (~fwd_i.wb[k].ex_valid) & ((CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr( + fwd_i.sbe[fwd_i.wb[k].trans_id].op + )) == (CVA6Cfg.FpPresent && ariane_pkg::is_rs1_fpr( + issue_instr_i[i].op + ))); + + assign rs2_fwd_req[i][k] = (fwd_i.sbe[fwd_i.wb[k].trans_id].rd == issue_instr_i[i].rs2) & (fwd_i.still_issued[fwd_i.wb[k].trans_id]) & fwd_i.wb[k].valid & (~fwd_i.wb[k].ex_valid) & ((CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr( + fwd_i.sbe[fwd_i.wb[k].trans_id].op + )) == (CVA6Cfg.FpPresent && ariane_pkg::is_rs2_fpr( + issue_instr_i[i].op + ))); + + assign rs3_fwd_req[i][k] = (fwd_i.sbe[fwd_i.wb[k].trans_id].rd == issue_instr_i[i].result[ariane_pkg::REG_ADDR_SIZE-1:0]) & (fwd_i.still_issued[fwd_i.wb[k].trans_id]) & fwd_i.wb[k].valid & (~fwd_i.wb[k].ex_valid) & ((CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr( + fwd_i.sbe[fwd_i.wb[k].trans_id].op + )) == (CVA6Cfg.FpPresent && ariane_pkg::is_imm_fpr( + issue_instr_i[i].op + ))); + + assign rs_data[i][k] = fwd_i.wb[k].data; + end + + for (genvar k = 0; unsigned'(k) < CVA6Cfg.NR_SB_ENTRIES; k++) begin : gen_rs_entries + + assign rs1_fwd_req[i][k+CVA6Cfg.NrWbPorts] = (fwd_i.sbe[k].rd == issue_instr_i[i].rs1) & fwd_i.still_issued[k] & fwd_i.sbe[k].valid & ((CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr( + fwd_i.sbe[k].op + )) == (CVA6Cfg.FpPresent && ariane_pkg::is_rs1_fpr( + issue_instr_i[i].op + ))); + + assign rs2_fwd_req[i][k+CVA6Cfg.NrWbPorts] = (fwd_i.sbe[k].rd == issue_instr_i[i].rs2) & fwd_i.still_issued[k] & fwd_i.sbe[k].valid & ((CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr( + fwd_i.sbe[k].op + )) == (CVA6Cfg.FpPresent && ariane_pkg::is_rs2_fpr( + issue_instr_i[i].op + ))); + + assign rs3_fwd_req[i][k+CVA6Cfg.NrWbPorts] = (fwd_i.sbe[k].rd == issue_instr_i[i].result[ariane_pkg::REG_ADDR_SIZE-1:0]) & fwd_i.still_issued[k] & fwd_i.sbe[k].valid & ((CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr( + fwd_i.sbe[k].op + )) == (CVA6Cfg.FpPresent && ariane_pkg::is_imm_fpr( + issue_instr_i[i].op + ))); + + assign rs_data[i][k+CVA6Cfg.NrWbPorts] = fwd_i.sbe[k].result; + end + + // use fixed prio here + // this implicitly gives higher prio to WB ports + rr_arb_tree #( + .NumIn(CVA6Cfg.NR_SB_ENTRIES + CVA6Cfg.NrWbPorts), + .DataWidth(CVA6Cfg.XLEN), + .ExtPrio(1'b1), + .AxiVldRdy(1'b1) + ) i_sel_rs1 ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i(1'b0), + .rr_i ('0), + .req_i (rs1_fwd_req[i]), + .gnt_o (), + .data_i (rs_data[i]), + .gnt_i (1'b1), + .req_o (rs1_available[i]), + .data_o (rs1_res[i]), + .idx_o () + ); + + rr_arb_tree #( + .NumIn(CVA6Cfg.NR_SB_ENTRIES + CVA6Cfg.NrWbPorts), + .DataWidth(CVA6Cfg.XLEN), + .ExtPrio(1'b1), + .AxiVldRdy(1'b1) + ) i_sel_rs2 ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i(1'b0), + .rr_i ('0), + .req_i (rs2_fwd_req[i]), + .gnt_o (), + .data_i (rs_data[i]), + .gnt_i (1'b1), + .req_o (rs2_available[i]), + .data_o (rs2_res[i]), + .idx_o () + ); + + + rr_arb_tree #( + .NumIn(CVA6Cfg.NR_SB_ENTRIES + CVA6Cfg.NrWbPorts), + .DataWidth(CVA6Cfg.XLEN), + .ExtPrio(1'b1), + .AxiVldRdy(1'b1) + ) i_sel_rs3 ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i(1'b0), + .rr_i ('0), + .req_i (rs3_fwd_req[i]), + .gnt_o (), + .data_i (rs_data[i]), + .gnt_i (1'b1), + .req_o (rs3_available[i]), + .data_o (rs3[i]), + .idx_o () + ); + + if (CVA6Cfg.NrRgprPorts == 3) begin : gen_gp_three_port + assign rs3_res[i] = rs3[i][riscv::XLEN-1:0]; + end else begin : gen_fp_three_port + assign rs3_res[i] = rs3[i][CVA6Cfg.FLen-1:0]; + end + + assign rs1_has_raw[i] = !issue_instr_i[i].use_zimm && ((CVA6Cfg.FpPresent && is_rs1_fpr( + issue_instr_i[i].op + )) ? rd_clobber_fpr[issue_instr_i[i].rs1] != NONE : + rd_clobber_gpr[issue_instr_i[i].rs1] != NONE); + + assign rs1_valid[i] = rs1_available[i] && (CVA6Cfg.FpPresent && is_rs1_fpr( + issue_instr_i[i].op + ) ? 1'b1 : ((rd_clobber_gpr[issue_instr_i[i].rs1] != CSR) || + (CVA6Cfg.RVS && issue_instr_i[i].op == SFENCE_VMA))); + + assign rs2_has_raw[i] = ((CVA6Cfg.FpPresent && is_rs2_fpr( + issue_instr_i[i].op + )) ? rd_clobber_fpr[issue_instr_i[i].rs2] != NONE : + rd_clobber_gpr[issue_instr_i[i].rs2] != NONE); + + assign rs2_valid[i] = rs2_available[i] && (CVA6Cfg.FpPresent && is_rs2_fpr( + issue_instr_i[i].op + ) ? 1'b1 : ((rd_clobber_gpr[issue_instr_i[i].rs2] != CSR) || + (CVA6Cfg.RVS && issue_instr_i[i].op == SFENCE_VMA))); + + assign rs3_has_raw[i] = ((CVA6Cfg.FpPresent && is_imm_fpr( + issue_instr_i[i].op + )) ? rd_clobber_fpr[issue_instr_i[i].result[REG_ADDR_SIZE-1:0]] != NONE : 0); + + assign rs3_valid[i] = rs3_available[i]; + assign rs3_fpr[i] = (CVA6Cfg.FpPresent && ariane_pkg::is_imm_fpr(issue_instr_i[i].op)); + + end + // --------------- // Register stage // --------------- @@ -403,27 +635,8 @@ module issue_read_operands forward_rs3 = '0; // FPR only for (int unsigned i = 0; i < CVA6Cfg.NrIssuePorts; i++) begin - // poll the scoreboard for those values - rs1_o[i] = issue_instr_i[i].rs1; - rs2_o[i] = issue_instr_i[i].rs2; - rs3_o[i] = issue_instr_i[i].result[REG_ADDR_SIZE-1:0]; // rs3 is encoded in imm field - - // 0. check that we are not using the zimm type in RS1 - // as this is an immediate we do not have to wait on anything here - // 0.bis check that rs1 is required by coprocessor if not do not wait here - // 1. check if the source registers are clobbered --> check appropriate clobber list (gpr/fpr) - // 2. poll the scoreboard - if (!issue_instr_i[i].use_zimm && ((CVA6Cfg.FpPresent && is_rs1_fpr( - issue_instr_i[i].op - )) ? rd_clobber_fpr_i[issue_instr_i[i].rs1] != NONE : - rd_clobber_gpr_i[issue_instr_i[i].rs1] != NONE)) begin - // check if the clobbering instruction is not a CSR instruction, CSR instructions can only - // be fetched through the register file since they can't be forwarded - // if the operand is available, forward it. CSRs don't write to/from FPR - if (rs1_valid_i[i] && (CVA6Cfg.FpPresent && is_rs1_fpr( - issue_instr_i[i].op - ) ? 1'b1 : ((rd_clobber_gpr_i[issue_instr_i[i].rs1] != CSR) || - (CVA6Cfg.RVS && issue_instr_i[i].op == SFENCE_VMA)))) begin + if (rs1_has_raw[i]) begin + if (rs1_valid[i]) begin forward_rs1[i] = 1'b1; end else begin // the operand is not available -> stall stall_raw[i] = 1'b1; @@ -431,15 +644,8 @@ module issue_read_operands end end - if (((CVA6Cfg.FpPresent && is_rs2_fpr( - issue_instr_i[i].op - )) ? rd_clobber_fpr_i[issue_instr_i[i].rs2] != NONE : - rd_clobber_gpr_i[issue_instr_i[i].rs2] != NONE)) begin - // if the operand is available, forward it. CSRs don't write to/from FPR - if (rs2_valid_i[i] && (CVA6Cfg.FpPresent && is_rs2_fpr( - issue_instr_i[i].op - ) ? 1'b1 : ((rd_clobber_gpr_i[issue_instr_i[i].rs2] != CSR) || - (CVA6Cfg.RVS && issue_instr_i[i].op == SFENCE_VMA)))) begin + if (rs2_has_raw[i]) begin + if (rs2_valid[i]) begin forward_rs2[i] = 1'b1; end else begin // the operand is not available -> stall stall_raw[i] = 1'b1; @@ -447,12 +653,8 @@ module issue_read_operands end end - // Only check clobbered gpr for OFFLOADED instruction - if ((CVA6Cfg.FpPresent && is_imm_fpr( - issue_instr_i[i].op - )) ? rd_clobber_fpr_i[issue_instr_i[i].result[REG_ADDR_SIZE-1:0]] != NONE : 0) begin - // if the operand is available, forward it. CSRs don't write to/from FPR so no need to check - if (rs3_valid_i[i]) begin + if (rs3_has_raw[i] && rs3_fpr[i]) begin + if (rs3_valid[i]) begin forward_rs3[i] = 1'b1; end else begin // the operand is not available -> stall stall_raw[i] = 1'b1; @@ -460,6 +662,7 @@ module issue_read_operands end end end + if (CVA6Cfg.CvxifEn) begin // Remove unecessary forward and stall in case source register is not needed by coprocessor. if (x_issue_valid_o && x_issue_resp_i.accept) begin @@ -511,9 +714,9 @@ module issue_read_operands // third operand from fp regfile or gp regfile if NR_RGPR_PORTS == 3 if (OPERANDS_PER_INSTR == 3) begin : gen_gp_rs3 - assign imm_forward_rs3 = rs3_i[0]; + assign imm_forward_rs3 = rs3_res[0]; end else begin : gen_fp_rs3 - assign imm_forward_rs3 = {{CVA6Cfg.XLEN - CVA6Cfg.FLen{1'b0}}, rs3_i[0]}; + assign imm_forward_rs3 = {{CVA6Cfg.XLEN - CVA6Cfg.FLen{1'b0}}, rs3_res[0]}; end // Forwarding/Output MUX @@ -542,10 +745,10 @@ module issue_read_operands // or should we forward if (forward_rs1[i]) begin - fu_data_n[i].operand_a = rs1_i[i]; + fu_data_n[i].operand_a = rs1_res[i]; end if (forward_rs2[i]) begin - fu_data_n[i].operand_b = rs2_i[i]; + fu_data_n[i].operand_b = rs2_res[i]; end if ((CVA6Cfg.FpPresent || (CVA6Cfg.CvxifEn && OPERANDS_PER_INSTR == 3)) && forward_rs3[i]) begin fu_data_n[i].imm = imm_forward_rs3; @@ -676,7 +879,6 @@ module issue_read_operands end end - always_comb begin : gen_check_waw_dependencies stall_waw = '1; for (int unsigned i = 0; i < CVA6Cfg.NrIssuePorts; i++) begin @@ -687,8 +889,8 @@ module issue_read_operands // no other instruction has the same destination register -> issue the instruction if ((CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr( issue_instr_i[i].op - )) ? (rd_clobber_fpr_i[issue_instr_i[i].rd] == NONE) : - (rd_clobber_gpr_i[issue_instr_i[i].rd] == NONE)) begin + )) ? (rd_clobber_fpr[issue_instr_i[i].rd] == NONE) : + (rd_clobber_gpr[issue_instr_i[i].rd] == NONE)) begin stall_waw[i] = 1'b0; end // or check that the target destination register will be written in this cycle by the @@ -709,6 +911,8 @@ module issue_read_operands end end end + + // We can issue an instruction if we do not detect that any other instruction is writing the same // destination register. // We also need to check if there is an unresolved branch in the scoreboard. diff --git a/core/issue_stage.sv b/core/issue_stage.sv index 426a5b23c..094c34365 100644 --- a/core/issue_stage.sv +++ b/core/issue_stage.sv @@ -23,6 +23,7 @@ module issue_stage parameter type exception_t = logic, parameter type fu_data_t = logic, parameter type scoreboard_entry_t = logic, + parameter type writeback_t = logic, parameter type x_issue_req_t = logic, parameter type x_issue_resp_t = logic, parameter type x_register_t = logic, @@ -155,29 +156,21 @@ module issue_stage // Scoreboard (SB) <-> Issue and Read Operands (IRO) // --------------------------------------------------- typedef logic [(CVA6Cfg.NrRgprPorts == 3 ? CVA6Cfg.XLEN : CVA6Cfg.FLen)-1:0] rs3_len_t; + typedef struct packed { + logic [CVA6Cfg.NR_SB_ENTRIES-1:0] still_issued; + logic [CVA6Cfg.TRANS_ID_BITS-1:0] issue_pointer; + writeback_t [CVA6Cfg.NrWbPorts-1:0] wb; + scoreboard_entry_t [CVA6Cfg.NR_SB_ENTRIES-1:0] sbe; + } forwarding_t; - fu_t [ 2**REG_ADDR_SIZE-1:0] rd_clobber_gpr_sb_iro; - fu_t [ 2**REG_ADDR_SIZE-1:0] rd_clobber_fpr_sb_iro; + forwarding_t fwd; + scoreboard_entry_t [CVA6Cfg.NrIssuePorts-1:0] issue_instr_sb_iro; + logic [CVA6Cfg.NrIssuePorts-1:0][ 31:0] orig_instr_sb_iro; + logic [CVA6Cfg.NrIssuePorts-1:0] issue_instr_valid_sb_iro; + logic [CVA6Cfg.NrIssuePorts-1:0] issue_ack_iro_sb; - logic [CVA6Cfg.NrIssuePorts-1:0][REG_ADDR_SIZE-1:0] rs1_iro_sb; - logic [CVA6Cfg.NrIssuePorts-1:0][ CVA6Cfg.XLEN-1:0] rs1_sb_iro; - logic [CVA6Cfg.NrIssuePorts-1:0] rs1_valid_sb_iro; - - logic [CVA6Cfg.NrIssuePorts-1:0][REG_ADDR_SIZE-1:0] rs2_iro_sb; - logic [CVA6Cfg.NrIssuePorts-1:0][ CVA6Cfg.XLEN-1:0] rs2_sb_iro; - logic [CVA6Cfg.NrIssuePorts-1:0] rs2_valid_iro_sb; - - logic [CVA6Cfg.NrIssuePorts-1:0][REG_ADDR_SIZE-1:0] rs3_iro_sb; - rs3_len_t [CVA6Cfg.NrIssuePorts-1:0] rs3_sb_iro; - logic [CVA6Cfg.NrIssuePorts-1:0] rs3_valid_iro_sb; - - scoreboard_entry_t [CVA6Cfg.NrIssuePorts-1:0] issue_instr_sb_iro; - logic [CVA6Cfg.NrIssuePorts-1:0][ 31:0] orig_instr_sb_iro; - logic [CVA6Cfg.NrIssuePorts-1:0] issue_instr_valid_sb_iro; - logic [CVA6Cfg.NrIssuePorts-1:0] issue_ack_iro_sb; - - logic [CVA6Cfg.NrIssuePorts-1:0][ CVA6Cfg.XLEN-1:0] rs1_forwarding_xlen; - logic [CVA6Cfg.NrIssuePorts-1:0][ CVA6Cfg.XLEN-1:0] rs2_forwarding_xlen; + logic [CVA6Cfg.NrIssuePorts-1:0][CVA6Cfg.XLEN-1:0] rs1_forwarding_xlen; + logic [CVA6Cfg.NrIssuePorts-1:0][CVA6Cfg.XLEN-1:0] rs2_forwarding_xlen; for (genvar i = 0; i < CVA6Cfg.NrIssuePorts; i++) begin assign rs1_forwarding_o[i] = rs1_forwarding_xlen[i][CVA6Cfg.VLEN-1:0]; @@ -190,7 +183,6 @@ module issue_stage logic x_transaction_accepted_iro_sb, x_issue_writeback_iro_sb; logic [CVA6Cfg.TRANS_ID_BITS-1:0] x_id_iro_sb; - // --------------------------------------------------------- // 2. Manage instructions in a scoreboard // --------------------------------------------------------- @@ -198,32 +190,23 @@ module issue_stage .CVA6Cfg (CVA6Cfg), .rs3_len_t (rs3_len_t), .bp_resolve_t(bp_resolve_t), + .writeback_t(writeback_t), + .forwarding_t(forwarding_t), .exception_t(exception_t), .scoreboard_entry_t(scoreboard_entry_t) ) i_scoreboard ( .sb_full_o (sb_full_o), - .rd_clobber_gpr_o (rd_clobber_gpr_sb_iro), - .rd_clobber_fpr_o (rd_clobber_fpr_sb_iro), .x_transaction_accepted_i(x_transaction_accepted_iro_sb), .x_issue_writeback_i (x_issue_writeback_iro_sb), .x_id_i (x_id_iro_sb), - .rs1_i (rs1_iro_sb), - .rs1_o (rs1_sb_iro), - .rs1_valid_o (rs1_valid_sb_iro), - .rs2_i (rs2_iro_sb), - .rs2_o (rs2_sb_iro), - .rs2_valid_o (rs2_valid_iro_sb), - .rs3_i (rs3_iro_sb), - .rs3_o (rs3_sb_iro), - .rs3_valid_o (rs3_valid_iro_sb), - - .decoded_instr_i (decoded_instr_i), - .decoded_instr_valid_i(decoded_instr_valid_i), - .decoded_instr_ack_o (decoded_instr_ack_o), - .issue_instr_o (issue_instr_sb_iro), - .orig_instr_o (orig_instr_sb_iro), - .issue_instr_valid_o (issue_instr_valid_sb_iro), - .issue_ack_i (issue_ack_iro_sb), + .fwd_o (fwd), + .decoded_instr_i (decoded_instr_i), + .decoded_instr_valid_i (decoded_instr_valid_i), + .decoded_instr_ack_o (decoded_instr_ack_o), + .issue_instr_o (issue_instr_sb_iro), + .orig_instr_o (orig_instr_sb_iro), + .issue_instr_valid_o (issue_instr_valid_sb_iro), + .issue_ack_i (issue_ack_iro_sb), .resolved_branch_i(resolved_branch_i), .trans_id_i (trans_id_i), @@ -241,6 +224,8 @@ module issue_stage .fu_data_t(fu_data_t), .scoreboard_entry_t(scoreboard_entry_t), .rs3_len_t(rs3_len_t), + .writeback_t(writeback_t), + .forwarding_t(forwarding_t), .x_issue_req_t(x_issue_req_t), .x_issue_resp_t(x_issue_resp_t), .x_register_t(x_register_t), @@ -253,17 +238,7 @@ module issue_stage .issue_ack_o (issue_ack_iro_sb), .fu_data_o (fu_data_o), .flu_ready_i (flu_ready_i), - .rs1_o (rs1_iro_sb), - .rs1_i (rs1_sb_iro), - .rs1_valid_i (rs1_valid_sb_iro), - .rs2_o (rs2_iro_sb), - .rs2_i (rs2_sb_iro), - .rs2_valid_i (rs2_valid_iro_sb), - .rs3_o (rs3_iro_sb), - .rs3_i (rs3_sb_iro), - .rs3_valid_i (rs3_valid_iro_sb), - .rd_clobber_gpr_i (rd_clobber_gpr_sb_iro), - .rd_clobber_fpr_i (rd_clobber_fpr_sb_iro), + .fwd_i (fwd), .alu_valid_o (alu_valid_o), .alu2_valid_o (alu2_valid_o), .branch_valid_o (branch_valid_o), diff --git a/core/scoreboard.sv b/core/scoreboard.sv index 9e60fc281..61984f4b0 100644 --- a/core/scoreboard.sv +++ b/core/scoreboard.sv @@ -17,47 +17,24 @@ module scoreboard #( parameter type bp_resolve_t = logic, parameter type exception_t = logic, parameter type scoreboard_entry_t = logic, + parameter type forwarding_t = logic, + parameter type writeback_t = logic, parameter type rs3_len_t = logic ) ( // Subsystem Clock - SUBSYSTEM - input logic clk_i, + input logic clk_i, // Asynchronous reset active low - SUBSYSTEM - input logic rst_ni, + input logic rst_ni, // TO_BE_COMPLETED - TO_BE_COMPLETED - output logic sb_full_o, + output logic sb_full_o, // Flush only un-issued instructions - TO_BE_COMPLETED - input logic flush_unissued_instr_i, + input logic flush_unissued_instr_i, // Flush whole scoreboard - TO_BE_COMPLETED - input logic flush_i, - // TO_BE_COMPLETED - TO_BE_COMPLETED - output ariane_pkg::fu_t [2**ariane_pkg::REG_ADDR_SIZE-1:0] rd_clobber_gpr_o, - // TO_BE_COMPLETED - TO_BE_COMPLETED - output ariane_pkg::fu_t [2**ariane_pkg::REG_ADDR_SIZE-1:0] rd_clobber_fpr_o, + input logic flush_i, // Writeback Handling of CVXIF - input logic x_transaction_accepted_i, - input logic x_issue_writeback_i, - input logic [CVA6Cfg.TRANS_ID_BITS-1:0] x_id_i, - // rs1 operand address - issue_read_operands - input logic [CVA6Cfg.NrIssuePorts-1:0][ariane_pkg::REG_ADDR_SIZE-1:0] rs1_i, - // rs1 operand - issue_read_operands - output logic [CVA6Cfg.NrIssuePorts-1:0][CVA6Cfg.XLEN-1:0] rs1_o, - // rs1 operand is valid - issue_read_operands - output logic [CVA6Cfg.NrIssuePorts-1:0] rs1_valid_o, - - // rs2 operand address - issue_read_operands - input logic [CVA6Cfg.NrIssuePorts-1:0][ariane_pkg::REG_ADDR_SIZE-1:0] rs2_i, - // rs2 operand - issue_read_operands - output logic [CVA6Cfg.NrIssuePorts-1:0][ CVA6Cfg.XLEN-1:0] rs2_o, - // rs2 operand is valid - issue_read_operands - output logic [CVA6Cfg.NrIssuePorts-1:0] rs2_valid_o, - - // rs3 operand address - issue_read_operands - input logic [CVA6Cfg.NrIssuePorts-1:0][ariane_pkg::REG_ADDR_SIZE-1:0] rs3_i, - // rs3 operand - issue_read_operands - output rs3_len_t [CVA6Cfg.NrIssuePorts-1:0] rs3_o, - // rs3 operand is valid - issue_read_operands - output logic [CVA6Cfg.NrIssuePorts-1:0] rs3_valid_o, - + input logic x_transaction_accepted_i, + input logic x_issue_writeback_i, + input logic [CVA6Cfg.TRANS_ID_BITS-1:0] x_id_i, // advertise instruction to commit stage, if commit_ack_i is asserted advance the commit pointer // TO_BE_COMPLETED - TO_BE_COMPLETED output scoreboard_entry_t [CVA6Cfg.NrCommitPorts-1:0] commit_instr_o, @@ -86,6 +63,8 @@ module scoreboard #( output logic [CVA6Cfg.NrIssuePorts-1:0] issue_instr_valid_o, // TO_BE_COMPLETED - TO_BE_COMPLETED input logic [CVA6Cfg.NrIssuePorts-1:0] issue_ack_i, + // Forwarding - issue_read_operands + output forwarding_t fwd_o, // TO_BE_COMPLETED - TO_BE_COMPLETED input bp_resolve_t resolved_branch_i, @@ -302,194 +281,22 @@ module scoreboard #( assign commit_pointer_n[k] = (flush_i) ? '0 : commit_pointer_n[0] + unsigned'(k); end - // ------------------- - // RD clobber process - // ------------------- - // rd_clobber output: output currently clobbered destination registers - logic [2**ariane_pkg::REG_ADDR_SIZE-1:0][CVA6Cfg.NR_SB_ENTRIES:0] gpr_clobber_vld; - logic [2**ariane_pkg::REG_ADDR_SIZE-1:0][CVA6Cfg.NR_SB_ENTRIES:0] fpr_clobber_vld; - ariane_pkg::fu_t [ CVA6Cfg.NR_SB_ENTRIES:0] clobber_fu; - - always_comb begin : clobber_assign - gpr_clobber_vld = '0; - fpr_clobber_vld = '0; - - // default (highest entry hast lowest prio in arbiter tree below) - clobber_fu[CVA6Cfg.NR_SB_ENTRIES] = ariane_pkg::NONE; - for (int unsigned i = 0; i < 2 ** ariane_pkg::REG_ADDR_SIZE; i++) begin - gpr_clobber_vld[i][CVA6Cfg.NR_SB_ENTRIES] = 1'b1; - fpr_clobber_vld[i][CVA6Cfg.NR_SB_ENTRIES] = 1'b1; - end - - // check for all valid entries and set the clobber accordingly - for (int unsigned i = 0; i < CVA6Cfg.NR_SB_ENTRIES; i++) begin - gpr_clobber_vld[mem_q[i].sbe.rd][i] = still_issued[i] & ~mem_q[i].is_rd_fpr_flag; - fpr_clobber_vld[mem_q[i].sbe.rd][i] = still_issued[i] & mem_q[i].is_rd_fpr_flag; - clobber_fu[i] = mem_q[i].sbe.fu; - end - - // GPR[0] is always free - gpr_clobber_vld[0] = '0; + // Forwarding logic + writeback_t [CVA6Cfg.NrWbPorts-1:0] wb; + for (genvar i = 0; i < CVA6Cfg.NrWbPorts; i++) begin + assign wb[i].valid = wt_valid_i[i]; + assign wb[i].data = wbdata_i[i]; + assign wb[i].ex_valid = ex_i[i].valid; + assign wb[i].trans_id = trans_id_i[i]; end - for (genvar k = 0; k < 2 ** ariane_pkg::REG_ADDR_SIZE; k++) begin : gen_sel_clobbers - // get fu that is going to clobber this register (there should be only one) - rr_arb_tree #( - .NumIn(CVA6Cfg.NR_SB_ENTRIES + 1), - .DataType(ariane_pkg::fu_t), - .ExtPrio(1'b1), - .AxiVldRdy(1'b1) - ) i_sel_gpr_clobbers ( - .clk_i (clk_i), - .rst_ni (rst_ni), - .flush_i(1'b0), - .rr_i ('0), - .req_i (gpr_clobber_vld[k]), - .gnt_o (), - .data_i (clobber_fu), - .gnt_i (1'b1), - .req_o (), - .data_o (rd_clobber_gpr_o[k]), - .idx_o () - ); - if (CVA6Cfg.FpPresent) begin - rr_arb_tree #( - .NumIn(CVA6Cfg.NR_SB_ENTRIES + 1), - .DataType(ariane_pkg::fu_t), - .ExtPrio(1'b1), - .AxiVldRdy(1'b1) - ) i_sel_fpr_clobbers ( - .clk_i (clk_i), - .rst_ni (rst_ni), - .flush_i(1'b0), - .rr_i ('0), - .req_i (fpr_clobber_vld[k]), - .gnt_o (), - .data_i (clobber_fu), - .gnt_i (1'b1), - .req_o (), - .data_o (rd_clobber_fpr_o[k]), - .idx_o () - ); - end + assign fwd_o.still_issued = still_issued; + assign fwd_o.issue_pointer = issue_pointer; + assign fwd_o.wb = wb; + for (genvar i = 0; i < CVA6Cfg.NR_SB_ENTRIES; i++) begin + assign fwd_o.sbe[i] = mem_q[i].sbe; end - // ---------------------------------- - // Read Operands (a.k.a forwarding) - // ---------------------------------- - // read operand interface: same logic as register file - logic [CVA6Cfg.NrIssuePorts-1:0][CVA6Cfg.NR_SB_ENTRIES+CVA6Cfg.NrWbPorts-1:0] - rs1_fwd_req, rs2_fwd_req, rs3_fwd_req; - logic [CVA6Cfg.NrIssuePorts-1:0][CVA6Cfg.NR_SB_ENTRIES+CVA6Cfg.NrWbPorts-1:0][CVA6Cfg.XLEN-1:0] rs_data; - logic [CVA6Cfg.NrIssuePorts-1:0] rs1_valid, rs2_valid, rs3_valid; - - // WB ports have higher prio than entries - for (genvar i = 0; i < CVA6Cfg.NrIssuePorts; i++) begin - for (genvar k = 0; unsigned'(k) < CVA6Cfg.NrWbPorts; k++) begin : gen_rs_wb - assign rs1_fwd_req[i][k] = (mem_q[trans_id_i[k]].sbe.rd == rs1_i[i]) & (~mem_q[trans_id_i[k]].cancelled) & wt_valid_i[k] & (~ex_i[k].valid) & (mem_q[trans_id_i[k]].is_rd_fpr_flag == (CVA6Cfg.FpPresent && ariane_pkg::is_rs1_fpr( - issue_instr_o[i].op - ))); - assign rs2_fwd_req[i][k] = (mem_q[trans_id_i[k]].sbe.rd == rs2_i[i]) & (~mem_q[trans_id_i[k]].cancelled) & wt_valid_i[k] & (~ex_i[k].valid) & (mem_q[trans_id_i[k]].is_rd_fpr_flag == (CVA6Cfg.FpPresent && ariane_pkg::is_rs2_fpr( - issue_instr_o[i].op - ))); - assign rs3_fwd_req[i][k] = (mem_q[trans_id_i[k]].sbe.rd == rs3_i[i]) & (~mem_q[trans_id_i[k]].cancelled) & wt_valid_i[k] & (~ex_i[k].valid) & (mem_q[trans_id_i[k]].is_rd_fpr_flag == (CVA6Cfg.FpPresent && ariane_pkg::is_imm_fpr( - issue_instr_o[i].op - ))); - assign rs_data[i][k] = wbdata_i[k]; - end - for (genvar k = 0; unsigned'(k) < CVA6Cfg.NR_SB_ENTRIES; k++) begin : gen_rs_entries - assign rs1_fwd_req[i][k+CVA6Cfg.NrWbPorts] = (mem_q[k].sbe.rd == rs1_i[i]) & still_issued[k] & mem_q[k].sbe.valid & (mem_q[k].is_rd_fpr_flag == (CVA6Cfg.FpPresent && ariane_pkg::is_rs1_fpr( - issue_instr_o[i].op - ))); - assign rs2_fwd_req[i][k+CVA6Cfg.NrWbPorts] = (mem_q[k].sbe.rd == rs2_i[i]) & still_issued[k] & mem_q[k].sbe.valid & (mem_q[k].is_rd_fpr_flag == (CVA6Cfg.FpPresent && ariane_pkg::is_rs2_fpr( - issue_instr_o[i].op - ))); - assign rs3_fwd_req[i][k+CVA6Cfg.NrWbPorts] = (mem_q[k].sbe.rd == rs3_i[i]) & still_issued[k] & mem_q[k].sbe.valid & (mem_q[k].is_rd_fpr_flag == (CVA6Cfg.FpPresent && ariane_pkg::is_imm_fpr( - issue_instr_o[i].op - ))); - assign rs_data[i][k+CVA6Cfg.NrWbPorts] = mem_q[k].sbe.result; - end - - // check whether we are accessing GPR[0] - assign rs1_valid_o[i] = rs1_valid[i] & ((|rs1_i[i]) | (CVA6Cfg.FpPresent && ariane_pkg::is_rs1_fpr( - issue_instr_o[i].op - ))); - assign rs2_valid_o[i] = rs2_valid[i] & ((|rs2_i[i]) | (CVA6Cfg.FpPresent && ariane_pkg::is_rs2_fpr( - issue_instr_o[i].op - ))); - assign rs3_valid_o[i] = CVA6Cfg.NrRgprPorts == 3 ? rs3_valid[i] & ((|rs3_i[i]) | (CVA6Cfg.FpPresent && ariane_pkg::is_imm_fpr( - issue_instr_o[i].op - ))) : rs3_valid[i]; - - // use fixed prio here - // this implicitly gives higher prio to WB ports - rr_arb_tree #( - .NumIn(CVA6Cfg.NR_SB_ENTRIES + CVA6Cfg.NrWbPorts), - .DataWidth(CVA6Cfg.XLEN), - .ExtPrio(1'b1), - .AxiVldRdy(1'b1) - ) i_sel_rs1 ( - .clk_i (clk_i), - .rst_ni (rst_ni), - .flush_i(1'b0), - .rr_i ('0), - .req_i (rs1_fwd_req[i]), - .gnt_o (), - .data_i (rs_data[i]), - .gnt_i (1'b1), - .req_o (rs1_valid[i]), - .data_o (rs1_o[i]), - .idx_o () - ); - - rr_arb_tree #( - .NumIn(CVA6Cfg.NR_SB_ENTRIES + CVA6Cfg.NrWbPorts), - .DataWidth(CVA6Cfg.XLEN), - .ExtPrio(1'b1), - .AxiVldRdy(1'b1) - ) i_sel_rs2 ( - .clk_i (clk_i), - .rst_ni (rst_ni), - .flush_i(1'b0), - .rr_i ('0), - .req_i (rs2_fwd_req[i]), - .gnt_o (), - .data_i (rs_data[i]), - .gnt_i (1'b1), - .req_o (rs2_valid[i]), - .data_o (rs2_o[i]), - .idx_o () - ); - - logic [CVA6Cfg.NrIssuePorts-1:0][CVA6Cfg.XLEN-1:0] rs3; - - rr_arb_tree #( - .NumIn(CVA6Cfg.NR_SB_ENTRIES + CVA6Cfg.NrWbPorts), - .DataWidth(CVA6Cfg.XLEN), - .ExtPrio(1'b1), - .AxiVldRdy(1'b1) - ) i_sel_rs3 ( - .clk_i (clk_i), - .rst_ni (rst_ni), - .flush_i(1'b0), - .rr_i ('0), - .req_i (rs3_fwd_req[i]), - .gnt_o (), - .data_i (rs_data[i]), - .gnt_i (1'b1), - .req_o (rs3_valid[i]), - .data_o (rs3[i]), - .idx_o () - ); - - if (CVA6Cfg.NrRgprPorts == 3) begin : gen_gp_three_port - assign rs3_o[i] = rs3[i][riscv::XLEN-1:0]; - end else begin : gen_fp_three_port - assign rs3_o[i] = rs3[i][CVA6Cfg.FLen-1:0]; - end - end - - // sequential process always_ff @(posedge clk_i or negedge rst_ni) begin : regs if (!rst_ni) begin @@ -513,10 +320,6 @@ module scoreboard #( assert (CVA6Cfg.NR_SB_ENTRIES == 2 ** CVA6Cfg.TRANS_ID_BITS) else $fatal(1, "Scoreboard size needs to be a power of two."); end - - // assert that zero is never set - assert property (@(posedge clk_i) disable iff (!rst_ni) (rd_clobber_gpr_o[0] == ariane_pkg::NONE)) - else $fatal(1, "RD 0 should not bet set"); // assert that we never acknowledge a commit if the instruction is not valid assert property ( @(posedge clk_i) disable iff (!rst_ni) commit_ack_i[0] |-> commit_instr_o[0].valid)