diff --git a/.gitlab-ci/expected_synth.yml b/.gitlab-ci/expected_synth.yml index 72ae96f39..cdd4737fc 100644 --- a/.gitlab-ci/expected_synth.yml +++ b/.gitlab-ci/expected_synth.yml @@ -1,2 +1,2 @@ cv32a65x: - gates: 184679 + gates: 182076 diff --git a/.gitlab-ci/scripts/report_benchmark.py b/.gitlab-ci/scripts/report_benchmark.py index cec2c10fd..5bcaf6108 100644 --- a/.gitlab-ci/scripts/report_benchmark.py +++ b/.gitlab-ci/scripts/report_benchmark.py @@ -19,12 +19,12 @@ iterations = None # Keep it up-to-date with compiler version and core performance improvements # Will fail if the number of cycles is different from this one valid_cycles = { - "dhrystone_dual": 18935, - "dhrystone_single": 24127, - "coremark_dual": 1001191, - "coremark_single": 1300030, - "dhrystone_cv32a65x": 31976, - "dhrystone_cv32a60x": 39449, + "dhrystone_dual": 17159, + "dhrystone_single": 22407, + "coremark_dual": 981777, + "coremark_single": 1294524, + "dhrystone_cv32a65x": 30056, + "dhrystone_cv32a60x": 37474, } for arg in sys.argv[1:]: diff --git a/Bender.yml b/Bender.yml index 2178c96f7..1bb70ab67 100644 --- a/Bender.yml +++ b/Bender.yml @@ -113,6 +113,7 @@ sources: - core/ariane_regfile_ff.sv - core/ariane_regfile_fpga.sv - core/scoreboard.sv + - core/raw_checker.sv - core/store_buffer.sv - core/amo_buffer.sv - core/store_unit.sv diff --git a/Flist.ariane b/Flist.ariane index 592125b7f..103cd41d0 100644 --- a/Flist.ariane +++ b/Flist.ariane @@ -95,6 +95,7 @@ core/mmu_sv39x4/ptw_sv39x4.sv core/ariane_regfile_ff.sv core/re_name.sv core/scoreboard.sv +core/raw_checker.sv core/store_buffer.sv core/amo_buffer.sv core/store_unit.sv diff --git a/core/Flist.cva6 b/core/Flist.cva6 index 17cb6ae98..c2b2a6f7b 100644 --- a/core/Flist.cva6 +++ b/core/Flist.cva6 @@ -132,6 +132,7 @@ ${CVA6_REPO_DIR}/core/ariane_regfile_ff.sv ${CVA6_REPO_DIR}/core/ariane_regfile_fpga.sv // NOTE: scoreboard.sv modified for DSIM (unchanged for other simulators) ${CVA6_REPO_DIR}/core/scoreboard.sv +${CVA6_REPO_DIR}/core/raw_checker.sv ${CVA6_REPO_DIR}/core/store_buffer.sv ${CVA6_REPO_DIR}/core/amo_buffer.sv ${CVA6_REPO_DIR}/core/store_unit.sv diff --git a/core/issue_read_operands.sv b/core/issue_read_operands.sv index 8f70016f0..6123aaef8 100644 --- a/core/issue_read_operands.sv +++ b/core/issue_read_operands.sv @@ -136,7 +136,7 @@ module issue_read_operands logic none, load, store, alu, alu2, ctrl_flow, mult, csr, fpu, fpu_vec, cvxif, accel; } fus_busy_t; - logic [CVA6Cfg.NrIssuePorts-1:0] stall_raw, stall_waw, stall_rs1, stall_rs2, stall_rs3; + logic [CVA6Cfg.NrIssuePorts-1:0] stall_raw, stall_rs1, stall_rs2, stall_rs3; logic [CVA6Cfg.NrIssuePorts-1:0] fu_busy; // functional unit is busy fus_busy_t [CVA6Cfg.NrIssuePorts-1:0] fus_busy; // which functional units are considered busy logic [CVA6Cfg.NrIssuePorts-1:0] issue_ack; @@ -163,37 +163,44 @@ module issue_read_operands logic [CVA6Cfg.NrIssuePorts-1:0] branch_valid_n, branch_valid_q; logic [CVA6Cfg.NrIssuePorts-1:0] cvxif_valid_n, cvxif_valid_q; logic [31:0] cvxif_off_instr_n, cvxif_off_instr_q; - logic cvxif_instruction_valid; + logic cvxif_instruction_valid; + + //RAW detection + logic [ CVA6Cfg.NrIssuePorts-1:0][ CVA6Cfg.TRANS_ID_BITS-1:0] idx_hzd_rs1; + logic [ CVA6Cfg.NrIssuePorts-1:0] rs1_raw_check; + logic [ CVA6Cfg.NrIssuePorts-1:0] rs1_has_raw; + logic [ CVA6Cfg.NrIssuePorts-1:0] rs1_fpr; + + logic [ CVA6Cfg.NrIssuePorts-1:0][ CVA6Cfg.TRANS_ID_BITS-1:0] idx_hzd_rs2; + logic [ CVA6Cfg.NrIssuePorts-1:0] rs2_raw_check; + logic [ CVA6Cfg.NrIssuePorts-1:0] rs2_has_raw; + logic [ CVA6Cfg.NrIssuePorts-1:0] rs2_fpr; + + logic [ CVA6Cfg.NrIssuePorts-1:0][ CVA6Cfg.TRANS_ID_BITS-1:0] idx_hzd_rs3; + logic [ CVA6Cfg.NrIssuePorts-1:0] rs3_raw_check; + logic [ CVA6Cfg.NrIssuePorts-1:0] rs3_has_raw; + logic [ CVA6Cfg.NrIssuePorts-1:0] rs3_fpr; + + + logic [CVA6Cfg.NR_SB_ENTRIES-1:0][ariane_pkg::REG_ADDR_SIZE-1:0] rd_list; + logic [CVA6Cfg.NR_SB_ENTRIES-1:0] rd_fpr; //fwd logic - logic [CVA6Cfg.NrIssuePorts-1:0] rs1_has_raw; - logic [CVA6Cfg.NrIssuePorts-1:0] rs2_has_raw; - logic [CVA6Cfg.NrIssuePorts-1:0] rs3_has_raw; + logic [CVA6Cfg.NR_SB_ENTRIES-1:0][ CVA6Cfg.XLEN-1:0] fwd_res; + logic [CVA6Cfg.NR_SB_ENTRIES-1:0] fwd_res_valid; - logic [CVA6Cfg.NrIssuePorts-1:0][CVA6Cfg.XLEN-1:0] rs3; - logic [CVA6Cfg.NrIssuePorts-1:0] rs3_fpr; + logic [CVA6Cfg.NR_SB_ENTRIES-1:0] rs1_is_not_csr; + logic [CVA6Cfg.NR_SB_ENTRIES-1:0] rs2_is_not_csr; - logic [CVA6Cfg.NrIssuePorts-1:0] rs1_valid; - logic [CVA6Cfg.NrIssuePorts-1:0] rs2_valid; - logic [CVA6Cfg.NrIssuePorts-1:0] rs3_valid; + logic [ CVA6Cfg.NrIssuePorts-1:0][ CVA6Cfg.XLEN-1:0] rs3; - logic [CVA6Cfg.NrIssuePorts-1:0][CVA6Cfg.XLEN-1:0] rs1_res; - logic [CVA6Cfg.NrIssuePorts-1:0][CVA6Cfg.XLEN-1:0] rs2_res; - logic [CVA6Cfg.NrIssuePorts-1:0][CVA6Cfg.XLEN-1:0] rs3_res; - - // clobber - fu_t [2**ariane_pkg::REG_ADDR_SIZE-1:0] rd_clobber_gpr, rd_clobber_fpr; - logic [2**ariane_pkg::REG_ADDR_SIZE-1:0][CVA6Cfg.NR_SB_ENTRIES:0] gpr_clobber_vld; - logic [2**ariane_pkg::REG_ADDR_SIZE-1:0][CVA6Cfg.NR_SB_ENTRIES:0] fpr_clobber_vld; - ariane_pkg::fu_t [ CVA6Cfg.NR_SB_ENTRIES:0] clobber_fu; - - //forward logic - logic [CVA6Cfg.NrIssuePorts-1:0][CVA6Cfg.NR_SB_ENTRIES+CVA6Cfg.NrWbPorts-1:0] - rs1_fwd_req, rs2_fwd_req, rs3_fwd_req; - logic [CVA6Cfg.NrIssuePorts-1:0] rs1_is_not_gpr0, rs2_is_not_gpr0, rs3_is_not_gpr0; - logic [CVA6Cfg.NrIssuePorts-1:0][CVA6Cfg.NR_SB_ENTRIES+CVA6Cfg.NrWbPorts-1:0][CVA6Cfg.XLEN-1:0] rs_data; - logic [CVA6Cfg.NrIssuePorts-1:0] rs1_available, rs2_available, rs3_available; + logic [ CVA6Cfg.NrIssuePorts-1:0] rs1_valid; + logic [ CVA6Cfg.NrIssuePorts-1:0] rs2_valid; + logic [ CVA6Cfg.NrIssuePorts-1:0] rs3_valid; + logic [ CVA6Cfg.NrIssuePorts-1:0][ CVA6Cfg.XLEN-1:0] rs1_res; + logic [ CVA6Cfg.NrIssuePorts-1:0][ CVA6Cfg.XLEN-1:0] rs2_res; + logic [ CVA6Cfg.NrIssuePorts-1:0][ CVA6Cfg.XLEN-1:0] rs3_res; logic [CVA6Cfg.NrIssuePorts-1:0][31:0] tinst_n, tinst_q; // transformed instruction @@ -246,7 +253,7 @@ module issue_read_operands // TODO check only for 1st instruction ?? // Allow a cvxif transaction if we WaW condition are ok. - assign cvxif_req_allowed = (issue_instr_i[0].fu == CVXIF) && !stall_waw[0]; + assign cvxif_req_allowed = (issue_instr_i[0].fu == CVXIF); assign cvxif_instruction_valid = !issue_instr_i[0].ex.valid && issue_instr_valid_i[0] && cvxif_req_allowed; assign x_transaction_accepted_o = x_issue_valid_o && x_issue_ready_i && x_issue_resp_i.accept; assign x_transaction_rejected = x_issue_valid_o && x_issue_ready_i && ~x_issue_resp_i.accept; @@ -408,226 +415,103 @@ module issue_read_operands end end - // ------------------- - // RD clobber process - // ------------------- - // rd_clobber output: output currently clobbered destination registers - - always_comb begin : clobber_assign - gpr_clobber_vld = '0; - fpr_clobber_vld = '0; - - // default (highest entry hast lowest prio in arbiter tree below) - clobber_fu[CVA6Cfg.NR_SB_ENTRIES] = ariane_pkg::NONE; - for (int unsigned i = 0; i < 2 ** ariane_pkg::REG_ADDR_SIZE; i++) begin - gpr_clobber_vld[i][CVA6Cfg.NR_SB_ENTRIES] = 1'b1; - fpr_clobber_vld[i][CVA6Cfg.NR_SB_ENTRIES] = 1'b1; - end - - // check for all valid entries and set the clobber accordingly - - for (int unsigned i = 0; i < CVA6Cfg.NR_SB_ENTRIES; i++) begin - gpr_clobber_vld[fwd_i.sbe[i].rd][i] = fwd_i.still_issued[i] & ~(CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr( - fwd_i.sbe[i].op)); - fpr_clobber_vld[fwd_i.sbe[i].rd][i] = fwd_i.still_issued[i] & (CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr( - fwd_i.sbe[i].op)); - clobber_fu[i] = fwd_i.sbe[i].fu; - end - - // GPR[0] is always free - gpr_clobber_vld[0] = '0; + for (genvar i = 0; i < CVA6Cfg.NrIssuePorts; i++) begin + assign rs1_fpr[i] = (CVA6Cfg.FpPresent && ariane_pkg::is_rs1_fpr(issue_instr_i[i].op)); + assign rs2_fpr[i] = (CVA6Cfg.FpPresent && ariane_pkg::is_rs2_fpr(issue_instr_i[i].op)); + assign rs3_fpr[i] = (CVA6Cfg.FpPresent && ariane_pkg::is_imm_fpr(issue_instr_i[i].op)); end - for (genvar k = 0; k < 2 ** ariane_pkg::REG_ADDR_SIZE; k++) begin : gen_sel_clobbers - // get fu that is going to clobber this register (there should be only one) - rr_arb_tree #( - .NumIn(CVA6Cfg.NR_SB_ENTRIES + 1), - .DataType(ariane_pkg::fu_t), - .ExtPrio(1'b1), - .AxiVldRdy(1'b1) - ) i_sel_gpr_clobbers ( - .clk_i (clk_i), - .rst_ni (rst_ni), - .flush_i(1'b0), - .rr_i ('0), - .req_i (gpr_clobber_vld[k]), - .gnt_o (), - .data_i (clobber_fu), - .gnt_i (1'b1), - .req_o (), - .data_o (rd_clobber_gpr[k]), - .idx_o () + // ---------------------------------- + // Renaming + // ---------------------------------- + for (genvar i = 0; i < CVA6Cfg.NR_SB_ENTRIES; i++) begin + assign rd_list[i] = fwd_i.sbe[i].rd; + assign rd_fpr[i] = CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr(fwd_i.sbe[i].op); + end + + for (genvar i = 0; i < CVA6Cfg.NrIssuePorts; i++) begin : gen_raw_checks + raw_checker #( + .CVA6Cfg(CVA6Cfg) + ) i_rs1_last_raw ( + .clk_i(clk_i), + .rst_ni(rst_ni), + .rs_i(issue_instr_i[i].rs1), + .rs_fpr_i(rs1_fpr[i]), + .rd_i(rd_list), + .rd_fpr_i(rd_fpr), + .still_issued_i(fwd_i.still_issued), + .issue_pointer_i(fwd_i.issue_pointer), + .idx_o(idx_hzd_rs1[i]), + .valid_o(rs1_raw_check[i]) ); - if (CVA6Cfg.FpPresent) begin - rr_arb_tree #( - .NumIn(CVA6Cfg.NR_SB_ENTRIES + 1), - .DataType(ariane_pkg::fu_t), - .ExtPrio(1'b1), - .AxiVldRdy(1'b1) - ) i_sel_fpr_clobbers ( - .clk_i (clk_i), - .rst_ni (rst_ni), - .flush_i(1'b0), - .rr_i ('0), - .req_i (fpr_clobber_vld[k]), - .gnt_o (), - .data_i (clobber_fu), - .gnt_i (1'b1), - .req_o (), - .data_o (rd_clobber_fpr[k]), - .idx_o () - ); - end else begin - assign rd_clobber_fpr[k] = NONE; - end + assign rs1_has_raw[i] = rs1_raw_check[i] && !issue_instr_i[i].use_zimm; + + raw_checker #( + .CVA6Cfg(CVA6Cfg) + ) i_rs2_last_raw ( + .clk_i(clk_i), + .rst_ni(rst_ni), + .rs_i(issue_instr_i[i].rs2), + .rs_fpr_i(rs2_fpr[i]), + .rd_i(rd_list), + .rd_fpr_i(rd_fpr), + .still_issued_i(fwd_i.still_issued), + .issue_pointer_i(fwd_i.issue_pointer), + .idx_o(idx_hzd_rs2[i]), + .valid_o(rs2_raw_check[i]) + ); + assign rs2_has_raw[i] = rs2_raw_check[i]; + + raw_checker #( + .CVA6Cfg(CVA6Cfg) + ) i_rs3_last_raw ( + .clk_i(clk_i), + .rst_ni(rst_ni), + .rs_i(issue_instr_i[i].result[ariane_pkg::REG_ADDR_SIZE-1:0]), + .rs_fpr_i(rs3_fpr[i]), + .rd_i(rd_list), + .rd_fpr_i(rd_fpr), + .still_issued_i(fwd_i.still_issued), + .issue_pointer_i(fwd_i.issue_pointer), + .idx_o(idx_hzd_rs3[i]), + .valid_o(rs3_raw_check[i]) + ); + assign rs3_has_raw[i] = rs3_raw_check[i] && rs3_fpr[i]; end // ---------------------------------- // Read Operands (a.k.a forwarding) // ---------------------------------- - // read operand interface: same logic as register file + always_comb begin + for (int unsigned i = 0; i < CVA6Cfg.NR_SB_ENTRIES; i++) begin + fwd_res[i] = fwd_i.sbe[i].result; + fwd_res_valid[i] = fwd_i.sbe[i].valid; + end + for (int unsigned i = 0; i < CVA6Cfg.NrWbPorts; i++) begin + if (fwd_i.wb[i].valid && !fwd_i.wb[i].ex_valid) begin + fwd_res[fwd_i.wb[i].trans_id] = fwd_i.wb[i].data; + fwd_res_valid[fwd_i.wb[i].trans_id] = 1'b1; + end + end + end - // WB ports have higher prio than entries for (genvar i = 0; i < CVA6Cfg.NrIssuePorts; i++) begin - for (genvar k = 0; unsigned'(k) < CVA6Cfg.NrWbPorts; k++) begin : gen_rs_wb + assign rs1_res[i] = fwd_res[idx_hzd_rs1[i]]; + assign rs1_is_not_csr[i] = rs1_fpr[i] || (fwd_i.sbe[idx_hzd_rs1[i]].fu != ariane_pkg::CSR) || (CVA6Cfg.RVS && issue_instr_i[i].op == ariane_pkg::SFENCE_VMA); + assign rs1_valid[i] = fwd_res_valid[idx_hzd_rs1[i]] && rs1_is_not_csr[i]; - assign rs1_fwd_req[i][k] = (fwd_i.sbe[fwd_i.wb[k].trans_id].rd == issue_instr_i[i].rs1) & (fwd_i.still_issued[fwd_i.wb[k].trans_id]) & fwd_i.wb[k].valid & (~fwd_i.wb[k].ex_valid) & ((CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr( - fwd_i.sbe[fwd_i.wb[k].trans_id].op - )) == (CVA6Cfg.FpPresent && ariane_pkg::is_rs1_fpr( - issue_instr_i[i].op - ))); + assign rs2_res[i] = fwd_res[idx_hzd_rs2[i]]; + assign rs2_is_not_csr[i] = rs2_fpr[i] || (fwd_i.sbe[idx_hzd_rs2[i]].fu != ariane_pkg::CSR) || (CVA6Cfg.RVS && issue_instr_i[i].op == ariane_pkg::SFENCE_VMA); + assign rs2_valid[i] = fwd_res_valid[idx_hzd_rs2[i]] && rs2_is_not_csr[i]; - assign rs2_fwd_req[i][k] = (fwd_i.sbe[fwd_i.wb[k].trans_id].rd == issue_instr_i[i].rs2) & (fwd_i.still_issued[fwd_i.wb[k].trans_id]) & fwd_i.wb[k].valid & (~fwd_i.wb[k].ex_valid) & ((CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr( - fwd_i.sbe[fwd_i.wb[k].trans_id].op - )) == (CVA6Cfg.FpPresent && ariane_pkg::is_rs2_fpr( - issue_instr_i[i].op - ))); + assign rs3[i] = fwd_res[idx_hzd_rs3[i]]; + assign rs3_valid[i] = fwd_res_valid[idx_hzd_rs3[i]]; - assign rs3_fwd_req[i][k] = (fwd_i.sbe[fwd_i.wb[k].trans_id].rd == issue_instr_i[i].result[ariane_pkg::REG_ADDR_SIZE-1:0]) & (fwd_i.still_issued[fwd_i.wb[k].trans_id]) & fwd_i.wb[k].valid & (~fwd_i.wb[k].ex_valid) & ((CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr( - fwd_i.sbe[fwd_i.wb[k].trans_id].op - )) == (CVA6Cfg.FpPresent && ariane_pkg::is_imm_fpr( - issue_instr_i[i].op - ))); - - assign rs_data[i][k] = fwd_i.wb[k].data; - end - - for (genvar k = 0; unsigned'(k) < CVA6Cfg.NR_SB_ENTRIES; k++) begin : gen_rs_entries - - assign rs1_fwd_req[i][k+CVA6Cfg.NrWbPorts] = (fwd_i.sbe[k].rd == issue_instr_i[i].rs1) & fwd_i.still_issued[k] & fwd_i.sbe[k].valid & ((CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr( - fwd_i.sbe[k].op - )) == (CVA6Cfg.FpPresent && ariane_pkg::is_rs1_fpr( - issue_instr_i[i].op - ))); - - assign rs2_fwd_req[i][k+CVA6Cfg.NrWbPorts] = (fwd_i.sbe[k].rd == issue_instr_i[i].rs2) & fwd_i.still_issued[k] & fwd_i.sbe[k].valid & ((CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr( - fwd_i.sbe[k].op - )) == (CVA6Cfg.FpPresent && ariane_pkg::is_rs2_fpr( - issue_instr_i[i].op - ))); - - assign rs3_fwd_req[i][k+CVA6Cfg.NrWbPorts] = (fwd_i.sbe[k].rd == issue_instr_i[i].result[ariane_pkg::REG_ADDR_SIZE-1:0]) & fwd_i.still_issued[k] & fwd_i.sbe[k].valid & ((CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr( - fwd_i.sbe[k].op - )) == (CVA6Cfg.FpPresent && ariane_pkg::is_imm_fpr( - issue_instr_i[i].op - ))); - - assign rs_data[i][k+CVA6Cfg.NrWbPorts] = fwd_i.sbe[k].result; - end - - // use fixed prio here - // this implicitly gives higher prio to WB ports - rr_arb_tree #( - .NumIn(CVA6Cfg.NR_SB_ENTRIES + CVA6Cfg.NrWbPorts), - .DataWidth(CVA6Cfg.XLEN), - .ExtPrio(1'b1), - .AxiVldRdy(1'b1) - ) i_sel_rs1 ( - .clk_i (clk_i), - .rst_ni (rst_ni), - .flush_i(1'b0), - .rr_i ('0), - .req_i (rs1_fwd_req[i]), - .gnt_o (), - .data_i (rs_data[i]), - .gnt_i (1'b1), - .req_o (rs1_available[i]), - .data_o (rs1_res[i]), - .idx_o () - ); - - rr_arb_tree #( - .NumIn(CVA6Cfg.NR_SB_ENTRIES + CVA6Cfg.NrWbPorts), - .DataWidth(CVA6Cfg.XLEN), - .ExtPrio(1'b1), - .AxiVldRdy(1'b1) - ) i_sel_rs2 ( - .clk_i (clk_i), - .rst_ni (rst_ni), - .flush_i(1'b0), - .rr_i ('0), - .req_i (rs2_fwd_req[i]), - .gnt_o (), - .data_i (rs_data[i]), - .gnt_i (1'b1), - .req_o (rs2_available[i]), - .data_o (rs2_res[i]), - .idx_o () - ); - - - rr_arb_tree #( - .NumIn(CVA6Cfg.NR_SB_ENTRIES + CVA6Cfg.NrWbPorts), - .DataWidth(CVA6Cfg.XLEN), - .ExtPrio(1'b1), - .AxiVldRdy(1'b1) - ) i_sel_rs3 ( - .clk_i (clk_i), - .rst_ni (rst_ni), - .flush_i(1'b0), - .rr_i ('0), - .req_i (rs3_fwd_req[i]), - .gnt_o (), - .data_i (rs_data[i]), - .gnt_i (1'b1), - .req_o (rs3_available[i]), - .data_o (rs3[i]), - .idx_o () - ); - - if (CVA6Cfg.NrRgprPorts == 3) begin : gen_gp_three_port - assign rs3_res[i] = rs3[i][riscv::XLEN-1:0]; - end else begin : gen_fp_three_port + if (CVA6Cfg.NrRgprPorts == 3) begin + assign rs3_res[i] = rs3[i][CVA6Cfg.XLEN-1:0]; + end else begin assign rs3_res[i] = rs3[i][CVA6Cfg.FLen-1:0]; end - - assign rs1_has_raw[i] = !issue_instr_i[i].use_zimm && ((CVA6Cfg.FpPresent && is_rs1_fpr( - issue_instr_i[i].op - )) ? rd_clobber_fpr[issue_instr_i[i].rs1] != NONE : - rd_clobber_gpr[issue_instr_i[i].rs1] != NONE); - - assign rs1_valid[i] = rs1_available[i] && (CVA6Cfg.FpPresent && is_rs1_fpr( - issue_instr_i[i].op - ) ? 1'b1 : ((rd_clobber_gpr[issue_instr_i[i].rs1] != CSR) || - (CVA6Cfg.RVS && issue_instr_i[i].op == SFENCE_VMA))); - - assign rs2_has_raw[i] = ((CVA6Cfg.FpPresent && is_rs2_fpr( - issue_instr_i[i].op - )) ? rd_clobber_fpr[issue_instr_i[i].rs2] != NONE : - rd_clobber_gpr[issue_instr_i[i].rs2] != NONE); - - assign rs2_valid[i] = rs2_available[i] && (CVA6Cfg.FpPresent && is_rs2_fpr( - issue_instr_i[i].op - ) ? 1'b1 : ((rd_clobber_gpr[issue_instr_i[i].rs2] != CSR) || - (CVA6Cfg.RVS && issue_instr_i[i].op == SFENCE_VMA))); - - assign rs3_has_raw[i] = ((CVA6Cfg.FpPresent && is_imm_fpr( - issue_instr_i[i].op - )) ? rd_clobber_fpr[issue_instr_i[i].result[REG_ADDR_SIZE-1:0]] != NONE : 0); - - assign rs3_valid[i] = rs3_available[i]; - assign rs3_fpr[i] = (CVA6Cfg.FpPresent && ariane_pkg::is_imm_fpr(issue_instr_i[i].op)); - end // --------------- @@ -902,40 +786,6 @@ module issue_read_operands end end - always_comb begin : gen_check_waw_dependencies - stall_waw = '1; - for (int unsigned i = 0; i < CVA6Cfg.NrIssuePorts; i++) begin - if (issue_instr_valid_i[i] && !fu_busy[i]) begin - // ----------------------------------------- - // WAW - Write After Write Dependency Check - // ----------------------------------------- - // no other instruction has the same destination register -> issue the instruction - if ((CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr( - issue_instr_i[i].op - )) ? (rd_clobber_fpr[issue_instr_i[i].rd] == NONE) : - (rd_clobber_gpr[issue_instr_i[i].rd] == NONE)) begin - stall_waw[i] = 1'b0; - end - // or check that the target destination register will be written in this cycle by the - // commit stage - for (int unsigned c = 0; c < CVA6Cfg.NrCommitPorts; c++) begin - if ((CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr( - issue_instr_i[i].op - )) ? (we_fpr_i[c] && waddr_i[c] == issue_instr_i[i].rd) : - (we_gpr_i[c] && waddr_i[c] == issue_instr_i[i].rd)) begin - stall_waw[i] = 1'b0; - end - end - if (i > 0) begin - if ((issue_instr_i[i].rd == issue_instr_i[i-1].rd) && (issue_instr_i[i].rd != '0)) begin - stall_waw[i] = 1'b1; - end - end - end - end - end - - // We can issue an instruction if we do not detect that any other instruction is writing the same // destination register. // We also need to check if there is an unresolved branch in the scoreboard. @@ -946,7 +796,7 @@ module issue_read_operands // check that the instruction we got is valid // and that the functional unit we need is not busy if (issue_instr_valid_i[i] && !fu_busy[i]) begin - if (!stall_raw[i] && !stall_waw[i]) begin + if (!stall_raw[i]) begin issue_ack[i] = 1'b1; end if (issue_instr_i[i].ex.valid) begin diff --git a/core/raw_checker.sv b/core/raw_checker.sv new file mode 100644 index 000000000..2b1519735 --- /dev/null +++ b/core/raw_checker.sv @@ -0,0 +1,121 @@ +// Copyright 2024 Thales DIS France SAS +// +// Licensed under the Solderpad Hardware Licence, Version 0.51 (the "License"); +// you may not use this file except in compliance with the License. +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// You may obtain a copy of the License at https://solderpad.org/licenses/ +// +// Original Author: Junheng Zheng - Thales + +module raw_checker + import ariane_pkg::*; +#( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty +) ( + // Subsystem Clock - SUBSYSTEM + input logic clk_i, + // Asynchronous reset active low - SUBSYSTEM + input logic rst_ni, + // Register source of the instruction to check RAW dependancies - SCOREBOARD + input logic [REG_ADDR_SIZE-1:0] rs_i, + // Type of register source (FPR or GPR) - SCOREBOARD + input logic rs_fpr_i, + // Registers of destination of the instructions already issued in the scoreboard - SCOREBOARD + input logic [CVA6Cfg.NR_SB_ENTRIES-1:0][REG_ADDR_SIZE-1:0] rd_i, + // Type of registers of destination (FPR or GPR) - SCOREBOARD + input logic [CVA6Cfg.NR_SB_ENTRIES-1:0] rd_fpr_i, + // Instructions in the scoreboard are still issued - SCOREBOARD + input logic [CVA6Cfg.NR_SB_ENTRIES-1:0] still_issued_i, + // Issue pointer - SCOREBOARD + input logic [CVA6Cfg.TRANS_ID_BITS-1:0] issue_pointer_i, + + // Index in the scoreboard of the most recent RAW dependancy - SCOREBOARD + output logic [CVA6Cfg.TRANS_ID_BITS-1:0] idx_o, + // Indicates if there is a RAW dependancy - SCOREBOARD + output logic valid_o +); + + logic [CVA6Cfg.NR_SB_ENTRIES-1:0] same_rd_as_rs; + + logic [CVA6Cfg.NR_SB_ENTRIES-1:0] same_rd_as_rs_before; + logic last_before_valid; + logic [CVA6Cfg.TRANS_ID_BITS-1:0] last_before_idx; + + logic [CVA6Cfg.NR_SB_ENTRIES-1:0] same_rd_as_rs_after; + logic last_after_valid; + logic [CVA6Cfg.TRANS_ID_BITS-1:0] last_after_idx; + + logic valid; + logic rs_is_gpr0; + + for (genvar i = 0; i < CVA6Cfg.NR_SB_ENTRIES; i++) begin + assign same_rd_as_rs[i] = (rs_fpr_i == rd_fpr_i[i]) && (rs_i == rd_i[i]) && still_issued_i[i]; + assign same_rd_as_rs_before[i] = (i < issue_pointer_i) && same_rd_as_rs[i]; + assign same_rd_as_rs_after[i] = (i >= issue_pointer_i) && same_rd_as_rs[i]; + end + + //Last finders + // for instructions < instruction pointer + rr_arb_tree #( + .NumIn(CVA6Cfg.NR_SB_ENTRIES), + .DataWidth(1), + .ExtPrio(1'b1), + .AxiVldRdy(1'b1) + ) i_last_finder_before ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i(1'b0), + .rr_i ({$clog2(CVA6Cfg.NR_SB_ENTRIES){1'b1}}), // Highest index has highest prio. + .req_i (same_rd_as_rs_before), + .gnt_o (), + .data_i ('0), + .gnt_i (1'b1), + .req_o (last_before_valid), + .data_o (), + .idx_o (last_before_idx) + ); + + // for instructions >= instruction pointer + rr_arb_tree #( + .NumIn(CVA6Cfg.NR_SB_ENTRIES), + .DataWidth(1), + .ExtPrio(1'b1), + .AxiVldRdy(1'b1) + ) i_last_finder_after ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i(1'b0), + .rr_i ({$clog2(CVA6Cfg.NR_SB_ENTRIES){1'b1}}), // Highest index has highest prio. + .req_i (same_rd_as_rs_after), + .gnt_o (), + .data_i ('0), + .gnt_i (1'b1), + .req_o (last_after_valid), + .data_o (), + .idx_o (last_after_idx) + ); + + // take the minimum of the last indexes + rr_arb_tree #( + .NumIn(2), + .DataWidth(CVA6Cfg.TRANS_ID_BITS), + .ExtPrio(1'b1), + .AxiVldRdy(1'b1) + ) min_finder ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .flush_i(1'b0), + .rr_i ('0), // Lowest index has highest prio. + .req_i ({last_after_valid,last_before_valid}), + .gnt_o (), + .data_i ({last_after_idx,last_before_idx}), + .gnt_i (1'b1), + .req_o (valid), + .data_o (idx_o), + .idx_o () + ); + + assign rs_is_gpr0 = (rs_i == '0) && !rs_fpr_i; + assign valid_o = valid && !rs_is_gpr0; + +endmodule