WAW hazards elimination (#2881)
Some checks failed
bender-up-to-date / bender-up-to-date (push) Has been cancelled
ci / build-riscv-tests (push) Has been cancelled
ci / execute-riscv64-tests (push) Has been cancelled
ci / execute-riscv32-tests (push) Has been cancelled

This PR introduces a new RAW hazard detection mechanism to eliminate WAW hazards in CVA6 issue stage.

It first checks for hazards in all scoreboard entries in parallel.
Then it filters found hazards before vs after the current issue pointer.
It then finds the index of the last hazard before (resp. after) the issue pointer.
Finally, it gives precedence to a hazard before the issue pointer over the one after the issue pointer.

---------

Co-authored-by: Junheng Zheng <junheng.zheng@thalesgroup.com>
Co-authored-by: JeanRochCoulon <jean-roch.coulon@thalesgroup.com>
This commit is contained in:
Côme 2025-04-23 22:26:50 +02:00 committed by GitHub
parent eb3ff25f15
commit c784fd9047
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 248 additions and 274 deletions

View file

@ -1,2 +1,2 @@
cv32a65x:
gates: 184679
gates: 182076

View file

@ -19,12 +19,12 @@ iterations = None
# Keep it up-to-date with compiler version and core performance improvements
# Will fail if the number of cycles is different from this one
valid_cycles = {
"dhrystone_dual": 18935,
"dhrystone_single": 24127,
"coremark_dual": 1001191,
"coremark_single": 1300030,
"dhrystone_cv32a65x": 31976,
"dhrystone_cv32a60x": 39449,
"dhrystone_dual": 17159,
"dhrystone_single": 22407,
"coremark_dual": 981777,
"coremark_single": 1294524,
"dhrystone_cv32a65x": 30056,
"dhrystone_cv32a60x": 37474,
}
for arg in sys.argv[1:]:

View file

@ -113,6 +113,7 @@ sources:
- core/ariane_regfile_ff.sv
- core/ariane_regfile_fpga.sv
- core/scoreboard.sv
- core/raw_checker.sv
- core/store_buffer.sv
- core/amo_buffer.sv
- core/store_unit.sv

View file

@ -95,6 +95,7 @@ core/mmu_sv39x4/ptw_sv39x4.sv
core/ariane_regfile_ff.sv
core/re_name.sv
core/scoreboard.sv
core/raw_checker.sv
core/store_buffer.sv
core/amo_buffer.sv
core/store_unit.sv

View file

@ -132,6 +132,7 @@ ${CVA6_REPO_DIR}/core/ariane_regfile_ff.sv
${CVA6_REPO_DIR}/core/ariane_regfile_fpga.sv
// NOTE: scoreboard.sv modified for DSIM (unchanged for other simulators)
${CVA6_REPO_DIR}/core/scoreboard.sv
${CVA6_REPO_DIR}/core/raw_checker.sv
${CVA6_REPO_DIR}/core/store_buffer.sv
${CVA6_REPO_DIR}/core/amo_buffer.sv
${CVA6_REPO_DIR}/core/store_unit.sv

View file

@ -136,7 +136,7 @@ module issue_read_operands
logic none, load, store, alu, alu2, ctrl_flow, mult, csr, fpu, fpu_vec, cvxif, accel;
} fus_busy_t;
logic [CVA6Cfg.NrIssuePorts-1:0] stall_raw, stall_waw, stall_rs1, stall_rs2, stall_rs3;
logic [CVA6Cfg.NrIssuePorts-1:0] stall_raw, stall_rs1, stall_rs2, stall_rs3;
logic [CVA6Cfg.NrIssuePorts-1:0] fu_busy; // functional unit is busy
fus_busy_t [CVA6Cfg.NrIssuePorts-1:0] fus_busy; // which functional units are considered busy
logic [CVA6Cfg.NrIssuePorts-1:0] issue_ack;
@ -163,37 +163,44 @@ module issue_read_operands
logic [CVA6Cfg.NrIssuePorts-1:0] branch_valid_n, branch_valid_q;
logic [CVA6Cfg.NrIssuePorts-1:0] cvxif_valid_n, cvxif_valid_q;
logic [31:0] cvxif_off_instr_n, cvxif_off_instr_q;
logic cvxif_instruction_valid;
logic cvxif_instruction_valid;
//RAW detection
logic [ CVA6Cfg.NrIssuePorts-1:0][ CVA6Cfg.TRANS_ID_BITS-1:0] idx_hzd_rs1;
logic [ CVA6Cfg.NrIssuePorts-1:0] rs1_raw_check;
logic [ CVA6Cfg.NrIssuePorts-1:0] rs1_has_raw;
logic [ CVA6Cfg.NrIssuePorts-1:0] rs1_fpr;
logic [ CVA6Cfg.NrIssuePorts-1:0][ CVA6Cfg.TRANS_ID_BITS-1:0] idx_hzd_rs2;
logic [ CVA6Cfg.NrIssuePorts-1:0] rs2_raw_check;
logic [ CVA6Cfg.NrIssuePorts-1:0] rs2_has_raw;
logic [ CVA6Cfg.NrIssuePorts-1:0] rs2_fpr;
logic [ CVA6Cfg.NrIssuePorts-1:0][ CVA6Cfg.TRANS_ID_BITS-1:0] idx_hzd_rs3;
logic [ CVA6Cfg.NrIssuePorts-1:0] rs3_raw_check;
logic [ CVA6Cfg.NrIssuePorts-1:0] rs3_has_raw;
logic [ CVA6Cfg.NrIssuePorts-1:0] rs3_fpr;
logic [CVA6Cfg.NR_SB_ENTRIES-1:0][ariane_pkg::REG_ADDR_SIZE-1:0] rd_list;
logic [CVA6Cfg.NR_SB_ENTRIES-1:0] rd_fpr;
//fwd logic
logic [CVA6Cfg.NrIssuePorts-1:0] rs1_has_raw;
logic [CVA6Cfg.NrIssuePorts-1:0] rs2_has_raw;
logic [CVA6Cfg.NrIssuePorts-1:0] rs3_has_raw;
logic [CVA6Cfg.NR_SB_ENTRIES-1:0][ CVA6Cfg.XLEN-1:0] fwd_res;
logic [CVA6Cfg.NR_SB_ENTRIES-1:0] fwd_res_valid;
logic [CVA6Cfg.NrIssuePorts-1:0][CVA6Cfg.XLEN-1:0] rs3;
logic [CVA6Cfg.NrIssuePorts-1:0] rs3_fpr;
logic [CVA6Cfg.NR_SB_ENTRIES-1:0] rs1_is_not_csr;
logic [CVA6Cfg.NR_SB_ENTRIES-1:0] rs2_is_not_csr;
logic [CVA6Cfg.NrIssuePorts-1:0] rs1_valid;
logic [CVA6Cfg.NrIssuePorts-1:0] rs2_valid;
logic [CVA6Cfg.NrIssuePorts-1:0] rs3_valid;
logic [ CVA6Cfg.NrIssuePorts-1:0][ CVA6Cfg.XLEN-1:0] rs3;
logic [CVA6Cfg.NrIssuePorts-1:0][CVA6Cfg.XLEN-1:0] rs1_res;
logic [CVA6Cfg.NrIssuePorts-1:0][CVA6Cfg.XLEN-1:0] rs2_res;
logic [CVA6Cfg.NrIssuePorts-1:0][CVA6Cfg.XLEN-1:0] rs3_res;
// clobber
fu_t [2**ariane_pkg::REG_ADDR_SIZE-1:0] rd_clobber_gpr, rd_clobber_fpr;
logic [2**ariane_pkg::REG_ADDR_SIZE-1:0][CVA6Cfg.NR_SB_ENTRIES:0] gpr_clobber_vld;
logic [2**ariane_pkg::REG_ADDR_SIZE-1:0][CVA6Cfg.NR_SB_ENTRIES:0] fpr_clobber_vld;
ariane_pkg::fu_t [ CVA6Cfg.NR_SB_ENTRIES:0] clobber_fu;
//forward logic
logic [CVA6Cfg.NrIssuePorts-1:0][CVA6Cfg.NR_SB_ENTRIES+CVA6Cfg.NrWbPorts-1:0]
rs1_fwd_req, rs2_fwd_req, rs3_fwd_req;
logic [CVA6Cfg.NrIssuePorts-1:0] rs1_is_not_gpr0, rs2_is_not_gpr0, rs3_is_not_gpr0;
logic [CVA6Cfg.NrIssuePorts-1:0][CVA6Cfg.NR_SB_ENTRIES+CVA6Cfg.NrWbPorts-1:0][CVA6Cfg.XLEN-1:0] rs_data;
logic [CVA6Cfg.NrIssuePorts-1:0] rs1_available, rs2_available, rs3_available;
logic [ CVA6Cfg.NrIssuePorts-1:0] rs1_valid;
logic [ CVA6Cfg.NrIssuePorts-1:0] rs2_valid;
logic [ CVA6Cfg.NrIssuePorts-1:0] rs3_valid;
logic [ CVA6Cfg.NrIssuePorts-1:0][ CVA6Cfg.XLEN-1:0] rs1_res;
logic [ CVA6Cfg.NrIssuePorts-1:0][ CVA6Cfg.XLEN-1:0] rs2_res;
logic [ CVA6Cfg.NrIssuePorts-1:0][ CVA6Cfg.XLEN-1:0] rs3_res;
logic [CVA6Cfg.NrIssuePorts-1:0][31:0] tinst_n, tinst_q; // transformed instruction
@ -246,7 +253,7 @@ module issue_read_operands
// TODO check only for 1st instruction ??
// Allow a cvxif transaction if we WaW condition are ok.
assign cvxif_req_allowed = (issue_instr_i[0].fu == CVXIF) && !stall_waw[0];
assign cvxif_req_allowed = (issue_instr_i[0].fu == CVXIF);
assign cvxif_instruction_valid = !issue_instr_i[0].ex.valid && issue_instr_valid_i[0] && cvxif_req_allowed;
assign x_transaction_accepted_o = x_issue_valid_o && x_issue_ready_i && x_issue_resp_i.accept;
assign x_transaction_rejected = x_issue_valid_o && x_issue_ready_i && ~x_issue_resp_i.accept;
@ -408,226 +415,103 @@ module issue_read_operands
end
end
// -------------------
// RD clobber process
// -------------------
// rd_clobber output: output currently clobbered destination registers
always_comb begin : clobber_assign
gpr_clobber_vld = '0;
fpr_clobber_vld = '0;
// default (highest entry hast lowest prio in arbiter tree below)
clobber_fu[CVA6Cfg.NR_SB_ENTRIES] = ariane_pkg::NONE;
for (int unsigned i = 0; i < 2 ** ariane_pkg::REG_ADDR_SIZE; i++) begin
gpr_clobber_vld[i][CVA6Cfg.NR_SB_ENTRIES] = 1'b1;
fpr_clobber_vld[i][CVA6Cfg.NR_SB_ENTRIES] = 1'b1;
end
// check for all valid entries and set the clobber accordingly
for (int unsigned i = 0; i < CVA6Cfg.NR_SB_ENTRIES; i++) begin
gpr_clobber_vld[fwd_i.sbe[i].rd][i] = fwd_i.still_issued[i] & ~(CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr(
fwd_i.sbe[i].op));
fpr_clobber_vld[fwd_i.sbe[i].rd][i] = fwd_i.still_issued[i] & (CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr(
fwd_i.sbe[i].op));
clobber_fu[i] = fwd_i.sbe[i].fu;
end
// GPR[0] is always free
gpr_clobber_vld[0] = '0;
for (genvar i = 0; i < CVA6Cfg.NrIssuePorts; i++) begin
assign rs1_fpr[i] = (CVA6Cfg.FpPresent && ariane_pkg::is_rs1_fpr(issue_instr_i[i].op));
assign rs2_fpr[i] = (CVA6Cfg.FpPresent && ariane_pkg::is_rs2_fpr(issue_instr_i[i].op));
assign rs3_fpr[i] = (CVA6Cfg.FpPresent && ariane_pkg::is_imm_fpr(issue_instr_i[i].op));
end
for (genvar k = 0; k < 2 ** ariane_pkg::REG_ADDR_SIZE; k++) begin : gen_sel_clobbers
// get fu that is going to clobber this register (there should be only one)
rr_arb_tree #(
.NumIn(CVA6Cfg.NR_SB_ENTRIES + 1),
.DataType(ariane_pkg::fu_t),
.ExtPrio(1'b1),
.AxiVldRdy(1'b1)
) i_sel_gpr_clobbers (
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i(1'b0),
.rr_i ('0),
.req_i (gpr_clobber_vld[k]),
.gnt_o (),
.data_i (clobber_fu),
.gnt_i (1'b1),
.req_o (),
.data_o (rd_clobber_gpr[k]),
.idx_o ()
// ----------------------------------
// Renaming
// ----------------------------------
for (genvar i = 0; i < CVA6Cfg.NR_SB_ENTRIES; i++) begin
assign rd_list[i] = fwd_i.sbe[i].rd;
assign rd_fpr[i] = CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr(fwd_i.sbe[i].op);
end
for (genvar i = 0; i < CVA6Cfg.NrIssuePorts; i++) begin : gen_raw_checks
raw_checker #(
.CVA6Cfg(CVA6Cfg)
) i_rs1_last_raw (
.clk_i(clk_i),
.rst_ni(rst_ni),
.rs_i(issue_instr_i[i].rs1),
.rs_fpr_i(rs1_fpr[i]),
.rd_i(rd_list),
.rd_fpr_i(rd_fpr),
.still_issued_i(fwd_i.still_issued),
.issue_pointer_i(fwd_i.issue_pointer),
.idx_o(idx_hzd_rs1[i]),
.valid_o(rs1_raw_check[i])
);
if (CVA6Cfg.FpPresent) begin
rr_arb_tree #(
.NumIn(CVA6Cfg.NR_SB_ENTRIES + 1),
.DataType(ariane_pkg::fu_t),
.ExtPrio(1'b1),
.AxiVldRdy(1'b1)
) i_sel_fpr_clobbers (
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i(1'b0),
.rr_i ('0),
.req_i (fpr_clobber_vld[k]),
.gnt_o (),
.data_i (clobber_fu),
.gnt_i (1'b1),
.req_o (),
.data_o (rd_clobber_fpr[k]),
.idx_o ()
);
end else begin
assign rd_clobber_fpr[k] = NONE;
end
assign rs1_has_raw[i] = rs1_raw_check[i] && !issue_instr_i[i].use_zimm;
raw_checker #(
.CVA6Cfg(CVA6Cfg)
) i_rs2_last_raw (
.clk_i(clk_i),
.rst_ni(rst_ni),
.rs_i(issue_instr_i[i].rs2),
.rs_fpr_i(rs2_fpr[i]),
.rd_i(rd_list),
.rd_fpr_i(rd_fpr),
.still_issued_i(fwd_i.still_issued),
.issue_pointer_i(fwd_i.issue_pointer),
.idx_o(idx_hzd_rs2[i]),
.valid_o(rs2_raw_check[i])
);
assign rs2_has_raw[i] = rs2_raw_check[i];
raw_checker #(
.CVA6Cfg(CVA6Cfg)
) i_rs3_last_raw (
.clk_i(clk_i),
.rst_ni(rst_ni),
.rs_i(issue_instr_i[i].result[ariane_pkg::REG_ADDR_SIZE-1:0]),
.rs_fpr_i(rs3_fpr[i]),
.rd_i(rd_list),
.rd_fpr_i(rd_fpr),
.still_issued_i(fwd_i.still_issued),
.issue_pointer_i(fwd_i.issue_pointer),
.idx_o(idx_hzd_rs3[i]),
.valid_o(rs3_raw_check[i])
);
assign rs3_has_raw[i] = rs3_raw_check[i] && rs3_fpr[i];
end
// ----------------------------------
// Read Operands (a.k.a forwarding)
// ----------------------------------
// read operand interface: same logic as register file
always_comb begin
for (int unsigned i = 0; i < CVA6Cfg.NR_SB_ENTRIES; i++) begin
fwd_res[i] = fwd_i.sbe[i].result;
fwd_res_valid[i] = fwd_i.sbe[i].valid;
end
for (int unsigned i = 0; i < CVA6Cfg.NrWbPorts; i++) begin
if (fwd_i.wb[i].valid && !fwd_i.wb[i].ex_valid) begin
fwd_res[fwd_i.wb[i].trans_id] = fwd_i.wb[i].data;
fwd_res_valid[fwd_i.wb[i].trans_id] = 1'b1;
end
end
end
// WB ports have higher prio than entries
for (genvar i = 0; i < CVA6Cfg.NrIssuePorts; i++) begin
for (genvar k = 0; unsigned'(k) < CVA6Cfg.NrWbPorts; k++) begin : gen_rs_wb
assign rs1_res[i] = fwd_res[idx_hzd_rs1[i]];
assign rs1_is_not_csr[i] = rs1_fpr[i] || (fwd_i.sbe[idx_hzd_rs1[i]].fu != ariane_pkg::CSR) || (CVA6Cfg.RVS && issue_instr_i[i].op == ariane_pkg::SFENCE_VMA);
assign rs1_valid[i] = fwd_res_valid[idx_hzd_rs1[i]] && rs1_is_not_csr[i];
assign rs1_fwd_req[i][k] = (fwd_i.sbe[fwd_i.wb[k].trans_id].rd == issue_instr_i[i].rs1) & (fwd_i.still_issued[fwd_i.wb[k].trans_id]) & fwd_i.wb[k].valid & (~fwd_i.wb[k].ex_valid) & ((CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr(
fwd_i.sbe[fwd_i.wb[k].trans_id].op
)) == (CVA6Cfg.FpPresent && ariane_pkg::is_rs1_fpr(
issue_instr_i[i].op
)));
assign rs2_res[i] = fwd_res[idx_hzd_rs2[i]];
assign rs2_is_not_csr[i] = rs2_fpr[i] || (fwd_i.sbe[idx_hzd_rs2[i]].fu != ariane_pkg::CSR) || (CVA6Cfg.RVS && issue_instr_i[i].op == ariane_pkg::SFENCE_VMA);
assign rs2_valid[i] = fwd_res_valid[idx_hzd_rs2[i]] && rs2_is_not_csr[i];
assign rs2_fwd_req[i][k] = (fwd_i.sbe[fwd_i.wb[k].trans_id].rd == issue_instr_i[i].rs2) & (fwd_i.still_issued[fwd_i.wb[k].trans_id]) & fwd_i.wb[k].valid & (~fwd_i.wb[k].ex_valid) & ((CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr(
fwd_i.sbe[fwd_i.wb[k].trans_id].op
)) == (CVA6Cfg.FpPresent && ariane_pkg::is_rs2_fpr(
issue_instr_i[i].op
)));
assign rs3[i] = fwd_res[idx_hzd_rs3[i]];
assign rs3_valid[i] = fwd_res_valid[idx_hzd_rs3[i]];
assign rs3_fwd_req[i][k] = (fwd_i.sbe[fwd_i.wb[k].trans_id].rd == issue_instr_i[i].result[ariane_pkg::REG_ADDR_SIZE-1:0]) & (fwd_i.still_issued[fwd_i.wb[k].trans_id]) & fwd_i.wb[k].valid & (~fwd_i.wb[k].ex_valid) & ((CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr(
fwd_i.sbe[fwd_i.wb[k].trans_id].op
)) == (CVA6Cfg.FpPresent && ariane_pkg::is_imm_fpr(
issue_instr_i[i].op
)));
assign rs_data[i][k] = fwd_i.wb[k].data;
end
for (genvar k = 0; unsigned'(k) < CVA6Cfg.NR_SB_ENTRIES; k++) begin : gen_rs_entries
assign rs1_fwd_req[i][k+CVA6Cfg.NrWbPorts] = (fwd_i.sbe[k].rd == issue_instr_i[i].rs1) & fwd_i.still_issued[k] & fwd_i.sbe[k].valid & ((CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr(
fwd_i.sbe[k].op
)) == (CVA6Cfg.FpPresent && ariane_pkg::is_rs1_fpr(
issue_instr_i[i].op
)));
assign rs2_fwd_req[i][k+CVA6Cfg.NrWbPorts] = (fwd_i.sbe[k].rd == issue_instr_i[i].rs2) & fwd_i.still_issued[k] & fwd_i.sbe[k].valid & ((CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr(
fwd_i.sbe[k].op
)) == (CVA6Cfg.FpPresent && ariane_pkg::is_rs2_fpr(
issue_instr_i[i].op
)));
assign rs3_fwd_req[i][k+CVA6Cfg.NrWbPorts] = (fwd_i.sbe[k].rd == issue_instr_i[i].result[ariane_pkg::REG_ADDR_SIZE-1:0]) & fwd_i.still_issued[k] & fwd_i.sbe[k].valid & ((CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr(
fwd_i.sbe[k].op
)) == (CVA6Cfg.FpPresent && ariane_pkg::is_imm_fpr(
issue_instr_i[i].op
)));
assign rs_data[i][k+CVA6Cfg.NrWbPorts] = fwd_i.sbe[k].result;
end
// use fixed prio here
// this implicitly gives higher prio to WB ports
rr_arb_tree #(
.NumIn(CVA6Cfg.NR_SB_ENTRIES + CVA6Cfg.NrWbPorts),
.DataWidth(CVA6Cfg.XLEN),
.ExtPrio(1'b1),
.AxiVldRdy(1'b1)
) i_sel_rs1 (
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i(1'b0),
.rr_i ('0),
.req_i (rs1_fwd_req[i]),
.gnt_o (),
.data_i (rs_data[i]),
.gnt_i (1'b1),
.req_o (rs1_available[i]),
.data_o (rs1_res[i]),
.idx_o ()
);
rr_arb_tree #(
.NumIn(CVA6Cfg.NR_SB_ENTRIES + CVA6Cfg.NrWbPorts),
.DataWidth(CVA6Cfg.XLEN),
.ExtPrio(1'b1),
.AxiVldRdy(1'b1)
) i_sel_rs2 (
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i(1'b0),
.rr_i ('0),
.req_i (rs2_fwd_req[i]),
.gnt_o (),
.data_i (rs_data[i]),
.gnt_i (1'b1),
.req_o (rs2_available[i]),
.data_o (rs2_res[i]),
.idx_o ()
);
rr_arb_tree #(
.NumIn(CVA6Cfg.NR_SB_ENTRIES + CVA6Cfg.NrWbPorts),
.DataWidth(CVA6Cfg.XLEN),
.ExtPrio(1'b1),
.AxiVldRdy(1'b1)
) i_sel_rs3 (
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i(1'b0),
.rr_i ('0),
.req_i (rs3_fwd_req[i]),
.gnt_o (),
.data_i (rs_data[i]),
.gnt_i (1'b1),
.req_o (rs3_available[i]),
.data_o (rs3[i]),
.idx_o ()
);
if (CVA6Cfg.NrRgprPorts == 3) begin : gen_gp_three_port
assign rs3_res[i] = rs3[i][riscv::XLEN-1:0];
end else begin : gen_fp_three_port
if (CVA6Cfg.NrRgprPorts == 3) begin
assign rs3_res[i] = rs3[i][CVA6Cfg.XLEN-1:0];
end else begin
assign rs3_res[i] = rs3[i][CVA6Cfg.FLen-1:0];
end
assign rs1_has_raw[i] = !issue_instr_i[i].use_zimm && ((CVA6Cfg.FpPresent && is_rs1_fpr(
issue_instr_i[i].op
)) ? rd_clobber_fpr[issue_instr_i[i].rs1] != NONE :
rd_clobber_gpr[issue_instr_i[i].rs1] != NONE);
assign rs1_valid[i] = rs1_available[i] && (CVA6Cfg.FpPresent && is_rs1_fpr(
issue_instr_i[i].op
) ? 1'b1 : ((rd_clobber_gpr[issue_instr_i[i].rs1] != CSR) ||
(CVA6Cfg.RVS && issue_instr_i[i].op == SFENCE_VMA)));
assign rs2_has_raw[i] = ((CVA6Cfg.FpPresent && is_rs2_fpr(
issue_instr_i[i].op
)) ? rd_clobber_fpr[issue_instr_i[i].rs2] != NONE :
rd_clobber_gpr[issue_instr_i[i].rs2] != NONE);
assign rs2_valid[i] = rs2_available[i] && (CVA6Cfg.FpPresent && is_rs2_fpr(
issue_instr_i[i].op
) ? 1'b1 : ((rd_clobber_gpr[issue_instr_i[i].rs2] != CSR) ||
(CVA6Cfg.RVS && issue_instr_i[i].op == SFENCE_VMA)));
assign rs3_has_raw[i] = ((CVA6Cfg.FpPresent && is_imm_fpr(
issue_instr_i[i].op
)) ? rd_clobber_fpr[issue_instr_i[i].result[REG_ADDR_SIZE-1:0]] != NONE : 0);
assign rs3_valid[i] = rs3_available[i];
assign rs3_fpr[i] = (CVA6Cfg.FpPresent && ariane_pkg::is_imm_fpr(issue_instr_i[i].op));
end
// ---------------
@ -902,40 +786,6 @@ module issue_read_operands
end
end
always_comb begin : gen_check_waw_dependencies
stall_waw = '1;
for (int unsigned i = 0; i < CVA6Cfg.NrIssuePorts; i++) begin
if (issue_instr_valid_i[i] && !fu_busy[i]) begin
// -----------------------------------------
// WAW - Write After Write Dependency Check
// -----------------------------------------
// no other instruction has the same destination register -> issue the instruction
if ((CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr(
issue_instr_i[i].op
)) ? (rd_clobber_fpr[issue_instr_i[i].rd] == NONE) :
(rd_clobber_gpr[issue_instr_i[i].rd] == NONE)) begin
stall_waw[i] = 1'b0;
end
// or check that the target destination register will be written in this cycle by the
// commit stage
for (int unsigned c = 0; c < CVA6Cfg.NrCommitPorts; c++) begin
if ((CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr(
issue_instr_i[i].op
)) ? (we_fpr_i[c] && waddr_i[c] == issue_instr_i[i].rd) :
(we_gpr_i[c] && waddr_i[c] == issue_instr_i[i].rd)) begin
stall_waw[i] = 1'b0;
end
end
if (i > 0) begin
if ((issue_instr_i[i].rd == issue_instr_i[i-1].rd) && (issue_instr_i[i].rd != '0)) begin
stall_waw[i] = 1'b1;
end
end
end
end
end
// We can issue an instruction if we do not detect that any other instruction is writing the same
// destination register.
// We also need to check if there is an unresolved branch in the scoreboard.
@ -946,7 +796,7 @@ module issue_read_operands
// check that the instruction we got is valid
// and that the functional unit we need is not busy
if (issue_instr_valid_i[i] && !fu_busy[i]) begin
if (!stall_raw[i] && !stall_waw[i]) begin
if (!stall_raw[i]) begin
issue_ack[i] = 1'b1;
end
if (issue_instr_i[i].ex.valid) begin

121
core/raw_checker.sv Normal file
View file

@ -0,0 +1,121 @@
// Copyright 2024 Thales DIS France SAS
//
// Licensed under the Solderpad Hardware Licence, Version 0.51 (the "License");
// you may not use this file except in compliance with the License.
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
// You may obtain a copy of the License at https://solderpad.org/licenses/
//
// Original Author: Junheng Zheng - Thales
module raw_checker
import ariane_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
) (
// Subsystem Clock - SUBSYSTEM
input logic clk_i,
// Asynchronous reset active low - SUBSYSTEM
input logic rst_ni,
// Register source of the instruction to check RAW dependancies - SCOREBOARD
input logic [REG_ADDR_SIZE-1:0] rs_i,
// Type of register source (FPR or GPR) - SCOREBOARD
input logic rs_fpr_i,
// Registers of destination of the instructions already issued in the scoreboard - SCOREBOARD
input logic [CVA6Cfg.NR_SB_ENTRIES-1:0][REG_ADDR_SIZE-1:0] rd_i,
// Type of registers of destination (FPR or GPR) - SCOREBOARD
input logic [CVA6Cfg.NR_SB_ENTRIES-1:0] rd_fpr_i,
// Instructions in the scoreboard are still issued - SCOREBOARD
input logic [CVA6Cfg.NR_SB_ENTRIES-1:0] still_issued_i,
// Issue pointer - SCOREBOARD
input logic [CVA6Cfg.TRANS_ID_BITS-1:0] issue_pointer_i,
// Index in the scoreboard of the most recent RAW dependancy - SCOREBOARD
output logic [CVA6Cfg.TRANS_ID_BITS-1:0] idx_o,
// Indicates if there is a RAW dependancy - SCOREBOARD
output logic valid_o
);
logic [CVA6Cfg.NR_SB_ENTRIES-1:0] same_rd_as_rs;
logic [CVA6Cfg.NR_SB_ENTRIES-1:0] same_rd_as_rs_before;
logic last_before_valid;
logic [CVA6Cfg.TRANS_ID_BITS-1:0] last_before_idx;
logic [CVA6Cfg.NR_SB_ENTRIES-1:0] same_rd_as_rs_after;
logic last_after_valid;
logic [CVA6Cfg.TRANS_ID_BITS-1:0] last_after_idx;
logic valid;
logic rs_is_gpr0;
for (genvar i = 0; i < CVA6Cfg.NR_SB_ENTRIES; i++) begin
assign same_rd_as_rs[i] = (rs_fpr_i == rd_fpr_i[i]) && (rs_i == rd_i[i]) && still_issued_i[i];
assign same_rd_as_rs_before[i] = (i < issue_pointer_i) && same_rd_as_rs[i];
assign same_rd_as_rs_after[i] = (i >= issue_pointer_i) && same_rd_as_rs[i];
end
//Last finders
// for instructions < instruction pointer
rr_arb_tree #(
.NumIn(CVA6Cfg.NR_SB_ENTRIES),
.DataWidth(1),
.ExtPrio(1'b1),
.AxiVldRdy(1'b1)
) i_last_finder_before (
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i(1'b0),
.rr_i ({$clog2(CVA6Cfg.NR_SB_ENTRIES){1'b1}}), // Highest index has highest prio.
.req_i (same_rd_as_rs_before),
.gnt_o (),
.data_i ('0),
.gnt_i (1'b1),
.req_o (last_before_valid),
.data_o (),
.idx_o (last_before_idx)
);
// for instructions >= instruction pointer
rr_arb_tree #(
.NumIn(CVA6Cfg.NR_SB_ENTRIES),
.DataWidth(1),
.ExtPrio(1'b1),
.AxiVldRdy(1'b1)
) i_last_finder_after (
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i(1'b0),
.rr_i ({$clog2(CVA6Cfg.NR_SB_ENTRIES){1'b1}}), // Highest index has highest prio.
.req_i (same_rd_as_rs_after),
.gnt_o (),
.data_i ('0),
.gnt_i (1'b1),
.req_o (last_after_valid),
.data_o (),
.idx_o (last_after_idx)
);
// take the minimum of the last indexes
rr_arb_tree #(
.NumIn(2),
.DataWidth(CVA6Cfg.TRANS_ID_BITS),
.ExtPrio(1'b1),
.AxiVldRdy(1'b1)
) min_finder (
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i(1'b0),
.rr_i ('0), // Lowest index has highest prio.
.req_i ({last_after_valid,last_before_valid}),
.gnt_o (),
.data_i ({last_after_idx,last_before_idx}),
.gnt_i (1'b1),
.req_o (valid),
.data_o (idx_o),
.idx_o ()
);
assign rs_is_gpr0 = (rs_i == '0) && !rs_fpr_i;
assign valid_o = valid && !rs_is_gpr0;
endmodule