mirror of
https://github.com/openhwgroup/cva6.git
synced 2025-06-27 08:50:54 -04:00
WAW hazards elimination (#2881)
This PR introduces a new RAW hazard detection mechanism to eliminate WAW hazards in CVA6 issue stage. It first checks for hazards in all scoreboard entries in parallel. Then it filters found hazards before vs after the current issue pointer. It then finds the index of the last hazard before (resp. after) the issue pointer. Finally, it gives precedence to a hazard before the issue pointer over the one after the issue pointer. --------- Co-authored-by: Junheng Zheng <junheng.zheng@thalesgroup.com> Co-authored-by: JeanRochCoulon <jean-roch.coulon@thalesgroup.com>
This commit is contained in:
parent
eb3ff25f15
commit
c784fd9047
7 changed files with 248 additions and 274 deletions
|
@ -1,2 +1,2 @@
|
|||
cv32a65x:
|
||||
gates: 184679
|
||||
gates: 182076
|
||||
|
|
|
@ -19,12 +19,12 @@ iterations = None
|
|||
# Keep it up-to-date with compiler version and core performance improvements
|
||||
# Will fail if the number of cycles is different from this one
|
||||
valid_cycles = {
|
||||
"dhrystone_dual": 18935,
|
||||
"dhrystone_single": 24127,
|
||||
"coremark_dual": 1001191,
|
||||
"coremark_single": 1300030,
|
||||
"dhrystone_cv32a65x": 31976,
|
||||
"dhrystone_cv32a60x": 39449,
|
||||
"dhrystone_dual": 17159,
|
||||
"dhrystone_single": 22407,
|
||||
"coremark_dual": 981777,
|
||||
"coremark_single": 1294524,
|
||||
"dhrystone_cv32a65x": 30056,
|
||||
"dhrystone_cv32a60x": 37474,
|
||||
}
|
||||
|
||||
for arg in sys.argv[1:]:
|
||||
|
|
|
@ -113,6 +113,7 @@ sources:
|
|||
- core/ariane_regfile_ff.sv
|
||||
- core/ariane_regfile_fpga.sv
|
||||
- core/scoreboard.sv
|
||||
- core/raw_checker.sv
|
||||
- core/store_buffer.sv
|
||||
- core/amo_buffer.sv
|
||||
- core/store_unit.sv
|
||||
|
|
|
@ -95,6 +95,7 @@ core/mmu_sv39x4/ptw_sv39x4.sv
|
|||
core/ariane_regfile_ff.sv
|
||||
core/re_name.sv
|
||||
core/scoreboard.sv
|
||||
core/raw_checker.sv
|
||||
core/store_buffer.sv
|
||||
core/amo_buffer.sv
|
||||
core/store_unit.sv
|
||||
|
|
|
@ -132,6 +132,7 @@ ${CVA6_REPO_DIR}/core/ariane_regfile_ff.sv
|
|||
${CVA6_REPO_DIR}/core/ariane_regfile_fpga.sv
|
||||
// NOTE: scoreboard.sv modified for DSIM (unchanged for other simulators)
|
||||
${CVA6_REPO_DIR}/core/scoreboard.sv
|
||||
${CVA6_REPO_DIR}/core/raw_checker.sv
|
||||
${CVA6_REPO_DIR}/core/store_buffer.sv
|
||||
${CVA6_REPO_DIR}/core/amo_buffer.sv
|
||||
${CVA6_REPO_DIR}/core/store_unit.sv
|
||||
|
|
|
@ -136,7 +136,7 @@ module issue_read_operands
|
|||
logic none, load, store, alu, alu2, ctrl_flow, mult, csr, fpu, fpu_vec, cvxif, accel;
|
||||
} fus_busy_t;
|
||||
|
||||
logic [CVA6Cfg.NrIssuePorts-1:0] stall_raw, stall_waw, stall_rs1, stall_rs2, stall_rs3;
|
||||
logic [CVA6Cfg.NrIssuePorts-1:0] stall_raw, stall_rs1, stall_rs2, stall_rs3;
|
||||
logic [CVA6Cfg.NrIssuePorts-1:0] fu_busy; // functional unit is busy
|
||||
fus_busy_t [CVA6Cfg.NrIssuePorts-1:0] fus_busy; // which functional units are considered busy
|
||||
logic [CVA6Cfg.NrIssuePorts-1:0] issue_ack;
|
||||
|
@ -163,37 +163,44 @@ module issue_read_operands
|
|||
logic [CVA6Cfg.NrIssuePorts-1:0] branch_valid_n, branch_valid_q;
|
||||
logic [CVA6Cfg.NrIssuePorts-1:0] cvxif_valid_n, cvxif_valid_q;
|
||||
logic [31:0] cvxif_off_instr_n, cvxif_off_instr_q;
|
||||
logic cvxif_instruction_valid;
|
||||
logic cvxif_instruction_valid;
|
||||
|
||||
//RAW detection
|
||||
logic [ CVA6Cfg.NrIssuePorts-1:0][ CVA6Cfg.TRANS_ID_BITS-1:0] idx_hzd_rs1;
|
||||
logic [ CVA6Cfg.NrIssuePorts-1:0] rs1_raw_check;
|
||||
logic [ CVA6Cfg.NrIssuePorts-1:0] rs1_has_raw;
|
||||
logic [ CVA6Cfg.NrIssuePorts-1:0] rs1_fpr;
|
||||
|
||||
logic [ CVA6Cfg.NrIssuePorts-1:0][ CVA6Cfg.TRANS_ID_BITS-1:0] idx_hzd_rs2;
|
||||
logic [ CVA6Cfg.NrIssuePorts-1:0] rs2_raw_check;
|
||||
logic [ CVA6Cfg.NrIssuePorts-1:0] rs2_has_raw;
|
||||
logic [ CVA6Cfg.NrIssuePorts-1:0] rs2_fpr;
|
||||
|
||||
logic [ CVA6Cfg.NrIssuePorts-1:0][ CVA6Cfg.TRANS_ID_BITS-1:0] idx_hzd_rs3;
|
||||
logic [ CVA6Cfg.NrIssuePorts-1:0] rs3_raw_check;
|
||||
logic [ CVA6Cfg.NrIssuePorts-1:0] rs3_has_raw;
|
||||
logic [ CVA6Cfg.NrIssuePorts-1:0] rs3_fpr;
|
||||
|
||||
|
||||
logic [CVA6Cfg.NR_SB_ENTRIES-1:0][ariane_pkg::REG_ADDR_SIZE-1:0] rd_list;
|
||||
logic [CVA6Cfg.NR_SB_ENTRIES-1:0] rd_fpr;
|
||||
|
||||
//fwd logic
|
||||
logic [CVA6Cfg.NrIssuePorts-1:0] rs1_has_raw;
|
||||
logic [CVA6Cfg.NrIssuePorts-1:0] rs2_has_raw;
|
||||
logic [CVA6Cfg.NrIssuePorts-1:0] rs3_has_raw;
|
||||
logic [CVA6Cfg.NR_SB_ENTRIES-1:0][ CVA6Cfg.XLEN-1:0] fwd_res;
|
||||
logic [CVA6Cfg.NR_SB_ENTRIES-1:0] fwd_res_valid;
|
||||
|
||||
logic [CVA6Cfg.NrIssuePorts-1:0][CVA6Cfg.XLEN-1:0] rs3;
|
||||
logic [CVA6Cfg.NrIssuePorts-1:0] rs3_fpr;
|
||||
logic [CVA6Cfg.NR_SB_ENTRIES-1:0] rs1_is_not_csr;
|
||||
logic [CVA6Cfg.NR_SB_ENTRIES-1:0] rs2_is_not_csr;
|
||||
|
||||
logic [CVA6Cfg.NrIssuePorts-1:0] rs1_valid;
|
||||
logic [CVA6Cfg.NrIssuePorts-1:0] rs2_valid;
|
||||
logic [CVA6Cfg.NrIssuePorts-1:0] rs3_valid;
|
||||
logic [ CVA6Cfg.NrIssuePorts-1:0][ CVA6Cfg.XLEN-1:0] rs3;
|
||||
|
||||
logic [CVA6Cfg.NrIssuePorts-1:0][CVA6Cfg.XLEN-1:0] rs1_res;
|
||||
logic [CVA6Cfg.NrIssuePorts-1:0][CVA6Cfg.XLEN-1:0] rs2_res;
|
||||
logic [CVA6Cfg.NrIssuePorts-1:0][CVA6Cfg.XLEN-1:0] rs3_res;
|
||||
|
||||
// clobber
|
||||
fu_t [2**ariane_pkg::REG_ADDR_SIZE-1:0] rd_clobber_gpr, rd_clobber_fpr;
|
||||
logic [2**ariane_pkg::REG_ADDR_SIZE-1:0][CVA6Cfg.NR_SB_ENTRIES:0] gpr_clobber_vld;
|
||||
logic [2**ariane_pkg::REG_ADDR_SIZE-1:0][CVA6Cfg.NR_SB_ENTRIES:0] fpr_clobber_vld;
|
||||
ariane_pkg::fu_t [ CVA6Cfg.NR_SB_ENTRIES:0] clobber_fu;
|
||||
|
||||
//forward logic
|
||||
logic [CVA6Cfg.NrIssuePorts-1:0][CVA6Cfg.NR_SB_ENTRIES+CVA6Cfg.NrWbPorts-1:0]
|
||||
rs1_fwd_req, rs2_fwd_req, rs3_fwd_req;
|
||||
logic [CVA6Cfg.NrIssuePorts-1:0] rs1_is_not_gpr0, rs2_is_not_gpr0, rs3_is_not_gpr0;
|
||||
logic [CVA6Cfg.NrIssuePorts-1:0][CVA6Cfg.NR_SB_ENTRIES+CVA6Cfg.NrWbPorts-1:0][CVA6Cfg.XLEN-1:0] rs_data;
|
||||
logic [CVA6Cfg.NrIssuePorts-1:0] rs1_available, rs2_available, rs3_available;
|
||||
logic [ CVA6Cfg.NrIssuePorts-1:0] rs1_valid;
|
||||
logic [ CVA6Cfg.NrIssuePorts-1:0] rs2_valid;
|
||||
logic [ CVA6Cfg.NrIssuePorts-1:0] rs3_valid;
|
||||
|
||||
logic [ CVA6Cfg.NrIssuePorts-1:0][ CVA6Cfg.XLEN-1:0] rs1_res;
|
||||
logic [ CVA6Cfg.NrIssuePorts-1:0][ CVA6Cfg.XLEN-1:0] rs2_res;
|
||||
logic [ CVA6Cfg.NrIssuePorts-1:0][ CVA6Cfg.XLEN-1:0] rs3_res;
|
||||
|
||||
logic [CVA6Cfg.NrIssuePorts-1:0][31:0] tinst_n, tinst_q; // transformed instruction
|
||||
|
||||
|
@ -246,7 +253,7 @@ module issue_read_operands
|
|||
|
||||
// TODO check only for 1st instruction ??
|
||||
// Allow a cvxif transaction if we WaW condition are ok.
|
||||
assign cvxif_req_allowed = (issue_instr_i[0].fu == CVXIF) && !stall_waw[0];
|
||||
assign cvxif_req_allowed = (issue_instr_i[0].fu == CVXIF);
|
||||
assign cvxif_instruction_valid = !issue_instr_i[0].ex.valid && issue_instr_valid_i[0] && cvxif_req_allowed;
|
||||
assign x_transaction_accepted_o = x_issue_valid_o && x_issue_ready_i && x_issue_resp_i.accept;
|
||||
assign x_transaction_rejected = x_issue_valid_o && x_issue_ready_i && ~x_issue_resp_i.accept;
|
||||
|
@ -408,226 +415,103 @@ module issue_read_operands
|
|||
end
|
||||
end
|
||||
|
||||
// -------------------
|
||||
// RD clobber process
|
||||
// -------------------
|
||||
// rd_clobber output: output currently clobbered destination registers
|
||||
|
||||
always_comb begin : clobber_assign
|
||||
gpr_clobber_vld = '0;
|
||||
fpr_clobber_vld = '0;
|
||||
|
||||
// default (highest entry hast lowest prio in arbiter tree below)
|
||||
clobber_fu[CVA6Cfg.NR_SB_ENTRIES] = ariane_pkg::NONE;
|
||||
for (int unsigned i = 0; i < 2 ** ariane_pkg::REG_ADDR_SIZE; i++) begin
|
||||
gpr_clobber_vld[i][CVA6Cfg.NR_SB_ENTRIES] = 1'b1;
|
||||
fpr_clobber_vld[i][CVA6Cfg.NR_SB_ENTRIES] = 1'b1;
|
||||
end
|
||||
|
||||
// check for all valid entries and set the clobber accordingly
|
||||
|
||||
for (int unsigned i = 0; i < CVA6Cfg.NR_SB_ENTRIES; i++) begin
|
||||
gpr_clobber_vld[fwd_i.sbe[i].rd][i] = fwd_i.still_issued[i] & ~(CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr(
|
||||
fwd_i.sbe[i].op));
|
||||
fpr_clobber_vld[fwd_i.sbe[i].rd][i] = fwd_i.still_issued[i] & (CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr(
|
||||
fwd_i.sbe[i].op));
|
||||
clobber_fu[i] = fwd_i.sbe[i].fu;
|
||||
end
|
||||
|
||||
// GPR[0] is always free
|
||||
gpr_clobber_vld[0] = '0;
|
||||
for (genvar i = 0; i < CVA6Cfg.NrIssuePorts; i++) begin
|
||||
assign rs1_fpr[i] = (CVA6Cfg.FpPresent && ariane_pkg::is_rs1_fpr(issue_instr_i[i].op));
|
||||
assign rs2_fpr[i] = (CVA6Cfg.FpPresent && ariane_pkg::is_rs2_fpr(issue_instr_i[i].op));
|
||||
assign rs3_fpr[i] = (CVA6Cfg.FpPresent && ariane_pkg::is_imm_fpr(issue_instr_i[i].op));
|
||||
end
|
||||
|
||||
for (genvar k = 0; k < 2 ** ariane_pkg::REG_ADDR_SIZE; k++) begin : gen_sel_clobbers
|
||||
// get fu that is going to clobber this register (there should be only one)
|
||||
rr_arb_tree #(
|
||||
.NumIn(CVA6Cfg.NR_SB_ENTRIES + 1),
|
||||
.DataType(ariane_pkg::fu_t),
|
||||
.ExtPrio(1'b1),
|
||||
.AxiVldRdy(1'b1)
|
||||
) i_sel_gpr_clobbers (
|
||||
.clk_i (clk_i),
|
||||
.rst_ni (rst_ni),
|
||||
.flush_i(1'b0),
|
||||
.rr_i ('0),
|
||||
.req_i (gpr_clobber_vld[k]),
|
||||
.gnt_o (),
|
||||
.data_i (clobber_fu),
|
||||
.gnt_i (1'b1),
|
||||
.req_o (),
|
||||
.data_o (rd_clobber_gpr[k]),
|
||||
.idx_o ()
|
||||
// ----------------------------------
|
||||
// Renaming
|
||||
// ----------------------------------
|
||||
for (genvar i = 0; i < CVA6Cfg.NR_SB_ENTRIES; i++) begin
|
||||
assign rd_list[i] = fwd_i.sbe[i].rd;
|
||||
assign rd_fpr[i] = CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr(fwd_i.sbe[i].op);
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < CVA6Cfg.NrIssuePorts; i++) begin : gen_raw_checks
|
||||
raw_checker #(
|
||||
.CVA6Cfg(CVA6Cfg)
|
||||
) i_rs1_last_raw (
|
||||
.clk_i(clk_i),
|
||||
.rst_ni(rst_ni),
|
||||
.rs_i(issue_instr_i[i].rs1),
|
||||
.rs_fpr_i(rs1_fpr[i]),
|
||||
.rd_i(rd_list),
|
||||
.rd_fpr_i(rd_fpr),
|
||||
.still_issued_i(fwd_i.still_issued),
|
||||
.issue_pointer_i(fwd_i.issue_pointer),
|
||||
.idx_o(idx_hzd_rs1[i]),
|
||||
.valid_o(rs1_raw_check[i])
|
||||
);
|
||||
if (CVA6Cfg.FpPresent) begin
|
||||
rr_arb_tree #(
|
||||
.NumIn(CVA6Cfg.NR_SB_ENTRIES + 1),
|
||||
.DataType(ariane_pkg::fu_t),
|
||||
.ExtPrio(1'b1),
|
||||
.AxiVldRdy(1'b1)
|
||||
) i_sel_fpr_clobbers (
|
||||
.clk_i (clk_i),
|
||||
.rst_ni (rst_ni),
|
||||
.flush_i(1'b0),
|
||||
.rr_i ('0),
|
||||
.req_i (fpr_clobber_vld[k]),
|
||||
.gnt_o (),
|
||||
.data_i (clobber_fu),
|
||||
.gnt_i (1'b1),
|
||||
.req_o (),
|
||||
.data_o (rd_clobber_fpr[k]),
|
||||
.idx_o ()
|
||||
);
|
||||
end else begin
|
||||
assign rd_clobber_fpr[k] = NONE;
|
||||
end
|
||||
assign rs1_has_raw[i] = rs1_raw_check[i] && !issue_instr_i[i].use_zimm;
|
||||
|
||||
raw_checker #(
|
||||
.CVA6Cfg(CVA6Cfg)
|
||||
) i_rs2_last_raw (
|
||||
.clk_i(clk_i),
|
||||
.rst_ni(rst_ni),
|
||||
.rs_i(issue_instr_i[i].rs2),
|
||||
.rs_fpr_i(rs2_fpr[i]),
|
||||
.rd_i(rd_list),
|
||||
.rd_fpr_i(rd_fpr),
|
||||
.still_issued_i(fwd_i.still_issued),
|
||||
.issue_pointer_i(fwd_i.issue_pointer),
|
||||
.idx_o(idx_hzd_rs2[i]),
|
||||
.valid_o(rs2_raw_check[i])
|
||||
);
|
||||
assign rs2_has_raw[i] = rs2_raw_check[i];
|
||||
|
||||
raw_checker #(
|
||||
.CVA6Cfg(CVA6Cfg)
|
||||
) i_rs3_last_raw (
|
||||
.clk_i(clk_i),
|
||||
.rst_ni(rst_ni),
|
||||
.rs_i(issue_instr_i[i].result[ariane_pkg::REG_ADDR_SIZE-1:0]),
|
||||
.rs_fpr_i(rs3_fpr[i]),
|
||||
.rd_i(rd_list),
|
||||
.rd_fpr_i(rd_fpr),
|
||||
.still_issued_i(fwd_i.still_issued),
|
||||
.issue_pointer_i(fwd_i.issue_pointer),
|
||||
.idx_o(idx_hzd_rs3[i]),
|
||||
.valid_o(rs3_raw_check[i])
|
||||
);
|
||||
assign rs3_has_raw[i] = rs3_raw_check[i] && rs3_fpr[i];
|
||||
end
|
||||
|
||||
// ----------------------------------
|
||||
// Read Operands (a.k.a forwarding)
|
||||
// ----------------------------------
|
||||
// read operand interface: same logic as register file
|
||||
always_comb begin
|
||||
for (int unsigned i = 0; i < CVA6Cfg.NR_SB_ENTRIES; i++) begin
|
||||
fwd_res[i] = fwd_i.sbe[i].result;
|
||||
fwd_res_valid[i] = fwd_i.sbe[i].valid;
|
||||
end
|
||||
for (int unsigned i = 0; i < CVA6Cfg.NrWbPorts; i++) begin
|
||||
if (fwd_i.wb[i].valid && !fwd_i.wb[i].ex_valid) begin
|
||||
fwd_res[fwd_i.wb[i].trans_id] = fwd_i.wb[i].data;
|
||||
fwd_res_valid[fwd_i.wb[i].trans_id] = 1'b1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// WB ports have higher prio than entries
|
||||
for (genvar i = 0; i < CVA6Cfg.NrIssuePorts; i++) begin
|
||||
for (genvar k = 0; unsigned'(k) < CVA6Cfg.NrWbPorts; k++) begin : gen_rs_wb
|
||||
assign rs1_res[i] = fwd_res[idx_hzd_rs1[i]];
|
||||
assign rs1_is_not_csr[i] = rs1_fpr[i] || (fwd_i.sbe[idx_hzd_rs1[i]].fu != ariane_pkg::CSR) || (CVA6Cfg.RVS && issue_instr_i[i].op == ariane_pkg::SFENCE_VMA);
|
||||
assign rs1_valid[i] = fwd_res_valid[idx_hzd_rs1[i]] && rs1_is_not_csr[i];
|
||||
|
||||
assign rs1_fwd_req[i][k] = (fwd_i.sbe[fwd_i.wb[k].trans_id].rd == issue_instr_i[i].rs1) & (fwd_i.still_issued[fwd_i.wb[k].trans_id]) & fwd_i.wb[k].valid & (~fwd_i.wb[k].ex_valid) & ((CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr(
|
||||
fwd_i.sbe[fwd_i.wb[k].trans_id].op
|
||||
)) == (CVA6Cfg.FpPresent && ariane_pkg::is_rs1_fpr(
|
||||
issue_instr_i[i].op
|
||||
)));
|
||||
assign rs2_res[i] = fwd_res[idx_hzd_rs2[i]];
|
||||
assign rs2_is_not_csr[i] = rs2_fpr[i] || (fwd_i.sbe[idx_hzd_rs2[i]].fu != ariane_pkg::CSR) || (CVA6Cfg.RVS && issue_instr_i[i].op == ariane_pkg::SFENCE_VMA);
|
||||
assign rs2_valid[i] = fwd_res_valid[idx_hzd_rs2[i]] && rs2_is_not_csr[i];
|
||||
|
||||
assign rs2_fwd_req[i][k] = (fwd_i.sbe[fwd_i.wb[k].trans_id].rd == issue_instr_i[i].rs2) & (fwd_i.still_issued[fwd_i.wb[k].trans_id]) & fwd_i.wb[k].valid & (~fwd_i.wb[k].ex_valid) & ((CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr(
|
||||
fwd_i.sbe[fwd_i.wb[k].trans_id].op
|
||||
)) == (CVA6Cfg.FpPresent && ariane_pkg::is_rs2_fpr(
|
||||
issue_instr_i[i].op
|
||||
)));
|
||||
assign rs3[i] = fwd_res[idx_hzd_rs3[i]];
|
||||
assign rs3_valid[i] = fwd_res_valid[idx_hzd_rs3[i]];
|
||||
|
||||
assign rs3_fwd_req[i][k] = (fwd_i.sbe[fwd_i.wb[k].trans_id].rd == issue_instr_i[i].result[ariane_pkg::REG_ADDR_SIZE-1:0]) & (fwd_i.still_issued[fwd_i.wb[k].trans_id]) & fwd_i.wb[k].valid & (~fwd_i.wb[k].ex_valid) & ((CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr(
|
||||
fwd_i.sbe[fwd_i.wb[k].trans_id].op
|
||||
)) == (CVA6Cfg.FpPresent && ariane_pkg::is_imm_fpr(
|
||||
issue_instr_i[i].op
|
||||
)));
|
||||
|
||||
assign rs_data[i][k] = fwd_i.wb[k].data;
|
||||
end
|
||||
|
||||
for (genvar k = 0; unsigned'(k) < CVA6Cfg.NR_SB_ENTRIES; k++) begin : gen_rs_entries
|
||||
|
||||
assign rs1_fwd_req[i][k+CVA6Cfg.NrWbPorts] = (fwd_i.sbe[k].rd == issue_instr_i[i].rs1) & fwd_i.still_issued[k] & fwd_i.sbe[k].valid & ((CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr(
|
||||
fwd_i.sbe[k].op
|
||||
)) == (CVA6Cfg.FpPresent && ariane_pkg::is_rs1_fpr(
|
||||
issue_instr_i[i].op
|
||||
)));
|
||||
|
||||
assign rs2_fwd_req[i][k+CVA6Cfg.NrWbPorts] = (fwd_i.sbe[k].rd == issue_instr_i[i].rs2) & fwd_i.still_issued[k] & fwd_i.sbe[k].valid & ((CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr(
|
||||
fwd_i.sbe[k].op
|
||||
)) == (CVA6Cfg.FpPresent && ariane_pkg::is_rs2_fpr(
|
||||
issue_instr_i[i].op
|
||||
)));
|
||||
|
||||
assign rs3_fwd_req[i][k+CVA6Cfg.NrWbPorts] = (fwd_i.sbe[k].rd == issue_instr_i[i].result[ariane_pkg::REG_ADDR_SIZE-1:0]) & fwd_i.still_issued[k] & fwd_i.sbe[k].valid & ((CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr(
|
||||
fwd_i.sbe[k].op
|
||||
)) == (CVA6Cfg.FpPresent && ariane_pkg::is_imm_fpr(
|
||||
issue_instr_i[i].op
|
||||
)));
|
||||
|
||||
assign rs_data[i][k+CVA6Cfg.NrWbPorts] = fwd_i.sbe[k].result;
|
||||
end
|
||||
|
||||
// use fixed prio here
|
||||
// this implicitly gives higher prio to WB ports
|
||||
rr_arb_tree #(
|
||||
.NumIn(CVA6Cfg.NR_SB_ENTRIES + CVA6Cfg.NrWbPorts),
|
||||
.DataWidth(CVA6Cfg.XLEN),
|
||||
.ExtPrio(1'b1),
|
||||
.AxiVldRdy(1'b1)
|
||||
) i_sel_rs1 (
|
||||
.clk_i (clk_i),
|
||||
.rst_ni (rst_ni),
|
||||
.flush_i(1'b0),
|
||||
.rr_i ('0),
|
||||
.req_i (rs1_fwd_req[i]),
|
||||
.gnt_o (),
|
||||
.data_i (rs_data[i]),
|
||||
.gnt_i (1'b1),
|
||||
.req_o (rs1_available[i]),
|
||||
.data_o (rs1_res[i]),
|
||||
.idx_o ()
|
||||
);
|
||||
|
||||
rr_arb_tree #(
|
||||
.NumIn(CVA6Cfg.NR_SB_ENTRIES + CVA6Cfg.NrWbPorts),
|
||||
.DataWidth(CVA6Cfg.XLEN),
|
||||
.ExtPrio(1'b1),
|
||||
.AxiVldRdy(1'b1)
|
||||
) i_sel_rs2 (
|
||||
.clk_i (clk_i),
|
||||
.rst_ni (rst_ni),
|
||||
.flush_i(1'b0),
|
||||
.rr_i ('0),
|
||||
.req_i (rs2_fwd_req[i]),
|
||||
.gnt_o (),
|
||||
.data_i (rs_data[i]),
|
||||
.gnt_i (1'b1),
|
||||
.req_o (rs2_available[i]),
|
||||
.data_o (rs2_res[i]),
|
||||
.idx_o ()
|
||||
);
|
||||
|
||||
|
||||
rr_arb_tree #(
|
||||
.NumIn(CVA6Cfg.NR_SB_ENTRIES + CVA6Cfg.NrWbPorts),
|
||||
.DataWidth(CVA6Cfg.XLEN),
|
||||
.ExtPrio(1'b1),
|
||||
.AxiVldRdy(1'b1)
|
||||
) i_sel_rs3 (
|
||||
.clk_i (clk_i),
|
||||
.rst_ni (rst_ni),
|
||||
.flush_i(1'b0),
|
||||
.rr_i ('0),
|
||||
.req_i (rs3_fwd_req[i]),
|
||||
.gnt_o (),
|
||||
.data_i (rs_data[i]),
|
||||
.gnt_i (1'b1),
|
||||
.req_o (rs3_available[i]),
|
||||
.data_o (rs3[i]),
|
||||
.idx_o ()
|
||||
);
|
||||
|
||||
if (CVA6Cfg.NrRgprPorts == 3) begin : gen_gp_three_port
|
||||
assign rs3_res[i] = rs3[i][riscv::XLEN-1:0];
|
||||
end else begin : gen_fp_three_port
|
||||
if (CVA6Cfg.NrRgprPorts == 3) begin
|
||||
assign rs3_res[i] = rs3[i][CVA6Cfg.XLEN-1:0];
|
||||
end else begin
|
||||
assign rs3_res[i] = rs3[i][CVA6Cfg.FLen-1:0];
|
||||
end
|
||||
|
||||
assign rs1_has_raw[i] = !issue_instr_i[i].use_zimm && ((CVA6Cfg.FpPresent && is_rs1_fpr(
|
||||
issue_instr_i[i].op
|
||||
)) ? rd_clobber_fpr[issue_instr_i[i].rs1] != NONE :
|
||||
rd_clobber_gpr[issue_instr_i[i].rs1] != NONE);
|
||||
|
||||
assign rs1_valid[i] = rs1_available[i] && (CVA6Cfg.FpPresent && is_rs1_fpr(
|
||||
issue_instr_i[i].op
|
||||
) ? 1'b1 : ((rd_clobber_gpr[issue_instr_i[i].rs1] != CSR) ||
|
||||
(CVA6Cfg.RVS && issue_instr_i[i].op == SFENCE_VMA)));
|
||||
|
||||
assign rs2_has_raw[i] = ((CVA6Cfg.FpPresent && is_rs2_fpr(
|
||||
issue_instr_i[i].op
|
||||
)) ? rd_clobber_fpr[issue_instr_i[i].rs2] != NONE :
|
||||
rd_clobber_gpr[issue_instr_i[i].rs2] != NONE);
|
||||
|
||||
assign rs2_valid[i] = rs2_available[i] && (CVA6Cfg.FpPresent && is_rs2_fpr(
|
||||
issue_instr_i[i].op
|
||||
) ? 1'b1 : ((rd_clobber_gpr[issue_instr_i[i].rs2] != CSR) ||
|
||||
(CVA6Cfg.RVS && issue_instr_i[i].op == SFENCE_VMA)));
|
||||
|
||||
assign rs3_has_raw[i] = ((CVA6Cfg.FpPresent && is_imm_fpr(
|
||||
issue_instr_i[i].op
|
||||
)) ? rd_clobber_fpr[issue_instr_i[i].result[REG_ADDR_SIZE-1:0]] != NONE : 0);
|
||||
|
||||
assign rs3_valid[i] = rs3_available[i];
|
||||
assign rs3_fpr[i] = (CVA6Cfg.FpPresent && ariane_pkg::is_imm_fpr(issue_instr_i[i].op));
|
||||
|
||||
end
|
||||
|
||||
// ---------------
|
||||
|
@ -902,40 +786,6 @@ module issue_read_operands
|
|||
end
|
||||
end
|
||||
|
||||
always_comb begin : gen_check_waw_dependencies
|
||||
stall_waw = '1;
|
||||
for (int unsigned i = 0; i < CVA6Cfg.NrIssuePorts; i++) begin
|
||||
if (issue_instr_valid_i[i] && !fu_busy[i]) begin
|
||||
// -----------------------------------------
|
||||
// WAW - Write After Write Dependency Check
|
||||
// -----------------------------------------
|
||||
// no other instruction has the same destination register -> issue the instruction
|
||||
if ((CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr(
|
||||
issue_instr_i[i].op
|
||||
)) ? (rd_clobber_fpr[issue_instr_i[i].rd] == NONE) :
|
||||
(rd_clobber_gpr[issue_instr_i[i].rd] == NONE)) begin
|
||||
stall_waw[i] = 1'b0;
|
||||
end
|
||||
// or check that the target destination register will be written in this cycle by the
|
||||
// commit stage
|
||||
for (int unsigned c = 0; c < CVA6Cfg.NrCommitPorts; c++) begin
|
||||
if ((CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr(
|
||||
issue_instr_i[i].op
|
||||
)) ? (we_fpr_i[c] && waddr_i[c] == issue_instr_i[i].rd) :
|
||||
(we_gpr_i[c] && waddr_i[c] == issue_instr_i[i].rd)) begin
|
||||
stall_waw[i] = 1'b0;
|
||||
end
|
||||
end
|
||||
if (i > 0) begin
|
||||
if ((issue_instr_i[i].rd == issue_instr_i[i-1].rd) && (issue_instr_i[i].rd != '0)) begin
|
||||
stall_waw[i] = 1'b1;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
// We can issue an instruction if we do not detect that any other instruction is writing the same
|
||||
// destination register.
|
||||
// We also need to check if there is an unresolved branch in the scoreboard.
|
||||
|
@ -946,7 +796,7 @@ module issue_read_operands
|
|||
// check that the instruction we got is valid
|
||||
// and that the functional unit we need is not busy
|
||||
if (issue_instr_valid_i[i] && !fu_busy[i]) begin
|
||||
if (!stall_raw[i] && !stall_waw[i]) begin
|
||||
if (!stall_raw[i]) begin
|
||||
issue_ack[i] = 1'b1;
|
||||
end
|
||||
if (issue_instr_i[i].ex.valid) begin
|
||||
|
|
121
core/raw_checker.sv
Normal file
121
core/raw_checker.sv
Normal file
|
@ -0,0 +1,121 @@
|
|||
// Copyright 2024 Thales DIS France SAS
|
||||
//
|
||||
// Licensed under the Solderpad Hardware Licence, Version 0.51 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
|
||||
// You may obtain a copy of the License at https://solderpad.org/licenses/
|
||||
//
|
||||
// Original Author: Junheng Zheng - Thales
|
||||
|
||||
module raw_checker
|
||||
import ariane_pkg::*;
|
||||
#(
|
||||
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
|
||||
) (
|
||||
// Subsystem Clock - SUBSYSTEM
|
||||
input logic clk_i,
|
||||
// Asynchronous reset active low - SUBSYSTEM
|
||||
input logic rst_ni,
|
||||
// Register source of the instruction to check RAW dependancies - SCOREBOARD
|
||||
input logic [REG_ADDR_SIZE-1:0] rs_i,
|
||||
// Type of register source (FPR or GPR) - SCOREBOARD
|
||||
input logic rs_fpr_i,
|
||||
// Registers of destination of the instructions already issued in the scoreboard - SCOREBOARD
|
||||
input logic [CVA6Cfg.NR_SB_ENTRIES-1:0][REG_ADDR_SIZE-1:0] rd_i,
|
||||
// Type of registers of destination (FPR or GPR) - SCOREBOARD
|
||||
input logic [CVA6Cfg.NR_SB_ENTRIES-1:0] rd_fpr_i,
|
||||
// Instructions in the scoreboard are still issued - SCOREBOARD
|
||||
input logic [CVA6Cfg.NR_SB_ENTRIES-1:0] still_issued_i,
|
||||
// Issue pointer - SCOREBOARD
|
||||
input logic [CVA6Cfg.TRANS_ID_BITS-1:0] issue_pointer_i,
|
||||
|
||||
// Index in the scoreboard of the most recent RAW dependancy - SCOREBOARD
|
||||
output logic [CVA6Cfg.TRANS_ID_BITS-1:0] idx_o,
|
||||
// Indicates if there is a RAW dependancy - SCOREBOARD
|
||||
output logic valid_o
|
||||
);
|
||||
|
||||
logic [CVA6Cfg.NR_SB_ENTRIES-1:0] same_rd_as_rs;
|
||||
|
||||
logic [CVA6Cfg.NR_SB_ENTRIES-1:0] same_rd_as_rs_before;
|
||||
logic last_before_valid;
|
||||
logic [CVA6Cfg.TRANS_ID_BITS-1:0] last_before_idx;
|
||||
|
||||
logic [CVA6Cfg.NR_SB_ENTRIES-1:0] same_rd_as_rs_after;
|
||||
logic last_after_valid;
|
||||
logic [CVA6Cfg.TRANS_ID_BITS-1:0] last_after_idx;
|
||||
|
||||
logic valid;
|
||||
logic rs_is_gpr0;
|
||||
|
||||
for (genvar i = 0; i < CVA6Cfg.NR_SB_ENTRIES; i++) begin
|
||||
assign same_rd_as_rs[i] = (rs_fpr_i == rd_fpr_i[i]) && (rs_i == rd_i[i]) && still_issued_i[i];
|
||||
assign same_rd_as_rs_before[i] = (i < issue_pointer_i) && same_rd_as_rs[i];
|
||||
assign same_rd_as_rs_after[i] = (i >= issue_pointer_i) && same_rd_as_rs[i];
|
||||
end
|
||||
|
||||
//Last finders
|
||||
// for instructions < instruction pointer
|
||||
rr_arb_tree #(
|
||||
.NumIn(CVA6Cfg.NR_SB_ENTRIES),
|
||||
.DataWidth(1),
|
||||
.ExtPrio(1'b1),
|
||||
.AxiVldRdy(1'b1)
|
||||
) i_last_finder_before (
|
||||
.clk_i (clk_i),
|
||||
.rst_ni (rst_ni),
|
||||
.flush_i(1'b0),
|
||||
.rr_i ({$clog2(CVA6Cfg.NR_SB_ENTRIES){1'b1}}), // Highest index has highest prio.
|
||||
.req_i (same_rd_as_rs_before),
|
||||
.gnt_o (),
|
||||
.data_i ('0),
|
||||
.gnt_i (1'b1),
|
||||
.req_o (last_before_valid),
|
||||
.data_o (),
|
||||
.idx_o (last_before_idx)
|
||||
);
|
||||
|
||||
// for instructions >= instruction pointer
|
||||
rr_arb_tree #(
|
||||
.NumIn(CVA6Cfg.NR_SB_ENTRIES),
|
||||
.DataWidth(1),
|
||||
.ExtPrio(1'b1),
|
||||
.AxiVldRdy(1'b1)
|
||||
) i_last_finder_after (
|
||||
.clk_i (clk_i),
|
||||
.rst_ni (rst_ni),
|
||||
.flush_i(1'b0),
|
||||
.rr_i ({$clog2(CVA6Cfg.NR_SB_ENTRIES){1'b1}}), // Highest index has highest prio.
|
||||
.req_i (same_rd_as_rs_after),
|
||||
.gnt_o (),
|
||||
.data_i ('0),
|
||||
.gnt_i (1'b1),
|
||||
.req_o (last_after_valid),
|
||||
.data_o (),
|
||||
.idx_o (last_after_idx)
|
||||
);
|
||||
|
||||
// take the minimum of the last indexes
|
||||
rr_arb_tree #(
|
||||
.NumIn(2),
|
||||
.DataWidth(CVA6Cfg.TRANS_ID_BITS),
|
||||
.ExtPrio(1'b1),
|
||||
.AxiVldRdy(1'b1)
|
||||
) min_finder (
|
||||
.clk_i (clk_i),
|
||||
.rst_ni (rst_ni),
|
||||
.flush_i(1'b0),
|
||||
.rr_i ('0), // Lowest index has highest prio.
|
||||
.req_i ({last_after_valid,last_before_valid}),
|
||||
.gnt_o (),
|
||||
.data_i ({last_after_idx,last_before_idx}),
|
||||
.gnt_i (1'b1),
|
||||
.req_o (valid),
|
||||
.data_o (idx_o),
|
||||
.idx_o ()
|
||||
);
|
||||
|
||||
assign rs_is_gpr0 = (rs_i == '0) && !rs_fpr_i;
|
||||
assign valid_o = valid && !rs_is_gpr0;
|
||||
|
||||
endmodule
|
Loading…
Add table
Add a link
Reference in a new issue