Move decode logic to respective units

Signed-off-by: Eric Matthews <ematthew@sfu.ca>
This commit is contained in:
Eric Matthews 2023-01-30 19:42:30 -05:00
parent ac362d0b5b
commit 6cf0d84c3e
14 changed files with 492 additions and 545 deletions

View file

@ -33,17 +33,28 @@ module alu_unit
input decode_packet_t decode_stage,
output unit_needed,
output logic [REGFILE_READ_PORTS-1:0] uses_rs,
output logic uses_rd,
input issue_packet_t issue_stage,
input logic issue_stage_ready,
input logic [31:0] constant_alu,
input rs_addr_t issue_rs_addr [REGFILE_READ_PORTS],
input logic [31:0] rf [REGFILE_READ_PORTS],
unit_issue_interface.unit issue,
input alu_inputs_t alu_inputs,
unit_writeback_interface.unit wb
);
common_instruction_t instruction;//rs1_addr, rs2_addr, fn3, fn7, rd_addr, upper/lower opcode
logic [31:0] alu_rs2_data;
logic [32:0] alu_data1;
logic [32:0] alu_data2;
logic imm_type;
alu_op_t alu_op;
alu_op_t alu_op_r;
logic subtract;
logic[XLEN:0] add_sub_result;
logic add_sub_carry_in;
logic[XLEN:0] adder_in1;
@ -55,46 +66,94 @@ module alu_unit
////////////////////////////////////////////////////
//Decode
assign instruction = decode_stage.instruction;
assign unit_needed = decode_stage.instruction inside {
JALR, JAL, LUI, AUIPC, ADDI, SLLI, SLTI, SLTIU, XORI, SRLI, SRAI, ORI, ANDI,
LUI, AUIPC, JAL, JALR,
ADDI, SLLI, SLTI, SLTIU, XORI, SRLI, SRAI, ORI, ANDI,
ADD, SUB, SLL, SLT, SLTU, XOR, SRL, SRA, OR, AND
};
always_comb begin
uses_rs = '0;
uses_rs[RS1] = decode_stage.instruction inside {
JALR,
ADDI, SLLI, SLTI, SLTIU, XORI, SRLI, SRAI, ORI, ANDI,
ADD, SUB, SLL, SLT, SLTU, XOR, SRL, SRA, OR, AND
};
uses_rs[RS2] = decode_stage.instruction inside {
ADD, SUB, SLL, SLT, SLTU, XOR, SRL, SRA, OR, AND
};
uses_rd = decode_stage.instruction inside {
LUI, AUIPC, JAL, JALR,
ADDI, SLLI, SLTI, SLTIU, XORI, SRLI, SRAI, ORI, ANDI,
ADD, SUB, SLL, SLT, SLTU, XOR, SRL, SRA, OR, AND
};
end
always_comb begin
case (instruction.upper_opcode) inside
LUI_T, AUIPC_T, JAL_T, JALR_T : alu_op = ALU_CONSTANT;
default :
case (instruction.fn3) inside
SLTU_fn3, SLT_fn3 : alu_op = ALU_SLT;
SLL_fn3, SRA_fn3 : alu_op = ALU_SHIFT;
default : alu_op = ALU_ADD_SUB;
endcase
endcase
end
//Constant ALU:
// provides LUI, AUIPC, JAL, JALR results for ALU
// provides PC+4 for BRANCH unit and ifence in GC unit
always_ff @(posedge clk) begin
if (issue_stage_ready) begin
imm_type <= instruction.upper_opcode inside {ARITH_IMM_T};
alu_op_r <= alu_op;
subtract <= decode_stage.instruction inside {SUB, SLTI, SLTIU, SLT, SLTU};
end
end
//logic and adder
assign alu_data1 = {(rf[RS1][31] & ~issue_stage.fn3[0]), rf[RS1]};//(fn3[0] is SLTU_fn3);
assign alu_rs2_data = imm_type ? 32'(signed'(issue_stage.instruction[31:20])) : rf[RS2];
assign alu_data2 = {(alu_rs2_data[31] & ~issue_stage.fn3[0]), alu_rs2_data};
////////////////////////////////////////////////////
//Issue
//Logic ops put through the adder carry chain to reduce resources
always_comb begin
case (alu_inputs.logic_op)
ALU_LOGIC_XOR : adder_in1 = alu_inputs.in1 ^ alu_inputs.in2;
ALU_LOGIC_OR : adder_in1 = alu_inputs.in1 | alu_inputs.in2;
ALU_LOGIC_AND : adder_in1 = alu_inputs.in1 & alu_inputs.in2;
default : adder_in1 = alu_inputs.in1; //ADD/SUB/SLT/SLTU
case (issue_stage.fn3)
XOR_fn3 : adder_in1 = alu_data1 ^ alu_data2;
OR_fn3 : adder_in1 = alu_data1 | alu_data2;
AND_fn3 : adder_in1 = alu_data1 & alu_data2;
default : adder_in1 = alu_data1; //ADD/SUB/SLT/SLTU
endcase
case (alu_inputs.logic_op)
ALU_LOGIC_XOR,
ALU_LOGIC_OR,
ALU_LOGIC_AND : adder_in2 = 0;
default : adder_in2 = alu_inputs.in2 ^ {33{alu_inputs.subtract}};
case (issue_stage.fn3)
XOR_fn3,
OR_fn3,
AND_fn3 : adder_in2 = 0;
default : adder_in2 = alu_data2 ^ {33{subtract}};
endcase
end
//Add/Sub ops
assign {add_sub_result, add_sub_carry_in} = {adder_in1, 1'b1} + {adder_in2, alu_inputs.subtract};
assign {add_sub_result, add_sub_carry_in} = {adder_in1, 1'b1} + {adder_in2, subtract};
//Shift ops
barrel_shifter shifter (
.shifter_input(alu_inputs.shifter_in),
.shift_amount(alu_inputs.shift_amount),
.arith(alu_inputs.arith),
.lshift(alu_inputs.lshift),
.shifter_input(rf[RS1]),
.shift_amount(imm_type ? issue_rs_addr[RS2] : rf[RS2][4:0]),
.arith(rf[RS1][31] & issue_stage.instruction[30]),
.lshift(~issue_stage.fn3[2]),
.shifted_result(shift_result)
);
always_comb begin
case (alu_inputs.alu_op)
ALU_CONSTANT : result = alu_inputs.constant_adder;
case (alu_op_r)
ALU_CONSTANT : result = constant_alu;//LUI, AUIPC, JAL, JALR
ALU_ADD_SUB : result = add_sub_result[31:0];
ALU_SLT : result = {31'b0, add_sub_result[XLEN]};
ALU_SLT : result = {31'b0, add_sub_result[32]};
default : result = shift_result; //ALU_SHIFT
endcase
end

View file

@ -37,18 +37,21 @@ module branch_unit
input decode_packet_t decode_stage,
output logic unit_needed,
output logic [REGFILE_READ_PORTS-1:0] uses_rs,
output logic uses_rd,
input issue_packet_t issue_stage,
input logic issue_stage_ready,
input logic [31:0] constant_alu,
input logic [31:0] rf [REGFILE_READ_PORTS],
unit_issue_interface.unit issue,
input branch_inputs_t branch_inputs,
output branch_results_t br_results,
output logic branch_flush,
exception_interface.unit exception
);
common_instruction_t instruction;//rs1_addr, rs2_addr, fn3, fn7, rd_addr, upper/lower opcode
logic branch_issued_r;
logic result;
@ -66,15 +69,87 @@ module branch_unit
logic instruction_is_completing;
logic branch_complete;
logic jal_jalr_ex;
logic jal_or_jalr_ex;
logic [32:0] rs1;
logic [32:0] rs2;
////////////////////////////////////////////////////
//Implementation
////////////////////////////////////////////////////
//Decode
assign instruction = decode_stage.instruction;
assign unit_needed = decode_stage.instruction inside {
BEQ, BNE, BLT, BGE, BLTU, BGEU, JALR, JAL
};
always_comb begin
uses_rs = '0;
uses_rs[RS1] = decode_stage.instruction inside {
BEQ, BNE, BLT, BGE, BLTU, BGEU, JALR
};
uses_rs[RS2] = decode_stage.instruction inside {
BEQ, BNE, BLT, BGE, BLTU, BGEU
};
uses_rd = 0;//JALR/JAL writeback handled by ALU
end
////////////////////////////////////////////////////
//RAS Support
logic rs1_link;
logic rd_link;
logic rs1_eq_rd;
logic is_return;
logic is_call;
assign rs1_link = instruction.rs1_addr inside {1,5};
assign rd_link = instruction.rd_addr inside {1,5};
assign rs1_eq_rd = (instruction.rs1_addr == instruction.rd_addr);
always_ff @(posedge clk) begin
if (issue_stage_ready) begin
is_return <= (instruction.upper_opcode inside {JALR_T}) & ((rs1_link & ~rd_link) | (rs1_link & rd_link & ~rs1_eq_rd));
is_call <= (instruction.upper_opcode inside {JAL_T, JALR_T}) & rd_link;
end
end
////////////////////////////////////////////////////
//PC Offset
logic[19:0] jal_imm;
logic[11:0] jalr_imm;
logic[11:0] br_imm;
logic [20:0] pc_offset;
logic [20:0] pc_offset_r;
assign jal_imm = {decode_stage.instruction[31], decode_stage.instruction[19:12], decode_stage.instruction[20], decode_stage.instruction[30:21]};
assign jalr_imm = decode_stage.instruction[31:20];
assign br_imm = {decode_stage.instruction[31], decode_stage.instruction[7], decode_stage.instruction[30:25], decode_stage.instruction[11:8]};
always_comb begin
case (decode_stage.instruction[3:2])
2'b11 : pc_offset = 21'(signed'({jal_imm, 1'b0}));
2'b01 : pc_offset = 21'(signed'(jalr_imm));
default : pc_offset = 21'(signed'({br_imm, 1'b0}));
endcase
end
always_ff @(posedge clk) begin
if (issue_stage_ready)
pc_offset_r <= pc_offset;
end
////////////////////////////////////////////////////
logic jal;
logic jalr;
logic jal_or_jalr;
logic br_use_signed;
always_ff @(posedge clk) begin
if (issue_stage_ready) begin
jal <= decode_stage.instruction[3];
jalr <= (~decode_stage.instruction[3] & decode_stage.instruction[2]);
jal_or_jalr <= decode_stage.instruction[2];
br_use_signed <= !(instruction.fn3 inside {BLTU_fn3, BGEU_fn3});
end
end
////////////////////////////////////////////////////
//Issue
@ -88,36 +163,40 @@ module branch_unit
set_clr_reg_with_rst #(.SET_OVER_CLR(1), .WIDTH(1), .RST_VALUE(0)) branch_issued_m (
.clk, .rst,
.set(issue.new_request),
.clr(branch_inputs.issue_pc_valid | exception.valid),
.clr(issue_stage.stage_valid | exception.valid),
.result(branch_issued_r)
);
//To determine if the branch was predicted correctly we need to wait until the
//subsequent instruction has reached the issue stage
assign instruction_is_completing = branch_issued_r & branch_inputs.issue_pc_valid;
assign instruction_is_completing = branch_issued_r & issue_stage.stage_valid;
//Sign extend
assign rs1 = {(rf[RS1][31] & br_use_signed), rf[RS1]};
assign rs2 = {(rf[RS2][31] & br_use_signed), rf[RS2]};
////////////////////////////////////////////////////
//Branch/Jump target determination
//Branch comparison and final address calculation
//are performed in the issue stage
branch_comparator bc (
.less_than(branch_inputs.fn3[2]),
.a(branch_inputs.rs1),
.b(branch_inputs.rs2),
.xor_result(branch_inputs.fn3[0]),
.less_than(issue_stage.fn3[2]),
.a(rs1),
.b(rs2),
.xor_result(issue_stage.fn3[0]),
.result(result)
);
assign branch_taken = result | branch_inputs.jal_jalr;
assign branch_taken = result | jal_or_jalr;
assign jump_pc = (branch_inputs.jalr ? branch_inputs.rs1[31:0] : branch_inputs.issue_pc) + 32'(signed'(branch_inputs.pc_offset));
assign new_pc = branch_taken ? jump_pc : branch_inputs.pc_p4;
assign jump_pc = (jalr ? rs1[31:0] : issue_stage.pc) + 32'(signed'(pc_offset_r));
assign new_pc = branch_taken ? jump_pc : constant_alu;
always_ff @(posedge clk) begin
if (issue.new_request) begin
branch_taken_ex <= branch_taken;
new_pc_ex <= {new_pc[31:1], new_pc[0] & ~branch_inputs.jalr};
new_pc_ex <= {new_pc[31:1], new_pc[0] & ~jalr};
id_ex <= issue.id;
jal_jalr_ex <= branch_inputs.jal_jalr;
jal_or_jalr_ex <= jal_or_jalr;
end
end
@ -145,13 +224,13 @@ module branch_unit
////////////////////////////////////////////////////
//Predictor support
logic is_return;
logic is_call;
logic is_return_ex;
logic is_call_ex;
always_ff @(posedge clk) begin
if (issue.possible_issue) begin
is_return <= branch_inputs.is_return;
is_call <= branch_inputs.is_call;
pc_ex <= branch_inputs.issue_pc;
is_return_ex <= is_return;
is_call_ex <= is_call;
pc_ex <= issue_stage.pc;
end
end
@ -160,11 +239,11 @@ module branch_unit
assign br_results.pc = pc_ex;
assign br_results.target_pc = new_pc_ex;
assign br_results.branch_taken = branch_taken_ex;
assign br_results.is_branch = ~jal_jalr_ex;
assign br_results.is_return = is_return;
assign br_results.is_call = is_call;
assign br_results.is_branch = ~jal_or_jalr_ex;
assign br_results.is_return = is_return_ex;
assign br_results.is_call = is_call_ex;
assign branch_flush = instruction_is_completing && (branch_inputs.issue_pc[31:1] != new_pc_ex[31:1]);
assign branch_flush = instruction_is_completing & (issue_stage.pc[31:1] != new_pc_ex[31:1]);
////////////////////////////////////////////////////
//End of Implementation

View file

@ -38,14 +38,16 @@ module csr_unit
input decode_packet_t decode_stage,
output logic unit_needed,
output logic [REGFILE_READ_PORTS-1:0] uses_rs,
output logic uses_rd,
input issue_packet_t issue_stage,
input logic issue_stage_ready,
input rs_addr_t issue_rs_addr [REGFILE_READ_PORTS],
input logic [31:0] rf [REGFILE_READ_PORTS],
//Unit Interfaces
unit_issue_interface.unit issue,
input csr_inputs_t csr_inputs,
unit_writeback_interface.unit wb,
//Privilege
@ -82,10 +84,19 @@ module csr_unit
input interrupt_t m_interrupt
);
typedef struct packed{
csr_addr_t addr;
logic[1:0] op;
logic reads;
logic writes;
logic [XLEN-1:0] data;
} csr_inputs_t;
logic busy;
logic commit;
logic commit_in_progress;
csr_inputs_t csr_inputs;
csr_inputs_t csr_inputs_r;
privilege_t privilege_level;
@ -114,12 +125,20 @@ module csr_unit
////////////////////////////////////////////////////
//Decode
assign unit_needed = decode_stage.instruction inside {
CSRRW, CSRRS, CSRRC, CSRRWI, CSRRSI, CSRRCI
};
assign unit_needed = decode_stage.instruction inside {CSRRW, CSRRS, CSRRC, CSRRWI, CSRRSI, CSRRCI};
always_comb begin
uses_rs = '0;
uses_rs[RS1] = decode_stage.instruction inside {CSRRW, CSRRS, CSRRC};
uses_rd = unit_needed;
end
////////////////////////////////////////////////////
//Issue
assign csr_inputs.addr = issue_stage.instruction[31:20];
assign csr_inputs.op = issue_stage.fn3[1:0];
assign csr_inputs.data = issue_stage.fn3[2] ? {27'b0, issue_rs_addr[RS1]} : rf[RS1];
assign csr_inputs.reads = ~((issue_stage.fn3[1:0] == CSR_RW) && (issue_stage.rd_addr == 0));
assign csr_inputs.writes = ~((issue_stage.fn3[1:0] == CSR_RC) && (issue_rs_addr[RS1] == 0));
assign processing_csr = busy | issue.new_request;
assign issue.ready = ~busy;

View file

@ -33,6 +33,9 @@ module custom_unit
input decode_packet_t decode_stage,
output logic unit_needed,
output logic [REGFILE_READ_PORTS-1:0] uses_rs,
output logic uses_rd,
input issue_packet_t issue_stage,
input logic issue_stage_ready,
input logic [31:0] rf [REGFILE_READ_PORTS],
@ -56,8 +59,13 @@ module custom_unit
//The following signals should be asserted when the decoded instruction
//is handled by this execution unit.
assign unit_needed = instruction.upper_opcode inside {CUSTOM_T};
assign unit_needed = decode_stage.instruction inside {CUSTOM};
always_comb begin
uses_rs = '0;
uses_rs[RS1] = decode_stage.instruction inside {CUSTOM};
uses_rs[RS2] = decode_stage.instruction inside {CUSTOM};
uses_rd = decode_stage.instruction inside {CUSTOM};
end
////////////////////////////////////////////////////
//Issue
assign issue.ready = ~wb.done;

View file

@ -116,14 +116,10 @@ module cva5
register_file_issue_interface #(.NUM_WB_GROUPS(CONFIG.NUM_WB_GROUPS)) rf_issue();
logic [NUM_UNITS-1:0] unit_needed;
logic [NUM_UNITS-1:0][REGFILE_READ_PORTS-1:0] unit_uses_rs;
logic [NUM_UNITS-1:0] unit_uses_rd;
alu_inputs_t alu_inputs;
load_store_inputs_t ls_inputs;
branch_inputs_t branch_inputs;
mul_inputs_t mul_inputs;
div_inputs_t div_inputs;
gc_inputs_t gc_inputs;
csr_inputs_t csr_inputs;
logic [31:0] constant_alu;
unit_issue_interface unit_issue [NUM_UNITS-1:0]();
@ -197,6 +193,8 @@ module cva5
//Decode Unit and Fetch Unit
logic issue_stage_ready;
phys_addr_t issue_phys_rs_addr [REGFILE_READ_PORTS];
rs_addr_t issue_rs_addr [REGFILE_READ_PORTS];
logic illegal_instruction;
logic instruction_issued;
logic instruction_issued_with_rd;
@ -361,6 +359,8 @@ module cva5
.decode (decode),
.decode_advance (decode_advance),
.unit_needed (unit_needed),
.unit_uses_rs (unit_uses_rs),
.unit_uses_rd (unit_uses_rd),
.renamer (decode_rename_interface),
.decode_uses_rd (decode_uses_rd),
.decode_rd_addr (decode_rd_addr),
@ -372,14 +372,10 @@ module cva5
.instruction_issued_with_rd (instruction_issued_with_rd),
.issue (issue),
.issue_stage_ready (issue_stage_ready),
.issue_phys_rs_addr (issue_phys_rs_addr),
.issue_rs_addr (issue_rs_addr),
.rf (rf_issue),
.alu_inputs (alu_inputs),
.ls_inputs (ls_inputs),
.branch_inputs (branch_inputs),
.gc_inputs (gc_inputs),
.csr_inputs (csr_inputs),
.mul_inputs (mul_inputs),
.div_inputs (div_inputs),
.constant_alu (constant_alu),
.unit_issue (unit_issue),
.gc (gc),
.current_privilege (current_privilege),
@ -413,9 +409,11 @@ module cva5
.issue_stage (issue),
.issue_stage_ready (issue_stage_ready),
.unit_needed (unit_needed[UNIT_IDS.BR]),
.rf (rf_issue.data),
.uses_rs (unit_uses_rs[UNIT_IDS.BR]),
.uses_rd (unit_uses_rd[UNIT_IDS.BR]),
.rf (rf_issue.data),
.constant_alu (constant_alu),
.issue (unit_issue[UNIT_IDS.BR]),
.branch_inputs (branch_inputs),
.br_results (br_results),
.branch_flush (branch_flush),
.exception (exception[BR_EXCEPTION])
@ -429,8 +427,11 @@ module cva5
.issue_stage (issue),
.issue_stage_ready (issue_stage_ready),
.unit_needed (unit_needed[UNIT_IDS.ALU]),
.uses_rs (unit_uses_rs[UNIT_IDS.ALU]),
.uses_rd (unit_uses_rd[UNIT_IDS.ALU]),
.rf (rf_issue.data),
.alu_inputs (alu_inputs),
.constant_alu (constant_alu),
.issue_rs_addr (issue_rs_addr),
.issue (unit_issue[UNIT_IDS.ALU]),
.wb (unit_wb1[ALU_UNIT_WB1_ID])
);
@ -444,8 +445,12 @@ module cva5
.issue_stage (issue),
.issue_stage_ready (issue_stage_ready),
.unit_needed (unit_needed[UNIT_IDS.LS]),
.uses_rs (unit_uses_rs[UNIT_IDS.LS]),
.uses_rd (unit_uses_rd[UNIT_IDS.LS]),
.instruction_issued_with_rd (instruction_issued_with_rd),
.issue_rs_addr (issue_rs_addr),
.rs2_inuse (rf_issue.inuse[RS2]),
.rf (rf_issue.data),
.ls_inputs (ls_inputs),
.issue (unit_issue[UNIT_IDS.LS]),
.dcache_on (1'b1),
.clear_reservation (1'b0),
@ -503,9 +508,11 @@ module cva5
.decode_stage (decode),
.issue_stage (issue),
.issue_stage_ready (issue_stage_ready),
.issue_rs_addr (issue_rs_addr),
.unit_needed (unit_needed[UNIT_IDS.CSR]),
.uses_rs (unit_uses_rs[UNIT_IDS.CSR]),
.uses_rd (unit_uses_rd[UNIT_IDS.CSR]),
.rf (rf_issue.data),
.csr_inputs (csr_inputs),
.issue (unit_issue[UNIT_IDS.CSR]),
.wb (unit_wb3[CSR_UNIT_WB3_ID]),
.current_privilege(current_privilege),
@ -536,9 +543,11 @@ module cva5
.issue_stage (issue),
.issue_stage_ready (issue_stage_ready),
.unit_needed (unit_needed[UNIT_IDS.IEC]),
.uses_rs (unit_uses_rs[UNIT_IDS.IEC]),
.uses_rd (unit_uses_rd[UNIT_IDS.IEC]),
.constant_alu (constant_alu),
.rf (rf_issue.data),
.issue (unit_issue[UNIT_IDS.IEC]),
.gc_inputs (gc_inputs),
.branch_flush (branch_flush),
.exception (exception),
.exception_target_pc (exception_target_pc),
@ -566,8 +575,9 @@ module cva5
.issue_stage (issue),
.issue_stage_ready (issue_stage_ready),
.unit_needed (unit_needed[UNIT_IDS.MUL]),
.uses_rs (unit_uses_rs[UNIT_IDS.MUL]),
.uses_rd (unit_uses_rd[UNIT_IDS.MUL]),
.rf (rf_issue.data),
.mul_inputs (mul_inputs),
.issue (unit_issue[UNIT_IDS.MUL]),
.wb (unit_wb3[MUL_UNIT_WB3_ID])
);
@ -577,12 +587,16 @@ module cva5
div_unit div_unit_block (
.clk (clk),
.rst (rst),
.gc (gc),
.instruction_issued_with_rd (instruction_issued_with_rd),
.decode_stage (decode),
.issue_stage (issue),
.issue_stage_ready (issue_stage_ready),
.issue_phys_rs_addr (issue_phys_rs_addr),
.unit_needed (unit_needed[UNIT_IDS.DIV]),
.uses_rs (unit_uses_rs[UNIT_IDS.DIV]),
.uses_rd (unit_uses_rd[UNIT_IDS.DIV]),
.rf (rf_issue.data),
.div_inputs (div_inputs),
.issue (unit_issue[UNIT_IDS.DIV]),
.wb (unit_wb3[DIV_UNIT_WB3_ID])
);
@ -595,6 +609,8 @@ module cva5
.rst (rst),
.decode_stage (decode),
.unit_needed (unit_needed[UNIT_IDS.CUSTOM]),
.uses_rs (unit_uses_rs[UNIT_IDS.CUSTOM]),
.uses_rd (unit_uses_rd[UNIT_IDS.CUSTOM]),
.issue_stage (issue),
.issue_stage_ready (issue_stage_ready),
.rf (rf_issue.data),

View file

@ -41,13 +41,6 @@ package cva5_types;
ALU_SHIFT = 2'b11
} alu_op_t;
typedef enum logic [1:0] {
ALU_LOGIC_XOR = 2'b00,
ALU_LOGIC_OR = 2'b01,
ALU_LOGIC_AND = 2'b10,
ALU_LOGIC_ADD = 2'b11
} alu_logic_op_t;
typedef struct packed{
logic valid;
exception_code_t code;
@ -86,34 +79,6 @@ package cva5_types;
fetch_metadata_t fetch_metadata;
} issue_packet_t;
typedef struct packed{
logic [XLEN:0] in1;//contains sign padding bit for slt operation
logic [XLEN:0] in2;//contains sign padding bit for slt operation
logic [XLEN-1:0] shifter_in;
logic [31:0] constant_adder;
alu_op_t alu_op;
alu_logic_op_t logic_op;
logic [4:0] shift_amount;
logic subtract;
logic arith;//contains sign padding bit for arithmetic shift right operation
logic lshift;
} alu_inputs_t;
typedef struct packed {
logic [XLEN:0] rs1;
logic [XLEN:0] rs2;
logic [31:0] pc_p4;
logic [2:0] fn3;
logic [31:0] issue_pc;
logic issue_pc_valid;
logic jal;
logic jalr;
logic jal_jalr;
logic is_call;
logic is_return;
logic [20:0] pc_offset;
} branch_inputs_t;
typedef struct packed {
id_t id;
logic valid;
@ -138,48 +103,6 @@ package cva5_types;
logic [4:0] op;
} amo_details_t;
typedef struct packed{
logic [XLEN-1:0] rs1;
logic [XLEN-1:0] rs2;
logic [11:0] offset;
logic [2:0] fn3;
logic load;
logic store;
logic fence;
logic forwarded_store;
id_t store_forward_id;
//amo support
amo_details_t amo;
} load_store_inputs_t;
typedef struct packed{
logic [XLEN-1:0] rs1;
logic [XLEN-1:0] rs2;
logic [1:0] op;
} mul_inputs_t;
typedef struct packed{
logic [XLEN-1:0] rs1;
logic [XLEN-1:0] rs2;
logic [1:0] op;
logic reuse_result;
} div_inputs_t;
typedef struct packed{
csr_addr_t addr;
logic[1:0] op;
logic reads;
logic writes;
logic [XLEN-1:0] data;
} csr_inputs_t;
typedef struct packed{
logic [31:0] pc_p4;
logic is_ifence;
logic is_mret;
logic is_sret;
} gc_inputs_t;
typedef struct packed {
logic [31:0] addr;
logic load;

View file

@ -26,6 +26,7 @@ module decode_and_issue
import riscv_types::*;
import cva5_types::*;
import csr_types::*;
import opcodes::*;
# (
parameter cpu_config_t CONFIG = EXAMPLE_CONFIG,
@ -47,6 +48,8 @@ module decode_and_issue
renamer_interface.decode renamer,
input logic [NUM_UNITS-1:0] unit_needed,
input logic [NUM_UNITS-1:0][REGFILE_READ_PORTS-1:0] unit_uses_rs,
input logic [NUM_UNITS-1:0] unit_uses_rd,
output logic decode_uses_rd,
output rs_addr_t decode_rd_addr,
@ -57,18 +60,14 @@ module decode_and_issue
output logic instruction_issued,
output logic instruction_issued_with_rd,
output issue_packet_t issue,
output rs_addr_t issue_rs_addr [REGFILE_READ_PORTS],
output phys_addr_t issue_phys_rs_addr [REGFILE_READ_PORTS],
output logic issue_stage_ready,
//Register File
register_file_issue_interface.issue rf,
output alu_inputs_t alu_inputs,
output load_store_inputs_t ls_inputs,
output branch_inputs_t branch_inputs,
output gc_inputs_t gc_inputs,
output csr_inputs_t csr_inputs,
output mul_inputs_t mul_inputs,
output div_inputs_t div_inputs,
output logic [31:0] constant_alu,
unit_issue_interface.decode unit_issue [NUM_UNITS-1:0],
@ -78,38 +77,28 @@ module decode_and_issue
exception_interface.unit exception
);
logic [2:0] fn3;
logic [6:0] opcode;
logic [4:0] opcode_trim;
common_instruction_t decode_instruction;//rs1_addr, rs2_addr, fn3, fn7, rd_addr, upper/lower opcode
logic uses_rs [REGFILE_READ_PORTS];
logic uses_rd;
rs_addr_t rs_addr [REGFILE_READ_PORTS];
rs_addr_t rd_addr;
logic is_csr;
logic is_fence;
logic is_ifence;
logic csr_imm_op;
logic environment_op;
rs_addr_t decode_rs_addr [REGFILE_READ_PORTS];
logic issue_valid;
logic operands_ready;
logic mult_div_op;
logic [NUM_UNITS-1:0] unit_needed_issue_stage;
logic [NUM_UNITS-1:0] unit_ready;
logic [NUM_UNITS-1:0] issue_ready;
logic [NUM_UNITS-1:0] issue_to;
rs_addr_t issue_rs_addr [REGFILE_READ_PORTS];
phys_addr_t issue_phys_rs_addr [REGFILE_READ_PORTS];
logic [$clog2(CONFIG.NUM_WB_GROUPS)-1:0] issue_rs_wb_group [REGFILE_READ_PORTS];
logic issue_uses_rs [REGFILE_READ_PORTS];
logic pre_issue_exception_pending;
logic illegal_instruction_pattern;
logic illegal_instruction_pattern_r;
logic [REGFILE_READ_PORTS-1:0] rs_conflict;
@ -124,37 +113,34 @@ module decode_and_issue
assign decode_advance = decode.valid & issue_stage_ready;
//Instruction aliases
assign opcode = decode.instruction[6:0];
assign opcode_trim = opcode[6:2];
assign fn3 = decode.instruction[14:12];
assign rs_addr[RS1] = decode.instruction[19:15];
assign rs_addr[RS2] = decode.instruction[24:20];
assign rd_addr = decode.instruction[11:7];
assign is_csr = CONFIG.INCLUDE_CSRS & (opcode_trim == SYSTEM_T) & (fn3 != 0);
assign is_fence = (opcode_trim == FENCE_T) & ~fn3[0];
assign is_ifence = CONFIG.INCLUDE_IFENCE & (opcode_trim == FENCE_T) & fn3[0];
assign csr_imm_op = (opcode_trim == SYSTEM_T) & fn3[2];
assign environment_op = (opcode_trim == SYSTEM_T) & (fn3 == 0);
assign decode_instruction = decode.instruction;
always_comb begin
decode_rs_addr = '{default: '0};
decode_rs_addr[RS1] = decode_instruction.rs1_addr;
decode_rs_addr[RS2] = decode_instruction.rs2_addr;
end
////////////////////////////////////////////////////
//Register File Support
assign uses_rs[RS1] = opcode_trim inside {JALR_T, BRANCH_T, LOAD_T, STORE_T, ARITH_IMM_T, ARITH_T, AMO_T} | is_csr;
assign uses_rs[RS2] = opcode_trim inside {BRANCH_T, ARITH_T, AMO_T};//Stores are exempted due to store forwarding
assign uses_rd = opcode_trim inside {LUI_T, AUIPC_T, JAL_T, JALR_T, LOAD_T, ARITH_IMM_T, ARITH_T} | is_csr;
always_comb begin
uses_rd = |unit_uses_rd;
uses_rs = '{default: 0};
for (int i = 0; i < NUM_UNITS; i++)
for (int j = 0; j < REGFILE_READ_PORTS; j++)
uses_rs[j] |= unit_uses_rs[i][j];
end
////////////////////////////////////////////////////
//Renamer Support
logic [$clog2(CONFIG.NUM_WB_GROUPS)-1:0] renamer_wb_group;
always_comb begin
renamer_wb_group = 2;
renamer_wb_group = (CONFIG.NUM_WB_GROUPS - 1);
if (unit_needed[UNIT_IDS.ALU])
renamer_wb_group = 0;
else if (unit_needed[UNIT_IDS.LS] )
renamer_wb_group = 1;
end
assign renamer.rd_addr = rd_addr;
assign renamer.rs_addr = rs_addr;
assign renamer.rd_addr = decode_instruction.rd_addr;
assign renamer.rs_addr = decode_rs_addr;
assign renamer.uses_rd = uses_rd;
assign renamer.rd_wb_group = renamer_wb_group;
@ -163,7 +149,7 @@ module decode_and_issue
////////////////////////////////////////////////////
//Decode ID Support
assign decode_uses_rd = uses_rd;
assign decode_rd_addr = rd_addr;
assign decode_rd_addr = decode_instruction.rd_addr;
assign decode_phys_rd_addr = renamer.phys_rd_addr;
assign decode_phys_rs_addr = renamer.phys_rs_addr;
assign decode_rs_wb_group = renamer.rs_wb_group;
@ -175,12 +161,12 @@ module decode_and_issue
issue.pc <= decode.pc;
issue.instruction <= decode.instruction;
issue.fetch_metadata <= decode.fetch_metadata;
issue.fn3 <= fn3;
issue.opcode <= opcode;
issue_rs_addr <= rs_addr;
issue.fn3 <= decode_instruction.fn3;
issue.opcode <= decode.instruction[6:0];
issue_rs_addr <= decode_rs_addr;
issue_phys_rs_addr <= renamer.phys_rs_addr;
issue_rs_wb_group <= renamer.rs_wb_group;
issue.rd_addr <= rd_addr;
issue.rd_addr <= decode_instruction.rd_addr;
issue.phys_rd_addr <= renamer.phys_rd_addr;
issue.is_multicycle <= ~unit_needed[UNIT_IDS.ALU];
issue.id <= decode.id;
@ -231,290 +217,14 @@ module decode_and_issue
assign rf.single_cycle_or_flush = (instruction_issued_with_rd & |issue.rd_addr & ~issue.is_multicycle) | (issue.stage_valid & issue.uses_rd & |issue.rd_addr & gc.fetch_flush);
////////////////////////////////////////////////////
//ALU unit inputs
logic [XLEN-1:0] alu_rs2_data;
logic alu_imm_type;
logic [31:0] constant_alu;
alu_op_t alu_op;
alu_op_t alu_op_r;
alu_logic_op_t alu_logic_op;
alu_logic_op_t alu_logic_op_r;
logic alu_subtract;
logic sub_instruction;
always_comb begin
case (opcode_trim) inside
LUI_T, AUIPC_T, JAL_T, JALR_T : alu_op = ALU_CONSTANT;
default :
case (fn3) inside
SLTU_fn3, SLT_fn3 : alu_op = ALU_SLT;
SLL_fn3, SRA_fn3 : alu_op = ALU_SHIFT;
default : alu_op = ALU_ADD_SUB;
endcase
endcase
end
always_comb begin
case (fn3)
XOR_fn3 : alu_logic_op = ALU_LOGIC_XOR;
OR_fn3 : alu_logic_op = ALU_LOGIC_OR;
AND_fn3 : alu_logic_op = ALU_LOGIC_AND;
default : alu_logic_op = ALU_LOGIC_ADD;//ADD/SUB/SLT/SLTU
endcase
end
assign sub_instruction = (fn3 == ADD_SUB_fn3) && decode.instruction[30] && opcode[5];//If ARITH instruction
//Constant ALU:
// provides LUI, AUIPC, JAL, JALR results for ALU
// provides PC+4 for BRANCH unit and ifence in GC unit
always_ff @(posedge clk) begin
if (issue_stage_ready) begin
constant_alu <= ((opcode_trim inside {LUI_T}) ? '0 : decode.pc) + ((opcode_trim inside {LUI_T, AUIPC_T}) ? {decode.instruction[31:12], 12'b0} : 4);
alu_imm_type <= opcode_trim inside {ARITH_IMM_T};
alu_op_r <= alu_op;
alu_subtract <= (fn3 inside {SLTU_fn3, SLT_fn3}) || sub_instruction;
alu_logic_op_r <= alu_logic_op;
end
if (issue_stage_ready)
constant_alu <= ((decode_instruction.upper_opcode inside {LUI_T}) ? '0 : decode.pc) + ((decode_instruction.upper_opcode inside {LUI_T, AUIPC_T}) ? {decode.instruction[31:12], 12'b0} : 4);
end
//Shifter related
assign alu_inputs.lshift = ~issue.fn3[2];
assign alu_inputs.shift_amount = alu_imm_type ? issue_rs_addr[RS2] : rf.data[RS2][4:0];
assign alu_inputs.arith = rf.data[RS1][XLEN-1] & issue.instruction[30];//shift in bit
assign alu_inputs.shifter_in = rf.data[RS1];
//LUI, AUIPC, JAL, JALR
assign alu_inputs.constant_adder = constant_alu;
//logic and adder
assign alu_inputs.subtract = alu_subtract;
assign alu_inputs.logic_op = alu_logic_op_r;
assign alu_inputs.in1 = {(rf.data[RS1][XLEN-1] & ~issue.fn3[0]), rf.data[RS1]};//(fn3[0] is SLTU_fn3);
assign alu_rs2_data = alu_imm_type ? 32'(signed'(issue.instruction[31:20])) : rf.data[RS2];
assign alu_inputs.in2 = {(alu_rs2_data[XLEN-1] & ~issue.fn3[0]), alu_rs2_data};
assign alu_inputs.alu_op = alu_op_r;
////////////////////////////////////////////////////
//Load Store unit inputs
logic is_load;
logic is_store;
logic amo_op;
logic store_conditional;
logic load_reserve;
logic [4:0] amo_type;
assign amo_op = CONFIG.INCLUDE_AMO ? (opcode_trim == AMO_T) : 1'b0;
assign amo_type = decode.instruction[31:27];
assign store_conditional = (amo_type == AMO_SC_FN5);
assign load_reserve = (amo_type == AMO_LR_FN5);
generate if (CONFIG.INCLUDE_AMO) begin : gen_decode_ls_amo
assign ls_inputs.amo.is_lr = load_reserve;
assign ls_inputs.amo.is_sc = store_conditional;
assign ls_inputs.amo.is_amo = amo_op & ~(load_reserve | store_conditional);
assign ls_inputs.amo.op = amo_type;
end
else begin
assign ls_inputs.amo = '0;
end
endgenerate
assign is_load = (opcode_trim inside {LOAD_T, AMO_T}) && !(amo_op & store_conditional); //LR and AMO_ops perform a read operation as well
assign is_store = (opcode_trim == STORE_T) || (amo_op && store_conditional);//Used for LS unit and for ID tracking
logic [11:0] ls_offset;
logic is_load_r;
logic is_store_r;
logic is_fence_r;
always_ff @(posedge clk) begin
if (issue_stage_ready) begin
ls_offset <= opcode[5] ? {decode.instruction[31:25], decode.instruction[11:7]} : decode.instruction[31:20];
is_load_r <= is_load;
is_store_r <= is_store;
is_fence_r <= is_fence;
end
end
(* ramstyle = "MLAB, no_rw_check" *) id_t rd_to_id_table [32];
always_ff @ (posedge clk) begin
if (instruction_issued_with_rd)
rd_to_id_table[issue.rd_addr] <= issue.id;
end
assign ls_inputs.offset = ls_offset;
assign ls_inputs.load = is_load_r;
assign ls_inputs.store = is_store_r;
assign ls_inputs.fence = is_fence_r;
assign ls_inputs.fn3 = amo_op ? LS_W_fn3 : issue.fn3;
assign ls_inputs.rs1 = rf.data[RS1];
assign ls_inputs.rs2 = rf.data[RS2];
assign ls_inputs.forwarded_store = rf.inuse[RS2];
assign ls_inputs.store_forward_id = rd_to_id_table[issue_rs_addr[RS2]];
////////////////////////////////////////////////////
//Branch unit inputs
////////////////////////////////////////////////////
//RAS Support
logic rs1_link;
logic rd_link;
logic rs1_eq_rd;
logic is_return;
logic is_call;
assign rs1_link = (rs_addr[RS1] inside {1,5});
assign rd_link = (rd_addr inside {1,5});
assign rs1_eq_rd = (rs_addr[RS1] == rd_addr);
logic br_use_signed;
always_ff @(posedge clk) begin
if (issue_stage_ready) begin
is_return <= (opcode_trim == JALR_T) && ((rs1_link & ~rd_link) | (rs1_link & rd_link & ~rs1_eq_rd));
is_call <= (opcode_trim inside {JAL_T, JALR_T}) && rd_link;
br_use_signed <= !(fn3 inside {BLTU_fn3, BGEU_fn3});
end
end
logic[19:0] jal_imm;
logic[11:0] jalr_imm;
logic[11:0] br_imm;
logic [20:0] pc_offset;
logic [20:0] pc_offset_r;
assign jal_imm = {decode.instruction[31], decode.instruction[19:12], decode.instruction[20], decode.instruction[30:21]};
assign jalr_imm = decode.instruction[31:20];
assign br_imm = {decode.instruction[31], decode.instruction[7], decode.instruction[30:25], decode.instruction[11:8]};
always_comb begin
case (opcode[3:2])
2'b11 : pc_offset = 21'(signed'({jal_imm, 1'b0}));
2'b01 : pc_offset = 21'(signed'(jalr_imm));
default : pc_offset = 21'(signed'({br_imm, 1'b0}));
endcase
end
logic jalr;
always_ff @(posedge clk) begin
if (issue_stage_ready) begin
pc_offset_r <= pc_offset;
jalr <= (~opcode[3] & opcode[2]);
end
end
assign branch_inputs.is_return = is_return;
assign branch_inputs.is_call = is_call;
assign branch_inputs.fn3 = issue.fn3;
assign branch_inputs.pc_offset = pc_offset_r;
assign branch_inputs.jal = issue.opcode[3];//(opcode == JAL);
assign branch_inputs.jalr = jalr;
assign branch_inputs.jal_jalr = issue.opcode[2];
assign branch_inputs.issue_pc = issue.pc;
assign branch_inputs.issue_pc_valid = issue.stage_valid;
assign branch_inputs.rs1 = {(rf.data[RS1][31] & br_use_signed), rf.data[RS1]};
assign branch_inputs.rs2 = {(rf.data[RS2][31] & br_use_signed), rf.data[RS2]};
assign branch_inputs.pc_p4 = constant_alu;
////////////////////////////////////////////////////
//Global Control unit inputs
logic is_ecall_r;
logic is_ebreak_r;
logic is_mret_r;
logic is_sret_r;
logic is_ifence_r;
logic [7:0] sys_op_match;
typedef enum logic [2:0] {
ECALL_i = 0,
EBREAK_i = 1,
URET_i = 2,
SRET_i = 3,
MRET_i = 4,
SFENCE_i = 5
} sys_op_index_t;
always_comb begin
sys_op_match = '0;
case (decode.instruction[31:20]) inside
ECALL_imm : sys_op_match[ECALL_i] = CONFIG.INCLUDE_M_MODE;
EBREAK_imm : sys_op_match[EBREAK_i] = CONFIG.INCLUDE_M_MODE;
SRET_imm : sys_op_match[SRET_i] = CONFIG.INCLUDE_S_MODE;
MRET_imm : sys_op_match[MRET_i] = CONFIG.INCLUDE_M_MODE;
SFENCE_imm : sys_op_match[SFENCE_i] = CONFIG.INCLUDE_S_MODE;
default : sys_op_match = '0;
endcase
end
always_ff @(posedge clk) begin
if (issue_stage_ready) begin
is_ecall_r <= sys_op_match[ECALL_i];
is_ebreak_r <= sys_op_match[EBREAK_i];
is_mret_r <= sys_op_match[MRET_i];
is_sret_r <= sys_op_match[SRET_i];
is_ifence_r <= is_ifence;
end
end
assign gc_inputs.pc_p4 = constant_alu;
assign gc_inputs.is_ifence = is_ifence_r;
assign gc_inputs.is_mret = is_mret_r;
assign gc_inputs.is_sret = is_sret_r;
////////////////////////////////////////////////////
//CSR unit inputs
generate if (CONFIG.INCLUDE_CSRS) begin : gen_decode_csr_inputs
assign csr_inputs.addr = issue.instruction[31:20];
assign csr_inputs.op = issue.fn3[1:0];
assign csr_inputs.data = issue.fn3[2] ? {27'b0, issue_rs_addr[RS1]} : rf.data[RS1];
assign csr_inputs.reads = ~((issue.fn3[1:0] == CSR_RW) && (issue.rd_addr == 0));
assign csr_inputs.writes = ~((issue.fn3[1:0] == CSR_RC) && (issue_rs_addr[RS1] == 0));
end endgenerate
////////////////////////////////////////////////////
//Mul unit inputs
generate if (CONFIG.INCLUDE_MUL) begin : gen_decode_mul_inputs
assign mul_inputs.rs1 = rf.data[RS1];
assign mul_inputs.rs2 = rf.data[RS2];
assign mul_inputs.op = issue.fn3[1:0];
end endgenerate
////////////////////////////////////////////////////
//Div unit inputs
generate if (CONFIG.INCLUDE_DIV) begin : gen_decode_div_inputs
phys_addr_t prev_div_rs_addr [2];
logic [1:0] div_rd_match;
logic prev_div_result_valid;
logic div_rs_overwrite;
logic div_op_reuse;
always_ff @(posedge clk) begin
if (issue_to[UNIT_IDS.DIV])
prev_div_rs_addr <= issue_phys_rs_addr[RS1:RS2];
end
assign div_op_reuse = {prev_div_result_valid, prev_div_rs_addr[RS1], prev_div_rs_addr[RS2]} == {1'b1, issue_phys_rs_addr[RS1],issue_phys_rs_addr[RS2]};
//Clear if prev div inputs are overwritten by another instruction
assign div_rd_match[RS1] = (issue.phys_rd_addr == prev_div_rs_addr[RS1]);
assign div_rd_match[RS2] = (issue.phys_rd_addr == prev_div_rs_addr[RS2]);
assign div_rs_overwrite = |div_rd_match;
set_clr_reg_with_rst #(.SET_OVER_CLR(1), .WIDTH(1), .RST_VALUE(0)) prev_div_result_valid_m (
.clk, .rst,
.set(instruction_issued & unit_needed_issue_stage[UNIT_IDS.DIV]),
.clr((instruction_issued & issue.uses_rd & div_rs_overwrite) | gc.writeback_supress), //No instructions will be issued while gc.writeback_supress is asserted
.result(prev_div_result_valid)
);
assign div_inputs.rs1 = rf.data[RS1];
assign div_inputs.rs2 = rf.data[RS2];
assign div_inputs.op = issue.fn3[1:0];
assign div_inputs.reuse_result = div_op_reuse;
end endgenerate
////////////////////////////////////////////////////
//Unit EX signals
generate for (i = 0; i < NUM_UNITS; i++) begin : gen_unit_issue_signals
@ -526,12 +236,9 @@ module decode_and_issue
////////////////////////////////////////////////////
//Illegal Instruction check
logic illegal_instruction_pattern_r;
generate if (CONFIG.INCLUDE_M_MODE) begin : gen_decode_exceptions
illegal_instruction_checker # (.CONFIG(CONFIG))
illegal_op_check (
.instruction(decode.instruction), .illegal_instruction(illegal_instruction_pattern)
);
assign illegal_instruction_pattern = ~|unit_needed;
always_ff @(posedge clk) begin
if (rst)
illegal_instruction_pattern_r <= 0;
@ -558,7 +265,7 @@ module decode_and_issue
always_comb begin
case (current_privilege)
USER_PRIVILEGE : ecall_code = ECALL_U;
SUPERVISOR_PRIVILEGE : ecall_code = ECALL_S;
SUPERVISOR_PRIVILEGE : ecall_code = ECALL_S;
MACHINE_PRIVILEGE : ecall_code = ECALL_M;
default : ecall_code = ECALL_U;
endcase
@ -573,7 +280,7 @@ module decode_and_issue
if (rst)
pre_issue_exception_pending <= 0;
else if (issue_stage_ready)
pre_issue_exception_pending <= illegal_instruction_pattern | (opcode_trim inside {SYSTEM_T} & ~is_csr & (sys_op_match[ECALL_i] | sys_op_match[EBREAK_i])) | ~decode.fetch_metadata.ok;
pre_issue_exception_pending <= illegal_instruction_pattern | (~decode.fetch_metadata.ok) | decode.instruction inside {ECALL, EBREAK};
end
assign new_exception = issue.stage_valid & pre_issue_exception_pending & ~(gc.issue_hold | gc.fetch_flush);
@ -585,6 +292,12 @@ module decode_and_issue
exception.valid <= (exception.valid | new_exception) & ~exception.ack;
end
logic is_ecall_r;
always_ff @(posedge clk) begin
if (issue_stage_ready)
is_ecall_r <= (decode_instruction.upper_opcode == SYSTEM_T) & (decode.instruction[31:20] == ECALL_imm);
end
assign ecode =
illegal_instruction_pattern_r ? ILLEGAL_INST :
is_ecall_r ? ecall_code :

View file

@ -30,15 +30,20 @@ module div_unit
(
input logic clk,
input logic rst,
input gc_outputs_t gc,
input logic instruction_issued_with_rd,
input decode_packet_t decode_stage,
output logic unit_needed,
output logic [REGFILE_READ_PORTS-1:0] uses_rs,
output logic uses_rd,
input issue_packet_t issue_stage,
input logic issue_stage_ready,
input phys_addr_t issue_phys_rs_addr [REGFILE_READ_PORTS],
input logic [31:0] rf [REGFILE_READ_PORTS],
input div_inputs_t div_inputs,
unit_issue_interface.unit issue,
unit_writeback_interface.unit wb
);
@ -96,44 +101,74 @@ module div_unit
////////////////////////////////////////////////////
//Decode
assign unit_needed = decode_stage.instruction inside {
DIV, DIVU, REM, REMU
};
assign unit_needed = decode_stage.instruction inside {DIV, DIVU, REM, REMU};
always_comb begin
uses_rs = '0;
uses_rs[RS1] = unit_needed;
uses_rs[RS2] = unit_needed;
uses_rd = unit_needed;
end
////////////////////////////////////////////////////
//Issue
////////////////////////////////////////////////////
//Result resuse (for div/rem pairs)
phys_addr_t prev_div_rs_addr [2];
logic [1:0] div_rd_match;
logic prev_div_result_valid;
logic div_rs_overwrite;
logic div_op_reuse;
always_ff @(posedge clk) begin
if (issue.new_request)
prev_div_rs_addr <= issue_phys_rs_addr[RS1:RS2];
end
assign div_op_reuse = {prev_div_result_valid, prev_div_rs_addr[RS1], prev_div_rs_addr[RS2]} == {1'b1, issue_phys_rs_addr[RS1],issue_phys_rs_addr[RS2]};
//Clear if prev div inputs are overwritten by another instruction
assign div_rd_match[RS1] = (issue_stage.phys_rd_addr == prev_div_rs_addr[RS1]);
assign div_rd_match[RS2] = (issue_stage.phys_rd_addr == prev_div_rs_addr[RS2]);
assign div_rs_overwrite = |div_rd_match;
set_clr_reg_with_rst #(.SET_OVER_CLR(1), .WIDTH(1), .RST_VALUE(0)) prev_div_result_valid_m (
.clk, .rst,
.set(issue.new_request),
.clr((instruction_issued_with_rd & div_rs_overwrite) | gc.writeback_supress), //No instructions will be issued while gc.writeback_supress is asserted
.result(prev_div_result_valid)
);
////////////////////////////////////////////////////
//Input and output sign determination
assign signed_divop = ~div_inputs.op[0];
assign signed_divop = ~ issue_stage.fn3[0];
assign negate_dividend = signed_divop & div_inputs.rs1[31];
assign negate_divisor = signed_divop & div_inputs.rs2[31];
assign negate_dividend = signed_divop & rf[RS1][31];
assign negate_divisor = signed_divop & rf[RS2][31];
assign negate_quotient = signed_divop & (div_inputs.rs1[31] ^ div_inputs.rs2[31]);
assign negate_remainder = signed_divop & (div_inputs.rs1[31]);
assign negate_quotient = signed_divop & (rf[RS1][31] ^ rf[RS2][31]);
assign negate_remainder = signed_divop & (rf[RS1][31]);
////////////////////////////////////////////////////
//Input Processing
assign unsigned_dividend = negate_if (div_inputs.rs1, negate_dividend);
assign unsigned_divisor = negate_if (div_inputs.rs2, negate_divisor);
assign unsigned_dividend = negate_if (rf[RS1], negate_dividend);
assign unsigned_divisor = negate_if (rf[RS2], negate_divisor);
//Note: If this becomes the critical path, we can use the one's complemented input instead.
//It will potentially overestimate (only when the input is a negative power-of-two), and
//the divisor width will need to be increased by one to safely handle the case where the divisor CLZ is overestimated
clz dividend_clz_block (.clz_input(unsigned_dividend), .clz(dividend_CLZ));
clz divisor_clz_block (.clz_input(unsigned_divisor), .clz(divisor_CLZ));
assign divisor_is_zero = (&divisor_CLZ) & ~div_inputs.rs2[0];
assign divisor_is_zero = (&divisor_CLZ) & ~rf[RS2][0];
assign issue_fifo_inputs.unsigned_dividend = unsigned_dividend;
assign issue_fifo_inputs.unsigned_divisor = unsigned_divisor;
assign issue_fifo_inputs.dividend_CLZ = divisor_is_zero ? '0 : dividend_CLZ;
assign issue_fifo_inputs.divisor_CLZ = divisor_CLZ;
assign issue_fifo_inputs.attr.remainder_op = div_inputs.op[1];
assign issue_fifo_inputs.attr.negate_result = div_inputs.op[1] ? negate_remainder : (negate_quotient & ~divisor_is_zero);
assign issue_fifo_inputs.attr.remainder_op = issue_stage.fn3[1];
assign issue_fifo_inputs.attr.negate_result = issue_stage.fn3[1] ? negate_remainder : (negate_quotient & ~divisor_is_zero);
assign issue_fifo_inputs.attr.divisor_is_zero = divisor_is_zero;
assign issue_fifo_inputs.attr.reuse_result = div_inputs.reuse_result;
assign issue_fifo_inputs.attr.reuse_result = div_op_reuse;
assign issue_fifo_inputs.attr.id = issue.id;
assign issue_fifo_inputs.attr.phys_addr = issue.phys_addr;

View file

@ -39,13 +39,15 @@ module gc_unit
//Decode
input decode_packet_t decode_stage,
output logic unit_needed,
output logic [REGFILE_READ_PORTS-1:0] uses_rs,
output logic uses_rd,
input issue_packet_t issue_stage,
input logic issue_stage_ready,
input logic [31:0] constant_alu,
input logic [31:0] rf [REGFILE_READ_PORTS],
unit_issue_interface.unit issue,
input gc_inputs_t gc_inputs,
//Branch miss predict
input logic branch_flush,
@ -118,6 +120,7 @@ module gc_unit
//LS exceptions (miss-aligned, TLB and MMU) (issue stage)
//fetch flush, take exception. If execute or later exception occurs first, exception is overridden
common_instruction_t instruction;//rs1_addr, rs2_addr, fn3, fn7, rd_addr, upper/lower opcode
typedef enum {RST_STATE, PRE_CLEAR_STATE, INIT_CLEAR_STATE, IDLE_STATE, TLB_CLEAR_STATE, POST_ISSUE_DRAIN, PRE_ISSUE_FLUSH, POST_ISSUE_DISCARD} gc_state;
gc_state state;
@ -126,7 +129,6 @@ module gc_unit
logic init_clear_done;
logic tlb_clear_done;
gc_inputs_t gc_inputs_r;
logic post_issue_idle;
logic ifence_in_progress;
logic ret_in_progress;
@ -143,14 +145,48 @@ module gc_unit
logic gc_pc_override;
logic [31:0] gc_pc;
typedef struct packed{
logic [31:0] pc_p4;
logic is_ifence;
logic is_mret;
logic is_sret;
} gc_inputs_t;
gc_inputs_t gc_inputs;
gc_inputs_t gc_inputs_r;
////////////////////////////////////////////////////
//Implementation
////////////////////////////////////////////////////
//Decode
assign unit_needed = decode_stage.instruction inside {
ECALL, EBREAK, SRET, MRET, FENCE_I, SFENCE_VMA
};
logic is_ifence;
logic is_mret;
logic is_sret;
assign instruction = decode_stage.instruction;
assign unit_needed =
(CONFIG.INCLUDE_M_MODE & decode_stage.instruction inside {ECALL, EBREAK, MRET}) |
(CONFIG.INCLUDE_S_MODE & decode_stage.instruction inside {SRET, SFENCE_VMA}) |
(CONFIG.INCLUDE_IFENCE & decode_stage.instruction inside {FENCE_I});
always_comb begin
uses_rs = '0;
uses_rs[RS1] = CONFIG.INCLUDE_S_MODE & decode_stage.instruction inside {SFENCE_VMA};
uses_rd = 0;
end
always_ff @(posedge clk) begin
if (issue_stage_ready) begin
is_ifence = (instruction.upper_opcode == FENCE_T) & CONFIG.INCLUDE_IFENCE;
is_mret = (instruction.upper_opcode == SYSTEM_T) & (decode_stage.instruction[31:20] == MRET_imm) & CONFIG.INCLUDE_M_MODE;
is_sret = (instruction.upper_opcode == SYSTEM_T) & (decode_stage.instruction[31:20] == SRET_imm) & CONFIG.INCLUDE_S_MODE;
end
end
assign gc_inputs.pc_p4 = constant_alu;
assign gc_inputs.is_ifence = is_ifence;
assign gc_inputs.is_mret = is_mret;
assign gc_inputs.is_sret = is_sret;
////////////////////////////////////////////////////
//Issue

View file

@ -38,12 +38,16 @@ module load_store_unit
input decode_packet_t decode_stage,
output logic unit_needed,
output logic [REGFILE_READ_PORTS-1:0] uses_rs,
output logic uses_rd,
input issue_packet_t issue_stage,
input logic issue_stage_ready,
input logic instruction_issued_with_rd,
input logic rs2_inuse,
input rs_addr_t issue_rs_addr [REGFILE_READ_PORTS],
input logic [31:0] rf [REGFILE_READ_PORTS],
input load_store_inputs_t ls_inputs,
unit_issue_interface.unit issue,
input logic dcache_on,
@ -135,6 +139,8 @@ module load_store_unit
} load_attributes_t;
load_attributes_t mem_attr, wb_attr;
common_instruction_t instruction;//rs1_addr, rs2_addr, fn3, fn7, rd_addr, upper/lower opcode
logic [3:0] be;
//FIFOs
fifo_interface #(.DATA_WIDTH($bits(load_attributes_t))) load_attributes();
@ -145,23 +151,67 @@ module load_store_unit
////////////////////////////////////////////////////
//Decode
assign unit_needed = decode_stage.instruction inside {
LB, LH, LW, LBU, LHU, SB, SH, SW, FENCE
};
assign instruction = decode_stage.instruction;
assign unit_needed = decode_stage.instruction inside {LB, LH, LW, LBU, LHU, SB, SH, SW, FENCE};
always_comb begin
uses_rs = '0;
uses_rs[RS1] = decode_stage.instruction inside {LB, LH, LW, LBU, LHU, SB, SH, SW};
uses_rs[RS2] = 0;//Store forwarding support //decode_stage.instruction inside {SB, SH, SW};
uses_rd = decode_stage.instruction inside {LB, LH, LW, LBU, LHU};
end
amo_details_t amo;
amo_details_t amo_r;
logic is_load;
logic is_store;
logic is_load_r;
logic is_store_r;
logic is_fence_r;
logic [2:0] fn3_r;
logic [11:0] ls_offset_r;
assign amo.is_amo = CONFIG.INCLUDE_AMO & (instruction.upper_opcode == AMO_T);
assign amo.op = CONFIG.INCLUDE_AMO ? decode_stage.instruction[31:27] : '0;
assign amo.is_lr = CONFIG.INCLUDE_AMO & (amo.op == AMO_LR_FN5);
assign amo.is_sc = CONFIG.INCLUDE_AMO & (amo.op == AMO_SC_FN5);
assign is_load = (instruction.upper_opcode inside {LOAD_T, AMO_T}) & !(amo.is_amo & amo.is_sc); //LR and AMO_ops perform a read operation as well
assign is_store = (instruction.upper_opcode == STORE_T) | (amo.is_amo & amo.is_sc);//Used for LS unit and for ID tracking
always_ff @(posedge clk) begin
if (issue_stage_ready) begin
ls_offset_r <= decode_stage.instruction[5] ? {decode_stage.instruction[31:25], decode_stage.instruction[11:7]} : decode_stage.instruction[31:20];
is_load_r <= is_load;
is_store_r <= is_store;
is_fence_r <= (instruction.upper_opcode == FENCE_T);
amo_r <= amo;
fn3_r <= amo.is_amo ? LS_W_fn3 : instruction.fn3;
end
end
(* ramstyle = "MLAB, no_rw_check" *) id_t rd_to_id_table [32];
id_t store_forward_id;
always_ff @ (posedge clk) begin
if (instruction_issued_with_rd)
rd_to_id_table[issue_stage.rd_addr] <= issue_stage.id;
end
assign store_forward_id = rd_to_id_table[issue_rs_addr[RS2]];
////////////////////////////////////////////////////
//Alignment Exception
generate if (CONFIG.INCLUDE_M_MODE) begin : gen_ls_exceptions
logic new_exception;
always_comb begin
case(ls_inputs.fn3)
case(fn3_r)
LS_H_fn3, L_HU_fn3 : unaligned_addr = virtual_address[0];
LS_W_fn3 : unaligned_addr = |virtual_address[1:0];
default : unaligned_addr = 0;
endcase
end
assign new_exception = unaligned_addr & issue.new_request & ~ls_inputs.fence;
assign new_exception = unaligned_addr & issue.new_request & ~is_fence_r;
always_ff @(posedge clk) begin
if (rst)
exception.valid <= 0;
@ -171,7 +221,7 @@ module load_store_unit
always_ff @(posedge clk) begin
if (new_exception & ~exception.valid) begin
exception.code <= ls_inputs.store ? STORE_AMO_ADDR_MISSALIGNED : LOAD_ADDR_MISSALIGNED;
exception.code <= is_store_r ? STORE_AMO_ADDR_MISSALIGNED : LOAD_ADDR_MISSALIGNED;
exception.tval <= virtual_address;
exception.id <= issue.id;
end
@ -195,12 +245,12 @@ module load_store_unit
////////////////////////////////////////////////////
//TLB interface
assign virtual_address = ls_inputs.rs1 + 32'(signed'(ls_inputs.offset));
assign virtual_address = rf[RS1] + 32'(signed'(ls_offset_r));
assign tlb.virtual_address = virtual_address;
assign tlb.new_request = tlb_on & issue.new_request;
assign tlb.execute = 0;
assign tlb.rnw = ls_inputs.load & ~ls_inputs.store;
assign tlb.rnw = is_load_r & ~is_store_r;
////////////////////////////////////////////////////
//Byte enable generation
@ -210,7 +260,7 @@ module load_store_unit
// SB: specific byte
always_comb begin
be = 0;
case(ls_inputs.fn3[1:0])
case(fn3_r[1:0])
LS_B_fn3[1:0] : be[virtual_address[1:0]] = 1;
LS_H_fn3[1:0] : begin
be[virtual_address[1:0]] = 1;
@ -224,19 +274,19 @@ module load_store_unit
//Load Store Queue
assign lsq.data_in = '{
addr : tlb_on ? tlb.physical_address : virtual_address,
fn3 : ls_inputs.fn3,
fn3 : fn3_r,
be : be,
data : ls_inputs.rs2,
load : ls_inputs.load,
store : ls_inputs.store,
data : rf[RS2],
load : is_load_r,
store : is_store_r,
id : issue.id,
phys_addr : issue.phys_addr,
forwarded_store : ls_inputs.forwarded_store,
id_needed : ls_inputs.store_forward_id
forwarded_store : rs2_inuse,
id_needed : store_forward_id
};
assign lsq.potential_push = issue.possible_issue;
assign lsq.push = issue.new_request & ~unaligned_addr & (~tlb_on | tlb.done) & ~ls_inputs.fence;
assign lsq.push = issue.new_request & ~unaligned_addr & (~tlb_on | tlb.done) & ~is_fence_r;
load_store_queue # (.CONFIG(CONFIG)) lsq_block (
.clk (clk),
@ -282,7 +332,7 @@ module load_store_unit
if (rst)
fence_hold <= 0;
else
fence_hold <= (fence_hold & ~load_store_status.idle) | (issue.new_request & ls_inputs.fence);
fence_hold <= (fence_hold & ~load_store_status.idle) | (issue.new_request & is_fence_r);
end
////////////////////////////////////////////////////
@ -407,7 +457,7 @@ module load_store_unit
.sc_complete (sc_complete),
.sc_success (sc_success),
.clear_reservation (clear_reservation),
.amo (ls_inputs.amo),
.amo (amo_r),
.uncacheable_load (uncacheable_load),
.uncacheable_store (uncacheable_store),
.is_load (sel_load),

View file

@ -33,12 +33,13 @@ module mul_unit
input decode_packet_t decode_stage,
output logic unit_needed,
output logic [REGFILE_READ_PORTS-1:0] uses_rs,
output logic uses_rd,
input issue_packet_t issue_stage,
input logic issue_stage_ready,
input logic [31:0] rf [REGFILE_READ_PORTS],
input mul_inputs_t mul_inputs,
unit_issue_interface.unit issue,
unit_writeback_interface.unit wb
);
@ -61,18 +62,21 @@ module mul_unit
////////////////////////////////////////////////////
//Decode
assign unit_needed = decode_stage.instruction inside {
MUL, MULH, MULHSU, MULHU
};
assign unit_needed = decode_stage.instruction inside {MUL, MULH, MULHSU, MULHU};
always_comb begin
uses_rs = '0;
uses_rs[RS1] = unit_needed;
uses_rs[RS2] = unit_needed;
uses_rd = unit_needed;
end
////////////////////////////////////////////////////
//Issue
assign rs1_is_signed = mul_inputs.op[1:0] inside {MULH_fn3[1:0], MULHSU_fn3[1:0]};//MUL doesn't matter
assign rs2_is_signed = mul_inputs.op[1:0] inside {MUL_fn3[1:0], MULH_fn3[1:0]};//MUL doesn't matter
assign rs1_is_signed = issue_stage.fn3[1:0] inside {MULH_fn3[1:0], MULHSU_fn3[1:0]};//MUL doesn't matter
assign rs2_is_signed = issue_stage.fn3[1:0] inside {MUL_fn3[1:0], MULH_fn3[1:0]};//MUL doesn't matter
assign rs1_ext = signed'({mul_inputs.rs1[31] & rs1_is_signed, mul_inputs.rs1});
assign rs2_ext = signed'({mul_inputs.rs2[31] & rs2_is_signed, mul_inputs.rs2});
assign rs1_ext = signed'({rf[RS1][31] & rs1_is_signed, rf[RS1]});
assign rs2_ext = signed'({rf[RS2][31] & rs2_is_signed, rf[RS2]});
//Pipeline advancement control signals
assign issue.ready = stage1_advance;
@ -93,7 +97,7 @@ module mul_unit
//Attribute Pipeline
always_ff @ (posedge clk) begin
if (stage1_advance) begin
mulh[0] <= (mul_inputs.op[1:0] != MUL_fn3[1:0]);
mulh[0] <= (issue_stage.fn3[1:0] != MUL_fn3[1:0]);
id[0] <= issue.id;
phys_addr[0] <= issue.phys_addr;
end

View file

@ -101,4 +101,7 @@ package opcodes;
localparam [31:0] SFENCE_VMA = 32'b0001001??????????000000001110011;
localparam [31:0] WFI = 32'b00010000010100000000000001110011;
localparam [31:0] CUSTOM = 32'b?????????????????????????1111011;
endpackage

View file

@ -312,6 +312,7 @@ module cva5_sim
`define RENAME_P cpu.renamer_block
`define METADATA_P cpu.id_block
`define LS_P cpu.load_store_unit_block
`define DIV_P cpu.gen_div.div_unit_block
`define LSQ_P cpu.load_store_unit_block.lsq_block
`define DCACHE_P cpu.load_store_unit_block.gen_ls_dcache.data_cache
@ -376,10 +377,10 @@ module cva5_sim
stats[FETCH_IC_ARB_STALL_STAT] = iarb_stall;
//Branch predictor
stats[FETCH_BP_BR_CORRECT_STAT] = `BRANCH_P.instruction_is_completing & ~`BRANCH_P.is_return & ~`BRANCH_P.branch_flush;
stats[FETCH_BP_BR_MISPREDICT_STAT] = `BRANCH_P.instruction_is_completing & ~`BRANCH_P.is_return & `BRANCH_P.branch_flush;
stats[FETCH_BP_RAS_CORRECT_STAT] = `BRANCH_P.instruction_is_completing & `BRANCH_P.is_return & ~`BRANCH_P.branch_flush;
stats[FETCH_BP_RAS_MISPREDICT_STAT] = `BRANCH_P.instruction_is_completing & `BRANCH_P.is_return & `BRANCH_P.branch_flush;
stats[FETCH_BP_BR_CORRECT_STAT] = `BRANCH_P.instruction_is_completing & ~`BRANCH_P.is_return_ex & ~`BRANCH_P.branch_flush;
stats[FETCH_BP_BR_MISPREDICT_STAT] = `BRANCH_P.instruction_is_completing & ~`BRANCH_P.is_return_ex & `BRANCH_P.branch_flush;
stats[FETCH_BP_RAS_CORRECT_STAT] = `BRANCH_P.instruction_is_completing & `BRANCH_P.is_return_ex & ~`BRANCH_P.branch_flush;
stats[FETCH_BP_RAS_MISPREDICT_STAT] = `BRANCH_P.instruction_is_completing & `BRANCH_P.is_return_ex & `BRANCH_P.branch_flush;
//Issue stalls
base_no_instruction_stall = ~`ISSUE_P.issue.stage_valid | cpu.gc.fetch_flush;
@ -403,8 +404,8 @@ module cva5_sim
//Misc Issue stats
stats[ISSUE_OPERAND_STALL_FOR_BRANCH_STAT] = stats[ISSUE_OPERANDS_NOT_READY_STAT] & `ISSUE_P.unit_needed_issue_stage[`ISSUE_P.UNIT_IDS.BR];
stats[ISSUE_STORE_WITH_FORWARDED_DATA_STAT] = `ISSUE_P.issue_to[`ISSUE_P.UNIT_IDS.LS] & `ISSUE_P.is_store_r & `ISSUE_P.ls_inputs.forwarded_store;
stats[ISSUE_DIVIDER_RESULT_REUSE_STAT] = `ISSUE_P.issue_to[`ISSUE_P.UNIT_IDS.DIV] & `ISSUE_P.gen_decode_div_inputs.div_op_reuse;
stats[ISSUE_STORE_WITH_FORWARDED_DATA_STAT] = `ISSUE_P.issue_to[`ISSUE_P.UNIT_IDS.LS] & `LS_P.is_store_r & `LS_P.rs2_inuse;
stats[ISSUE_DIVIDER_RESULT_REUSE_STAT] = `ISSUE_P.issue_to[`ISSUE_P.UNIT_IDS.DIV] & `DIV_P.div_op_reuse;
//Issue Stall Source
for (int i = 0; i < REGFILE_READ_PORTS; i++) begin

View file

@ -448,6 +448,7 @@ module cva5_sim
`define RENAME_P cpu.renamer_block
`define METADATA_P cpu.id_block
`define LS_P cpu.load_store_unit_block
`define DIV_P cpu.gen_div.div_unit_block
`define LSQ_P cpu.load_store_unit_block.lsq_block
`define DCACHE_P cpu.load_store_unit_block.gen_ls_dcache.data_cache
@ -512,10 +513,10 @@ module cva5_sim
stats[FETCH_IC_ARB_STALL_STAT] = iarb_stall;
//Branch predictor
stats[FETCH_BP_BR_CORRECT_STAT] = `BRANCH_P.instruction_is_completing & ~`BRANCH_P.is_return & ~`BRANCH_P.branch_flush;
stats[FETCH_BP_BR_MISPREDICT_STAT] = `BRANCH_P.instruction_is_completing & ~`BRANCH_P.is_return & `BRANCH_P.branch_flush;
stats[FETCH_BP_RAS_CORRECT_STAT] = `BRANCH_P.instruction_is_completing & `BRANCH_P.is_return & ~`BRANCH_P.branch_flush;
stats[FETCH_BP_RAS_MISPREDICT_STAT] = `BRANCH_P.instruction_is_completing & `BRANCH_P.is_return & `BRANCH_P.branch_flush;
stats[FETCH_BP_BR_CORRECT_STAT] = `BRANCH_P.instruction_is_completing & ~`BRANCH_P.is_return_ex & ~`BRANCH_P.branch_flush;
stats[FETCH_BP_BR_MISPREDICT_STAT] = `BRANCH_P.instruction_is_completing & ~`BRANCH_P.is_return_ex & `BRANCH_P.branch_flush;
stats[FETCH_BP_RAS_CORRECT_STAT] = `BRANCH_P.instruction_is_completing & `BRANCH_P.is_return_ex & ~`BRANCH_P.branch_flush;
stats[FETCH_BP_RAS_MISPREDICT_STAT] = `BRANCH_P.instruction_is_completing & `BRANCH_P.is_return_ex & `BRANCH_P.branch_flush;
//Issue stalls
base_no_instruction_stall = ~`ISSUE_P.issue.stage_valid | cpu.gc.fetch_flush;
@ -539,8 +540,8 @@ module cva5_sim
//Misc Issue stats
stats[ISSUE_OPERAND_STALL_FOR_BRANCH_STAT] = stats[ISSUE_OPERANDS_NOT_READY_STAT] & `ISSUE_P.unit_needed_issue_stage[`ISSUE_P.UNIT_IDS.BR];
stats[ISSUE_STORE_WITH_FORWARDED_DATA_STAT] = `ISSUE_P.issue_to[`ISSUE_P.UNIT_IDS.LS] & `ISSUE_P.is_store_r & `ISSUE_P.ls_inputs.forwarded_store;
stats[ISSUE_DIVIDER_RESULT_REUSE_STAT] = `ISSUE_P.issue_to[`ISSUE_P.UNIT_IDS.DIV] & `ISSUE_P.gen_decode_div_inputs.div_op_reuse;
stats[ISSUE_STORE_WITH_FORWARDED_DATA_STAT] = `ISSUE_P.issue_to[`ISSUE_P.UNIT_IDS.LS] & `LS_P.is_store_r & `LS_P.rs2_inuse;
stats[ISSUE_DIVIDER_RESULT_REUSE_STAT] = `ISSUE_P.issue_to[`ISSUE_P.UNIT_IDS.DIV] & `DIV_P.div_op_reuse;
//Issue Stall Source
for (int i = 0; i < REGFILE_READ_PORTS; i++) begin