mirror of
https://github.com/openhwgroup/cva5.git
synced 2025-04-20 03:57:18 -04:00
Move decode logic to respective units
Signed-off-by: Eric Matthews <ematthew@sfu.ca>
This commit is contained in:
parent
ac362d0b5b
commit
6cf0d84c3e
14 changed files with 492 additions and 545 deletions
101
core/alu_unit.sv
101
core/alu_unit.sv
|
@ -33,17 +33,28 @@ module alu_unit
|
|||
|
||||
input decode_packet_t decode_stage,
|
||||
output unit_needed,
|
||||
|
||||
output logic [REGFILE_READ_PORTS-1:0] uses_rs,
|
||||
output logic uses_rd,
|
||||
|
||||
input issue_packet_t issue_stage,
|
||||
input logic issue_stage_ready,
|
||||
input logic [31:0] constant_alu,
|
||||
input rs_addr_t issue_rs_addr [REGFILE_READ_PORTS],
|
||||
input logic [31:0] rf [REGFILE_READ_PORTS],
|
||||
|
||||
unit_issue_interface.unit issue,
|
||||
input alu_inputs_t alu_inputs,
|
||||
unit_writeback_interface.unit wb
|
||||
);
|
||||
common_instruction_t instruction;//rs1_addr, rs2_addr, fn3, fn7, rd_addr, upper/lower opcode
|
||||
|
||||
logic [31:0] alu_rs2_data;
|
||||
logic [32:0] alu_data1;
|
||||
logic [32:0] alu_data2;
|
||||
logic imm_type;
|
||||
alu_op_t alu_op;
|
||||
alu_op_t alu_op_r;
|
||||
logic subtract;
|
||||
|
||||
logic[XLEN:0] add_sub_result;
|
||||
logic add_sub_carry_in;
|
||||
logic[XLEN:0] adder_in1;
|
||||
|
@ -55,46 +66,94 @@ module alu_unit
|
|||
|
||||
////////////////////////////////////////////////////
|
||||
//Decode
|
||||
assign instruction = decode_stage.instruction;
|
||||
|
||||
assign unit_needed = decode_stage.instruction inside {
|
||||
JALR, JAL, LUI, AUIPC, ADDI, SLLI, SLTI, SLTIU, XORI, SRLI, SRAI, ORI, ANDI,
|
||||
LUI, AUIPC, JAL, JALR,
|
||||
ADDI, SLLI, SLTI, SLTIU, XORI, SRLI, SRAI, ORI, ANDI,
|
||||
ADD, SUB, SLL, SLT, SLTU, XOR, SRL, SRA, OR, AND
|
||||
};
|
||||
always_comb begin
|
||||
uses_rs = '0;
|
||||
uses_rs[RS1] = decode_stage.instruction inside {
|
||||
JALR,
|
||||
ADDI, SLLI, SLTI, SLTIU, XORI, SRLI, SRAI, ORI, ANDI,
|
||||
ADD, SUB, SLL, SLT, SLTU, XOR, SRL, SRA, OR, AND
|
||||
};
|
||||
uses_rs[RS2] = decode_stage.instruction inside {
|
||||
ADD, SUB, SLL, SLT, SLTU, XOR, SRL, SRA, OR, AND
|
||||
};
|
||||
uses_rd = decode_stage.instruction inside {
|
||||
LUI, AUIPC, JAL, JALR,
|
||||
ADDI, SLLI, SLTI, SLTIU, XORI, SRLI, SRAI, ORI, ANDI,
|
||||
ADD, SUB, SLL, SLT, SLTU, XOR, SRL, SRA, OR, AND
|
||||
};
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
case (instruction.upper_opcode) inside
|
||||
LUI_T, AUIPC_T, JAL_T, JALR_T : alu_op = ALU_CONSTANT;
|
||||
default :
|
||||
case (instruction.fn3) inside
|
||||
SLTU_fn3, SLT_fn3 : alu_op = ALU_SLT;
|
||||
SLL_fn3, SRA_fn3 : alu_op = ALU_SHIFT;
|
||||
default : alu_op = ALU_ADD_SUB;
|
||||
endcase
|
||||
endcase
|
||||
end
|
||||
|
||||
|
||||
//Constant ALU:
|
||||
// provides LUI, AUIPC, JAL, JALR results for ALU
|
||||
// provides PC+4 for BRANCH unit and ifence in GC unit
|
||||
always_ff @(posedge clk) begin
|
||||
if (issue_stage_ready) begin
|
||||
imm_type <= instruction.upper_opcode inside {ARITH_IMM_T};
|
||||
alu_op_r <= alu_op;
|
||||
subtract <= decode_stage.instruction inside {SUB, SLTI, SLTIU, SLT, SLTU};
|
||||
end
|
||||
end
|
||||
|
||||
//logic and adder
|
||||
assign alu_data1 = {(rf[RS1][31] & ~issue_stage.fn3[0]), rf[RS1]};//(fn3[0] is SLTU_fn3);
|
||||
assign alu_rs2_data = imm_type ? 32'(signed'(issue_stage.instruction[31:20])) : rf[RS2];
|
||||
assign alu_data2 = {(alu_rs2_data[31] & ~issue_stage.fn3[0]), alu_rs2_data};
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Issue
|
||||
//Logic ops put through the adder carry chain to reduce resources
|
||||
always_comb begin
|
||||
case (alu_inputs.logic_op)
|
||||
ALU_LOGIC_XOR : adder_in1 = alu_inputs.in1 ^ alu_inputs.in2;
|
||||
ALU_LOGIC_OR : adder_in1 = alu_inputs.in1 | alu_inputs.in2;
|
||||
ALU_LOGIC_AND : adder_in1 = alu_inputs.in1 & alu_inputs.in2;
|
||||
default : adder_in1 = alu_inputs.in1; //ADD/SUB/SLT/SLTU
|
||||
case (issue_stage.fn3)
|
||||
XOR_fn3 : adder_in1 = alu_data1 ^ alu_data2;
|
||||
OR_fn3 : adder_in1 = alu_data1 | alu_data2;
|
||||
AND_fn3 : adder_in1 = alu_data1 & alu_data2;
|
||||
default : adder_in1 = alu_data1; //ADD/SUB/SLT/SLTU
|
||||
endcase
|
||||
case (alu_inputs.logic_op)
|
||||
ALU_LOGIC_XOR,
|
||||
ALU_LOGIC_OR,
|
||||
ALU_LOGIC_AND : adder_in2 = 0;
|
||||
default : adder_in2 = alu_inputs.in2 ^ {33{alu_inputs.subtract}};
|
||||
case (issue_stage.fn3)
|
||||
XOR_fn3,
|
||||
OR_fn3,
|
||||
AND_fn3 : adder_in2 = 0;
|
||||
default : adder_in2 = alu_data2 ^ {33{subtract}};
|
||||
endcase
|
||||
end
|
||||
|
||||
//Add/Sub ops
|
||||
assign {add_sub_result, add_sub_carry_in} = {adder_in1, 1'b1} + {adder_in2, alu_inputs.subtract};
|
||||
assign {add_sub_result, add_sub_carry_in} = {adder_in1, 1'b1} + {adder_in2, subtract};
|
||||
|
||||
//Shift ops
|
||||
barrel_shifter shifter (
|
||||
.shifter_input(alu_inputs.shifter_in),
|
||||
.shift_amount(alu_inputs.shift_amount),
|
||||
.arith(alu_inputs.arith),
|
||||
.lshift(alu_inputs.lshift),
|
||||
.shifter_input(rf[RS1]),
|
||||
.shift_amount(imm_type ? issue_rs_addr[RS2] : rf[RS2][4:0]),
|
||||
.arith(rf[RS1][31] & issue_stage.instruction[30]),
|
||||
.lshift(~issue_stage.fn3[2]),
|
||||
.shifted_result(shift_result)
|
||||
);
|
||||
|
||||
always_comb begin
|
||||
case (alu_inputs.alu_op)
|
||||
ALU_CONSTANT : result = alu_inputs.constant_adder;
|
||||
case (alu_op_r)
|
||||
ALU_CONSTANT : result = constant_alu;//LUI, AUIPC, JAL, JALR
|
||||
ALU_ADD_SUB : result = add_sub_result[31:0];
|
||||
ALU_SLT : result = {31'b0, add_sub_result[XLEN]};
|
||||
ALU_SLT : result = {31'b0, add_sub_result[32]};
|
||||
default : result = shift_result; //ALU_SHIFT
|
||||
endcase
|
||||
end
|
||||
|
|
|
@ -37,18 +37,21 @@ module branch_unit
|
|||
|
||||
input decode_packet_t decode_stage,
|
||||
output logic unit_needed,
|
||||
|
||||
output logic [REGFILE_READ_PORTS-1:0] uses_rs,
|
||||
output logic uses_rd,
|
||||
|
||||
input issue_packet_t issue_stage,
|
||||
input logic issue_stage_ready,
|
||||
input logic [31:0] constant_alu,
|
||||
input logic [31:0] rf [REGFILE_READ_PORTS],
|
||||
|
||||
unit_issue_interface.unit issue,
|
||||
input branch_inputs_t branch_inputs,
|
||||
output branch_results_t br_results,
|
||||
output logic branch_flush,
|
||||
|
||||
exception_interface.unit exception
|
||||
);
|
||||
common_instruction_t instruction;//rs1_addr, rs2_addr, fn3, fn7, rd_addr, upper/lower opcode
|
||||
|
||||
logic branch_issued_r;
|
||||
logic result;
|
||||
|
@ -66,15 +69,87 @@ module branch_unit
|
|||
logic instruction_is_completing;
|
||||
|
||||
logic branch_complete;
|
||||
logic jal_jalr_ex;
|
||||
logic jal_or_jalr_ex;
|
||||
|
||||
logic [32:0] rs1;
|
||||
logic [32:0] rs2;
|
||||
////////////////////////////////////////////////////
|
||||
//Implementation
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Decode
|
||||
assign instruction = decode_stage.instruction;
|
||||
|
||||
assign unit_needed = decode_stage.instruction inside {
|
||||
BEQ, BNE, BLT, BGE, BLTU, BGEU, JALR, JAL
|
||||
};
|
||||
always_comb begin
|
||||
uses_rs = '0;
|
||||
uses_rs[RS1] = decode_stage.instruction inside {
|
||||
BEQ, BNE, BLT, BGE, BLTU, BGEU, JALR
|
||||
};
|
||||
uses_rs[RS2] = decode_stage.instruction inside {
|
||||
BEQ, BNE, BLT, BGE, BLTU, BGEU
|
||||
};
|
||||
uses_rd = 0;//JALR/JAL writeback handled by ALU
|
||||
end
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//RAS Support
|
||||
logic rs1_link;
|
||||
logic rd_link;
|
||||
logic rs1_eq_rd;
|
||||
logic is_return;
|
||||
logic is_call;
|
||||
|
||||
assign rs1_link = instruction.rs1_addr inside {1,5};
|
||||
assign rd_link = instruction.rd_addr inside {1,5};
|
||||
assign rs1_eq_rd = (instruction.rs1_addr == instruction.rd_addr);
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (issue_stage_ready) begin
|
||||
is_return <= (instruction.upper_opcode inside {JALR_T}) & ((rs1_link & ~rd_link) | (rs1_link & rd_link & ~rs1_eq_rd));
|
||||
is_call <= (instruction.upper_opcode inside {JAL_T, JALR_T}) & rd_link;
|
||||
end
|
||||
end
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//PC Offset
|
||||
logic[19:0] jal_imm;
|
||||
logic[11:0] jalr_imm;
|
||||
logic[11:0] br_imm;
|
||||
|
||||
logic [20:0] pc_offset;
|
||||
logic [20:0] pc_offset_r;
|
||||
assign jal_imm = {decode_stage.instruction[31], decode_stage.instruction[19:12], decode_stage.instruction[20], decode_stage.instruction[30:21]};
|
||||
assign jalr_imm = decode_stage.instruction[31:20];
|
||||
assign br_imm = {decode_stage.instruction[31], decode_stage.instruction[7], decode_stage.instruction[30:25], decode_stage.instruction[11:8]};
|
||||
|
||||
always_comb begin
|
||||
case (decode_stage.instruction[3:2])
|
||||
2'b11 : pc_offset = 21'(signed'({jal_imm, 1'b0}));
|
||||
2'b01 : pc_offset = 21'(signed'(jalr_imm));
|
||||
default : pc_offset = 21'(signed'({br_imm, 1'b0}));
|
||||
endcase
|
||||
end
|
||||
always_ff @(posedge clk) begin
|
||||
if (issue_stage_ready)
|
||||
pc_offset_r <= pc_offset;
|
||||
end
|
||||
////////////////////////////////////////////////////
|
||||
|
||||
logic jal;
|
||||
logic jalr;
|
||||
logic jal_or_jalr;
|
||||
logic br_use_signed;
|
||||
always_ff @(posedge clk) begin
|
||||
if (issue_stage_ready) begin
|
||||
jal <= decode_stage.instruction[3];
|
||||
jalr <= (~decode_stage.instruction[3] & decode_stage.instruction[2]);
|
||||
jal_or_jalr <= decode_stage.instruction[2];
|
||||
br_use_signed <= !(instruction.fn3 inside {BLTU_fn3, BGEU_fn3});
|
||||
end
|
||||
end
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Issue
|
||||
|
@ -88,36 +163,40 @@ module branch_unit
|
|||
set_clr_reg_with_rst #(.SET_OVER_CLR(1), .WIDTH(1), .RST_VALUE(0)) branch_issued_m (
|
||||
.clk, .rst,
|
||||
.set(issue.new_request),
|
||||
.clr(branch_inputs.issue_pc_valid | exception.valid),
|
||||
.clr(issue_stage.stage_valid | exception.valid),
|
||||
.result(branch_issued_r)
|
||||
);
|
||||
|
||||
//To determine if the branch was predicted correctly we need to wait until the
|
||||
//subsequent instruction has reached the issue stage
|
||||
assign instruction_is_completing = branch_issued_r & branch_inputs.issue_pc_valid;
|
||||
assign instruction_is_completing = branch_issued_r & issue_stage.stage_valid;
|
||||
|
||||
//Sign extend
|
||||
assign rs1 = {(rf[RS1][31] & br_use_signed), rf[RS1]};
|
||||
assign rs2 = {(rf[RS2][31] & br_use_signed), rf[RS2]};
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Branch/Jump target determination
|
||||
//Branch comparison and final address calculation
|
||||
//are performed in the issue stage
|
||||
branch_comparator bc (
|
||||
.less_than(branch_inputs.fn3[2]),
|
||||
.a(branch_inputs.rs1),
|
||||
.b(branch_inputs.rs2),
|
||||
.xor_result(branch_inputs.fn3[0]),
|
||||
.less_than(issue_stage.fn3[2]),
|
||||
.a(rs1),
|
||||
.b(rs2),
|
||||
.xor_result(issue_stage.fn3[0]),
|
||||
.result(result)
|
||||
);
|
||||
assign branch_taken = result | branch_inputs.jal_jalr;
|
||||
assign branch_taken = result | jal_or_jalr;
|
||||
|
||||
assign jump_pc = (branch_inputs.jalr ? branch_inputs.rs1[31:0] : branch_inputs.issue_pc) + 32'(signed'(branch_inputs.pc_offset));
|
||||
assign new_pc = branch_taken ? jump_pc : branch_inputs.pc_p4;
|
||||
assign jump_pc = (jalr ? rs1[31:0] : issue_stage.pc) + 32'(signed'(pc_offset_r));
|
||||
assign new_pc = branch_taken ? jump_pc : constant_alu;
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (issue.new_request) begin
|
||||
branch_taken_ex <= branch_taken;
|
||||
new_pc_ex <= {new_pc[31:1], new_pc[0] & ~branch_inputs.jalr};
|
||||
new_pc_ex <= {new_pc[31:1], new_pc[0] & ~jalr};
|
||||
id_ex <= issue.id;
|
||||
jal_jalr_ex <= branch_inputs.jal_jalr;
|
||||
jal_or_jalr_ex <= jal_or_jalr;
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -145,13 +224,13 @@ module branch_unit
|
|||
|
||||
////////////////////////////////////////////////////
|
||||
//Predictor support
|
||||
logic is_return;
|
||||
logic is_call;
|
||||
logic is_return_ex;
|
||||
logic is_call_ex;
|
||||
always_ff @(posedge clk) begin
|
||||
if (issue.possible_issue) begin
|
||||
is_return <= branch_inputs.is_return;
|
||||
is_call <= branch_inputs.is_call;
|
||||
pc_ex <= branch_inputs.issue_pc;
|
||||
is_return_ex <= is_return;
|
||||
is_call_ex <= is_call;
|
||||
pc_ex <= issue_stage.pc;
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -160,11 +239,11 @@ module branch_unit
|
|||
assign br_results.pc = pc_ex;
|
||||
assign br_results.target_pc = new_pc_ex;
|
||||
assign br_results.branch_taken = branch_taken_ex;
|
||||
assign br_results.is_branch = ~jal_jalr_ex;
|
||||
assign br_results.is_return = is_return;
|
||||
assign br_results.is_call = is_call;
|
||||
assign br_results.is_branch = ~jal_or_jalr_ex;
|
||||
assign br_results.is_return = is_return_ex;
|
||||
assign br_results.is_call = is_call_ex;
|
||||
|
||||
assign branch_flush = instruction_is_completing && (branch_inputs.issue_pc[31:1] != new_pc_ex[31:1]);
|
||||
assign branch_flush = instruction_is_completing & (issue_stage.pc[31:1] != new_pc_ex[31:1]);
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//End of Implementation
|
||||
|
|
|
@ -38,14 +38,16 @@ module csr_unit
|
|||
|
||||
input decode_packet_t decode_stage,
|
||||
output logic unit_needed,
|
||||
output logic [REGFILE_READ_PORTS-1:0] uses_rs,
|
||||
output logic uses_rd,
|
||||
|
||||
input issue_packet_t issue_stage,
|
||||
input logic issue_stage_ready,
|
||||
input rs_addr_t issue_rs_addr [REGFILE_READ_PORTS],
|
||||
input logic [31:0] rf [REGFILE_READ_PORTS],
|
||||
|
||||
//Unit Interfaces
|
||||
unit_issue_interface.unit issue,
|
||||
input csr_inputs_t csr_inputs,
|
||||
unit_writeback_interface.unit wb,
|
||||
|
||||
//Privilege
|
||||
|
@ -82,10 +84,19 @@ module csr_unit
|
|||
input interrupt_t m_interrupt
|
||||
);
|
||||
|
||||
typedef struct packed{
|
||||
csr_addr_t addr;
|
||||
logic[1:0] op;
|
||||
logic reads;
|
||||
logic writes;
|
||||
logic [XLEN-1:0] data;
|
||||
} csr_inputs_t;
|
||||
|
||||
logic busy;
|
||||
logic commit;
|
||||
logic commit_in_progress;
|
||||
|
||||
csr_inputs_t csr_inputs;
|
||||
csr_inputs_t csr_inputs_r;
|
||||
|
||||
privilege_t privilege_level;
|
||||
|
@ -114,12 +125,20 @@ module csr_unit
|
|||
|
||||
////////////////////////////////////////////////////
|
||||
//Decode
|
||||
assign unit_needed = decode_stage.instruction inside {
|
||||
CSRRW, CSRRS, CSRRC, CSRRWI, CSRRSI, CSRRCI
|
||||
};
|
||||
|
||||
assign unit_needed = decode_stage.instruction inside {CSRRW, CSRRS, CSRRC, CSRRWI, CSRRSI, CSRRCI};
|
||||
always_comb begin
|
||||
uses_rs = '0;
|
||||
uses_rs[RS1] = decode_stage.instruction inside {CSRRW, CSRRS, CSRRC};
|
||||
uses_rd = unit_needed;
|
||||
end
|
||||
////////////////////////////////////////////////////
|
||||
//Issue
|
||||
assign csr_inputs.addr = issue_stage.instruction[31:20];
|
||||
assign csr_inputs.op = issue_stage.fn3[1:0];
|
||||
assign csr_inputs.data = issue_stage.fn3[2] ? {27'b0, issue_rs_addr[RS1]} : rf[RS1];
|
||||
assign csr_inputs.reads = ~((issue_stage.fn3[1:0] == CSR_RW) && (issue_stage.rd_addr == 0));
|
||||
assign csr_inputs.writes = ~((issue_stage.fn3[1:0] == CSR_RC) && (issue_rs_addr[RS1] == 0));
|
||||
|
||||
assign processing_csr = busy | issue.new_request;
|
||||
|
||||
assign issue.ready = ~busy;
|
||||
|
|
|
@ -33,6 +33,9 @@ module custom_unit
|
|||
|
||||
input decode_packet_t decode_stage,
|
||||
output logic unit_needed,
|
||||
output logic [REGFILE_READ_PORTS-1:0] uses_rs,
|
||||
output logic uses_rd,
|
||||
|
||||
input issue_packet_t issue_stage,
|
||||
input logic issue_stage_ready,
|
||||
input logic [31:0] rf [REGFILE_READ_PORTS],
|
||||
|
@ -56,8 +59,13 @@ module custom_unit
|
|||
|
||||
//The following signals should be asserted when the decoded instruction
|
||||
//is handled by this execution unit.
|
||||
assign unit_needed = instruction.upper_opcode inside {CUSTOM_T};
|
||||
|
||||
assign unit_needed = decode_stage.instruction inside {CUSTOM};
|
||||
always_comb begin
|
||||
uses_rs = '0;
|
||||
uses_rs[RS1] = decode_stage.instruction inside {CUSTOM};
|
||||
uses_rs[RS2] = decode_stage.instruction inside {CUSTOM};
|
||||
uses_rd = decode_stage.instruction inside {CUSTOM};
|
||||
end
|
||||
////////////////////////////////////////////////////
|
||||
//Issue
|
||||
assign issue.ready = ~wb.done;
|
||||
|
|
60
core/cva5.sv
60
core/cva5.sv
|
@ -116,14 +116,10 @@ module cva5
|
|||
register_file_issue_interface #(.NUM_WB_GROUPS(CONFIG.NUM_WB_GROUPS)) rf_issue();
|
||||
|
||||
logic [NUM_UNITS-1:0] unit_needed;
|
||||
logic [NUM_UNITS-1:0][REGFILE_READ_PORTS-1:0] unit_uses_rs;
|
||||
logic [NUM_UNITS-1:0] unit_uses_rd;
|
||||
|
||||
alu_inputs_t alu_inputs;
|
||||
load_store_inputs_t ls_inputs;
|
||||
branch_inputs_t branch_inputs;
|
||||
mul_inputs_t mul_inputs;
|
||||
div_inputs_t div_inputs;
|
||||
gc_inputs_t gc_inputs;
|
||||
csr_inputs_t csr_inputs;
|
||||
logic [31:0] constant_alu;
|
||||
|
||||
unit_issue_interface unit_issue [NUM_UNITS-1:0]();
|
||||
|
||||
|
@ -197,6 +193,8 @@ module cva5
|
|||
|
||||
//Decode Unit and Fetch Unit
|
||||
logic issue_stage_ready;
|
||||
phys_addr_t issue_phys_rs_addr [REGFILE_READ_PORTS];
|
||||
rs_addr_t issue_rs_addr [REGFILE_READ_PORTS];
|
||||
logic illegal_instruction;
|
||||
logic instruction_issued;
|
||||
logic instruction_issued_with_rd;
|
||||
|
@ -361,6 +359,8 @@ module cva5
|
|||
.decode (decode),
|
||||
.decode_advance (decode_advance),
|
||||
.unit_needed (unit_needed),
|
||||
.unit_uses_rs (unit_uses_rs),
|
||||
.unit_uses_rd (unit_uses_rd),
|
||||
.renamer (decode_rename_interface),
|
||||
.decode_uses_rd (decode_uses_rd),
|
||||
.decode_rd_addr (decode_rd_addr),
|
||||
|
@ -372,14 +372,10 @@ module cva5
|
|||
.instruction_issued_with_rd (instruction_issued_with_rd),
|
||||
.issue (issue),
|
||||
.issue_stage_ready (issue_stage_ready),
|
||||
.issue_phys_rs_addr (issue_phys_rs_addr),
|
||||
.issue_rs_addr (issue_rs_addr),
|
||||
.rf (rf_issue),
|
||||
.alu_inputs (alu_inputs),
|
||||
.ls_inputs (ls_inputs),
|
||||
.branch_inputs (branch_inputs),
|
||||
.gc_inputs (gc_inputs),
|
||||
.csr_inputs (csr_inputs),
|
||||
.mul_inputs (mul_inputs),
|
||||
.div_inputs (div_inputs),
|
||||
.constant_alu (constant_alu),
|
||||
.unit_issue (unit_issue),
|
||||
.gc (gc),
|
||||
.current_privilege (current_privilege),
|
||||
|
@ -413,9 +409,11 @@ module cva5
|
|||
.issue_stage (issue),
|
||||
.issue_stage_ready (issue_stage_ready),
|
||||
.unit_needed (unit_needed[UNIT_IDS.BR]),
|
||||
.rf (rf_issue.data),
|
||||
.uses_rs (unit_uses_rs[UNIT_IDS.BR]),
|
||||
.uses_rd (unit_uses_rd[UNIT_IDS.BR]),
|
||||
.rf (rf_issue.data),
|
||||
.constant_alu (constant_alu),
|
||||
.issue (unit_issue[UNIT_IDS.BR]),
|
||||
.branch_inputs (branch_inputs),
|
||||
.br_results (br_results),
|
||||
.branch_flush (branch_flush),
|
||||
.exception (exception[BR_EXCEPTION])
|
||||
|
@ -429,8 +427,11 @@ module cva5
|
|||
.issue_stage (issue),
|
||||
.issue_stage_ready (issue_stage_ready),
|
||||
.unit_needed (unit_needed[UNIT_IDS.ALU]),
|
||||
.uses_rs (unit_uses_rs[UNIT_IDS.ALU]),
|
||||
.uses_rd (unit_uses_rd[UNIT_IDS.ALU]),
|
||||
.rf (rf_issue.data),
|
||||
.alu_inputs (alu_inputs),
|
||||
.constant_alu (constant_alu),
|
||||
.issue_rs_addr (issue_rs_addr),
|
||||
.issue (unit_issue[UNIT_IDS.ALU]),
|
||||
.wb (unit_wb1[ALU_UNIT_WB1_ID])
|
||||
);
|
||||
|
@ -444,8 +445,12 @@ module cva5
|
|||
.issue_stage (issue),
|
||||
.issue_stage_ready (issue_stage_ready),
|
||||
.unit_needed (unit_needed[UNIT_IDS.LS]),
|
||||
.uses_rs (unit_uses_rs[UNIT_IDS.LS]),
|
||||
.uses_rd (unit_uses_rd[UNIT_IDS.LS]),
|
||||
.instruction_issued_with_rd (instruction_issued_with_rd),
|
||||
.issue_rs_addr (issue_rs_addr),
|
||||
.rs2_inuse (rf_issue.inuse[RS2]),
|
||||
.rf (rf_issue.data),
|
||||
.ls_inputs (ls_inputs),
|
||||
.issue (unit_issue[UNIT_IDS.LS]),
|
||||
.dcache_on (1'b1),
|
||||
.clear_reservation (1'b0),
|
||||
|
@ -503,9 +508,11 @@ module cva5
|
|||
.decode_stage (decode),
|
||||
.issue_stage (issue),
|
||||
.issue_stage_ready (issue_stage_ready),
|
||||
.issue_rs_addr (issue_rs_addr),
|
||||
.unit_needed (unit_needed[UNIT_IDS.CSR]),
|
||||
.uses_rs (unit_uses_rs[UNIT_IDS.CSR]),
|
||||
.uses_rd (unit_uses_rd[UNIT_IDS.CSR]),
|
||||
.rf (rf_issue.data),
|
||||
.csr_inputs (csr_inputs),
|
||||
.issue (unit_issue[UNIT_IDS.CSR]),
|
||||
.wb (unit_wb3[CSR_UNIT_WB3_ID]),
|
||||
.current_privilege(current_privilege),
|
||||
|
@ -536,9 +543,11 @@ module cva5
|
|||
.issue_stage (issue),
|
||||
.issue_stage_ready (issue_stage_ready),
|
||||
.unit_needed (unit_needed[UNIT_IDS.IEC]),
|
||||
.uses_rs (unit_uses_rs[UNIT_IDS.IEC]),
|
||||
.uses_rd (unit_uses_rd[UNIT_IDS.IEC]),
|
||||
.constant_alu (constant_alu),
|
||||
.rf (rf_issue.data),
|
||||
.issue (unit_issue[UNIT_IDS.IEC]),
|
||||
.gc_inputs (gc_inputs),
|
||||
.branch_flush (branch_flush),
|
||||
.exception (exception),
|
||||
.exception_target_pc (exception_target_pc),
|
||||
|
@ -566,8 +575,9 @@ module cva5
|
|||
.issue_stage (issue),
|
||||
.issue_stage_ready (issue_stage_ready),
|
||||
.unit_needed (unit_needed[UNIT_IDS.MUL]),
|
||||
.uses_rs (unit_uses_rs[UNIT_IDS.MUL]),
|
||||
.uses_rd (unit_uses_rd[UNIT_IDS.MUL]),
|
||||
.rf (rf_issue.data),
|
||||
.mul_inputs (mul_inputs),
|
||||
.issue (unit_issue[UNIT_IDS.MUL]),
|
||||
.wb (unit_wb3[MUL_UNIT_WB3_ID])
|
||||
);
|
||||
|
@ -577,12 +587,16 @@ module cva5
|
|||
div_unit div_unit_block (
|
||||
.clk (clk),
|
||||
.rst (rst),
|
||||
.gc (gc),
|
||||
.instruction_issued_with_rd (instruction_issued_with_rd),
|
||||
.decode_stage (decode),
|
||||
.issue_stage (issue),
|
||||
.issue_stage_ready (issue_stage_ready),
|
||||
.issue_phys_rs_addr (issue_phys_rs_addr),
|
||||
.unit_needed (unit_needed[UNIT_IDS.DIV]),
|
||||
.uses_rs (unit_uses_rs[UNIT_IDS.DIV]),
|
||||
.uses_rd (unit_uses_rd[UNIT_IDS.DIV]),
|
||||
.rf (rf_issue.data),
|
||||
.div_inputs (div_inputs),
|
||||
.issue (unit_issue[UNIT_IDS.DIV]),
|
||||
.wb (unit_wb3[DIV_UNIT_WB3_ID])
|
||||
);
|
||||
|
@ -595,6 +609,8 @@ module cva5
|
|||
.rst (rst),
|
||||
.decode_stage (decode),
|
||||
.unit_needed (unit_needed[UNIT_IDS.CUSTOM]),
|
||||
.uses_rs (unit_uses_rs[UNIT_IDS.CUSTOM]),
|
||||
.uses_rd (unit_uses_rd[UNIT_IDS.CUSTOM]),
|
||||
.issue_stage (issue),
|
||||
.issue_stage_ready (issue_stage_ready),
|
||||
.rf (rf_issue.data),
|
||||
|
|
|
@ -41,13 +41,6 @@ package cva5_types;
|
|||
ALU_SHIFT = 2'b11
|
||||
} alu_op_t;
|
||||
|
||||
typedef enum logic [1:0] {
|
||||
ALU_LOGIC_XOR = 2'b00,
|
||||
ALU_LOGIC_OR = 2'b01,
|
||||
ALU_LOGIC_AND = 2'b10,
|
||||
ALU_LOGIC_ADD = 2'b11
|
||||
} alu_logic_op_t;
|
||||
|
||||
typedef struct packed{
|
||||
logic valid;
|
||||
exception_code_t code;
|
||||
|
@ -86,34 +79,6 @@ package cva5_types;
|
|||
fetch_metadata_t fetch_metadata;
|
||||
} issue_packet_t;
|
||||
|
||||
typedef struct packed{
|
||||
logic [XLEN:0] in1;//contains sign padding bit for slt operation
|
||||
logic [XLEN:0] in2;//contains sign padding bit for slt operation
|
||||
logic [XLEN-1:0] shifter_in;
|
||||
logic [31:0] constant_adder;
|
||||
alu_op_t alu_op;
|
||||
alu_logic_op_t logic_op;
|
||||
logic [4:0] shift_amount;
|
||||
logic subtract;
|
||||
logic arith;//contains sign padding bit for arithmetic shift right operation
|
||||
logic lshift;
|
||||
} alu_inputs_t;
|
||||
|
||||
typedef struct packed {
|
||||
logic [XLEN:0] rs1;
|
||||
logic [XLEN:0] rs2;
|
||||
logic [31:0] pc_p4;
|
||||
logic [2:0] fn3;
|
||||
logic [31:0] issue_pc;
|
||||
logic issue_pc_valid;
|
||||
logic jal;
|
||||
logic jalr;
|
||||
logic jal_jalr;
|
||||
logic is_call;
|
||||
logic is_return;
|
||||
logic [20:0] pc_offset;
|
||||
} branch_inputs_t;
|
||||
|
||||
typedef struct packed {
|
||||
id_t id;
|
||||
logic valid;
|
||||
|
@ -138,48 +103,6 @@ package cva5_types;
|
|||
logic [4:0] op;
|
||||
} amo_details_t;
|
||||
|
||||
typedef struct packed{
|
||||
logic [XLEN-1:0] rs1;
|
||||
logic [XLEN-1:0] rs2;
|
||||
logic [11:0] offset;
|
||||
logic [2:0] fn3;
|
||||
logic load;
|
||||
logic store;
|
||||
logic fence;
|
||||
logic forwarded_store;
|
||||
id_t store_forward_id;
|
||||
//amo support
|
||||
amo_details_t amo;
|
||||
} load_store_inputs_t;
|
||||
|
||||
typedef struct packed{
|
||||
logic [XLEN-1:0] rs1;
|
||||
logic [XLEN-1:0] rs2;
|
||||
logic [1:0] op;
|
||||
} mul_inputs_t;
|
||||
|
||||
typedef struct packed{
|
||||
logic [XLEN-1:0] rs1;
|
||||
logic [XLEN-1:0] rs2;
|
||||
logic [1:0] op;
|
||||
logic reuse_result;
|
||||
} div_inputs_t;
|
||||
|
||||
typedef struct packed{
|
||||
csr_addr_t addr;
|
||||
logic[1:0] op;
|
||||
logic reads;
|
||||
logic writes;
|
||||
logic [XLEN-1:0] data;
|
||||
} csr_inputs_t;
|
||||
|
||||
typedef struct packed{
|
||||
logic [31:0] pc_p4;
|
||||
logic is_ifence;
|
||||
logic is_mret;
|
||||
logic is_sret;
|
||||
} gc_inputs_t;
|
||||
|
||||
typedef struct packed {
|
||||
logic [31:0] addr;
|
||||
logic load;
|
||||
|
|
|
@ -26,6 +26,7 @@ module decode_and_issue
|
|||
import riscv_types::*;
|
||||
import cva5_types::*;
|
||||
import csr_types::*;
|
||||
import opcodes::*;
|
||||
|
||||
# (
|
||||
parameter cpu_config_t CONFIG = EXAMPLE_CONFIG,
|
||||
|
@ -47,6 +48,8 @@ module decode_and_issue
|
|||
renamer_interface.decode renamer,
|
||||
|
||||
input logic [NUM_UNITS-1:0] unit_needed,
|
||||
input logic [NUM_UNITS-1:0][REGFILE_READ_PORTS-1:0] unit_uses_rs,
|
||||
input logic [NUM_UNITS-1:0] unit_uses_rd,
|
||||
|
||||
output logic decode_uses_rd,
|
||||
output rs_addr_t decode_rd_addr,
|
||||
|
@ -57,18 +60,14 @@ module decode_and_issue
|
|||
output logic instruction_issued,
|
||||
output logic instruction_issued_with_rd,
|
||||
output issue_packet_t issue,
|
||||
output rs_addr_t issue_rs_addr [REGFILE_READ_PORTS],
|
||||
output phys_addr_t issue_phys_rs_addr [REGFILE_READ_PORTS],
|
||||
output logic issue_stage_ready,
|
||||
|
||||
//Register File
|
||||
register_file_issue_interface.issue rf,
|
||||
|
||||
output alu_inputs_t alu_inputs,
|
||||
output load_store_inputs_t ls_inputs,
|
||||
output branch_inputs_t branch_inputs,
|
||||
output gc_inputs_t gc_inputs,
|
||||
output csr_inputs_t csr_inputs,
|
||||
output mul_inputs_t mul_inputs,
|
||||
output div_inputs_t div_inputs,
|
||||
output logic [31:0] constant_alu,
|
||||
|
||||
unit_issue_interface.decode unit_issue [NUM_UNITS-1:0],
|
||||
|
||||
|
@ -78,38 +77,28 @@ module decode_and_issue
|
|||
exception_interface.unit exception
|
||||
);
|
||||
|
||||
logic [2:0] fn3;
|
||||
logic [6:0] opcode;
|
||||
logic [4:0] opcode_trim;
|
||||
|
||||
common_instruction_t decode_instruction;//rs1_addr, rs2_addr, fn3, fn7, rd_addr, upper/lower opcode
|
||||
|
||||
logic uses_rs [REGFILE_READ_PORTS];
|
||||
logic uses_rd;
|
||||
|
||||
rs_addr_t rs_addr [REGFILE_READ_PORTS];
|
||||
rs_addr_t rd_addr;
|
||||
|
||||
logic is_csr;
|
||||
logic is_fence;
|
||||
logic is_ifence;
|
||||
logic csr_imm_op;
|
||||
logic environment_op;
|
||||
rs_addr_t decode_rs_addr [REGFILE_READ_PORTS];
|
||||
|
||||
logic issue_valid;
|
||||
logic operands_ready;
|
||||
logic mult_div_op;
|
||||
|
||||
logic [NUM_UNITS-1:0] unit_needed_issue_stage;
|
||||
logic [NUM_UNITS-1:0] unit_ready;
|
||||
logic [NUM_UNITS-1:0] issue_ready;
|
||||
logic [NUM_UNITS-1:0] issue_to;
|
||||
|
||||
rs_addr_t issue_rs_addr [REGFILE_READ_PORTS];
|
||||
phys_addr_t issue_phys_rs_addr [REGFILE_READ_PORTS];
|
||||
logic [$clog2(CONFIG.NUM_WB_GROUPS)-1:0] issue_rs_wb_group [REGFILE_READ_PORTS];
|
||||
logic issue_uses_rs [REGFILE_READ_PORTS];
|
||||
|
||||
logic pre_issue_exception_pending;
|
||||
logic illegal_instruction_pattern;
|
||||
logic illegal_instruction_pattern_r;
|
||||
|
||||
logic [REGFILE_READ_PORTS-1:0] rs_conflict;
|
||||
|
||||
|
@ -124,37 +113,34 @@ module decode_and_issue
|
|||
assign decode_advance = decode.valid & issue_stage_ready;
|
||||
|
||||
//Instruction aliases
|
||||
assign opcode = decode.instruction[6:0];
|
||||
assign opcode_trim = opcode[6:2];
|
||||
assign fn3 = decode.instruction[14:12];
|
||||
assign rs_addr[RS1] = decode.instruction[19:15];
|
||||
assign rs_addr[RS2] = decode.instruction[24:20];
|
||||
assign rd_addr = decode.instruction[11:7];
|
||||
|
||||
assign is_csr = CONFIG.INCLUDE_CSRS & (opcode_trim == SYSTEM_T) & (fn3 != 0);
|
||||
assign is_fence = (opcode_trim == FENCE_T) & ~fn3[0];
|
||||
assign is_ifence = CONFIG.INCLUDE_IFENCE & (opcode_trim == FENCE_T) & fn3[0];
|
||||
assign csr_imm_op = (opcode_trim == SYSTEM_T) & fn3[2];
|
||||
assign environment_op = (opcode_trim == SYSTEM_T) & (fn3 == 0);
|
||||
|
||||
assign decode_instruction = decode.instruction;
|
||||
always_comb begin
|
||||
decode_rs_addr = '{default: '0};
|
||||
decode_rs_addr[RS1] = decode_instruction.rs1_addr;
|
||||
decode_rs_addr[RS2] = decode_instruction.rs2_addr;
|
||||
end
|
||||
////////////////////////////////////////////////////
|
||||
//Register File Support
|
||||
assign uses_rs[RS1] = opcode_trim inside {JALR_T, BRANCH_T, LOAD_T, STORE_T, ARITH_IMM_T, ARITH_T, AMO_T} | is_csr;
|
||||
assign uses_rs[RS2] = opcode_trim inside {BRANCH_T, ARITH_T, AMO_T};//Stores are exempted due to store forwarding
|
||||
assign uses_rd = opcode_trim inside {LUI_T, AUIPC_T, JAL_T, JALR_T, LOAD_T, ARITH_IMM_T, ARITH_T} | is_csr;
|
||||
|
||||
always_comb begin
|
||||
uses_rd = |unit_uses_rd;
|
||||
uses_rs = '{default: 0};
|
||||
for (int i = 0; i < NUM_UNITS; i++)
|
||||
for (int j = 0; j < REGFILE_READ_PORTS; j++)
|
||||
uses_rs[j] |= unit_uses_rs[i][j];
|
||||
end
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Renamer Support
|
||||
logic [$clog2(CONFIG.NUM_WB_GROUPS)-1:0] renamer_wb_group;
|
||||
always_comb begin
|
||||
renamer_wb_group = 2;
|
||||
renamer_wb_group = (CONFIG.NUM_WB_GROUPS - 1);
|
||||
if (unit_needed[UNIT_IDS.ALU])
|
||||
renamer_wb_group = 0;
|
||||
else if (unit_needed[UNIT_IDS.LS] )
|
||||
renamer_wb_group = 1;
|
||||
end
|
||||
assign renamer.rd_addr = rd_addr;
|
||||
assign renamer.rs_addr = rs_addr;
|
||||
assign renamer.rd_addr = decode_instruction.rd_addr;
|
||||
assign renamer.rs_addr = decode_rs_addr;
|
||||
assign renamer.uses_rd = uses_rd;
|
||||
|
||||
assign renamer.rd_wb_group = renamer_wb_group;
|
||||
|
@ -163,7 +149,7 @@ module decode_and_issue
|
|||
////////////////////////////////////////////////////
|
||||
//Decode ID Support
|
||||
assign decode_uses_rd = uses_rd;
|
||||
assign decode_rd_addr = rd_addr;
|
||||
assign decode_rd_addr = decode_instruction.rd_addr;
|
||||
assign decode_phys_rd_addr = renamer.phys_rd_addr;
|
||||
assign decode_phys_rs_addr = renamer.phys_rs_addr;
|
||||
assign decode_rs_wb_group = renamer.rs_wb_group;
|
||||
|
@ -175,12 +161,12 @@ module decode_and_issue
|
|||
issue.pc <= decode.pc;
|
||||
issue.instruction <= decode.instruction;
|
||||
issue.fetch_metadata <= decode.fetch_metadata;
|
||||
issue.fn3 <= fn3;
|
||||
issue.opcode <= opcode;
|
||||
issue_rs_addr <= rs_addr;
|
||||
issue.fn3 <= decode_instruction.fn3;
|
||||
issue.opcode <= decode.instruction[6:0];
|
||||
issue_rs_addr <= decode_rs_addr;
|
||||
issue_phys_rs_addr <= renamer.phys_rs_addr;
|
||||
issue_rs_wb_group <= renamer.rs_wb_group;
|
||||
issue.rd_addr <= rd_addr;
|
||||
issue.rd_addr <= decode_instruction.rd_addr;
|
||||
issue.phys_rd_addr <= renamer.phys_rd_addr;
|
||||
issue.is_multicycle <= ~unit_needed[UNIT_IDS.ALU];
|
||||
issue.id <= decode.id;
|
||||
|
@ -231,290 +217,14 @@ module decode_and_issue
|
|||
|
||||
assign rf.single_cycle_or_flush = (instruction_issued_with_rd & |issue.rd_addr & ~issue.is_multicycle) | (issue.stage_valid & issue.uses_rd & |issue.rd_addr & gc.fetch_flush);
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//ALU unit inputs
|
||||
logic [XLEN-1:0] alu_rs2_data;
|
||||
logic alu_imm_type;
|
||||
logic [31:0] constant_alu;
|
||||
alu_op_t alu_op;
|
||||
alu_op_t alu_op_r;
|
||||
alu_logic_op_t alu_logic_op;
|
||||
alu_logic_op_t alu_logic_op_r;
|
||||
logic alu_subtract;
|
||||
logic sub_instruction;
|
||||
|
||||
always_comb begin
|
||||
case (opcode_trim) inside
|
||||
LUI_T, AUIPC_T, JAL_T, JALR_T : alu_op = ALU_CONSTANT;
|
||||
default :
|
||||
case (fn3) inside
|
||||
SLTU_fn3, SLT_fn3 : alu_op = ALU_SLT;
|
||||
SLL_fn3, SRA_fn3 : alu_op = ALU_SHIFT;
|
||||
default : alu_op = ALU_ADD_SUB;
|
||||
endcase
|
||||
endcase
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
case (fn3)
|
||||
XOR_fn3 : alu_logic_op = ALU_LOGIC_XOR;
|
||||
OR_fn3 : alu_logic_op = ALU_LOGIC_OR;
|
||||
AND_fn3 : alu_logic_op = ALU_LOGIC_AND;
|
||||
default : alu_logic_op = ALU_LOGIC_ADD;//ADD/SUB/SLT/SLTU
|
||||
endcase
|
||||
end
|
||||
|
||||
assign sub_instruction = (fn3 == ADD_SUB_fn3) && decode.instruction[30] && opcode[5];//If ARITH instruction
|
||||
|
||||
//Constant ALU:
|
||||
// provides LUI, AUIPC, JAL, JALR results for ALU
|
||||
// provides PC+4 for BRANCH unit and ifence in GC unit
|
||||
always_ff @(posedge clk) begin
|
||||
if (issue_stage_ready) begin
|
||||
constant_alu <= ((opcode_trim inside {LUI_T}) ? '0 : decode.pc) + ((opcode_trim inside {LUI_T, AUIPC_T}) ? {decode.instruction[31:12], 12'b0} : 4);
|
||||
alu_imm_type <= opcode_trim inside {ARITH_IMM_T};
|
||||
alu_op_r <= alu_op;
|
||||
alu_subtract <= (fn3 inside {SLTU_fn3, SLT_fn3}) || sub_instruction;
|
||||
alu_logic_op_r <= alu_logic_op;
|
||||
end
|
||||
if (issue_stage_ready)
|
||||
constant_alu <= ((decode_instruction.upper_opcode inside {LUI_T}) ? '0 : decode.pc) + ((decode_instruction.upper_opcode inside {LUI_T, AUIPC_T}) ? {decode.instruction[31:12], 12'b0} : 4);
|
||||
end
|
||||
|
||||
//Shifter related
|
||||
assign alu_inputs.lshift = ~issue.fn3[2];
|
||||
assign alu_inputs.shift_amount = alu_imm_type ? issue_rs_addr[RS2] : rf.data[RS2][4:0];
|
||||
assign alu_inputs.arith = rf.data[RS1][XLEN-1] & issue.instruction[30];//shift in bit
|
||||
assign alu_inputs.shifter_in = rf.data[RS1];
|
||||
|
||||
//LUI, AUIPC, JAL, JALR
|
||||
assign alu_inputs.constant_adder = constant_alu;
|
||||
|
||||
//logic and adder
|
||||
assign alu_inputs.subtract = alu_subtract;
|
||||
assign alu_inputs.logic_op = alu_logic_op_r;
|
||||
assign alu_inputs.in1 = {(rf.data[RS1][XLEN-1] & ~issue.fn3[0]), rf.data[RS1]};//(fn3[0] is SLTU_fn3);
|
||||
assign alu_rs2_data = alu_imm_type ? 32'(signed'(issue.instruction[31:20])) : rf.data[RS2];
|
||||
assign alu_inputs.in2 = {(alu_rs2_data[XLEN-1] & ~issue.fn3[0]), alu_rs2_data};
|
||||
|
||||
assign alu_inputs.alu_op = alu_op_r;
|
||||
////////////////////////////////////////////////////
|
||||
//Load Store unit inputs
|
||||
logic is_load;
|
||||
logic is_store;
|
||||
logic amo_op;
|
||||
logic store_conditional;
|
||||
logic load_reserve;
|
||||
logic [4:0] amo_type;
|
||||
|
||||
assign amo_op = CONFIG.INCLUDE_AMO ? (opcode_trim == AMO_T) : 1'b0;
|
||||
assign amo_type = decode.instruction[31:27];
|
||||
assign store_conditional = (amo_type == AMO_SC_FN5);
|
||||
assign load_reserve = (amo_type == AMO_LR_FN5);
|
||||
|
||||
generate if (CONFIG.INCLUDE_AMO) begin : gen_decode_ls_amo
|
||||
assign ls_inputs.amo.is_lr = load_reserve;
|
||||
assign ls_inputs.amo.is_sc = store_conditional;
|
||||
assign ls_inputs.amo.is_amo = amo_op & ~(load_reserve | store_conditional);
|
||||
assign ls_inputs.amo.op = amo_type;
|
||||
end
|
||||
else begin
|
||||
assign ls_inputs.amo = '0;
|
||||
end
|
||||
endgenerate
|
||||
|
||||
assign is_load = (opcode_trim inside {LOAD_T, AMO_T}) && !(amo_op & store_conditional); //LR and AMO_ops perform a read operation as well
|
||||
assign is_store = (opcode_trim == STORE_T) || (amo_op && store_conditional);//Used for LS unit and for ID tracking
|
||||
|
||||
logic [11:0] ls_offset;
|
||||
logic is_load_r;
|
||||
logic is_store_r;
|
||||
logic is_fence_r;
|
||||
always_ff @(posedge clk) begin
|
||||
if (issue_stage_ready) begin
|
||||
ls_offset <= opcode[5] ? {decode.instruction[31:25], decode.instruction[11:7]} : decode.instruction[31:20];
|
||||
is_load_r <= is_load;
|
||||
is_store_r <= is_store;
|
||||
is_fence_r <= is_fence;
|
||||
end
|
||||
end
|
||||
|
||||
(* ramstyle = "MLAB, no_rw_check" *) id_t rd_to_id_table [32];
|
||||
always_ff @ (posedge clk) begin
|
||||
if (instruction_issued_with_rd)
|
||||
rd_to_id_table[issue.rd_addr] <= issue.id;
|
||||
end
|
||||
|
||||
assign ls_inputs.offset = ls_offset;
|
||||
assign ls_inputs.load = is_load_r;
|
||||
assign ls_inputs.store = is_store_r;
|
||||
assign ls_inputs.fence = is_fence_r;
|
||||
assign ls_inputs.fn3 = amo_op ? LS_W_fn3 : issue.fn3;
|
||||
assign ls_inputs.rs1 = rf.data[RS1];
|
||||
assign ls_inputs.rs2 = rf.data[RS2];
|
||||
assign ls_inputs.forwarded_store = rf.inuse[RS2];
|
||||
assign ls_inputs.store_forward_id = rd_to_id_table[issue_rs_addr[RS2]];
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Branch unit inputs
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//RAS Support
|
||||
logic rs1_link;
|
||||
logic rd_link;
|
||||
logic rs1_eq_rd;
|
||||
logic is_return;
|
||||
logic is_call;
|
||||
assign rs1_link = (rs_addr[RS1] inside {1,5});
|
||||
assign rd_link = (rd_addr inside {1,5});
|
||||
assign rs1_eq_rd = (rs_addr[RS1] == rd_addr);
|
||||
|
||||
logic br_use_signed;
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (issue_stage_ready) begin
|
||||
is_return <= (opcode_trim == JALR_T) && ((rs1_link & ~rd_link) | (rs1_link & rd_link & ~rs1_eq_rd));
|
||||
is_call <= (opcode_trim inside {JAL_T, JALR_T}) && rd_link;
|
||||
br_use_signed <= !(fn3 inside {BLTU_fn3, BGEU_fn3});
|
||||
end
|
||||
end
|
||||
|
||||
logic[19:0] jal_imm;
|
||||
logic[11:0] jalr_imm;
|
||||
logic[11:0] br_imm;
|
||||
|
||||
logic [20:0] pc_offset;
|
||||
logic [20:0] pc_offset_r;
|
||||
assign jal_imm = {decode.instruction[31], decode.instruction[19:12], decode.instruction[20], decode.instruction[30:21]};
|
||||
assign jalr_imm = decode.instruction[31:20];
|
||||
assign br_imm = {decode.instruction[31], decode.instruction[7], decode.instruction[30:25], decode.instruction[11:8]};
|
||||
|
||||
|
||||
always_comb begin
|
||||
case (opcode[3:2])
|
||||
2'b11 : pc_offset = 21'(signed'({jal_imm, 1'b0}));
|
||||
2'b01 : pc_offset = 21'(signed'(jalr_imm));
|
||||
default : pc_offset = 21'(signed'({br_imm, 1'b0}));
|
||||
endcase
|
||||
end
|
||||
|
||||
logic jalr;
|
||||
always_ff @(posedge clk) begin
|
||||
if (issue_stage_ready) begin
|
||||
pc_offset_r <= pc_offset;
|
||||
jalr <= (~opcode[3] & opcode[2]);
|
||||
end
|
||||
end
|
||||
|
||||
assign branch_inputs.is_return = is_return;
|
||||
assign branch_inputs.is_call = is_call;
|
||||
assign branch_inputs.fn3 = issue.fn3;
|
||||
assign branch_inputs.pc_offset = pc_offset_r;
|
||||
assign branch_inputs.jal = issue.opcode[3];//(opcode == JAL);
|
||||
assign branch_inputs.jalr = jalr;
|
||||
assign branch_inputs.jal_jalr = issue.opcode[2];
|
||||
|
||||
assign branch_inputs.issue_pc = issue.pc;
|
||||
assign branch_inputs.issue_pc_valid = issue.stage_valid;
|
||||
assign branch_inputs.rs1 = {(rf.data[RS1][31] & br_use_signed), rf.data[RS1]};
|
||||
assign branch_inputs.rs2 = {(rf.data[RS2][31] & br_use_signed), rf.data[RS2]};
|
||||
assign branch_inputs.pc_p4 = constant_alu;
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Global Control unit inputs
|
||||
logic is_ecall_r;
|
||||
logic is_ebreak_r;
|
||||
logic is_mret_r;
|
||||
logic is_sret_r;
|
||||
logic is_ifence_r;
|
||||
|
||||
logic [7:0] sys_op_match;
|
||||
typedef enum logic [2:0] {
|
||||
ECALL_i = 0,
|
||||
EBREAK_i = 1,
|
||||
URET_i = 2,
|
||||
SRET_i = 3,
|
||||
MRET_i = 4,
|
||||
SFENCE_i = 5
|
||||
} sys_op_index_t;
|
||||
|
||||
always_comb begin
|
||||
sys_op_match = '0;
|
||||
case (decode.instruction[31:20]) inside
|
||||
ECALL_imm : sys_op_match[ECALL_i] = CONFIG.INCLUDE_M_MODE;
|
||||
EBREAK_imm : sys_op_match[EBREAK_i] = CONFIG.INCLUDE_M_MODE;
|
||||
SRET_imm : sys_op_match[SRET_i] = CONFIG.INCLUDE_S_MODE;
|
||||
MRET_imm : sys_op_match[MRET_i] = CONFIG.INCLUDE_M_MODE;
|
||||
SFENCE_imm : sys_op_match[SFENCE_i] = CONFIG.INCLUDE_S_MODE;
|
||||
default : sys_op_match = '0;
|
||||
endcase
|
||||
end
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (issue_stage_ready) begin
|
||||
is_ecall_r <= sys_op_match[ECALL_i];
|
||||
is_ebreak_r <= sys_op_match[EBREAK_i];
|
||||
is_mret_r <= sys_op_match[MRET_i];
|
||||
is_sret_r <= sys_op_match[SRET_i];
|
||||
is_ifence_r <= is_ifence;
|
||||
end
|
||||
end
|
||||
|
||||
assign gc_inputs.pc_p4 = constant_alu;
|
||||
assign gc_inputs.is_ifence = is_ifence_r;
|
||||
assign gc_inputs.is_mret = is_mret_r;
|
||||
assign gc_inputs.is_sret = is_sret_r;
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//CSR unit inputs
|
||||
generate if (CONFIG.INCLUDE_CSRS) begin : gen_decode_csr_inputs
|
||||
assign csr_inputs.addr = issue.instruction[31:20];
|
||||
assign csr_inputs.op = issue.fn3[1:0];
|
||||
assign csr_inputs.data = issue.fn3[2] ? {27'b0, issue_rs_addr[RS1]} : rf.data[RS1];
|
||||
assign csr_inputs.reads = ~((issue.fn3[1:0] == CSR_RW) && (issue.rd_addr == 0));
|
||||
assign csr_inputs.writes = ~((issue.fn3[1:0] == CSR_RC) && (issue_rs_addr[RS1] == 0));
|
||||
end endgenerate
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Mul unit inputs
|
||||
generate if (CONFIG.INCLUDE_MUL) begin : gen_decode_mul_inputs
|
||||
assign mul_inputs.rs1 = rf.data[RS1];
|
||||
assign mul_inputs.rs2 = rf.data[RS2];
|
||||
assign mul_inputs.op = issue.fn3[1:0];
|
||||
end endgenerate
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Div unit inputs
|
||||
generate if (CONFIG.INCLUDE_DIV) begin : gen_decode_div_inputs
|
||||
phys_addr_t prev_div_rs_addr [2];
|
||||
logic [1:0] div_rd_match;
|
||||
logic prev_div_result_valid;
|
||||
logic div_rs_overwrite;
|
||||
logic div_op_reuse;
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (issue_to[UNIT_IDS.DIV])
|
||||
prev_div_rs_addr <= issue_phys_rs_addr[RS1:RS2];
|
||||
end
|
||||
|
||||
assign div_op_reuse = {prev_div_result_valid, prev_div_rs_addr[RS1], prev_div_rs_addr[RS2]} == {1'b1, issue_phys_rs_addr[RS1],issue_phys_rs_addr[RS2]};
|
||||
|
||||
//Clear if prev div inputs are overwritten by another instruction
|
||||
assign div_rd_match[RS1] = (issue.phys_rd_addr == prev_div_rs_addr[RS1]);
|
||||
assign div_rd_match[RS2] = (issue.phys_rd_addr == prev_div_rs_addr[RS2]);
|
||||
assign div_rs_overwrite = |div_rd_match;
|
||||
|
||||
set_clr_reg_with_rst #(.SET_OVER_CLR(1), .WIDTH(1), .RST_VALUE(0)) prev_div_result_valid_m (
|
||||
.clk, .rst,
|
||||
.set(instruction_issued & unit_needed_issue_stage[UNIT_IDS.DIV]),
|
||||
.clr((instruction_issued & issue.uses_rd & div_rs_overwrite) | gc.writeback_supress), //No instructions will be issued while gc.writeback_supress is asserted
|
||||
.result(prev_div_result_valid)
|
||||
);
|
||||
|
||||
assign div_inputs.rs1 = rf.data[RS1];
|
||||
assign div_inputs.rs2 = rf.data[RS2];
|
||||
assign div_inputs.op = issue.fn3[1:0];
|
||||
assign div_inputs.reuse_result = div_op_reuse;
|
||||
end endgenerate
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Unit EX signals
|
||||
generate for (i = 0; i < NUM_UNITS; i++) begin : gen_unit_issue_signals
|
||||
|
@ -526,12 +236,9 @@ module decode_and_issue
|
|||
|
||||
////////////////////////////////////////////////////
|
||||
//Illegal Instruction check
|
||||
logic illegal_instruction_pattern_r;
|
||||
generate if (CONFIG.INCLUDE_M_MODE) begin : gen_decode_exceptions
|
||||
illegal_instruction_checker # (.CONFIG(CONFIG))
|
||||
illegal_op_check (
|
||||
.instruction(decode.instruction), .illegal_instruction(illegal_instruction_pattern)
|
||||
);
|
||||
|
||||
assign illegal_instruction_pattern = ~|unit_needed;
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst)
|
||||
illegal_instruction_pattern_r <= 0;
|
||||
|
@ -558,7 +265,7 @@ module decode_and_issue
|
|||
always_comb begin
|
||||
case (current_privilege)
|
||||
USER_PRIVILEGE : ecall_code = ECALL_U;
|
||||
SUPERVISOR_PRIVILEGE : ecall_code = ECALL_S;
|
||||
SUPERVISOR_PRIVILEGE : ecall_code = ECALL_S;
|
||||
MACHINE_PRIVILEGE : ecall_code = ECALL_M;
|
||||
default : ecall_code = ECALL_U;
|
||||
endcase
|
||||
|
@ -573,7 +280,7 @@ module decode_and_issue
|
|||
if (rst)
|
||||
pre_issue_exception_pending <= 0;
|
||||
else if (issue_stage_ready)
|
||||
pre_issue_exception_pending <= illegal_instruction_pattern | (opcode_trim inside {SYSTEM_T} & ~is_csr & (sys_op_match[ECALL_i] | sys_op_match[EBREAK_i])) | ~decode.fetch_metadata.ok;
|
||||
pre_issue_exception_pending <= illegal_instruction_pattern | (~decode.fetch_metadata.ok) | decode.instruction inside {ECALL, EBREAK};
|
||||
end
|
||||
|
||||
assign new_exception = issue.stage_valid & pre_issue_exception_pending & ~(gc.issue_hold | gc.fetch_flush);
|
||||
|
@ -585,6 +292,12 @@ module decode_and_issue
|
|||
exception.valid <= (exception.valid | new_exception) & ~exception.ack;
|
||||
end
|
||||
|
||||
logic is_ecall_r;
|
||||
always_ff @(posedge clk) begin
|
||||
if (issue_stage_ready)
|
||||
is_ecall_r <= (decode_instruction.upper_opcode == SYSTEM_T) & (decode.instruction[31:20] == ECALL_imm);
|
||||
end
|
||||
|
||||
assign ecode =
|
||||
illegal_instruction_pattern_r ? ILLEGAL_INST :
|
||||
is_ecall_r ? ecall_code :
|
||||
|
|
|
@ -30,15 +30,20 @@ module div_unit
|
|||
(
|
||||
input logic clk,
|
||||
input logic rst,
|
||||
input gc_outputs_t gc,
|
||||
|
||||
input logic instruction_issued_with_rd,
|
||||
|
||||
input decode_packet_t decode_stage,
|
||||
output logic unit_needed,
|
||||
|
||||
output logic [REGFILE_READ_PORTS-1:0] uses_rs,
|
||||
output logic uses_rd,
|
||||
|
||||
input issue_packet_t issue_stage,
|
||||
input logic issue_stage_ready,
|
||||
input phys_addr_t issue_phys_rs_addr [REGFILE_READ_PORTS],
|
||||
input logic [31:0] rf [REGFILE_READ_PORTS],
|
||||
|
||||
input div_inputs_t div_inputs,
|
||||
unit_issue_interface.unit issue,
|
||||
unit_writeback_interface.unit wb
|
||||
);
|
||||
|
@ -96,44 +101,74 @@ module div_unit
|
|||
|
||||
////////////////////////////////////////////////////
|
||||
//Decode
|
||||
assign unit_needed = decode_stage.instruction inside {
|
||||
DIV, DIVU, REM, REMU
|
||||
};
|
||||
|
||||
assign unit_needed = decode_stage.instruction inside {DIV, DIVU, REM, REMU};
|
||||
always_comb begin
|
||||
uses_rs = '0;
|
||||
uses_rs[RS1] = unit_needed;
|
||||
uses_rs[RS2] = unit_needed;
|
||||
uses_rd = unit_needed;
|
||||
end
|
||||
////////////////////////////////////////////////////
|
||||
//Issue
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Result resuse (for div/rem pairs)
|
||||
phys_addr_t prev_div_rs_addr [2];
|
||||
logic [1:0] div_rd_match;
|
||||
logic prev_div_result_valid;
|
||||
logic div_rs_overwrite;
|
||||
logic div_op_reuse;
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (issue.new_request)
|
||||
prev_div_rs_addr <= issue_phys_rs_addr[RS1:RS2];
|
||||
end
|
||||
|
||||
assign div_op_reuse = {prev_div_result_valid, prev_div_rs_addr[RS1], prev_div_rs_addr[RS2]} == {1'b1, issue_phys_rs_addr[RS1],issue_phys_rs_addr[RS2]};
|
||||
|
||||
//Clear if prev div inputs are overwritten by another instruction
|
||||
assign div_rd_match[RS1] = (issue_stage.phys_rd_addr == prev_div_rs_addr[RS1]);
|
||||
assign div_rd_match[RS2] = (issue_stage.phys_rd_addr == prev_div_rs_addr[RS2]);
|
||||
assign div_rs_overwrite = |div_rd_match;
|
||||
|
||||
set_clr_reg_with_rst #(.SET_OVER_CLR(1), .WIDTH(1), .RST_VALUE(0)) prev_div_result_valid_m (
|
||||
.clk, .rst,
|
||||
.set(issue.new_request),
|
||||
.clr((instruction_issued_with_rd & div_rs_overwrite) | gc.writeback_supress), //No instructions will be issued while gc.writeback_supress is asserted
|
||||
.result(prev_div_result_valid)
|
||||
);
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Input and output sign determination
|
||||
assign signed_divop = ~div_inputs.op[0];
|
||||
assign signed_divop = ~ issue_stage.fn3[0];
|
||||
|
||||
assign negate_dividend = signed_divop & div_inputs.rs1[31];
|
||||
assign negate_divisor = signed_divop & div_inputs.rs2[31];
|
||||
assign negate_dividend = signed_divop & rf[RS1][31];
|
||||
assign negate_divisor = signed_divop & rf[RS2][31];
|
||||
|
||||
assign negate_quotient = signed_divop & (div_inputs.rs1[31] ^ div_inputs.rs2[31]);
|
||||
assign negate_remainder = signed_divop & (div_inputs.rs1[31]);
|
||||
assign negate_quotient = signed_divop & (rf[RS1][31] ^ rf[RS2][31]);
|
||||
assign negate_remainder = signed_divop & (rf[RS1][31]);
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Input Processing
|
||||
assign unsigned_dividend = negate_if (div_inputs.rs1, negate_dividend);
|
||||
assign unsigned_divisor = negate_if (div_inputs.rs2, negate_divisor);
|
||||
assign unsigned_dividend = negate_if (rf[RS1], negate_dividend);
|
||||
assign unsigned_divisor = negate_if (rf[RS2], negate_divisor);
|
||||
|
||||
//Note: If this becomes the critical path, we can use the one's complemented input instead.
|
||||
//It will potentially overestimate (only when the input is a negative power-of-two), and
|
||||
//the divisor width will need to be increased by one to safely handle the case where the divisor CLZ is overestimated
|
||||
clz dividend_clz_block (.clz_input(unsigned_dividend), .clz(dividend_CLZ));
|
||||
clz divisor_clz_block (.clz_input(unsigned_divisor), .clz(divisor_CLZ));
|
||||
assign divisor_is_zero = (&divisor_CLZ) & ~div_inputs.rs2[0];
|
||||
assign divisor_is_zero = (&divisor_CLZ) & ~rf[RS2][0];
|
||||
|
||||
assign issue_fifo_inputs.unsigned_dividend = unsigned_dividend;
|
||||
assign issue_fifo_inputs.unsigned_divisor = unsigned_divisor;
|
||||
assign issue_fifo_inputs.dividend_CLZ = divisor_is_zero ? '0 : dividend_CLZ;
|
||||
assign issue_fifo_inputs.divisor_CLZ = divisor_CLZ;
|
||||
|
||||
assign issue_fifo_inputs.attr.remainder_op = div_inputs.op[1];
|
||||
assign issue_fifo_inputs.attr.negate_result = div_inputs.op[1] ? negate_remainder : (negate_quotient & ~divisor_is_zero);
|
||||
assign issue_fifo_inputs.attr.remainder_op = issue_stage.fn3[1];
|
||||
assign issue_fifo_inputs.attr.negate_result = issue_stage.fn3[1] ? negate_remainder : (negate_quotient & ~divisor_is_zero);
|
||||
assign issue_fifo_inputs.attr.divisor_is_zero = divisor_is_zero;
|
||||
assign issue_fifo_inputs.attr.reuse_result = div_inputs.reuse_result;
|
||||
assign issue_fifo_inputs.attr.reuse_result = div_op_reuse;
|
||||
assign issue_fifo_inputs.attr.id = issue.id;
|
||||
assign issue_fifo_inputs.attr.phys_addr = issue.phys_addr;
|
||||
|
||||
|
|
|
@ -39,13 +39,15 @@ module gc_unit
|
|||
//Decode
|
||||
input decode_packet_t decode_stage,
|
||||
output logic unit_needed,
|
||||
output logic [REGFILE_READ_PORTS-1:0] uses_rs,
|
||||
output logic uses_rd,
|
||||
|
||||
input issue_packet_t issue_stage,
|
||||
input logic issue_stage_ready,
|
||||
input logic [31:0] constant_alu,
|
||||
input logic [31:0] rf [REGFILE_READ_PORTS],
|
||||
|
||||
unit_issue_interface.unit issue,
|
||||
input gc_inputs_t gc_inputs,
|
||||
|
||||
//Branch miss predict
|
||||
input logic branch_flush,
|
||||
|
@ -118,6 +120,7 @@ module gc_unit
|
|||
|
||||
//LS exceptions (miss-aligned, TLB and MMU) (issue stage)
|
||||
//fetch flush, take exception. If execute or later exception occurs first, exception is overridden
|
||||
common_instruction_t instruction;//rs1_addr, rs2_addr, fn3, fn7, rd_addr, upper/lower opcode
|
||||
|
||||
typedef enum {RST_STATE, PRE_CLEAR_STATE, INIT_CLEAR_STATE, IDLE_STATE, TLB_CLEAR_STATE, POST_ISSUE_DRAIN, PRE_ISSUE_FLUSH, POST_ISSUE_DISCARD} gc_state;
|
||||
gc_state state;
|
||||
|
@ -126,7 +129,6 @@ module gc_unit
|
|||
logic init_clear_done;
|
||||
logic tlb_clear_done;
|
||||
|
||||
gc_inputs_t gc_inputs_r;
|
||||
logic post_issue_idle;
|
||||
logic ifence_in_progress;
|
||||
logic ret_in_progress;
|
||||
|
@ -143,14 +145,48 @@ module gc_unit
|
|||
logic gc_pc_override;
|
||||
logic [31:0] gc_pc;
|
||||
|
||||
typedef struct packed{
|
||||
logic [31:0] pc_p4;
|
||||
logic is_ifence;
|
||||
logic is_mret;
|
||||
logic is_sret;
|
||||
} gc_inputs_t;
|
||||
|
||||
gc_inputs_t gc_inputs;
|
||||
gc_inputs_t gc_inputs_r;
|
||||
////////////////////////////////////////////////////
|
||||
//Implementation
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Decode
|
||||
assign unit_needed = decode_stage.instruction inside {
|
||||
ECALL, EBREAK, SRET, MRET, FENCE_I, SFENCE_VMA
|
||||
};
|
||||
logic is_ifence;
|
||||
logic is_mret;
|
||||
logic is_sret;
|
||||
|
||||
assign instruction = decode_stage.instruction;
|
||||
|
||||
assign unit_needed =
|
||||
(CONFIG.INCLUDE_M_MODE & decode_stage.instruction inside {ECALL, EBREAK, MRET}) |
|
||||
(CONFIG.INCLUDE_S_MODE & decode_stage.instruction inside {SRET, SFENCE_VMA}) |
|
||||
(CONFIG.INCLUDE_IFENCE & decode_stage.instruction inside {FENCE_I});
|
||||
always_comb begin
|
||||
uses_rs = '0;
|
||||
uses_rs[RS1] = CONFIG.INCLUDE_S_MODE & decode_stage.instruction inside {SFENCE_VMA};
|
||||
uses_rd = 0;
|
||||
end
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (issue_stage_ready) begin
|
||||
is_ifence = (instruction.upper_opcode == FENCE_T) & CONFIG.INCLUDE_IFENCE;
|
||||
is_mret = (instruction.upper_opcode == SYSTEM_T) & (decode_stage.instruction[31:20] == MRET_imm) & CONFIG.INCLUDE_M_MODE;
|
||||
is_sret = (instruction.upper_opcode == SYSTEM_T) & (decode_stage.instruction[31:20] == SRET_imm) & CONFIG.INCLUDE_S_MODE;
|
||||
end
|
||||
end
|
||||
|
||||
assign gc_inputs.pc_p4 = constant_alu;
|
||||
assign gc_inputs.is_ifence = is_ifence;
|
||||
assign gc_inputs.is_mret = is_mret;
|
||||
assign gc_inputs.is_sret = is_sret;
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Issue
|
||||
|
|
|
@ -38,12 +38,16 @@ module load_store_unit
|
|||
|
||||
input decode_packet_t decode_stage,
|
||||
output logic unit_needed,
|
||||
|
||||
output logic [REGFILE_READ_PORTS-1:0] uses_rs,
|
||||
output logic uses_rd,
|
||||
|
||||
input issue_packet_t issue_stage,
|
||||
input logic issue_stage_ready,
|
||||
input logic instruction_issued_with_rd,
|
||||
input logic rs2_inuse,
|
||||
input rs_addr_t issue_rs_addr [REGFILE_READ_PORTS],
|
||||
input logic [31:0] rf [REGFILE_READ_PORTS],
|
||||
|
||||
input load_store_inputs_t ls_inputs,
|
||||
unit_issue_interface.unit issue,
|
||||
|
||||
input logic dcache_on,
|
||||
|
@ -135,6 +139,8 @@ module load_store_unit
|
|||
} load_attributes_t;
|
||||
load_attributes_t mem_attr, wb_attr;
|
||||
|
||||
common_instruction_t instruction;//rs1_addr, rs2_addr, fn3, fn7, rd_addr, upper/lower opcode
|
||||
|
||||
logic [3:0] be;
|
||||
//FIFOs
|
||||
fifo_interface #(.DATA_WIDTH($bits(load_attributes_t))) load_attributes();
|
||||
|
@ -145,23 +151,67 @@ module load_store_unit
|
|||
|
||||
////////////////////////////////////////////////////
|
||||
//Decode
|
||||
assign unit_needed = decode_stage.instruction inside {
|
||||
LB, LH, LW, LBU, LHU, SB, SH, SW, FENCE
|
||||
};
|
||||
assign instruction = decode_stage.instruction;
|
||||
|
||||
assign unit_needed = decode_stage.instruction inside {LB, LH, LW, LBU, LHU, SB, SH, SW, FENCE};
|
||||
always_comb begin
|
||||
uses_rs = '0;
|
||||
uses_rs[RS1] = decode_stage.instruction inside {LB, LH, LW, LBU, LHU, SB, SH, SW};
|
||||
uses_rs[RS2] = 0;//Store forwarding support //decode_stage.instruction inside {SB, SH, SW};
|
||||
uses_rd = decode_stage.instruction inside {LB, LH, LW, LBU, LHU};
|
||||
end
|
||||
|
||||
amo_details_t amo;
|
||||
amo_details_t amo_r;
|
||||
logic is_load;
|
||||
logic is_store;
|
||||
logic is_load_r;
|
||||
logic is_store_r;
|
||||
logic is_fence_r;
|
||||
logic [2:0] fn3_r;
|
||||
logic [11:0] ls_offset_r;
|
||||
|
||||
assign amo.is_amo = CONFIG.INCLUDE_AMO & (instruction.upper_opcode == AMO_T);
|
||||
assign amo.op = CONFIG.INCLUDE_AMO ? decode_stage.instruction[31:27] : '0;
|
||||
assign amo.is_lr = CONFIG.INCLUDE_AMO & (amo.op == AMO_LR_FN5);
|
||||
assign amo.is_sc = CONFIG.INCLUDE_AMO & (amo.op == AMO_SC_FN5);
|
||||
|
||||
assign is_load = (instruction.upper_opcode inside {LOAD_T, AMO_T}) & !(amo.is_amo & amo.is_sc); //LR and AMO_ops perform a read operation as well
|
||||
assign is_store = (instruction.upper_opcode == STORE_T) | (amo.is_amo & amo.is_sc);//Used for LS unit and for ID tracking
|
||||
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (issue_stage_ready) begin
|
||||
ls_offset_r <= decode_stage.instruction[5] ? {decode_stage.instruction[31:25], decode_stage.instruction[11:7]} : decode_stage.instruction[31:20];
|
||||
is_load_r <= is_load;
|
||||
is_store_r <= is_store;
|
||||
is_fence_r <= (instruction.upper_opcode == FENCE_T);
|
||||
amo_r <= amo;
|
||||
fn3_r <= amo.is_amo ? LS_W_fn3 : instruction.fn3;
|
||||
end
|
||||
end
|
||||
|
||||
(* ramstyle = "MLAB, no_rw_check" *) id_t rd_to_id_table [32];
|
||||
id_t store_forward_id;
|
||||
always_ff @ (posedge clk) begin
|
||||
if (instruction_issued_with_rd)
|
||||
rd_to_id_table[issue_stage.rd_addr] <= issue_stage.id;
|
||||
end
|
||||
assign store_forward_id = rd_to_id_table[issue_rs_addr[RS2]];
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Alignment Exception
|
||||
generate if (CONFIG.INCLUDE_M_MODE) begin : gen_ls_exceptions
|
||||
logic new_exception;
|
||||
always_comb begin
|
||||
case(ls_inputs.fn3)
|
||||
case(fn3_r)
|
||||
LS_H_fn3, L_HU_fn3 : unaligned_addr = virtual_address[0];
|
||||
LS_W_fn3 : unaligned_addr = |virtual_address[1:0];
|
||||
default : unaligned_addr = 0;
|
||||
endcase
|
||||
end
|
||||
|
||||
assign new_exception = unaligned_addr & issue.new_request & ~ls_inputs.fence;
|
||||
assign new_exception = unaligned_addr & issue.new_request & ~is_fence_r;
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst)
|
||||
exception.valid <= 0;
|
||||
|
@ -171,7 +221,7 @@ module load_store_unit
|
|||
|
||||
always_ff @(posedge clk) begin
|
||||
if (new_exception & ~exception.valid) begin
|
||||
exception.code <= ls_inputs.store ? STORE_AMO_ADDR_MISSALIGNED : LOAD_ADDR_MISSALIGNED;
|
||||
exception.code <= is_store_r ? STORE_AMO_ADDR_MISSALIGNED : LOAD_ADDR_MISSALIGNED;
|
||||
exception.tval <= virtual_address;
|
||||
exception.id <= issue.id;
|
||||
end
|
||||
|
@ -195,12 +245,12 @@ module load_store_unit
|
|||
|
||||
////////////////////////////////////////////////////
|
||||
//TLB interface
|
||||
assign virtual_address = ls_inputs.rs1 + 32'(signed'(ls_inputs.offset));
|
||||
assign virtual_address = rf[RS1] + 32'(signed'(ls_offset_r));
|
||||
|
||||
assign tlb.virtual_address = virtual_address;
|
||||
assign tlb.new_request = tlb_on & issue.new_request;
|
||||
assign tlb.execute = 0;
|
||||
assign tlb.rnw = ls_inputs.load & ~ls_inputs.store;
|
||||
assign tlb.rnw = is_load_r & ~is_store_r;
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Byte enable generation
|
||||
|
@ -210,7 +260,7 @@ module load_store_unit
|
|||
// SB: specific byte
|
||||
always_comb begin
|
||||
be = 0;
|
||||
case(ls_inputs.fn3[1:0])
|
||||
case(fn3_r[1:0])
|
||||
LS_B_fn3[1:0] : be[virtual_address[1:0]] = 1;
|
||||
LS_H_fn3[1:0] : begin
|
||||
be[virtual_address[1:0]] = 1;
|
||||
|
@ -224,19 +274,19 @@ module load_store_unit
|
|||
//Load Store Queue
|
||||
assign lsq.data_in = '{
|
||||
addr : tlb_on ? tlb.physical_address : virtual_address,
|
||||
fn3 : ls_inputs.fn3,
|
||||
fn3 : fn3_r,
|
||||
be : be,
|
||||
data : ls_inputs.rs2,
|
||||
load : ls_inputs.load,
|
||||
store : ls_inputs.store,
|
||||
data : rf[RS2],
|
||||
load : is_load_r,
|
||||
store : is_store_r,
|
||||
id : issue.id,
|
||||
phys_addr : issue.phys_addr,
|
||||
forwarded_store : ls_inputs.forwarded_store,
|
||||
id_needed : ls_inputs.store_forward_id
|
||||
forwarded_store : rs2_inuse,
|
||||
id_needed : store_forward_id
|
||||
};
|
||||
|
||||
assign lsq.potential_push = issue.possible_issue;
|
||||
assign lsq.push = issue.new_request & ~unaligned_addr & (~tlb_on | tlb.done) & ~ls_inputs.fence;
|
||||
assign lsq.push = issue.new_request & ~unaligned_addr & (~tlb_on | tlb.done) & ~is_fence_r;
|
||||
|
||||
load_store_queue # (.CONFIG(CONFIG)) lsq_block (
|
||||
.clk (clk),
|
||||
|
@ -282,7 +332,7 @@ module load_store_unit
|
|||
if (rst)
|
||||
fence_hold <= 0;
|
||||
else
|
||||
fence_hold <= (fence_hold & ~load_store_status.idle) | (issue.new_request & ls_inputs.fence);
|
||||
fence_hold <= (fence_hold & ~load_store_status.idle) | (issue.new_request & is_fence_r);
|
||||
end
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
|
@ -407,7 +457,7 @@ module load_store_unit
|
|||
.sc_complete (sc_complete),
|
||||
.sc_success (sc_success),
|
||||
.clear_reservation (clear_reservation),
|
||||
.amo (ls_inputs.amo),
|
||||
.amo (amo_r),
|
||||
.uncacheable_load (uncacheable_load),
|
||||
.uncacheable_store (uncacheable_store),
|
||||
.is_load (sel_load),
|
||||
|
|
|
@ -33,12 +33,13 @@ module mul_unit
|
|||
|
||||
input decode_packet_t decode_stage,
|
||||
output logic unit_needed,
|
||||
output logic [REGFILE_READ_PORTS-1:0] uses_rs,
|
||||
output logic uses_rd,
|
||||
|
||||
input issue_packet_t issue_stage,
|
||||
input logic issue_stage_ready,
|
||||
input logic [31:0] rf [REGFILE_READ_PORTS],
|
||||
|
||||
input mul_inputs_t mul_inputs,
|
||||
unit_issue_interface.unit issue,
|
||||
unit_writeback_interface.unit wb
|
||||
);
|
||||
|
@ -61,18 +62,21 @@ module mul_unit
|
|||
|
||||
////////////////////////////////////////////////////
|
||||
//Decode
|
||||
assign unit_needed = decode_stage.instruction inside {
|
||||
MUL, MULH, MULHSU, MULHU
|
||||
};
|
||||
|
||||
assign unit_needed = decode_stage.instruction inside {MUL, MULH, MULHSU, MULHU};
|
||||
always_comb begin
|
||||
uses_rs = '0;
|
||||
uses_rs[RS1] = unit_needed;
|
||||
uses_rs[RS2] = unit_needed;
|
||||
uses_rd = unit_needed;
|
||||
end
|
||||
////////////////////////////////////////////////////
|
||||
//Issue
|
||||
|
||||
assign rs1_is_signed = mul_inputs.op[1:0] inside {MULH_fn3[1:0], MULHSU_fn3[1:0]};//MUL doesn't matter
|
||||
assign rs2_is_signed = mul_inputs.op[1:0] inside {MUL_fn3[1:0], MULH_fn3[1:0]};//MUL doesn't matter
|
||||
assign rs1_is_signed = issue_stage.fn3[1:0] inside {MULH_fn3[1:0], MULHSU_fn3[1:0]};//MUL doesn't matter
|
||||
assign rs2_is_signed = issue_stage.fn3[1:0] inside {MUL_fn3[1:0], MULH_fn3[1:0]};//MUL doesn't matter
|
||||
|
||||
assign rs1_ext = signed'({mul_inputs.rs1[31] & rs1_is_signed, mul_inputs.rs1});
|
||||
assign rs2_ext = signed'({mul_inputs.rs2[31] & rs2_is_signed, mul_inputs.rs2});
|
||||
assign rs1_ext = signed'({rf[RS1][31] & rs1_is_signed, rf[RS1]});
|
||||
assign rs2_ext = signed'({rf[RS2][31] & rs2_is_signed, rf[RS2]});
|
||||
|
||||
//Pipeline advancement control signals
|
||||
assign issue.ready = stage1_advance;
|
||||
|
@ -93,7 +97,7 @@ module mul_unit
|
|||
//Attribute Pipeline
|
||||
always_ff @ (posedge clk) begin
|
||||
if (stage1_advance) begin
|
||||
mulh[0] <= (mul_inputs.op[1:0] != MUL_fn3[1:0]);
|
||||
mulh[0] <= (issue_stage.fn3[1:0] != MUL_fn3[1:0]);
|
||||
id[0] <= issue.id;
|
||||
phys_addr[0] <= issue.phys_addr;
|
||||
end
|
||||
|
|
|
@ -101,4 +101,7 @@ package opcodes;
|
|||
localparam [31:0] SFENCE_VMA = 32'b0001001??????????000000001110011;
|
||||
localparam [31:0] WFI = 32'b00010000010100000000000001110011;
|
||||
|
||||
|
||||
localparam [31:0] CUSTOM = 32'b?????????????????????????1111011;
|
||||
|
||||
endpackage
|
|
@ -312,6 +312,7 @@ module cva5_sim
|
|||
`define RENAME_P cpu.renamer_block
|
||||
`define METADATA_P cpu.id_block
|
||||
`define LS_P cpu.load_store_unit_block
|
||||
`define DIV_P cpu.gen_div.div_unit_block
|
||||
`define LSQ_P cpu.load_store_unit_block.lsq_block
|
||||
`define DCACHE_P cpu.load_store_unit_block.gen_ls_dcache.data_cache
|
||||
|
||||
|
@ -376,10 +377,10 @@ module cva5_sim
|
|||
stats[FETCH_IC_ARB_STALL_STAT] = iarb_stall;
|
||||
|
||||
//Branch predictor
|
||||
stats[FETCH_BP_BR_CORRECT_STAT] = `BRANCH_P.instruction_is_completing & ~`BRANCH_P.is_return & ~`BRANCH_P.branch_flush;
|
||||
stats[FETCH_BP_BR_MISPREDICT_STAT] = `BRANCH_P.instruction_is_completing & ~`BRANCH_P.is_return & `BRANCH_P.branch_flush;
|
||||
stats[FETCH_BP_RAS_CORRECT_STAT] = `BRANCH_P.instruction_is_completing & `BRANCH_P.is_return & ~`BRANCH_P.branch_flush;
|
||||
stats[FETCH_BP_RAS_MISPREDICT_STAT] = `BRANCH_P.instruction_is_completing & `BRANCH_P.is_return & `BRANCH_P.branch_flush;
|
||||
stats[FETCH_BP_BR_CORRECT_STAT] = `BRANCH_P.instruction_is_completing & ~`BRANCH_P.is_return_ex & ~`BRANCH_P.branch_flush;
|
||||
stats[FETCH_BP_BR_MISPREDICT_STAT] = `BRANCH_P.instruction_is_completing & ~`BRANCH_P.is_return_ex & `BRANCH_P.branch_flush;
|
||||
stats[FETCH_BP_RAS_CORRECT_STAT] = `BRANCH_P.instruction_is_completing & `BRANCH_P.is_return_ex & ~`BRANCH_P.branch_flush;
|
||||
stats[FETCH_BP_RAS_MISPREDICT_STAT] = `BRANCH_P.instruction_is_completing & `BRANCH_P.is_return_ex & `BRANCH_P.branch_flush;
|
||||
|
||||
//Issue stalls
|
||||
base_no_instruction_stall = ~`ISSUE_P.issue.stage_valid | cpu.gc.fetch_flush;
|
||||
|
@ -403,8 +404,8 @@ module cva5_sim
|
|||
|
||||
//Misc Issue stats
|
||||
stats[ISSUE_OPERAND_STALL_FOR_BRANCH_STAT] = stats[ISSUE_OPERANDS_NOT_READY_STAT] & `ISSUE_P.unit_needed_issue_stage[`ISSUE_P.UNIT_IDS.BR];
|
||||
stats[ISSUE_STORE_WITH_FORWARDED_DATA_STAT] = `ISSUE_P.issue_to[`ISSUE_P.UNIT_IDS.LS] & `ISSUE_P.is_store_r & `ISSUE_P.ls_inputs.forwarded_store;
|
||||
stats[ISSUE_DIVIDER_RESULT_REUSE_STAT] = `ISSUE_P.issue_to[`ISSUE_P.UNIT_IDS.DIV] & `ISSUE_P.gen_decode_div_inputs.div_op_reuse;
|
||||
stats[ISSUE_STORE_WITH_FORWARDED_DATA_STAT] = `ISSUE_P.issue_to[`ISSUE_P.UNIT_IDS.LS] & `LS_P.is_store_r & `LS_P.rs2_inuse;
|
||||
stats[ISSUE_DIVIDER_RESULT_REUSE_STAT] = `ISSUE_P.issue_to[`ISSUE_P.UNIT_IDS.DIV] & `DIV_P.div_op_reuse;
|
||||
|
||||
//Issue Stall Source
|
||||
for (int i = 0; i < REGFILE_READ_PORTS; i++) begin
|
||||
|
|
|
@ -448,6 +448,7 @@ module cva5_sim
|
|||
`define RENAME_P cpu.renamer_block
|
||||
`define METADATA_P cpu.id_block
|
||||
`define LS_P cpu.load_store_unit_block
|
||||
`define DIV_P cpu.gen_div.div_unit_block
|
||||
`define LSQ_P cpu.load_store_unit_block.lsq_block
|
||||
`define DCACHE_P cpu.load_store_unit_block.gen_ls_dcache.data_cache
|
||||
|
||||
|
@ -512,10 +513,10 @@ module cva5_sim
|
|||
stats[FETCH_IC_ARB_STALL_STAT] = iarb_stall;
|
||||
|
||||
//Branch predictor
|
||||
stats[FETCH_BP_BR_CORRECT_STAT] = `BRANCH_P.instruction_is_completing & ~`BRANCH_P.is_return & ~`BRANCH_P.branch_flush;
|
||||
stats[FETCH_BP_BR_MISPREDICT_STAT] = `BRANCH_P.instruction_is_completing & ~`BRANCH_P.is_return & `BRANCH_P.branch_flush;
|
||||
stats[FETCH_BP_RAS_CORRECT_STAT] = `BRANCH_P.instruction_is_completing & `BRANCH_P.is_return & ~`BRANCH_P.branch_flush;
|
||||
stats[FETCH_BP_RAS_MISPREDICT_STAT] = `BRANCH_P.instruction_is_completing & `BRANCH_P.is_return & `BRANCH_P.branch_flush;
|
||||
stats[FETCH_BP_BR_CORRECT_STAT] = `BRANCH_P.instruction_is_completing & ~`BRANCH_P.is_return_ex & ~`BRANCH_P.branch_flush;
|
||||
stats[FETCH_BP_BR_MISPREDICT_STAT] = `BRANCH_P.instruction_is_completing & ~`BRANCH_P.is_return_ex & `BRANCH_P.branch_flush;
|
||||
stats[FETCH_BP_RAS_CORRECT_STAT] = `BRANCH_P.instruction_is_completing & `BRANCH_P.is_return_ex & ~`BRANCH_P.branch_flush;
|
||||
stats[FETCH_BP_RAS_MISPREDICT_STAT] = `BRANCH_P.instruction_is_completing & `BRANCH_P.is_return_ex & `BRANCH_P.branch_flush;
|
||||
|
||||
//Issue stalls
|
||||
base_no_instruction_stall = ~`ISSUE_P.issue.stage_valid | cpu.gc.fetch_flush;
|
||||
|
@ -539,8 +540,8 @@ module cva5_sim
|
|||
|
||||
//Misc Issue stats
|
||||
stats[ISSUE_OPERAND_STALL_FOR_BRANCH_STAT] = stats[ISSUE_OPERANDS_NOT_READY_STAT] & `ISSUE_P.unit_needed_issue_stage[`ISSUE_P.UNIT_IDS.BR];
|
||||
stats[ISSUE_STORE_WITH_FORWARDED_DATA_STAT] = `ISSUE_P.issue_to[`ISSUE_P.UNIT_IDS.LS] & `ISSUE_P.is_store_r & `ISSUE_P.ls_inputs.forwarded_store;
|
||||
stats[ISSUE_DIVIDER_RESULT_REUSE_STAT] = `ISSUE_P.issue_to[`ISSUE_P.UNIT_IDS.DIV] & `ISSUE_P.gen_decode_div_inputs.div_op_reuse;
|
||||
stats[ISSUE_STORE_WITH_FORWARDED_DATA_STAT] = `ISSUE_P.issue_to[`ISSUE_P.UNIT_IDS.LS] & `LS_P.is_store_r & `LS_P.rs2_inuse;
|
||||
stats[ISSUE_DIVIDER_RESULT_REUSE_STAT] = `ISSUE_P.issue_to[`ISSUE_P.UNIT_IDS.DIV] & `DIV_P.div_op_reuse;
|
||||
|
||||
//Issue Stall Source
|
||||
for (int i = 0; i < REGFILE_READ_PORTS; i++) begin
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue