mirror of
https://github.com/openhwgroup/cva6.git
synced 2025-04-20 04:07:36 -04:00
Re-structure ex-stage, clean-up
This commit is contained in:
parent
a5cf9666ae
commit
b30b2190e9
13 changed files with 649 additions and 707 deletions
|
@ -262,10 +262,12 @@ package ariane_pkg;
|
|||
} fu_op;
|
||||
|
||||
typedef struct packed {
|
||||
fu_op operator;
|
||||
logic [63:0] operand_a;
|
||||
logic [63:0] operand_b;
|
||||
logic [63:0] imm;
|
||||
fu_t fu;
|
||||
fu_op operator;
|
||||
logic [63:0] operand_a;
|
||||
logic [63:0] operand_b;
|
||||
logic [63:0] imm;
|
||||
logic [TRANS_ID_BITS-1:0] trans_id;
|
||||
} fu_data_t;
|
||||
|
||||
// -------------------------------
|
||||
|
|
131
src/alu.sv
131
src/alu.sv
|
@ -12,65 +12,35 @@
|
|||
// Author: Igor Loi <igor.loi@unibo.it>
|
||||
// Author: Andreas Traber <atraber@student.ethz.ch>
|
||||
// Author: Lukas Mueller <lukasmue@student.ethz.ch>
|
||||
// Author: Florian Zaruba <zaruabf@ethz.ch>
|
||||
// Author: Florian Zaruba <zaruabf@iis.ee.ethz.ch>
|
||||
//
|
||||
// Date: 19.03.2017
|
||||
// Description: Ariane ALU
|
||||
// Description: Ariane ALU based on RI5CY's ALU
|
||||
|
||||
import ariane_pkg::*;
|
||||
|
||||
module alu (
|
||||
input logic clk_i, // Clock
|
||||
input logic rst_ni, // Asynchronous reset active low
|
||||
input logic flush_i,
|
||||
input logic [63:0] pc_i,
|
||||
input logic [TRANS_ID_BITS-1:0] trans_id_i,
|
||||
input logic alu_valid_i,
|
||||
input logic branch_valid_i,
|
||||
input logic csr_valid_i,
|
||||
input fu_op operator_i,
|
||||
input logic [63:0] operand_a_i,
|
||||
input logic [63:0] operand_b_i,
|
||||
input logic [63:0] imm_i,
|
||||
input fu_data_t fu_data_i,
|
||||
output logic [63:0] result_o,
|
||||
output logic alu_valid_o,
|
||||
output logic alu_ready_o,
|
||||
output logic [TRANS_ID_BITS-1:0] alu_trans_id_o,
|
||||
output exception_t alu_exception_o,
|
||||
|
||||
input logic fu_valid_i,
|
||||
input logic is_compressed_instr_i,
|
||||
input branchpredict_sbe_t branch_predict_i,
|
||||
output branchpredict_t resolved_branch_o,
|
||||
output logic resolve_branch_o,
|
||||
|
||||
input logic commit_i,
|
||||
// to CSR file
|
||||
output logic [11:0] csr_addr_o // CSR address to commit stage
|
||||
output logic alu_branch_res_o
|
||||
);
|
||||
|
||||
logic csr_ready;
|
||||
|
||||
assign alu_ready_o = csr_ready;
|
||||
assign alu_valid_o = alu_valid_i | branch_valid_i | csr_valid_i;
|
||||
assign alu_trans_id_o = trans_id_i;
|
||||
|
||||
logic [63:0] operand_a_rev;
|
||||
logic [31:0] operand_a_rev32;
|
||||
logic [64:0] operand_b_neg;
|
||||
logic [65:0] adder_result_ext_o;
|
||||
logic less; // handles both signed and unsigned forms
|
||||
logic alu_branch_res;
|
||||
logic [63:0] branch_result, csr_result;
|
||||
|
||||
// bit reverse operand_a for left shifts and bit counting
|
||||
generate
|
||||
genvar k;
|
||||
for(k = 0; k < 64; k++)
|
||||
assign operand_a_rev[k] = operand_a_i[63-k];
|
||||
assign operand_a_rev[k] = fu_data_i.operand_a[63-k];
|
||||
|
||||
for (k = 0; k < 32; k++)
|
||||
assign operand_a_rev32[k] = operand_a_i[31-k];
|
||||
assign operand_a_rev32[k] = fu_data_i.operand_a[31-k];
|
||||
endgenerate
|
||||
|
||||
// ------
|
||||
|
@ -84,7 +54,7 @@ module alu (
|
|||
always_comb begin
|
||||
adder_op_b_negate = 1'b0;
|
||||
|
||||
unique case (operator_i)
|
||||
unique case (fu_data_i.operator)
|
||||
// ADDER OPS
|
||||
EQ, NE,
|
||||
SUB, SUBW: adder_op_b_negate = 1'b1;
|
||||
|
@ -94,10 +64,10 @@ module alu (
|
|||
end
|
||||
|
||||
// prepare operand a
|
||||
assign adder_in_a = {operand_a_i, 1'b1};
|
||||
assign adder_in_a = {fu_data_i.operand_a, 1'b1};
|
||||
|
||||
// prepare operand b
|
||||
assign operand_b_neg = {operand_b_i, 1'b0} ^ {65{adder_op_b_negate}};
|
||||
assign operand_b_neg = {fu_data_i.operand_b, 1'b0} ^ {65{adder_op_b_negate}};
|
||||
assign adder_in_b = operand_b_neg ;
|
||||
|
||||
// actual adder
|
||||
|
@ -108,13 +78,13 @@ module alu (
|
|||
// get the right branch comparison result
|
||||
always_comb begin : branch_resolve
|
||||
// set comparison by default
|
||||
alu_branch_res = 1'b1;
|
||||
case (operator_i)
|
||||
EQ: alu_branch_res = adder_z_flag;
|
||||
NE: alu_branch_res = ~adder_z_flag;
|
||||
LTS, LTU: alu_branch_res = less;
|
||||
GES, GEU: alu_branch_res = ~less;
|
||||
default: alu_branch_res = 1'b1;
|
||||
alu_branch_res_o = 1'b1;
|
||||
case (fu_data_i.operator)
|
||||
EQ: alu_branch_res_o = adder_z_flag;
|
||||
NE: alu_branch_res_o = ~adder_z_flag;
|
||||
LTS, LTU: alu_branch_res_o = less;
|
||||
GES, GEU: alu_branch_res_o = ~less;
|
||||
default: alu_branch_res_o = 1'b1;
|
||||
endcase
|
||||
end
|
||||
|
||||
|
@ -139,19 +109,19 @@ module alu (
|
|||
logic [63:0] shift_left_result;
|
||||
logic [31:0] shift_left_result32;
|
||||
|
||||
assign shift_amt = operand_b_i;
|
||||
assign shift_amt = fu_data_i.operand_b;
|
||||
|
||||
assign shift_left = (operator_i == SLL) | (operator_i == SLLW);
|
||||
assign shift_left = (fu_data_i.operator == SLL) | (fu_data_i.operator == SLLW);
|
||||
|
||||
assign shift_arithmetic = (operator_i == SRA) | (operator_i == SRAW);
|
||||
assign shift_arithmetic = (fu_data_i.operator == SRA) | (fu_data_i.operator == SRAW);
|
||||
|
||||
// right shifts, we let the synthesizer optimize this
|
||||
logic [64:0] shift_op_a_64;
|
||||
logic [32:0] shift_op_a_32;
|
||||
|
||||
// choose the bit reversed or the normal input for shift operand a
|
||||
assign shift_op_a = shift_left ? operand_a_rev : operand_a_i;
|
||||
assign shift_op_a32 = shift_left ? operand_a_rev32 : operand_a_i[31:0];
|
||||
assign shift_op_a = shift_left ? operand_a_rev : fu_data_i.operand_a;
|
||||
assign shift_op_a32 = shift_left ? operand_a_rev32 : fu_data_i.operand_a[31:0];
|
||||
|
||||
assign shift_op_a_64 = { shift_arithmetic & shift_op_a[63], shift_op_a};
|
||||
assign shift_op_a_32 = { shift_arithmetic & shift_op_a[31], shift_op_a32};
|
||||
|
@ -181,12 +151,12 @@ module alu (
|
|||
logic sgn;
|
||||
sgn = 1'b0;
|
||||
|
||||
if ((operator_i == SLTS) ||
|
||||
(operator_i == LTS) ||
|
||||
(operator_i == GES))
|
||||
if ((fu_data_i.operator == SLTS) ||
|
||||
(fu_data_i.operator == LTS) ||
|
||||
(fu_data_i.operator == GES))
|
||||
sgn = 1'b1;
|
||||
|
||||
less = ($signed({sgn & operand_a_i[63], operand_a_i}) < $signed({sgn & operand_b_i[63], operand_b_i}));
|
||||
less = ($signed({sgn & fu_data_i.operand_a[63], fu_data_i.operand_a}) < $signed({sgn & fu_data_i.operand_b[63], fu_data_i.operand_b}));
|
||||
end
|
||||
|
||||
// -----------
|
||||
|
@ -195,11 +165,11 @@ module alu (
|
|||
always_comb begin
|
||||
result_o = '0;
|
||||
|
||||
unique case (operator_i)
|
||||
unique case (fu_data_i.operator)
|
||||
// Standard Operations
|
||||
ANDL: result_o = operand_a_i & operand_b_i;
|
||||
ORL: result_o = operand_a_i | operand_b_i;
|
||||
XORL: result_o = operand_a_i ^ operand_b_i;
|
||||
ANDL: result_o = fu_data_i.operand_a & fu_data_i.operand_b;
|
||||
ORL: result_o = fu_data_i.operand_a | fu_data_i.operand_b;
|
||||
XORL: result_o = fu_data_i.operand_a ^ fu_data_i.operand_b;
|
||||
|
||||
// Adder Operations
|
||||
ADD, SUB: result_o = adder_result;
|
||||
|
@ -217,48 +187,5 @@ module alu (
|
|||
|
||||
default: ; // default case to suppress unique warning
|
||||
endcase
|
||||
|
||||
if (branch_valid_i) begin
|
||||
result_o = branch_result;
|
||||
end else if (csr_valid_i) begin
|
||||
result_o = csr_result;
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
// ----------------------
|
||||
// Branch Unit
|
||||
// ----------------------
|
||||
branch_unit branch_unit_i (
|
||||
.operator_i,
|
||||
.operand_a_i,
|
||||
.operand_b_i,
|
||||
.imm_i,
|
||||
.pc_i,
|
||||
.is_compressed_instr_i,
|
||||
// any functional unit is valid, check that there is no accidental mis-predict
|
||||
.fu_valid_i,
|
||||
.branch_valid_i,
|
||||
.branch_comp_res_i ( alu_branch_res ),
|
||||
.branch_result_o ( branch_result ),
|
||||
.branch_predict_i,
|
||||
.resolved_branch_o,
|
||||
.resolve_branch_o,
|
||||
.branch_exception_o ( alu_exception_o )
|
||||
);
|
||||
|
||||
csr_buffer csr_buffer_i (
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
.flush_i,
|
||||
.csr_valid_i,
|
||||
.operator_i,
|
||||
.operand_a_i,
|
||||
.operand_b_i,
|
||||
.csr_ready_o ( csr_ready ),
|
||||
.csr_result_o ( csr_result ),
|
||||
.commit_i,
|
||||
.csr_addr_o
|
||||
);
|
||||
|
||||
endmodule
|
||||
|
|
169
src/ariane.sv
169
src/ariane.sv
|
@ -72,21 +72,16 @@ module ariane #(
|
|||
// --------------
|
||||
// ISSUE <-> EX
|
||||
// --------------
|
||||
logic [63:0] imm_id_ex;
|
||||
logic [TRANS_ID_BITS-1:0] trans_id_id_ex;
|
||||
fu_t fu_id_ex;
|
||||
fu_op operator_id_ex;
|
||||
logic [63:0] operand_a_id_ex;
|
||||
logic [63:0] operand_b_id_ex;
|
||||
fu_data_t fu_data_id_ex;
|
||||
logic [63:0] pc_id_ex;
|
||||
logic is_compressed_instr_id_ex;
|
||||
// ALU
|
||||
logic alu_ready_ex_id;
|
||||
logic flu_ready_ex_id;
|
||||
logic alu_valid_id_ex;
|
||||
logic [TRANS_ID_BITS-1:0] alu_trans_id_ex_id;
|
||||
logic alu_valid_ex_id;
|
||||
logic [63:0] alu_result_ex_id;
|
||||
exception_t alu_exception_ex_id;
|
||||
logic [TRANS_ID_BITS-1:0] flu_trans_id_ex_id;
|
||||
logic flu_valid_ex_id;
|
||||
logic [63:0] flu_result_ex_id;
|
||||
exception_t flu_exception_ex_id;
|
||||
// Branches and Jumps
|
||||
logic branch_valid_id_ex;
|
||||
|
||||
|
@ -272,25 +267,22 @@ module ariane #(
|
|||
.NR_ENTRIES ( NR_SB_ENTRIES ),
|
||||
.NR_WB_PORTS ( NR_WB_PORTS )
|
||||
) issue_stage_i (
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
.flush_unissued_instr_i ( flush_unissued_instr_ctrl_id ),
|
||||
.flush_i ( flush_ctrl_id ),
|
||||
|
||||
// ID Stage
|
||||
.decoded_instr_i ( issue_entry_id_issue ),
|
||||
.decoded_instr_valid_i ( issue_entry_valid_id_issue ),
|
||||
.is_ctrl_flow_i ( is_ctrl_fow_id_issue ),
|
||||
.decoded_instr_ack_o ( issue_instr_issue_id ),
|
||||
|
||||
// Functional Units
|
||||
.fu_o ( fu_id_ex ),
|
||||
.operator_o ( operator_id_ex ),
|
||||
.operand_a_o ( operand_a_id_ex ),
|
||||
.operand_b_o ( operand_b_id_ex ),
|
||||
.imm_o ( imm_id_ex ),
|
||||
.trans_id_o ( trans_id_id_ex ),
|
||||
.fu_data_o ( fu_data_id_ex ),
|
||||
.pc_o ( pc_id_ex ),
|
||||
.is_compressed_instr_o ( is_compressed_instr_id_ex ),
|
||||
// fixed latency unit ready
|
||||
.flu_ready_i ( flu_ready_ex_id ),
|
||||
// ALU
|
||||
.alu_ready_i ( alu_ready_ex_id ),
|
||||
.alu_valid_o ( alu_valid_id_ex ),
|
||||
// Branches and Jumps
|
||||
.branch_valid_o ( branch_valid_id_ex ), // branch is valid
|
||||
|
@ -309,13 +301,12 @@ module ariane #(
|
|||
.fpu_rm_o ( fpu_rm_id_ex ),
|
||||
// CSR
|
||||
.csr_valid_o ( csr_valid_id_ex ),
|
||||
|
||||
// Commit
|
||||
.resolved_branch_i ( resolved_branch ),
|
||||
.trans_id_i ( {alu_trans_id_ex_id, lsu_trans_id_ex_id, mult_trans_id_ex_id, fpu_trans_id_ex_id }),
|
||||
.wbdata_i ( {alu_result_ex_id, lsu_result_ex_id, mult_result_ex_id, fpu_result_ex_id }),
|
||||
.ex_ex_i ( {alu_exception_ex_id, lsu_exception_ex_id, {$bits(exception_t){1'b0}}, fpu_exception_ex_id }),
|
||||
.wb_valid_i ( {alu_valid_ex_id, lsu_valid_ex_id, mult_valid_ex_id, fpu_valid_ex_id }),
|
||||
.trans_id_i ( {flu_trans_id_ex_id, lsu_trans_id_ex_id, mult_trans_id_ex_id, fpu_trans_id_ex_id }),
|
||||
.wbdata_i ( {flu_result_ex_id, lsu_result_ex_id, mult_result_ex_id, fpu_result_ex_id }),
|
||||
.ex_ex_i ( {flu_exception_ex_id, lsu_exception_ex_id, {$bits(exception_t){1'b0}}, fpu_exception_ex_id }),
|
||||
.wb_valid_i ( {flu_valid_ex_id, lsu_valid_ex_id, mult_valid_ex_id, fpu_valid_ex_id }),
|
||||
|
||||
.waddr_i ( waddr_commit_id ),
|
||||
.wdata_i ( wdata_commit_id ),
|
||||
|
@ -330,81 +321,77 @@ module ariane #(
|
|||
// EX
|
||||
// ---------
|
||||
ex_stage ex_stage_i (
|
||||
.clk_i ( clk_i ),
|
||||
.rst_ni ( rst_ni ),
|
||||
.flush_i ( flush_ctrl_ex ),
|
||||
.fu_i ( fu_id_ex ),
|
||||
.operator_i ( operator_id_ex ),
|
||||
.operand_a_i ( operand_a_id_ex ),
|
||||
.operand_b_i ( operand_b_id_ex ),
|
||||
.imm_i ( imm_id_ex ),
|
||||
.trans_id_i ( trans_id_id_ex ),
|
||||
.pc_i ( pc_id_ex ),
|
||||
.is_compressed_instr_i ( is_compressed_instr_id_ex ),
|
||||
.clk_i ( clk_i ),
|
||||
.rst_ni ( rst_ni ),
|
||||
.flush_i ( flush_ctrl_ex ),
|
||||
.fu_data_i ( fu_data_id_ex ),
|
||||
.pc_i ( pc_id_ex ),
|
||||
.is_compressed_instr_i ( is_compressed_instr_id_ex ),
|
||||
// fixed latency units
|
||||
.flu_result_o ( flu_result_ex_id ),
|
||||
.flu_trans_id_o ( flu_trans_id_ex_id ),
|
||||
.flu_valid_o ( flu_valid_ex_id ),
|
||||
.flu_exception_o ( flu_exception_ex_id ),
|
||||
.flu_ready_o ( flu_ready_ex_id ),
|
||||
// ALU
|
||||
.alu_ready_o ( alu_ready_ex_id ),
|
||||
.alu_valid_i ( alu_valid_id_ex ),
|
||||
.alu_result_o ( alu_result_ex_id ),
|
||||
.alu_trans_id_o ( alu_trans_id_ex_id ),
|
||||
.alu_valid_o ( alu_valid_ex_id ),
|
||||
.alu_exception_o ( alu_exception_ex_id ),
|
||||
.alu_valid_i ( alu_valid_id_ex ),
|
||||
// Branches and Jumps
|
||||
.branch_valid_i ( branch_valid_id_ex ),
|
||||
.branch_predict_i ( branch_predict_id_ex ), // branch predict to ex
|
||||
.resolved_branch_o ( resolved_branch ),
|
||||
.resolve_branch_o ( resolve_branch_ex_id ),
|
||||
.branch_valid_i ( branch_valid_id_ex ),
|
||||
.branch_predict_i ( branch_predict_id_ex ), // branch predict to ex
|
||||
.resolved_branch_o ( resolved_branch ),
|
||||
.resolve_branch_o ( resolve_branch_ex_id ),
|
||||
// CSR
|
||||
.csr_valid_i ( csr_valid_id_ex ),
|
||||
.csr_addr_o ( csr_addr_ex_csr ),
|
||||
.csr_commit_i ( csr_commit_commit_ex ), // from commit
|
||||
.csr_valid_i ( csr_valid_id_ex ),
|
||||
.csr_addr_o ( csr_addr_ex_csr ),
|
||||
.csr_commit_i ( csr_commit_commit_ex ), // from commit
|
||||
// LSU
|
||||
.lsu_ready_o ( lsu_ready_ex_id ),
|
||||
.lsu_valid_i ( lsu_valid_id_ex ),
|
||||
.lsu_result_o ( lsu_result_ex_id ),
|
||||
.lsu_trans_id_o ( lsu_trans_id_ex_id ),
|
||||
.lsu_valid_o ( lsu_valid_ex_id ),
|
||||
.lsu_commit_i ( lsu_commit_commit_ex ), // from commit
|
||||
.lsu_commit_ready_o ( lsu_commit_ready_ex_commit ), // to commit
|
||||
.lsu_exception_o ( lsu_exception_ex_id ),
|
||||
.no_st_pending_o ( no_st_pending_ex_commit ),
|
||||
.lsu_ready_o ( lsu_ready_ex_id ),
|
||||
.lsu_valid_i ( lsu_valid_id_ex ),
|
||||
.lsu_result_o ( lsu_result_ex_id ),
|
||||
.lsu_trans_id_o ( lsu_trans_id_ex_id ),
|
||||
.lsu_valid_o ( lsu_valid_ex_id ),
|
||||
.lsu_commit_i ( lsu_commit_commit_ex ), // from commit
|
||||
.lsu_commit_ready_o ( lsu_commit_ready_ex_commit ), // to commit
|
||||
.lsu_exception_o ( lsu_exception_ex_id ),
|
||||
.no_st_pending_o ( no_st_pending_ex_commit ),
|
||||
// MULT
|
||||
.mult_ready_o ( mult_ready_ex_id ),
|
||||
.mult_valid_i ( mult_valid_id_ex ),
|
||||
.mult_trans_id_o ( mult_trans_id_ex_id ),
|
||||
.mult_result_o ( mult_result_ex_id ),
|
||||
.mult_valid_o ( mult_valid_ex_id ),
|
||||
.mult_ready_o ( mult_ready_ex_id ),
|
||||
.mult_valid_i ( mult_valid_id_ex ),
|
||||
.mult_trans_id_o ( mult_trans_id_ex_id ),
|
||||
.mult_result_o ( mult_result_ex_id ),
|
||||
.mult_valid_o ( mult_valid_ex_id ),
|
||||
// FPU
|
||||
.fpu_ready_o ( fpu_ready_ex_id ),
|
||||
.fpu_valid_i ( fpu_valid_id_ex ),
|
||||
.fpu_fmt_i ( fpu_fmt_id_ex ),
|
||||
.fpu_rm_i ( fpu_rm_id_ex ),
|
||||
.fpu_frm_i ( frm_csr_id_issue_ex ),
|
||||
.fpu_prec_i ( fprec_csr_ex ),
|
||||
.fpu_trans_id_o ( fpu_trans_id_ex_id ),
|
||||
.fpu_result_o ( fpu_result_ex_id ),
|
||||
.fpu_valid_o ( fpu_valid_ex_id ),
|
||||
.fpu_exception_o ( fpu_exception_ex_id ),
|
||||
.amo_valid_commit_i ( amo_valid_commit ),
|
||||
.amo_req_o ( amo_req ),
|
||||
.amo_resp_i ( amo_resp ),
|
||||
.fpu_ready_o ( fpu_ready_ex_id ),
|
||||
.fpu_valid_i ( fpu_valid_id_ex ),
|
||||
.fpu_fmt_i ( fpu_fmt_id_ex ),
|
||||
.fpu_rm_i ( fpu_rm_id_ex ),
|
||||
.fpu_frm_i ( frm_csr_id_issue_ex ),
|
||||
.fpu_prec_i ( fprec_csr_ex ),
|
||||
.fpu_trans_id_o ( fpu_trans_id_ex_id ),
|
||||
.fpu_result_o ( fpu_result_ex_id ),
|
||||
.fpu_valid_o ( fpu_valid_ex_id ),
|
||||
.fpu_exception_o ( fpu_exception_ex_id ),
|
||||
.amo_valid_commit_i ( amo_valid_commit ),
|
||||
.amo_req_o ( amo_req ),
|
||||
.amo_resp_i ( amo_resp ),
|
||||
// Performance counters
|
||||
.itlb_miss_o ( itlb_miss_ex_perf ),
|
||||
.dtlb_miss_o ( dtlb_miss_ex_perf ),
|
||||
.itlb_miss_o ( itlb_miss_ex_perf ),
|
||||
.dtlb_miss_o ( dtlb_miss_ex_perf ),
|
||||
// Memory Management
|
||||
.enable_translation_i ( enable_translation_csr_ex ), // from CSR
|
||||
.en_ld_st_translation_i ( en_ld_st_translation_csr_ex ),
|
||||
.flush_tlb_i ( flush_tlb_ctrl_ex ),
|
||||
.priv_lvl_i ( priv_lvl ), // from CSR
|
||||
.ld_st_priv_lvl_i ( ld_st_priv_lvl_csr_ex ), // from CSR
|
||||
.sum_i ( sum_csr_ex ), // from CSR
|
||||
.mxr_i ( mxr_csr_ex ), // from CSR
|
||||
.satp_ppn_i ( satp_ppn_csr_ex ), // from CSR
|
||||
.asid_i ( asid_csr_ex ), // from CSR
|
||||
.icache_areq_i ( icache_areq_cache_ex ),
|
||||
.icache_areq_o ( icache_areq_ex_cache ),
|
||||
.enable_translation_i ( enable_translation_csr_ex ), // from CSR
|
||||
.en_ld_st_translation_i ( en_ld_st_translation_csr_ex ),
|
||||
.flush_tlb_i ( flush_tlb_ctrl_ex ),
|
||||
.priv_lvl_i ( priv_lvl ), // from CSR
|
||||
.ld_st_priv_lvl_i ( ld_st_priv_lvl_csr_ex ), // from CSR
|
||||
.sum_i ( sum_csr_ex ), // from CSR
|
||||
.mxr_i ( mxr_csr_ex ), // from CSR
|
||||
.satp_ppn_i ( satp_ppn_csr_ex ), // from CSR
|
||||
.asid_i ( asid_csr_ex ), // from CSR
|
||||
.icache_areq_i ( icache_areq_cache_ex ),
|
||||
.icache_areq_o ( icache_areq_ex_cache ),
|
||||
// DCACHE interfaces
|
||||
.dcache_req_ports_i ( dcache_req_ports_cache_ex ),
|
||||
.dcache_req_ports_o ( dcache_req_ports_ex_cache )
|
||||
.dcache_req_ports_i ( dcache_req_ports_cache_ex ),
|
||||
.dcache_req_ports_o ( dcache_req_ports_ex_cache )
|
||||
);
|
||||
|
||||
// ---------
|
||||
|
|
|
@ -15,10 +15,7 @@
|
|||
import ariane_pkg::*;
|
||||
|
||||
module branch_unit (
|
||||
input fu_op operator_i, // comparison operation to perform
|
||||
input logic [63:0] operand_a_i, // contains content of RS 1
|
||||
input logic [63:0] operand_b_i, // contains content of RS 2
|
||||
input logic [63:0] imm_i, // immediate to add to PC
|
||||
input fu_data_t fu_data_i,
|
||||
input logic [63:0] pc_i, // PC of instruction
|
||||
input logic is_compressed_instr_i,
|
||||
input logic fu_valid_i, // any functional unit is valid, check that there is no accidental mis-predict
|
||||
|
@ -39,7 +36,7 @@ module branch_unit (
|
|||
always_comb begin : mispredict_handler
|
||||
// set the jump base, for JALR we need to look at the register, for all other control flow instructions we can take the current PC
|
||||
automatic logic [63:0] jump_base;
|
||||
jump_base = (operator_i == JALR) ? operand_a_i : pc_i;
|
||||
jump_base = (fu_data_i.operator == JALR) ? fu_data_i.operand_a : pc_i;
|
||||
|
||||
target_address = 64'b0;
|
||||
resolve_branch_o = 1'b0;
|
||||
|
@ -53,9 +50,9 @@ module branch_unit (
|
|||
// calculate next PC, depending on whether the instruction is compressed or not this may be different
|
||||
next_pc = pc_i + ((is_compressed_instr_i) ? 64'h2 : 64'h4);
|
||||
// calculate target address simple 64 bit addition
|
||||
target_address = $unsigned($signed(jump_base) + $signed(imm_i));
|
||||
target_address = $unsigned($signed(jump_base) + $signed(fu_data_i.imm));
|
||||
// on a JALR we are supposed to reset the LSB to 0 (according to the specification)
|
||||
if (operator_i == JALR)
|
||||
if (fu_data_i.operator == JALR)
|
||||
target_address[0] = 1'b0;
|
||||
// if we need to put the branch target address in a destination register, output it here to WB
|
||||
branch_result_o = next_pc;
|
||||
|
|
|
@ -20,15 +20,12 @@ module csr_buffer (
|
|||
input logic rst_ni, // Asynchronous reset active low
|
||||
input logic flush_i,
|
||||
|
||||
input fu_op operator_i,
|
||||
input logic [63:0] operand_a_i,
|
||||
input logic [63:0] operand_b_i,
|
||||
input fu_data_t fu_data_i,
|
||||
|
||||
output logic csr_ready_o, // FU is ready e.g. not busy
|
||||
input logic csr_valid_i, // Input is valid
|
||||
output logic [63:0] csr_result_o,
|
||||
input logic commit_i, // commit the pending CSR OP
|
||||
|
||||
input logic csr_commit_i, // commit the pending CSR OP
|
||||
// to CSR file
|
||||
output logic [11:0] csr_addr_o // CSR address to commit stage
|
||||
);
|
||||
|
@ -40,7 +37,7 @@ module csr_buffer (
|
|||
} csr_reg_n, csr_reg_q;
|
||||
|
||||
// control logic, scoreboard signals
|
||||
assign csr_result_o = operand_a_i;
|
||||
assign csr_result_o = fu_data_i.operand_a;
|
||||
assign csr_addr_o = csr_reg_q.csr_address;
|
||||
|
||||
// write logic
|
||||
|
@ -49,16 +46,16 @@ module csr_buffer (
|
|||
// by default we are ready
|
||||
csr_ready_o = 1'b1;
|
||||
// if we have a valid uncomiited csr req or are just getting one WITHOUT a commit in, we are not ready
|
||||
if ((csr_reg_q.valid || csr_valid_i) && ~commit_i)
|
||||
if ((csr_reg_q.valid || csr_valid_i) && ~csr_commit_i)
|
||||
csr_ready_o = 1'b0;
|
||||
// if we got a valid from the scoreboard
|
||||
// store the CSR address
|
||||
if (csr_valid_i) begin
|
||||
csr_reg_n.csr_address = operand_b_i[11:0];
|
||||
csr_reg_n.csr_address = fu_data_i.operand_b[11:0];
|
||||
csr_reg_n.valid = 1'b1;
|
||||
end
|
||||
// if we get a commit and no new valid instruction -> clear the valid bit
|
||||
if (commit_i && ~csr_valid_i) begin
|
||||
if (csr_commit_i && ~csr_valid_i) begin
|
||||
csr_reg_n.valid = 1'b0;
|
||||
end
|
||||
// clear the buffer if we flushed
|
||||
|
|
212
src/ex_stage.sv
212
src/ex_stage.sv
|
@ -22,23 +22,19 @@ module ex_stage #(
|
|||
input logic rst_ni, // Asynchronous reset active low
|
||||
input logic flush_i,
|
||||
|
||||
input fu_t fu_i,
|
||||
input fu_op operator_i,
|
||||
input logic [63:0] operand_a_i,
|
||||
input logic [63:0] operand_b_i,
|
||||
input logic [63:0] imm_i,
|
||||
input logic [TRANS_ID_BITS-1:0] trans_id_i,
|
||||
input fu_data_t fu_data_i,
|
||||
input logic [63:0] pc_i, // PC of current instruction
|
||||
input logic is_compressed_instr_i, // we need to know if this was a compressed instruction
|
||||
// in order to calculate the next PC on a mis-predict
|
||||
// ALU 1
|
||||
output logic alu_ready_o, // FU is ready
|
||||
input logic alu_valid_i, // Output is valid
|
||||
output logic alu_valid_o, // ALU result is valid
|
||||
output logic [63:0] alu_result_o,
|
||||
output logic [TRANS_ID_BITS-1:0] alu_trans_id_o, // ID of scoreboard entry at which to write back
|
||||
output exception_t alu_exception_o,
|
||||
// Fixed latency unit(s)
|
||||
output logic [63:0] flu_result_o,
|
||||
output logic [TRANS_ID_BITS-1:0] flu_trans_id_o, // ID of scoreboard entry at which to write back
|
||||
output exception_t flu_exception_o,
|
||||
output logic flu_ready_o, // FLU is ready
|
||||
output logic flu_valid_o, // FLU result is valid
|
||||
// Branches and Jumps
|
||||
// ALU 1
|
||||
input logic alu_valid_i, // Output is valid
|
||||
input logic branch_valid_i, // we are using the branch unit
|
||||
input branchpredict_sbe_t branch_predict_i,
|
||||
output branchpredict_t resolved_branch_o, // the branch engine uses the write back from the ALU
|
||||
|
@ -101,66 +97,105 @@ module ex_stage #(
|
|||
output logic dtlb_miss_o
|
||||
);
|
||||
|
||||
// -------------------------
|
||||
// Fixed Latency Units
|
||||
// -------------------------
|
||||
// all fixed latency units share a single issue port and a sing write
|
||||
// port into the scoreboard. At the moment those are:
|
||||
// 1. ALU - all operations are single cycle
|
||||
// 2. Branch unit: operation is single cycle, the ALU is needed
|
||||
// for comparison
|
||||
// 3. CSR: This is a small buffer which saves the address of the CSR.
|
||||
// The value is then re-fetched once the instruction retires. The buffer
|
||||
// is only a single entry deep, hence this operation will block all
|
||||
// other operations once this buffer is full. This should not be a major
|
||||
// concern though as CSRs are infrequent.
|
||||
// 4. Multiplier/Divider: TODO(zarubaf)
|
||||
|
||||
// from ALU to branch unit
|
||||
logic alu_branch_res; // branch comparison result
|
||||
logic [63:0] alu_result, branch_result, csr_result;
|
||||
logic csr_ready;
|
||||
|
||||
// -----
|
||||
// ALU
|
||||
// -----
|
||||
// 1. ALU (combinatorial)
|
||||
// data silence operation
|
||||
fu_data_t alu_data;
|
||||
assign alu_data.operator = (alu_valid_i | branch_valid_i | csr_valid_i) ? operator_i : ADD;
|
||||
assign alu_data.operand_a = (alu_valid_i | branch_valid_i | csr_valid_i) ? operand_a_i : '0;
|
||||
assign alu_data.operand_b = (alu_valid_i | branch_valid_i | csr_valid_i) ? operand_b_i : '0;
|
||||
assign alu_data.imm = (alu_valid_i | branch_valid_i | csr_valid_i) ? imm_i : '0;
|
||||
assign alu_data = (alu_valid_i | branch_valid_i) ? fu_data_i : '0;
|
||||
|
||||
// fixed latency FUs
|
||||
// TOOD(zarubaf) Re-name this module and re-factor ALU
|
||||
alu alu_i (
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
.flush_i,
|
||||
.pc_i,
|
||||
.trans_id_i,
|
||||
.alu_valid_i,
|
||||
.branch_valid_i,
|
||||
.csr_valid_i ( csr_valid_i ),
|
||||
.operator_i ( alu_data.operator ),
|
||||
.operand_a_i ( alu_data.operand_a ),
|
||||
.operand_b_i ( alu_data.operand_b ),
|
||||
.imm_i ( alu_data.imm ),
|
||||
.result_o ( alu_result_o ),
|
||||
.alu_valid_o,
|
||||
.alu_ready_o,
|
||||
.alu_trans_id_o,
|
||||
.alu_exception_o,
|
||||
.fu_data_i ( alu_data ),
|
||||
.result_o ( alu_result ),
|
||||
.alu_branch_res_o ( alu_branch_res )
|
||||
);
|
||||
|
||||
.fu_valid_i ( alu_valid_i || lsu_valid_i || csr_valid_i || mult_valid_i || fpu_valid_i ),
|
||||
// 2. Branch Unit (combinatorial)
|
||||
// we don't silence the branch unit as this is already critical and we do
|
||||
// not want to add another layer of logic
|
||||
branch_unit branch_unit_i (
|
||||
.fu_data_i,
|
||||
.pc_i,
|
||||
.is_compressed_instr_i,
|
||||
// any functional unit is valid, check that there is no accidental mis-predict
|
||||
.fu_valid_i ( alu_valid_i || lsu_valid_i || csr_valid_i || mult_valid_i || fpu_valid_i ) ,
|
||||
.branch_valid_i,
|
||||
.branch_comp_res_i ( alu_branch_res ),
|
||||
.branch_result_o ( branch_result ),
|
||||
.branch_predict_i,
|
||||
.resolved_branch_o,
|
||||
.resolve_branch_o,
|
||||
|
||||
.commit_i ( csr_commit_i ),
|
||||
.csr_addr_o ( csr_addr_o )
|
||||
.branch_exception_o ( flu_exception_o )
|
||||
);
|
||||
|
||||
// CSR (sequential)
|
||||
csr_buffer csr_buffer_i (
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
.flush_i,
|
||||
.fu_data_i,
|
||||
.csr_valid_i,
|
||||
.csr_ready_o ( csr_ready ),
|
||||
.csr_result_o ( csr_result ),
|
||||
.csr_commit_i,
|
||||
.csr_addr_o
|
||||
);
|
||||
|
||||
assign flu_valid_o = alu_valid_i | branch_valid_i | csr_valid_i;
|
||||
|
||||
// result MUX
|
||||
always_comb begin
|
||||
// Branch result as default case
|
||||
flu_result_o = branch_result;
|
||||
flu_trans_id_o = fu_data_i.trans_id;
|
||||
// ALU result
|
||||
if (alu_valid_i) begin
|
||||
flu_result_o = alu_result;
|
||||
// CSR result
|
||||
end else if (csr_valid_i) begin
|
||||
flu_result_o = csr_result;
|
||||
end
|
||||
end
|
||||
|
||||
// ready flags for FLU
|
||||
always_comb begin
|
||||
flu_ready_o = csr_ready;
|
||||
end
|
||||
|
||||
// ----------------
|
||||
// Multiplication
|
||||
// ----------------
|
||||
fu_data_t mult_data;
|
||||
assign mult_data.operator = mult_valid_i ? operator_i : MUL;
|
||||
assign mult_data.operand_a = mult_valid_i ? operand_a_i : '0;
|
||||
assign mult_data.operand_b = mult_valid_i ? operand_b_i : '0;
|
||||
// input silencing of the multiplier
|
||||
assign mult_data = mult_valid_i ? fu_data_i : '0;
|
||||
|
||||
mult i_mult (
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
.flush_i,
|
||||
.trans_id_i,
|
||||
.mult_valid_i,
|
||||
.operator_i ( mult_data.operator ),
|
||||
.operand_a_i ( mult_data.operand_a ),
|
||||
.operand_b_i ( mult_data.operand_b ),
|
||||
.result_o ( mult_result_o ),
|
||||
.fu_data_i ( mult_data ),
|
||||
.result_o ( mult_result_o ),
|
||||
.mult_valid_o,
|
||||
.mult_ready_o,
|
||||
.mult_trans_id_o
|
||||
|
@ -172,23 +207,15 @@ module ex_stage #(
|
|||
generate
|
||||
if (FP_PRESENT) begin : fpu_gen
|
||||
fu_data_t fpu_data;
|
||||
assign fpu_data.operator = fpu_valid_i ? operator_i : FSGNJ;
|
||||
assign fpu_data.operand_a = fpu_valid_i ? operand_a_i : '0;
|
||||
assign fpu_data.operand_b = fpu_valid_i ? operand_b_i : '0;
|
||||
assign fpu_data.imm = fpu_valid_i ? imm_i : '0;
|
||||
assign fpu_data.operator = fpu_valid_i ? fu_data_i : '0;
|
||||
|
||||
fpu_wrap fpu_i (
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
.flush_i,
|
||||
.trans_id_i,
|
||||
.fu_i,
|
||||
.fpu_valid_i,
|
||||
.fpu_ready_o,
|
||||
.operator_i ( fpu_data.operator ),
|
||||
.operand_a_i ( fpu_data.operand_a[FLEN-1:0] ),
|
||||
.operand_b_i ( fpu_data.operand_b[FLEN-1:0] ),
|
||||
.operand_c_i ( fpu_data.imm[FLEN-1:0] ),
|
||||
.fu_data_i ( fpu_data ),
|
||||
.fpu_fmt_i,
|
||||
.fpu_rm_i,
|
||||
.fpu_frm_i,
|
||||
|
@ -211,47 +238,40 @@ module ex_stage #(
|
|||
// Load-Store Unit
|
||||
// ----------------
|
||||
fu_data_t lsu_data;
|
||||
assign lsu_data.operator = lsu_valid_i ? operator_i : LD;
|
||||
assign lsu_data.operand_a = lsu_valid_i ? operand_a_i : '0;
|
||||
assign lsu_data.operand_b = lsu_valid_i ? operand_b_i : '0;
|
||||
assign lsu_data.imm = lsu_valid_i ? imm_i : '0;
|
||||
|
||||
assign lsu_data = lsu_valid_i ? fu_data_i : '0;
|
||||
|
||||
lsu lsu_i (
|
||||
.clk_i ,
|
||||
.rst_ni ,
|
||||
.flush_i ,
|
||||
.no_st_pending_o ,
|
||||
.fu_i ,
|
||||
.operator_i (lsu_data.operator ),
|
||||
.operand_a_i (lsu_data.operand_a ),
|
||||
.operand_b_i (lsu_data.operand_b ),
|
||||
.imm_i (lsu_data.imm ),
|
||||
.lsu_ready_o ,
|
||||
.lsu_valid_i ,
|
||||
.trans_id_i ,
|
||||
.lsu_trans_id_o ,
|
||||
.lsu_result_o ,
|
||||
.lsu_valid_o ,
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
.flush_i,
|
||||
.no_st_pending_o,
|
||||
.fu_data_i ( lsu_data ),
|
||||
.lsu_ready_o,
|
||||
.lsu_valid_i,
|
||||
.lsu_trans_id_o,
|
||||
.lsu_result_o,
|
||||
.lsu_valid_o,
|
||||
.commit_i (lsu_commit_i ),
|
||||
.commit_ready_o (lsu_commit_ready_o ),
|
||||
.enable_translation_i ,
|
||||
.en_ld_st_translation_i ,
|
||||
.icache_areq_i ,
|
||||
.icache_areq_o ,
|
||||
.priv_lvl_i ,
|
||||
.ld_st_priv_lvl_i ,
|
||||
.sum_i ,
|
||||
.mxr_i ,
|
||||
.satp_ppn_i ,
|
||||
.asid_i ,
|
||||
.flush_tlb_i ,
|
||||
.itlb_miss_o ,
|
||||
.dtlb_miss_o ,
|
||||
.dcache_req_ports_i ,
|
||||
.dcache_req_ports_o ,
|
||||
.lsu_exception_o ,
|
||||
.amo_valid_commit_i ,
|
||||
.amo_req_o ,
|
||||
.enable_translation_i,
|
||||
.en_ld_st_translation_i,
|
||||
.icache_areq_i,
|
||||
.icache_areq_o,
|
||||
.priv_lvl_i,
|
||||
.ld_st_priv_lvl_i,
|
||||
.sum_i,
|
||||
.mxr_i,
|
||||
.satp_ppn_i,
|
||||
.asid_i,
|
||||
.flush_tlb_i,
|
||||
.itlb_miss_o,
|
||||
.dtlb_miss_o,
|
||||
.dcache_req_ports_i,
|
||||
.dcache_req_ports_o,
|
||||
.lsu_exception_o,
|
||||
.amo_valid_commit_i,
|
||||
.amo_req_o,
|
||||
.amo_resp_i
|
||||
);
|
||||
|
||||
|
|
|
@ -12,21 +12,16 @@
|
|||
// Date: 12.04.2018
|
||||
// Description: Wrapper for the floating-point unit
|
||||
|
||||
|
||||
import ariane_pkg::*;
|
||||
|
||||
module fpu_wrap (
|
||||
input logic clk_i,
|
||||
input logic rst_ni,
|
||||
input logic flush_i,
|
||||
input logic [TRANS_ID_BITS-1:0] trans_id_i,
|
||||
input fu_t fu_i,
|
||||
input logic fpu_valid_i,
|
||||
output logic fpu_ready_o,
|
||||
input fu_op operator_i,
|
||||
input logic [FLEN-1:0] operand_a_i,
|
||||
input logic [FLEN-1:0] operand_b_i, // imm will be here unless used as operand
|
||||
input logic [FLEN-1:0] operand_c_i, // imm will be here unless used as operand
|
||||
input fu_data_t fu_data_i,
|
||||
|
||||
input logic [1:0] fpu_fmt_i,
|
||||
input logic [2:0] fpu_rm_i,
|
||||
input logic [2:0] fpu_frm_i,
|
||||
|
@ -41,6 +36,14 @@ module fpu_wrap (
|
|||
// otherwise compilation might issue an error if FLEN=0
|
||||
generate
|
||||
if (FP_PRESENT) begin : fpu_gen
|
||||
|
||||
logic [FLEN-1:0] operand_a_i;
|
||||
logic [FLEN-1:0] operand_b_i;
|
||||
logic [FLEN-1:0] operand_c_i;
|
||||
assign operand_a_i = fu_data_i.operand_a[FLEN-1:0];
|
||||
assign operand_b_i = fu_data_i.operand_b[FLEN-1:0];
|
||||
assign operand_c_i = fu_data_i.imm[FLEN-1:0];
|
||||
|
||||
//-----------------------------------
|
||||
// FPnew encoding from FPnew package
|
||||
//-----------------------------------
|
||||
|
@ -166,8 +169,8 @@ generate
|
|||
fpu_fmt2_d = FMT_FP32;
|
||||
fpu_ifmt_d = IFMT_INT32;
|
||||
fpu_rm_d = fpu_rm_i;
|
||||
fpu_vec_op_d = fu_i == FPU_VEC;
|
||||
fpu_tag_d = trans_id_i;
|
||||
fpu_vec_op_d = fu_data_i.fu == FPU_VEC;
|
||||
fpu_tag_d = fu_data_i.trans_id;
|
||||
vec_replication = fpu_rm_i[0]; // replication bit is sent via rm field
|
||||
replicate_c = 1'b0;
|
||||
check_ah = 1'b0; // whether set scalar AH encoding from MSB of rm_i
|
||||
|
@ -199,7 +202,7 @@ generate
|
|||
|
||||
|
||||
// Operations (this can modify the rounding mode field and format!)
|
||||
unique case (operator_i)
|
||||
unique case (fu_data_i.operator)
|
||||
// Addition
|
||||
FADD : begin
|
||||
fpu_op_d = OP_ADD;
|
||||
|
|
|
@ -40,16 +40,11 @@ module issue_read_operands #(
|
|||
input fu_t [2**REG_ADDR_SIZE:0] rd_clobber_gpr_i,
|
||||
input fu_t [2**REG_ADDR_SIZE:0] rd_clobber_fpr_i,
|
||||
// To FU, just single issue for now
|
||||
output fu_t fu_o,
|
||||
output fu_op operator_o,
|
||||
output logic [63:0] operand_a_o,
|
||||
output logic [63:0] operand_b_o,
|
||||
output logic [63:0] imm_o, // output immediate for the LSU
|
||||
output logic [TRANS_ID_BITS-1:0] trans_id_o,
|
||||
output fu_data_t fu_data_o,
|
||||
output logic [63:0] pc_o,
|
||||
output logic is_compressed_instr_o,
|
||||
// ALU 1
|
||||
input logic alu_ready_i, // FU is ready
|
||||
input logic flu_ready_i, // Fixed latency unit ready to accept a new request
|
||||
output logic alu_valid_o, // Output is valid
|
||||
// Branches and Jumps
|
||||
output logic branch_valid_o, // this is a valid branch instruction
|
||||
|
@ -108,20 +103,20 @@ module issue_read_operands #(
|
|||
assign orig_instr = riscv::instruction_t'(issue_instr_i.ex.tval[31:0]);
|
||||
|
||||
// ID <-> EX registers
|
||||
assign operand_a_o = operand_a_q;
|
||||
assign operand_b_o = operand_b_q;
|
||||
assign fu_o = fu_q;
|
||||
assign operator_o = operator_q;
|
||||
assign alu_valid_o = alu_valid_q;
|
||||
assign branch_valid_o = branch_valid_q;
|
||||
assign lsu_valid_o = lsu_valid_q;
|
||||
assign csr_valid_o = csr_valid_q;
|
||||
assign mult_valid_o = mult_valid_q;
|
||||
assign fpu_valid_o = fpu_valid_q;
|
||||
assign fpu_fmt_o = fpu_fmt_q;
|
||||
assign fpu_rm_o = fpu_rm_q;
|
||||
assign trans_id_o = trans_id_q;
|
||||
assign imm_o = imm_q;
|
||||
assign fu_data_o.operand_a = operand_a_q;
|
||||
assign fu_data_o.operand_b = operand_b_q;
|
||||
assign fu_data_o.fu = fu_q;
|
||||
assign fu_data_o.operator = operator_q;
|
||||
assign fu_data_o.trans_id = trans_id_q;
|
||||
assign fu_data_o.imm = imm_q;
|
||||
assign alu_valid_o = alu_valid_q;
|
||||
assign branch_valid_o = branch_valid_q;
|
||||
assign lsu_valid_o = lsu_valid_q;
|
||||
assign csr_valid_o = csr_valid_q;
|
||||
assign mult_valid_o = mult_valid_q;
|
||||
assign fpu_valid_o = fpu_valid_q;
|
||||
assign fpu_fmt_o = fpu_fmt_q;
|
||||
assign fpu_rm_o = fpu_rm_q;
|
||||
// ---------------
|
||||
// Issue Stage
|
||||
// ---------------
|
||||
|
@ -133,7 +128,7 @@ module issue_read_operands #(
|
|||
NONE:
|
||||
fu_busy = 1'b0;
|
||||
ALU, CTRL_FLOW, CSR:
|
||||
fu_busy = ~alu_ready_i;
|
||||
fu_busy = ~flu_ready_i;
|
||||
MULT:
|
||||
fu_busy = ~mult_ready_i;
|
||||
FPU, FPU_VEC:
|
||||
|
|
|
@ -31,16 +31,10 @@ module issue_stage #(
|
|||
input logic is_ctrl_flow_i,
|
||||
output logic decoded_instr_ack_o,
|
||||
// to EX
|
||||
output fu_t fu_o,
|
||||
output fu_op operator_o,
|
||||
output logic [63:0] operand_a_o,
|
||||
output logic [63:0] operand_b_o,
|
||||
output logic [63:0] imm_o,
|
||||
output logic [TRANS_ID_BITS-1:0] trans_id_o,
|
||||
output fu_data_t fu_data_o,
|
||||
output logic [63:0] pc_o,
|
||||
output logic is_compressed_instr_o,
|
||||
|
||||
input logic alu_ready_i,
|
||||
input logic flu_ready_i,
|
||||
output logic alu_valid_o,
|
||||
// ex just resolved our predicted branch, we are ready to accept new requests
|
||||
input logic resolve_branch_i,
|
||||
|
@ -161,6 +155,8 @@ module issue_stage #(
|
|||
.issue_instr_i ( issue_instr_sb_iro ),
|
||||
.issue_instr_valid_i ( issue_instr_valid_sb_iro ),
|
||||
.issue_ack_o ( issue_ack_iro_sb ),
|
||||
.fu_data_o ( fu_data_o ),
|
||||
.flu_ready_i ( flu_ready_i ),
|
||||
.rs1_o ( rs1_iro_sb ),
|
||||
.rs1_i ( rs1_sb_iro ),
|
||||
.rs1_valid_i ( rs1_valid_sb_iro ),
|
||||
|
|
13
src/lsu.sv
13
src/lsu.sv
|
@ -23,14 +23,9 @@ module lsu #(
|
|||
output logic no_st_pending_o,
|
||||
input logic amo_valid_commit_i,
|
||||
|
||||
input fu_t fu_i,
|
||||
input fu_op operator_i,
|
||||
input logic [63:0] operand_a_i,
|
||||
input logic [63:0] operand_b_i,
|
||||
input logic [63:0] imm_i,
|
||||
input fu_data_t fu_data_i,
|
||||
output logic lsu_ready_o, // FU is ready e.g. not busy
|
||||
input logic lsu_valid_i, // Input is valid
|
||||
input logic [TRANS_ID_BITS-1:0] trans_id_i, // transaction id, needed for WB
|
||||
output logic [TRANS_ID_BITS-1:0] lsu_trans_id_o, // ID of scoreboard entry at which to write back
|
||||
output logic [63:0] lsu_result_o,
|
||||
output logic lsu_valid_o, // transaction id for which the output is the requested one
|
||||
|
@ -83,7 +78,7 @@ module lsu #(
|
|||
logic [63:0] vaddr_i;
|
||||
logic [7:0] be_i;
|
||||
|
||||
assign vaddr_i = $unsigned($signed(imm_i) + $signed(operand_a_i));
|
||||
assign vaddr_i = $unsigned($signed(fu_data_i.imm) + $signed(fu_data_i.operand_a));
|
||||
|
||||
logic st_valid_i;
|
||||
logic ld_valid_i;
|
||||
|
@ -259,7 +254,7 @@ module lsu #(
|
|||
// we can generate the byte enable from the virtual address since the last
|
||||
// 12 bit are the same anyway
|
||||
// and we can always generate the byte enable from the address at hand
|
||||
assign be_i = be_gen(vaddr_i[2:0], extract_transfer_size(operator_i));
|
||||
assign be_i = be_gen(vaddr_i[2:0], extract_transfer_size(fu_data_i.operator));
|
||||
|
||||
// ------------------------
|
||||
// Misaligned Exception
|
||||
|
@ -354,7 +349,7 @@ module lsu #(
|
|||
// new data arrives here
|
||||
lsu_ctrl_t lsu_req_i;
|
||||
|
||||
assign lsu_req_i = {lsu_valid_i, vaddr_i, operand_b_i, be_i, fu_i, operator_i, trans_id_i};
|
||||
assign lsu_req_i = {lsu_valid_i, vaddr_i, fu_data_i.operand_b, be_i, fu_data_i.fu, fu_data_i.operator, fu_data_i.trans_id};
|
||||
|
||||
lsu_bypass lsu_bypass_i (
|
||||
.lsu_req_i ( lsu_req_i ),
|
||||
|
|
386
src/mult.sv
386
src/mult.sv
|
@ -11,7 +11,7 @@
|
|||
// Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>
|
||||
//
|
||||
// Date: 05.06.2017
|
||||
// Description: Ariane Multiplier
|
||||
// Description: Ariane Multiplier and Divider (as defined in the M-extension)
|
||||
|
||||
import ariane_pkg::*;
|
||||
|
||||
|
@ -19,11 +19,8 @@ module mult (
|
|||
input logic clk_i,
|
||||
input logic rst_ni,
|
||||
input logic flush_i,
|
||||
input logic [TRANS_ID_BITS-1:0] trans_id_i,
|
||||
input fu_data_t fu_data_i,
|
||||
input logic mult_valid_i,
|
||||
input fu_op operator_i,
|
||||
input logic [63:0] operand_a_i,
|
||||
input logic [63:0] operand_b_i,
|
||||
output logic [63:0] result_o,
|
||||
output logic mult_valid_o,
|
||||
output logic mult_ready_o,
|
||||
|
@ -40,8 +37,8 @@ module mult (
|
|||
logic div_valid_op;
|
||||
logic mul_valid_op;
|
||||
// Input Arbitration
|
||||
assign mul_valid_op = ~flush_i && mult_valid_i && (operator_i inside { MUL, MULH, MULHU, MULHSU, MULW });
|
||||
assign div_valid_op = ~flush_i && mult_valid_i && (operator_i inside { DIV, DIVU, DIVW, DIVUW, REM, REMU, REMW, REMUW });
|
||||
assign mul_valid_op = ~flush_i && mult_valid_i && (fu_data_i.operator inside { MUL, MULH, MULHU, MULHSU, MULW });
|
||||
assign div_valid_op = ~flush_i && mult_valid_i && (fu_data_i.operator inside { DIV, DIVU, DIVW, DIVUW, REM, REMU, REMW, REMUW });
|
||||
|
||||
// ---------------------
|
||||
// Output Arbitration
|
||||
|
@ -58,12 +55,17 @@ module mult (
|
|||
// Multiplication
|
||||
// ---------------------
|
||||
mul i_mul (
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
.trans_id_i ( fu_data_i.trans_id ),
|
||||
.operator_i ( fu_data_i.operator ),
|
||||
.operand_a_i ( fu_data_i.operand_a ),
|
||||
.operand_b_i ( fu_data_i.operand_b ),
|
||||
.result_o ( mul_result ),
|
||||
.mult_valid_i ( mul_valid_op ),
|
||||
.mult_valid_o ( mul_valid ),
|
||||
.mult_trans_id_o ( mul_trans_id ),
|
||||
.mult_ready_o ( ), // this unit is unconditionally ready
|
||||
.*
|
||||
.mult_ready_o ( ) // this unit is unconditionally ready
|
||||
);
|
||||
|
||||
// ---------------------
|
||||
|
@ -84,7 +86,7 @@ module mult (
|
|||
logic word_op_d, word_op_q; // save whether the operation was signed or not
|
||||
|
||||
// is this a signed operation?
|
||||
assign div_signed = (operator_i inside {DIV, DIVW, REM, REMW}) ? 1'b1 : 1'b0;
|
||||
assign div_signed = (fu_data_i.operator inside {DIV, DIVW, REM, REMW}) ? 1'b1 : 1'b0;
|
||||
// if this operation is signed look at the actual sign bit to determine whether we should perform signed or unsigned division
|
||||
assign div_op_signed = div_signed & operand_b[63];
|
||||
|
||||
|
@ -108,17 +110,17 @@ module mult (
|
|||
rem = 1'b0;
|
||||
|
||||
// we've go a new division operation
|
||||
if (mult_valid_i && operator_i inside {DIV, DIVU, DIVW, DIVUW, REM, REMU, REMW, REMUW}) begin
|
||||
if (mult_valid_i && fu_data_i.operator inside {DIV, DIVU, DIVW, DIVUW, REM, REMU, REMW, REMUW}) begin
|
||||
// is this a word operation?
|
||||
if (operator_i inside {DIVW, DIVUW, REMW, REMUW}) begin
|
||||
if (fu_data_i.operator inside {DIVW, DIVUW, REMW, REMUW}) begin
|
||||
word_op = 1'b1;
|
||||
// yes so check if we should sign extend this is only done for a signed operation
|
||||
if (div_signed) begin
|
||||
operand_a = sext32(operand_a_i[31:0]);
|
||||
operand_b = sext32(operand_b_i[31:0]);
|
||||
operand_a = sext32(fu_data_i.operand_a[31:0]);
|
||||
operand_b = sext32(fu_data_i.operand_b[31:0]);
|
||||
end else begin
|
||||
operand_a = {32'b0, operand_a_i[31:0]};
|
||||
operand_b = {32'b0, operand_b_i[31:0]};
|
||||
operand_a = {32'b0, fu_data_i.operand_a[31:0]};
|
||||
operand_b = {32'b0, fu_data_i.operand_b[31:0]};
|
||||
end
|
||||
|
||||
// save whether we want sign extend the result or not, this is done for all word operations
|
||||
|
@ -127,12 +129,12 @@ module mult (
|
|||
end else begin
|
||||
word_op_d = 1'b0;
|
||||
// no sign extending is necessary as we are already using the full 64 bit
|
||||
operand_a = operand_a_i;
|
||||
operand_b = operand_b_i;
|
||||
operand_a = fu_data_i.operand_a;
|
||||
operand_b = fu_data_i.operand_b;
|
||||
end
|
||||
|
||||
// is this a modulo?
|
||||
if (operator_i inside {REM, REMU, REMW, REMUW}) begin
|
||||
if (fu_data_i.operator inside {REM, REMU, REMW, REMUW}) begin
|
||||
rem = 1'b1;
|
||||
end
|
||||
end
|
||||
|
@ -162,22 +164,22 @@ module mult (
|
|||
.C_WIDTH ( 64 ),
|
||||
.C_LOG_WIDTH ( $clog2(64) + 1 )
|
||||
) i_div (
|
||||
.Clk_CI ( clk_i ),
|
||||
.Rst_RBI ( rst_ni ),
|
||||
.TransId_DI ( trans_id_i ),
|
||||
.OpA_DI ( operand_a ),
|
||||
.OpB_DI ( operand_b_shift ),
|
||||
.OpBShift_DI ( div_shift ),
|
||||
.OpBIsZero_SI ( ~(|operand_b) ),
|
||||
.OpBSign_SI ( div_op_signed ), // gate this to 0 in case of unsigned ops
|
||||
.OpCode_SI ( {rem, div_signed} ), // 00: udiv, 10: urem, 01: div, 11: rem
|
||||
.InVld_SI ( div_valid_op ),
|
||||
.Flush_SI ( flush_i ),
|
||||
.OutRdy_SO ( mult_ready_o ),
|
||||
.OutRdy_SI ( div_ready_i ),
|
||||
.OutVld_SO ( div_valid ),
|
||||
.TransId_DO ( div_trans_id ),
|
||||
.Res_DO ( result )
|
||||
.Clk_CI ( clk_i ),
|
||||
.Rst_RBI ( rst_ni ),
|
||||
.TransId_DI ( fu_data_i.trans_id ),
|
||||
.OpA_DI ( operand_a ),
|
||||
.OpB_DI ( operand_b_shift ),
|
||||
.OpBShift_DI ( div_shift ),
|
||||
.OpBIsZero_SI ( ~(|operand_b) ),
|
||||
.OpBSign_SI ( div_op_signed ), // gate this to 0 in case of unsigned ops
|
||||
.OpCode_SI ( {rem, div_signed} ), // 00: udiv, 10: urem, 01: div, 11: rem
|
||||
.InVld_SI ( div_valid_op ),
|
||||
.Flush_SI ( flush_i ),
|
||||
.OutRdy_SO ( mult_ready_o ),
|
||||
.OutRdy_SI ( div_ready_i ),
|
||||
.OutVld_SO ( div_valid ),
|
||||
.TransId_DO ( div_trans_id ),
|
||||
.Res_DO ( result )
|
||||
);
|
||||
// Result multiplexer
|
||||
// if it was a signed word operation the bit will be set and the result will be sign extended accordingly
|
||||
|
@ -194,317 +196,3 @@ module mult (
|
|||
end
|
||||
end
|
||||
endmodule
|
||||
|
||||
/* File : mult.sv
|
||||
* Ver : 1.0
|
||||
* Date : 15.03.2016
|
||||
*
|
||||
*
|
||||
* Copyright (C) 2017 ETH Zurich, University of Bologna
|
||||
*
|
||||
* Description: this is a simple serial divider for signed integers.
|
||||
*
|
||||
*
|
||||
* Authors : Michael Schaffner (schaffner@iis.ee.ethz.ch)
|
||||
* Andreas Traber (atraber@iis.ee.ethz.ch)
|
||||
*
|
||||
*/
|
||||
module serial_divider #(
|
||||
parameter int unsigned C_WIDTH = 32,
|
||||
parameter int unsigned C_LOG_WIDTH = 6
|
||||
)(
|
||||
input logic Clk_CI,
|
||||
input logic Rst_RBI,
|
||||
// input IF
|
||||
input logic [TRANS_ID_BITS-1:0] TransId_DI,
|
||||
input logic [C_WIDTH-1:0] OpA_DI,
|
||||
input logic [C_WIDTH-1:0] OpB_DI,
|
||||
input logic [C_LOG_WIDTH-1:0] OpBShift_DI,
|
||||
input logic OpBIsZero_SI,
|
||||
//
|
||||
input logic OpBSign_SI, // gate this to 0 in case of unsigned ops
|
||||
input logic [1:0] OpCode_SI, // 0: udiv, 2: urem, 1: div, 3: rem
|
||||
// handshake
|
||||
input logic InVld_SI,
|
||||
input logic Flush_SI,
|
||||
// output IF
|
||||
output logic OutRdy_SO,
|
||||
input logic OutRdy_SI,
|
||||
output logic OutVld_SO,
|
||||
output logic [TRANS_ID_BITS-1:0] TransId_DO,
|
||||
output logic [C_WIDTH-1:0] Res_DO
|
||||
);
|
||||
|
||||
// ----------------------------------
|
||||
// Signal Declarations
|
||||
// ----------------------------------
|
||||
logic [C_WIDTH-1:0] ResReg_DP, ResReg_DN;
|
||||
logic [C_WIDTH-1:0] ResReg_DP_rev;
|
||||
logic [C_WIDTH-1:0] AReg_DP, AReg_DN;
|
||||
logic [C_WIDTH-1:0] BReg_DP, BReg_DN;
|
||||
logic OpBIsZero_SP, OpBIsZero_SN;
|
||||
|
||||
logic [TRANS_ID_BITS-1:0] TransId_DP, TransId_DN;
|
||||
|
||||
logic RemSel_SN, RemSel_SP;
|
||||
logic CompInv_SN, CompInv_SP;
|
||||
logic ResInv_SN, ResInv_SP;
|
||||
|
||||
logic [C_WIDTH-1:0] AddMux_D;
|
||||
logic [C_WIDTH-1:0] AddOut_D;
|
||||
logic [C_WIDTH-1:0] AddTmp_D;
|
||||
logic [C_WIDTH-1:0] BMux_D;
|
||||
logic [C_WIDTH-1:0] OutMux_D;
|
||||
|
||||
logic [C_LOG_WIDTH-1:0] Cnt_DP, Cnt_DN;
|
||||
logic CntZero_S;
|
||||
|
||||
logic ARegEn_S, BRegEn_S, ResRegEn_S, ABComp_S, PmSel_S, LoadEn_S;
|
||||
|
||||
enum logic [1:0] {IDLE, DIVIDE, FINISH} State_SN, State_SP;
|
||||
|
||||
|
||||
// -----------------
|
||||
// Datapath
|
||||
// -----------------
|
||||
assign PmSel_S = LoadEn_S & ~(OpCode_SI[0] & (OpA_DI[$high(OpA_DI)] ^ OpBSign_SI));
|
||||
|
||||
// muxes
|
||||
assign AddMux_D = (LoadEn_S) ? OpA_DI : BReg_DP;
|
||||
|
||||
// attention: logical shift in case of negative operand B!
|
||||
assign BMux_D = (LoadEn_S) ? OpB_DI : {CompInv_SP, (BReg_DP[$high(BReg_DP):1])};
|
||||
|
||||
assign ResReg_DP_rev = {<<{ResReg_DP}};
|
||||
assign OutMux_D = (RemSel_SP) ? AReg_DP : ResReg_DP_rev;
|
||||
|
||||
// invert if necessary
|
||||
assign Res_DO = (ResInv_SP) ? -$signed(OutMux_D) : OutMux_D;
|
||||
|
||||
// main comparator
|
||||
assign ABComp_S = ((AReg_DP == BReg_DP) | ((AReg_DP > BReg_DP) ^ CompInv_SP)) & ((|AReg_DP) | OpBIsZero_SP);
|
||||
|
||||
// main adder
|
||||
assign AddTmp_D = (LoadEn_S) ? 0 : AReg_DP;
|
||||
assign AddOut_D = (PmSel_S) ? AddTmp_D + AddMux_D : AddTmp_D - $signed(AddMux_D);
|
||||
|
||||
// -----------------
|
||||
// Counter
|
||||
// -----------------
|
||||
assign Cnt_DN = (LoadEn_S) ? OpBShift_DI :
|
||||
(~CntZero_S) ? Cnt_DP - 1 : Cnt_DP;
|
||||
|
||||
assign CntZero_S = ~(|Cnt_DP);
|
||||
|
||||
// -----------------
|
||||
// FSM
|
||||
// -----------------
|
||||
always_comb begin : p_fsm
|
||||
// default
|
||||
State_SN = State_SP;
|
||||
|
||||
OutVld_SO = 1'b0;
|
||||
OutRdy_SO = 1'b0;
|
||||
|
||||
LoadEn_S = 1'b0;
|
||||
|
||||
ARegEn_S = 1'b0;
|
||||
BRegEn_S = 1'b0;
|
||||
ResRegEn_S = 1'b0;
|
||||
|
||||
case (State_SP)
|
||||
|
||||
IDLE: begin
|
||||
OutRdy_SO = 1'b1;
|
||||
|
||||
if (InVld_SI) begin
|
||||
OutRdy_SO = 1'b0;
|
||||
OutVld_SO = 1'b0;
|
||||
ARegEn_S = 1'b1;
|
||||
BRegEn_S = 1'b1;
|
||||
LoadEn_S = 1'b1;
|
||||
State_SN = DIVIDE;
|
||||
end
|
||||
end
|
||||
|
||||
DIVIDE: begin
|
||||
|
||||
ARegEn_S = ABComp_S;
|
||||
BRegEn_S = 1'b1;
|
||||
ResRegEn_S = 1'b1;
|
||||
|
||||
// calculation finished
|
||||
// one more divide cycle (C_WIDTH th divide cycle)
|
||||
if (CntZero_S) begin
|
||||
State_SN = FINISH;
|
||||
end
|
||||
end
|
||||
|
||||
FINISH: begin
|
||||
OutVld_SO = 1'b1;
|
||||
|
||||
if (OutRdy_SI) begin
|
||||
State_SN = IDLE;
|
||||
end
|
||||
end
|
||||
|
||||
default : /* default */ ;
|
||||
|
||||
endcase
|
||||
|
||||
if (Flush_SI) begin
|
||||
OutRdy_SO = 1'b0;
|
||||
OutVld_SO = 1'b0;
|
||||
ARegEn_S = 1'b0;
|
||||
BRegEn_S = 1'b0;
|
||||
LoadEn_S = 1'b0;
|
||||
State_SN = IDLE;
|
||||
end
|
||||
end
|
||||
|
||||
// -----------------
|
||||
// Registers
|
||||
// -----------------
|
||||
// get flags
|
||||
assign RemSel_SN = (LoadEn_S) ? OpCode_SI[1] : RemSel_SP;
|
||||
assign CompInv_SN = (LoadEn_S) ? OpBSign_SI : CompInv_SP;
|
||||
assign OpBIsZero_SN = (LoadEn_S) ? OpBIsZero_SI : OpBIsZero_SP;
|
||||
assign ResInv_SN = (LoadEn_S) ? (~OpBIsZero_SI | OpCode_SI[1]) & OpCode_SI[0] & (OpA_DI[$high(OpA_DI)] ^ OpBSign_SI) : ResInv_SP;
|
||||
|
||||
// transaction id
|
||||
assign TransId_DN = (LoadEn_S) ? TransId_DI : TransId_DP;
|
||||
assign TransId_DO = TransId_DP;
|
||||
|
||||
assign AReg_DN = (ARegEn_S) ? AddOut_D : AReg_DP;
|
||||
assign BReg_DN = (BRegEn_S) ? BMux_D : BReg_DP;
|
||||
assign ResReg_DN = (LoadEn_S) ? '0 :
|
||||
(ResRegEn_S) ? {ABComp_S, ResReg_DP[$high(ResReg_DP):1]} : ResReg_DP;
|
||||
|
||||
always_ff @(posedge Clk_CI or negedge Rst_RBI) begin : p_regs
|
||||
if (~Rst_RBI) begin
|
||||
State_SP <= IDLE;
|
||||
AReg_DP <= '0;
|
||||
BReg_DP <= '0;
|
||||
ResReg_DP <= '0;
|
||||
Cnt_DP <= '0;
|
||||
TransId_DP <= '0;
|
||||
RemSel_SP <= 1'b0;
|
||||
CompInv_SP <= 1'b0;
|
||||
ResInv_SP <= 1'b0;
|
||||
OpBIsZero_SP <= 1'b0;
|
||||
end else begin
|
||||
State_SP <= State_SN;
|
||||
AReg_DP <= AReg_DN;
|
||||
BReg_DP <= BReg_DN;
|
||||
ResReg_DP <= ResReg_DN;
|
||||
Cnt_DP <= Cnt_DN;
|
||||
TransId_DP <= TransId_DN;
|
||||
RemSel_SP <= RemSel_SN;
|
||||
CompInv_SP <= CompInv_SN;
|
||||
ResInv_SP <= ResInv_SN;
|
||||
OpBIsZero_SP <= OpBIsZero_SN;
|
||||
end
|
||||
end
|
||||
|
||||
// ------------
|
||||
// Assertions
|
||||
// ------------
|
||||
//pragma translate_off
|
||||
initial begin : p_assertions
|
||||
assert (C_LOG_WIDTH == $clog2(C_WIDTH+1)) else $error("C_LOG_WIDTH must be $clog2(C_WIDTH+1)");
|
||||
end
|
||||
//pragma translate_on
|
||||
|
||||
endmodule
|
||||
|
||||
// --------------------------------------------------
|
||||
// Multiplication Unit with one pipeline register
|
||||
// --------------------------------------------------
|
||||
module mul (
|
||||
input logic clk_i,
|
||||
input logic rst_ni,
|
||||
input logic [TRANS_ID_BITS-1:0] trans_id_i,
|
||||
input logic mult_valid_i,
|
||||
input fu_op operator_i,
|
||||
input logic [63:0] operand_a_i,
|
||||
input logic [63:0] operand_b_i,
|
||||
output logic [63:0] result_o,
|
||||
output logic mult_valid_o,
|
||||
output logic mult_ready_o,
|
||||
output logic [TRANS_ID_BITS-1:0] mult_trans_id_o
|
||||
|
||||
);
|
||||
// Pipeline register
|
||||
logic [TRANS_ID_BITS-1:0] trans_id_q;
|
||||
logic mult_valid_q;
|
||||
fu_op operator_d, operator_q;
|
||||
logic [127:0] mult_result_d, mult_result_q;
|
||||
|
||||
// control registers
|
||||
logic sign_a, sign_b;
|
||||
logic mult_valid;
|
||||
|
||||
// control signals
|
||||
assign mult_valid_o = mult_valid_q;
|
||||
assign mult_trans_id_o = trans_id_q;
|
||||
assign mult_ready_o = 1'b1;
|
||||
|
||||
assign mult_valid = mult_valid_i && (operator_i inside {MUL, MULH, MULHU, MULHSU, MULW});
|
||||
// datapath
|
||||
logic [127:0] mult_result;
|
||||
assign mult_result = $signed({operand_a_i[63] & sign_a, operand_a_i}) * $signed({operand_b_i[63] & sign_b, operand_b_i});
|
||||
|
||||
// Sign Select MUX
|
||||
always_comb begin
|
||||
sign_a = 1'b0;
|
||||
sign_b = 1'b0;
|
||||
|
||||
// signed multiplication
|
||||
if (operator_i == MULH) begin
|
||||
sign_a = 1'b1;
|
||||
sign_b = 1'b1;
|
||||
// signed - unsigned multiplication
|
||||
end else if (operator_i == MULHSU) begin
|
||||
sign_a = 1'b1;
|
||||
// unsigned multiplication
|
||||
end else begin
|
||||
sign_a = 1'b0;
|
||||
sign_b = 1'b0;
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
// single stage version
|
||||
assign mult_result_d = $signed({operand_a_i[63] & sign_a, operand_a_i}) *
|
||||
$signed({operand_b_i[63] & sign_b, operand_b_i});
|
||||
|
||||
|
||||
assign operator_d = operator_i;
|
||||
always_comb begin : p_selmux
|
||||
unique case (operator_q)
|
||||
MULH, MULHU, MULHSU: result_o = mult_result_q[127:64];
|
||||
MULW: result_o = sext32(mult_result_q[31:0]);
|
||||
// MUL performs an XLEN-bit×XLEN-bit multiplication and places the lower XLEN bits in the destination register
|
||||
default: result_o = mult_result_q[63:0];// including MUL
|
||||
endcase
|
||||
end
|
||||
|
||||
// -----------------------
|
||||
// Output pipeline register
|
||||
// -----------------------
|
||||
always_ff @(posedge clk_i or negedge rst_ni) begin
|
||||
if (~rst_ni) begin
|
||||
mult_valid_q <= '0;
|
||||
trans_id_q <= '0;
|
||||
operator_q <= MUL;
|
||||
mult_result_q <= '0;
|
||||
end else begin
|
||||
// Input silencing
|
||||
trans_id_q <= trans_id_i;
|
||||
// Output Register
|
||||
mult_valid_q <= mult_valid;
|
||||
operator_q <= operator_d;
|
||||
mult_result_q <= mult_result_d;
|
||||
end
|
||||
end
|
||||
endmodule
|
||||
|
|
105
src/multiplier.sv
Normal file
105
src/multiplier.sv
Normal file
|
@ -0,0 +1,105 @@
|
|||
// Copyright 2018 ETH Zurich and University of Bologna.
|
||||
// Copyright and related rights are licensed under the Solderpad Hardware
|
||||
// License, Version 0.51 (the "License"); you may not use this file except in
|
||||
// compliance with the License. You may obtain a copy of the License at
|
||||
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
|
||||
// or agreed to in writing, software, hardware and materials distributed under
|
||||
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
//
|
||||
// Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>
|
||||
//
|
||||
// Description: Multiplication Unit with one pipeline register
|
||||
// This unit relies on retiming features of the synthesizer
|
||||
//
|
||||
|
||||
import ariane_pkg::*;
|
||||
|
||||
module mul (
|
||||
input logic clk_i,
|
||||
input logic rst_ni,
|
||||
input logic [TRANS_ID_BITS-1:0] trans_id_i,
|
||||
input logic mult_valid_i,
|
||||
input fu_op operator_i,
|
||||
input logic [63:0] operand_a_i,
|
||||
input logic [63:0] operand_b_i,
|
||||
output logic [63:0] result_o,
|
||||
output logic mult_valid_o,
|
||||
output logic mult_ready_o,
|
||||
output logic [TRANS_ID_BITS-1:0] mult_trans_id_o
|
||||
);
|
||||
// Pipeline register
|
||||
logic [TRANS_ID_BITS-1:0] trans_id_q;
|
||||
logic mult_valid_q;
|
||||
fu_op operator_d, operator_q;
|
||||
logic [127:0] mult_result_d, mult_result_q;
|
||||
|
||||
// control registers
|
||||
logic sign_a, sign_b;
|
||||
logic mult_valid;
|
||||
|
||||
// control signals
|
||||
assign mult_valid_o = mult_valid_q;
|
||||
assign mult_trans_id_o = trans_id_q;
|
||||
assign mult_ready_o = 1'b1;
|
||||
|
||||
assign mult_valid = mult_valid_i && (operator_i inside {MUL, MULH, MULHU, MULHSU, MULW});
|
||||
// datapath
|
||||
logic [127:0] mult_result;
|
||||
assign mult_result = $signed({operand_a_i[63] & sign_a, operand_a_i}) * $signed({operand_b_i[63] & sign_b, operand_b_i});
|
||||
|
||||
// Sign Select MUX
|
||||
always_comb begin
|
||||
sign_a = 1'b0;
|
||||
sign_b = 1'b0;
|
||||
|
||||
// signed multiplication
|
||||
if (operator_i == MULH) begin
|
||||
sign_a = 1'b1;
|
||||
sign_b = 1'b1;
|
||||
// signed - unsigned multiplication
|
||||
end else if (operator_i == MULHSU) begin
|
||||
sign_a = 1'b1;
|
||||
// unsigned multiplication
|
||||
end else begin
|
||||
sign_a = 1'b0;
|
||||
sign_b = 1'b0;
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
// single stage version
|
||||
assign mult_result_d = $signed({operand_a_i[63] & sign_a, operand_a_i}) *
|
||||
$signed({operand_b_i[63] & sign_b, operand_b_i});
|
||||
|
||||
|
||||
assign operator_d = operator_i;
|
||||
always_comb begin : p_selmux
|
||||
unique case (operator_q)
|
||||
MULH, MULHU, MULHSU: result_o = mult_result_q[127:64];
|
||||
MULW: result_o = sext32(mult_result_q[31:0]);
|
||||
// MUL performs an XLEN-bit×XLEN-bit multiplication and places the lower XLEN bits in the destination register
|
||||
default: result_o = mult_result_q[63:0];// including MUL
|
||||
endcase
|
||||
end
|
||||
|
||||
// -----------------------
|
||||
// Output pipeline register
|
||||
// -----------------------
|
||||
always_ff @(posedge clk_i or negedge rst_ni) begin
|
||||
if (~rst_ni) begin
|
||||
mult_valid_q <= '0;
|
||||
trans_id_q <= '0;
|
||||
operator_q <= MUL;
|
||||
mult_result_q <= '0;
|
||||
end else begin
|
||||
// Input silencing
|
||||
trans_id_q <= trans_id_i;
|
||||
// Output Register
|
||||
mult_valid_q <= mult_valid;
|
||||
operator_q <= operator_d;
|
||||
mult_result_q <= mult_result_d;
|
||||
end
|
||||
end
|
||||
endmodule
|
230
src/serial_divider.sv
Normal file
230
src/serial_divider.sv
Normal file
|
@ -0,0 +1,230 @@
|
|||
// Copyright 2018 ETH Zurich and University of Bologna.
|
||||
// Copyright and related rights are licensed under the Solderpad Hardware
|
||||
// License, Version 0.51 (the "License"); you may not use this file except in
|
||||
// compliance with the License. You may obtain a copy of the License at
|
||||
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
|
||||
// or agreed to in writing, software, hardware and materials distributed under
|
||||
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
// File : serial_divider.sv
|
||||
// Ver : 1.0
|
||||
// Date : 15.03.2016
|
||||
//
|
||||
//
|
||||
//
|
||||
// Description: this is a simple serial divider for signed integers.
|
||||
//
|
||||
//
|
||||
// Authors : Michael Schaffner (schaffner@iis.ee.ethz.ch)
|
||||
// Andreas Traber (atraber@iis.ee.ethz.ch)
|
||||
|
||||
import ariane_pkg::*;
|
||||
|
||||
module serial_divider #(
|
||||
parameter int unsigned C_WIDTH = 32,
|
||||
parameter int unsigned C_LOG_WIDTH = 6
|
||||
)(
|
||||
input logic Clk_CI,
|
||||
input logic Rst_RBI,
|
||||
// input IF
|
||||
input logic [TRANS_ID_BITS-1:0] TransId_DI,
|
||||
input logic [C_WIDTH-1:0] OpA_DI,
|
||||
input logic [C_WIDTH-1:0] OpB_DI,
|
||||
input logic [C_LOG_WIDTH-1:0] OpBShift_DI,
|
||||
input logic OpBIsZero_SI,
|
||||
//
|
||||
input logic OpBSign_SI, // gate this to 0 in case of unsigned ops
|
||||
input logic [1:0] OpCode_SI, // 0: udiv, 2: urem, 1: div, 3: rem
|
||||
// handshake
|
||||
input logic InVld_SI,
|
||||
input logic Flush_SI,
|
||||
// output IF
|
||||
output logic OutRdy_SO,
|
||||
input logic OutRdy_SI,
|
||||
output logic OutVld_SO,
|
||||
output logic [TRANS_ID_BITS-1:0] TransId_DO,
|
||||
output logic [C_WIDTH-1:0] Res_DO
|
||||
);
|
||||
|
||||
// ----------------------------------
|
||||
// Signal Declarations
|
||||
// ----------------------------------
|
||||
logic [C_WIDTH-1:0] ResReg_DP, ResReg_DN;
|
||||
logic [C_WIDTH-1:0] ResReg_DP_rev;
|
||||
logic [C_WIDTH-1:0] AReg_DP, AReg_DN;
|
||||
logic [C_WIDTH-1:0] BReg_DP, BReg_DN;
|
||||
logic OpBIsZero_SP, OpBIsZero_SN;
|
||||
|
||||
logic [TRANS_ID_BITS-1:0] TransId_DP, TransId_DN;
|
||||
|
||||
logic RemSel_SN, RemSel_SP;
|
||||
logic CompInv_SN, CompInv_SP;
|
||||
logic ResInv_SN, ResInv_SP;
|
||||
|
||||
logic [C_WIDTH-1:0] AddMux_D;
|
||||
logic [C_WIDTH-1:0] AddOut_D;
|
||||
logic [C_WIDTH-1:0] AddTmp_D;
|
||||
logic [C_WIDTH-1:0] BMux_D;
|
||||
logic [C_WIDTH-1:0] OutMux_D;
|
||||
|
||||
logic [C_LOG_WIDTH-1:0] Cnt_DP, Cnt_DN;
|
||||
logic CntZero_S;
|
||||
|
||||
logic ARegEn_S, BRegEn_S, ResRegEn_S, ABComp_S, PmSel_S, LoadEn_S;
|
||||
|
||||
enum logic [1:0] {IDLE, DIVIDE, FINISH} State_SN, State_SP;
|
||||
|
||||
|
||||
// -----------------
|
||||
// Datapath
|
||||
// -----------------
|
||||
assign PmSel_S = LoadEn_S & ~(OpCode_SI[0] & (OpA_DI[$high(OpA_DI)] ^ OpBSign_SI));
|
||||
|
||||
// muxes
|
||||
assign AddMux_D = (LoadEn_S) ? OpA_DI : BReg_DP;
|
||||
|
||||
// attention: logical shift in case of negative operand B!
|
||||
assign BMux_D = (LoadEn_S) ? OpB_DI : {CompInv_SP, (BReg_DP[$high(BReg_DP):1])};
|
||||
|
||||
assign ResReg_DP_rev = {<<{ResReg_DP}};
|
||||
assign OutMux_D = (RemSel_SP) ? AReg_DP : ResReg_DP_rev;
|
||||
|
||||
// invert if necessary
|
||||
assign Res_DO = (ResInv_SP) ? -$signed(OutMux_D) : OutMux_D;
|
||||
|
||||
// main comparator
|
||||
assign ABComp_S = ((AReg_DP == BReg_DP) | ((AReg_DP > BReg_DP) ^ CompInv_SP)) & ((|AReg_DP) | OpBIsZero_SP);
|
||||
|
||||
// main adder
|
||||
assign AddTmp_D = (LoadEn_S) ? 0 : AReg_DP;
|
||||
assign AddOut_D = (PmSel_S) ? AddTmp_D + AddMux_D : AddTmp_D - $signed(AddMux_D);
|
||||
|
||||
// -----------------
|
||||
// Counter
|
||||
// -----------------
|
||||
assign Cnt_DN = (LoadEn_S) ? OpBShift_DI :
|
||||
(~CntZero_S) ? Cnt_DP - 1 : Cnt_DP;
|
||||
|
||||
assign CntZero_S = ~(|Cnt_DP);
|
||||
|
||||
// -----------------
|
||||
// FSM
|
||||
// -----------------
|
||||
always_comb begin : p_fsm
|
||||
// default
|
||||
State_SN = State_SP;
|
||||
|
||||
OutVld_SO = 1'b0;
|
||||
OutRdy_SO = 1'b0;
|
||||
|
||||
LoadEn_S = 1'b0;
|
||||
|
||||
ARegEn_S = 1'b0;
|
||||
BRegEn_S = 1'b0;
|
||||
ResRegEn_S = 1'b0;
|
||||
|
||||
case (State_SP)
|
||||
|
||||
IDLE: begin
|
||||
OutRdy_SO = 1'b1;
|
||||
|
||||
if (InVld_SI) begin
|
||||
OutRdy_SO = 1'b0;
|
||||
OutVld_SO = 1'b0;
|
||||
ARegEn_S = 1'b1;
|
||||
BRegEn_S = 1'b1;
|
||||
LoadEn_S = 1'b1;
|
||||
State_SN = DIVIDE;
|
||||
end
|
||||
end
|
||||
|
||||
DIVIDE: begin
|
||||
|
||||
ARegEn_S = ABComp_S;
|
||||
BRegEn_S = 1'b1;
|
||||
ResRegEn_S = 1'b1;
|
||||
|
||||
// calculation finished
|
||||
// one more divide cycle (C_WIDTH th divide cycle)
|
||||
if (CntZero_S) begin
|
||||
State_SN = FINISH;
|
||||
end
|
||||
end
|
||||
|
||||
FINISH: begin
|
||||
OutVld_SO = 1'b1;
|
||||
|
||||
if (OutRdy_SI) begin
|
||||
State_SN = IDLE;
|
||||
end
|
||||
end
|
||||
|
||||
default : /* default */ ;
|
||||
|
||||
endcase
|
||||
|
||||
if (Flush_SI) begin
|
||||
// OutRdy_SO = 1'b0;
|
||||
// OutVld_SO = 1'b0;
|
||||
ARegEn_S = 1'b0;
|
||||
BRegEn_S = 1'b0;
|
||||
LoadEn_S = 1'b0;
|
||||
State_SN = IDLE;
|
||||
end
|
||||
end
|
||||
|
||||
// -----------------
|
||||
// Registers
|
||||
// -----------------
|
||||
// get flags
|
||||
assign RemSel_SN = (LoadEn_S) ? OpCode_SI[1] : RemSel_SP;
|
||||
assign CompInv_SN = (LoadEn_S) ? OpBSign_SI : CompInv_SP;
|
||||
assign OpBIsZero_SN = (LoadEn_S) ? OpBIsZero_SI : OpBIsZero_SP;
|
||||
assign ResInv_SN = (LoadEn_S) ? (~OpBIsZero_SI | OpCode_SI[1]) & OpCode_SI[0] & (OpA_DI[$high(OpA_DI)] ^ OpBSign_SI) : ResInv_SP;
|
||||
|
||||
// transaction id
|
||||
assign TransId_DN = (LoadEn_S) ? TransId_DI : TransId_DP;
|
||||
assign TransId_DO = TransId_DP;
|
||||
|
||||
assign AReg_DN = (ARegEn_S) ? AddOut_D : AReg_DP;
|
||||
assign BReg_DN = (BRegEn_S) ? BMux_D : BReg_DP;
|
||||
assign ResReg_DN = (LoadEn_S) ? '0 :
|
||||
(ResRegEn_S) ? {ABComp_S, ResReg_DP[$high(ResReg_DP):1]} : ResReg_DP;
|
||||
|
||||
always_ff @(posedge Clk_CI or negedge Rst_RBI) begin : p_regs
|
||||
if (~Rst_RBI) begin
|
||||
State_SP <= IDLE;
|
||||
AReg_DP <= '0;
|
||||
BReg_DP <= '0;
|
||||
ResReg_DP <= '0;
|
||||
Cnt_DP <= '0;
|
||||
TransId_DP <= '0;
|
||||
RemSel_SP <= 1'b0;
|
||||
CompInv_SP <= 1'b0;
|
||||
ResInv_SP <= 1'b0;
|
||||
OpBIsZero_SP <= 1'b0;
|
||||
end else begin
|
||||
State_SP <= State_SN;
|
||||
AReg_DP <= AReg_DN;
|
||||
BReg_DP <= BReg_DN;
|
||||
ResReg_DP <= ResReg_DN;
|
||||
Cnt_DP <= Cnt_DN;
|
||||
TransId_DP <= TransId_DN;
|
||||
RemSel_SP <= RemSel_SN;
|
||||
CompInv_SP <= CompInv_SN;
|
||||
ResInv_SP <= ResInv_SN;
|
||||
OpBIsZero_SP <= OpBIsZero_SN;
|
||||
end
|
||||
end
|
||||
|
||||
// ------------
|
||||
// Assertions
|
||||
// ------------
|
||||
//pragma translate_off
|
||||
initial begin : p_assertions
|
||||
assert (C_LOG_WIDTH == $clog2(C_WIDTH+1)) else $error("C_LOG_WIDTH must be $clog2(C_WIDTH+1)");
|
||||
end
|
||||
//pragma translate_on
|
||||
|
||||
endmodule
|
Loading…
Add table
Add a link
Reference in a new issue