mirror of
https://github.com/lowRISC/ibex.git
synced 2025-06-28 01:12:02 -04:00
[rtl] Add Single Cycle Multiplier targeting FPGA
* Integrate option to implement a multiplier using 3 parallel 17 bit multipliers in order to compute MUL instructions in 1 cycle MULH in 2 cycles. * Add parameter SingleCycleMultiply to select single cycle multiplication. The single cycle multiplication capability is intended for FPGA targets. Using three parallel multiplication units improves performance of multiplication operations at the cost of DSP primitives. For ASIC targets, the area consumed by the multiplication structure will grow approximately 3-4x. The functionality is selected within the module using the parameter `SingleCycleMultiply`. From the top level it can be chosen by setting the parameter `MultiplierImplementation` to 'single_cc'. Signed-off-by: ganoam <gnoam@live.com>
This commit is contained in:
parent
ba2240f138
commit
48c4b6a5ea
6 changed files with 355 additions and 179 deletions
|
@ -71,17 +71,31 @@ Multiplier/Divider Block (MULT/DIV)
|
|||
Source Files: :file:`rtl/ibex_multdiv_slow.sv` :file:`rtl/ibex_multdiv_fast.sv`
|
||||
|
||||
The Multiplier/Divider (MULT/DIV) is a state machine driven block to perform multiplication and division.
|
||||
The fast and slow versions differ in multiplier only, both implement the same form of long division algorithm.
|
||||
The ALU block is used by the long division algorithm in both the fast and slow blocks.
|
||||
The fast and slow versions differ in multiplier only. All versions implement the same form of long division algorithm. The ALU block is used by the long division algorithm in all versions.
|
||||
|
||||
Multiplier
|
||||
The multiplier can be implemented in three variants controlled via the parameter ``MultiplierImplementation``.
|
||||
|
||||
Single-Cycle Multiplier
|
||||
This implementation is chosen by setting the ``MultiplierImplementation`` parameter to "single-cycle". The single-cycle multiplier makes use of three parallel multiplier units, designed to be mapped to hardware multiplier primitives on FPGAs. It is therefore the **first choice for FPGA synthesis**.
|
||||
|
||||
- Using three parallel 17-bit x 17-bit multiplication units and a 34-bit accumulator, it completes a MUL instruction in 1 cycle. MULH is completed in 2 cycles.
|
||||
- This MAC is internal to the mult/div block (no external ALU use).
|
||||
- Beware it is simply implemented with the ``*`` and ``+`` operators so results heavily depend upon the synthesis tool used.
|
||||
- ASIC synthesis has not yet been tested but is expected to consume 3-4x the area of the fast multiplier for ASIC.
|
||||
|
||||
Fast Multi-Cycle Multiplier
|
||||
This implementation is chosen by setting the ``MultiplierImplementation`` parameter to "fast". The fast multi-cycle multiplier provides a reasonable trade-off between area and performance. It is the **first choice for ASIC synthesis**.
|
||||
|
||||
Fast Multiplier
|
||||
- Completes multiply in 3-4 cycles using a MAC (multiply accumulate) which is capable of a 17-bit x 17-bit multiplication with a 34-bit accumulator.
|
||||
- A MUL instruction takes 3 cycles, MULH takes 4.
|
||||
- This MAC is internal to the mult/div block (no external ALU use).
|
||||
- Beware it is simply implemented with the ``*`` and ``+`` operators so results heavily depend upon the synthesis tool used.
|
||||
- In some cases it may be desirable to replace this with a specific implementation (such as a hard macro in an FPGA or an explicit gate level implementation).
|
||||
- In some cases it may be desirable to replace this with a specific implementation such as an explicit gate level implementation.
|
||||
|
||||
Slow Multi-Cycle Multiplier
|
||||
To select the slow multi-cycle multiplier, set the ``MultiplierImplementation`` parameter to "slow".
|
||||
|
||||
Slow Multiplier
|
||||
- Completes multiply in clog2(``op_b``) + 1 cycles (for MUL) or 33 cycles (for MULH) using a Baugh-Wooley multiplier.
|
||||
- The ALU block is used to compute additions.
|
||||
|
||||
|
|
|
@ -90,7 +90,10 @@ Parameters
|
|||
| ``BranchTargetALU`` | bit | 0 | *EXPERIMENTAL* - Enables branch target ALU removing a stall |
|
||||
| | | | cycle from taken branches |
|
||||
+------------------------------+-------------+------------+-----------------------------------------------------------------+
|
||||
| ``MultiplierImplementation`` | string | "fast" | Multiplicator type, "slow", or "fast" |
|
||||
| ``MultiplierImplementation`` | string | "fast" | Multiplicator type: |
|
||||
| | | | "slow": multi-cycle slow, |
|
||||
| | | | "fast": multi-cycle fast, |
|
||||
| | | | "single-cycle": single-cycle |
|
||||
+------------------------------+-------------+------------+-----------------------------------------------------------------+
|
||||
| ``DbgTriggerEn`` | bit | 0 | Enable debug trigger support (one trigger only) |
|
||||
+------------------------------+-------------+------------+-----------------------------------------------------------------+
|
||||
|
|
|
@ -47,9 +47,12 @@ Read the description for more information.
|
|||
| | | takes to receive a response the longer loads and stores |
|
||||
| | | will stall. |
|
||||
+-----------------------+--------------------------------------+-------------------------------------------------------------+
|
||||
| Multiplication | 2/3 (Fast Multiplier) | Fast: 2 for MUL, 3 for MULH. |
|
||||
| | | Slow: clog2(``op_b``) for MUL, 32 for MULH. |
|
||||
| | clog2(``op_b``)/32 (Slow Multiplier) | See details in :ref:`mult-div` |
|
||||
| Multiplication | 0/1 (Single-Cycle Multiplier) | 0 for MUL, 1 for MULH. |
|
||||
| | | |
|
||||
| | 2/3 (Fast Multi-Cycle Multiplier) | 2 for MUL, 3 for MULH. |
|
||||
| | | |
|
||||
| | clog2(``op_b``)/32 (Slow Multi-Cycle | clog2(``op_b``) for MUL, 32 for MULH. |
|
||||
| | Multiplier) | See details in :ref:`mult-div`. |
|
||||
+-----------------------+--------------------------------------+-------------------------------------------------------------+
|
||||
| Division | 1 or 37 | 1 stall cycle if divide by 0, otherwise full long division. |
|
||||
| | | See details in :ref:`mult-div` |
|
||||
|
|
|
@ -29,15 +29,19 @@ lint_off -msg UNUSED -file "*/rtl/ibex_alu.sv" -lines 104
|
|||
|
||||
// Bits of signal are not used: alu_adder_ext_i[0]
|
||||
// Bottom bit is round, not needed
|
||||
lint_off -msg UNUSED -file "*/rtl/ibex_multdiv_fast.sv" -lines 26
|
||||
lint_off -msg UNUSED -file "*/rtl/ibex_multdiv_fast.sv" -lines 28
|
||||
|
||||
// Bits of signal are not used: mac_res_ext[34]
|
||||
// cleaner to write all bits even if not all are used
|
||||
lint_off -msg UNUSED -file "*/rtl/ibex_multdiv_fast.sv" -lines 51
|
||||
lint_off -msg UNUSED -file "*/rtl/ibex_multdiv_fast.sv" -lines 43
|
||||
|
||||
// Bits of signal are not used: res_adder_h[32]
|
||||
// cleaner to write all bits even if not all are used
|
||||
lint_off -msg UNUSED -file "*/rtl/ibex_multdiv_fast.sv" -lines 71
|
||||
lint_off -msg UNUSED -file "*/rtl/ibex_multdiv_fast.sv" -lines 65
|
||||
|
||||
// Bits of signal are not used: mult1_res[33:32]
|
||||
// cleaner to write all bits even if not all are used
|
||||
lint_off -msg UNUSED -file "*/rtl/ibex_multdiv_fast.sv" -lines 115
|
||||
|
||||
// Signal is not used: test_en_i
|
||||
// testability signal
|
||||
|
|
|
@ -131,7 +131,29 @@ module ibex_ex_block #(
|
|||
.multdiv_result_o ( multdiv_result )
|
||||
);
|
||||
end else if (MultiplierImplementation == "fast") begin : gen_multdiv_fast
|
||||
ibex_multdiv_fast multdiv_i (
|
||||
ibex_multdiv_fast #(
|
||||
.SingleCycleMultiply(0)
|
||||
) multdiv_i (
|
||||
.clk_i ( clk_i ),
|
||||
.rst_ni ( rst_ni ),
|
||||
.mult_en_i ( mult_en_i ),
|
||||
.div_en_i ( div_en_i ),
|
||||
.operator_i ( multdiv_operator_i ),
|
||||
.signed_mode_i ( multdiv_signed_mode_i ),
|
||||
.op_a_i ( multdiv_operand_a_i ),
|
||||
.op_b_i ( multdiv_operand_b_i ),
|
||||
.alu_operand_a_o ( multdiv_alu_operand_a ),
|
||||
.alu_operand_b_o ( multdiv_alu_operand_b ),
|
||||
.alu_adder_ext_i ( alu_adder_result_ext ),
|
||||
.alu_adder_i ( alu_adder_result_ex_o ),
|
||||
.equal_to_zero ( alu_is_equal_result ),
|
||||
.valid_o ( multdiv_valid ),
|
||||
.multdiv_result_o ( multdiv_result )
|
||||
);
|
||||
end else if (MultiplierImplementation == "single-cycle") begin: gen_multdiv_single_cycle
|
||||
ibex_multdiv_fast #(
|
||||
.SingleCycleMultiply(1)
|
||||
) multdiv_i (
|
||||
.clk_i ( clk_i ),
|
||||
.rst_ni ( rst_ni ),
|
||||
.mult_en_i ( mult_en_i ),
|
||||
|
|
|
@ -14,7 +14,9 @@
|
|||
|
||||
`include "prim_assert.sv"
|
||||
|
||||
module ibex_multdiv_fast (
|
||||
module ibex_multdiv_fast #(
|
||||
parameter bit SingleCycleMultiply = 0
|
||||
) (
|
||||
input logic clk_i,
|
||||
input logic rst_ni,
|
||||
input logic mult_en_i,
|
||||
|
@ -36,45 +38,41 @@ module ibex_multdiv_fast (
|
|||
|
||||
import ibex_pkg::*;
|
||||
|
||||
logic [ 4:0] div_counter_q, div_counter_n;
|
||||
typedef enum logic [1:0] {
|
||||
ALBL, ALBH, AHBL, AHBH
|
||||
} mult_fsm_e;
|
||||
mult_fsm_e mult_state_q, mult_state_n;
|
||||
|
||||
typedef enum logic [2:0] {
|
||||
MD_IDLE, MD_ABS_A, MD_ABS_B, MD_COMP, MD_LAST, MD_CHANGE_SIGN, MD_FINISH
|
||||
} md_fsm_e;
|
||||
md_fsm_e md_state_q, md_state_n;
|
||||
|
||||
// Both multiplier variants
|
||||
logic signed [34:0] mac_res_signed;
|
||||
logic [34:0] mac_res_ext;
|
||||
|
||||
logic [33:0] mac_res_q, mac_res_n, mac_res, op_remainder_n;
|
||||
logic [15:0] mult_op_a;
|
||||
logic [15:0] mult_op_b;
|
||||
logic [33:0] accum;
|
||||
logic sign_a, sign_b;
|
||||
logic div_sign_a, div_sign_b;
|
||||
logic mult_valid;
|
||||
logic signed_mult;
|
||||
|
||||
// Shared signals (div + mult)
|
||||
logic [33:0] mac_res_q, mac_res_d, mac_res, op_remainder_d;
|
||||
|
||||
// Divider signals
|
||||
logic div_sign_a, div_sign_b;
|
||||
logic is_greater_equal;
|
||||
logic div_change_sign, rem_change_sign;
|
||||
logic [31:0] one_shift;
|
||||
logic [31:0] op_denominator_q;
|
||||
logic [31:0] op_numerator_q;
|
||||
logic [31:0] op_quotient_q;
|
||||
logic [31:0] op_denominator_n;
|
||||
logic [31:0] op_numerator_n;
|
||||
logic [31:0] op_quotient_n;
|
||||
logic [31:0] op_denominator_d;
|
||||
logic [31:0] op_numerator_d;
|
||||
logic [31:0] op_quotient_d;
|
||||
logic [31:0] next_remainder;
|
||||
logic [32:0] next_quotient;
|
||||
logic [32:0] res_adder_h;
|
||||
logic mult_valid;
|
||||
logic div_valid;
|
||||
logic [ 4:0] div_counter_q, div_counter_d;
|
||||
|
||||
always_ff @(posedge clk_i or negedge rst_ni) begin : proc_mult_state_q
|
||||
typedef enum logic [2:0] {
|
||||
MD_IDLE, MD_ABS_A, MD_ABS_B, MD_COMP, MD_LAST, MD_CHANGE_SIGN, MD_FINISH
|
||||
} md_fsm_e;
|
||||
md_fsm_e md_state_q, md_state_d;
|
||||
|
||||
always_ff @(posedge clk_i or negedge rst_ni) begin
|
||||
if (!rst_ni) begin
|
||||
mult_state_q <= ALBL;
|
||||
mac_res_q <= '0;
|
||||
div_counter_q <= '0;
|
||||
md_state_q <= MD_IDLE;
|
||||
|
@ -83,23 +81,19 @@ module ibex_multdiv_fast (
|
|||
op_quotient_q <= '0;
|
||||
end else begin
|
||||
|
||||
if (mult_en_i) begin
|
||||
mult_state_q <= mult_state_n;
|
||||
end
|
||||
|
||||
if (div_en_i) begin
|
||||
div_counter_q <= div_counter_n;
|
||||
op_denominator_q <= op_denominator_n;
|
||||
op_numerator_q <= op_numerator_n;
|
||||
op_quotient_q <= op_quotient_n;
|
||||
md_state_q <= md_state_n;
|
||||
div_counter_q <= div_counter_d;
|
||||
op_denominator_q <= op_denominator_d;
|
||||
op_numerator_q <= op_numerator_d;
|
||||
op_quotient_q <= op_quotient_d;
|
||||
md_state_q <= md_state_d;
|
||||
end
|
||||
|
||||
unique case(1'b1)
|
||||
mult_en_i:
|
||||
mac_res_q <= mac_res_n;
|
||||
mac_res_q <= mac_res_d;
|
||||
div_en_i:
|
||||
mac_res_q <= op_remainder_n;
|
||||
mac_res_q <= op_remainder_d;
|
||||
default:
|
||||
mac_res_q <= mac_res_q;
|
||||
endcase
|
||||
|
@ -107,8 +101,129 @@ module ibex_multdiv_fast (
|
|||
end
|
||||
|
||||
assign signed_mult = (signed_mode_i != 2'b00);
|
||||
assign multdiv_result_o = div_en_i ? mac_res_q[31:0] : mac_res_d[31:0];
|
||||
|
||||
assign multdiv_result_o = div_en_i ? mac_res_q[31:0] : mac_res_n[31:0];
|
||||
// The single cycle multiplier uses three 17 bit multipliers to compute MUL instructions in a
|
||||
// single cycle and MULH instructions in two cycles.
|
||||
if (SingleCycleMultiply) begin : gen_multiv_single_cycle
|
||||
|
||||
typedef enum logic {
|
||||
MULL, MULH
|
||||
} mult_fsm_e;
|
||||
mult_fsm_e mult_state_q, mult_state_d;
|
||||
|
||||
logic signed [33:0] mult1_res, mult2_res, mult3_res;
|
||||
logic [15:0] mult1_op_a, mult1_op_b;
|
||||
logic [15:0] mult2_op_a, mult2_op_b;
|
||||
logic [15:0] mult3_op_a, mult3_op_b;
|
||||
logic mult1_sign_a, mult1_sign_b;
|
||||
logic mult2_sign_a, mult2_sign_b;
|
||||
logic mult3_sign_a, mult3_sign_b;
|
||||
logic [33:0] summand1, summand2, summand3;
|
||||
|
||||
assign mult1_res = $signed({mult1_sign_a, mult1_op_a}) * $signed({mult1_sign_b, mult1_op_b});
|
||||
assign mult2_res = $signed({mult2_sign_a, mult2_op_a}) * $signed({mult2_sign_b, mult2_op_b});
|
||||
assign mult3_res = $signed({mult3_sign_a, mult3_op_a}) * $signed({mult3_sign_b, mult3_op_b});
|
||||
|
||||
assign mac_res_signed = $signed(summand1) + $signed(summand2) + $signed(summand3);
|
||||
|
||||
assign mac_res_ext = $unsigned(mac_res_signed);
|
||||
assign mac_res = mac_res_ext[33:0];
|
||||
|
||||
assign sign_a = signed_mode_i[0] & op_a_i[31];
|
||||
assign sign_b = signed_mode_i[1] & op_b_i[31];
|
||||
|
||||
// The first two multipliers are only used in state 1 (MULL). We can assign them statically.
|
||||
// al*bl
|
||||
assign mult1_sign_a = 1'b0;
|
||||
assign mult1_sign_b = 1'b0;
|
||||
assign mult1_op_a = op_a_i[`OP_L];
|
||||
assign mult1_op_b = op_b_i[`OP_L];
|
||||
|
||||
// al*bh
|
||||
assign mult2_sign_a = 1'b0;
|
||||
assign mult2_sign_b = sign_b;
|
||||
assign mult2_op_a = op_a_i[`OP_L];
|
||||
assign mult2_op_b = op_b_i[`OP_H];
|
||||
|
||||
// used in MULH
|
||||
assign accum[17:0] = mac_res_q[33:16];
|
||||
assign accum[33:18] = {16{signed_mult & mac_res_q[33]}};
|
||||
|
||||
always_comb begin
|
||||
// Default values == MULL
|
||||
|
||||
// ah*bl
|
||||
mult3_sign_a = sign_a;
|
||||
mult3_sign_b = 1'b0;
|
||||
mult3_op_a = op_a_i[`OP_H];
|
||||
mult3_op_b = op_b_i[`OP_L];
|
||||
|
||||
summand1 = {18'h0, mult1_res[`OP_H]};
|
||||
summand2 = mult2_res;
|
||||
summand3 = mult3_res;
|
||||
|
||||
// mac_res = A*B[47:16], mult1_res = A*B[15:0]
|
||||
mac_res_d = {2'b0, mac_res[`OP_L], mult1_res[`OP_L]};
|
||||
mult_valid = mult_en_i;
|
||||
mult_state_d = MULL;
|
||||
|
||||
unique case (mult_state_q)
|
||||
|
||||
MULL: begin
|
||||
if (operator_i != MD_OP_MULL) begin
|
||||
mac_res_d = mac_res;
|
||||
mult_valid = 1'b0;
|
||||
mult_state_d = MULH;
|
||||
end
|
||||
end
|
||||
|
||||
MULH: begin
|
||||
// ah*bh
|
||||
mult3_sign_a = sign_a;
|
||||
mult3_sign_b = sign_b;
|
||||
mult3_op_a = op_a_i[`OP_H];
|
||||
mult3_op_b = op_b_i[`OP_H];
|
||||
mac_res_d = mac_res;
|
||||
|
||||
summand1 = '0;
|
||||
summand2 = accum;
|
||||
summand3 = mult3_res;
|
||||
|
||||
mult_state_d = MULL;
|
||||
mult_valid = 1'b1;
|
||||
end
|
||||
|
||||
default: begin
|
||||
mult_state_d = MULL;
|
||||
end
|
||||
|
||||
endcase // mult_state_q
|
||||
end
|
||||
|
||||
always_ff @(posedge clk_i or negedge rst_ni) begin
|
||||
if (!rst_ni) begin
|
||||
mult_state_q <= MULL;
|
||||
end else begin
|
||||
if (mult_en_i) begin
|
||||
mult_state_q <= mult_state_d;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// States must be knwon/valid.
|
||||
`ASSERT_KNOWN(IbexMultStateKnown, mult_state_q)
|
||||
|
||||
// The fast multiplier uses one 17 bit multiplier to compute MUL instructions in 3 cycles
|
||||
// and MULH instructions in 4 cycles.
|
||||
end else begin : gen_multdiv_fast
|
||||
logic [15:0] mult_op_a;
|
||||
logic [15:0] mult_op_b;
|
||||
|
||||
typedef enum logic [1:0] {
|
||||
ALBL, ALBH, AHBL, AHBH
|
||||
} mult_fsm_e;
|
||||
mult_fsm_e mult_state_q, mult_state_d;
|
||||
|
||||
// The 2 MSBs of mac_res_ext (mac_res_ext[34:33]) are always equal since:
|
||||
// 1. The 2 MSBs of the multiplicants are always equal, and
|
||||
|
@ -119,11 +234,105 @@ module ibex_multdiv_fast (
|
|||
assign mac_res_ext = $unsigned(mac_res_signed);
|
||||
assign mac_res = mac_res_ext[33:0];
|
||||
|
||||
always_comb begin
|
||||
mult_op_a = op_a_i[`OP_L];
|
||||
mult_op_b = op_b_i[`OP_L];
|
||||
sign_a = 1'b0;
|
||||
sign_b = 1'b0;
|
||||
accum = mac_res_q;
|
||||
mac_res_d = mac_res;
|
||||
mult_state_d = mult_state_q;
|
||||
mult_valid = 1'b0;
|
||||
|
||||
unique case (mult_state_q)
|
||||
|
||||
ALBL: begin
|
||||
// al*bl
|
||||
mult_op_a = op_a_i[`OP_L];
|
||||
mult_op_b = op_b_i[`OP_L];
|
||||
sign_a = 1'b0;
|
||||
sign_b = 1'b0;
|
||||
accum = '0;
|
||||
mac_res_d = mac_res;
|
||||
mult_state_d = ALBH;
|
||||
end
|
||||
|
||||
ALBH: begin
|
||||
// al*bh<<16
|
||||
mult_op_a = op_a_i[`OP_L];
|
||||
mult_op_b = op_b_i[`OP_H];
|
||||
sign_a = 1'b0;
|
||||
sign_b = signed_mode_i[1] & op_b_i[31];
|
||||
// result of AL*BL (in mac_res_q) always unsigned with no carry, so carries_q always 00
|
||||
accum = {18'b0, mac_res_q[31:16]};
|
||||
if (operator_i == MD_OP_MULL) begin
|
||||
mac_res_d = {2'b0, mac_res[`OP_L], mac_res_q[`OP_L]};
|
||||
end else begin
|
||||
// MD_OP_MULH
|
||||
mac_res_d = mac_res;
|
||||
end
|
||||
mult_state_d = AHBL;
|
||||
end
|
||||
|
||||
AHBL: begin
|
||||
// ah*bl<<16
|
||||
mult_op_a = op_a_i[`OP_H];
|
||||
mult_op_b = op_b_i[`OP_L];
|
||||
sign_a = signed_mode_i[0] & op_a_i[31];
|
||||
sign_b = 1'b0;
|
||||
if (operator_i == MD_OP_MULL) begin
|
||||
accum = {18'b0, mac_res_q[31:16]};
|
||||
mac_res_d = {2'b0, mac_res[15:0], mac_res_q[15:0]};
|
||||
mult_valid = 1'b1;
|
||||
mult_state_d = ALBL;
|
||||
end else begin
|
||||
accum = mac_res_q;
|
||||
mac_res_d = mac_res;
|
||||
mult_state_d = AHBH;
|
||||
end
|
||||
end
|
||||
|
||||
AHBH: begin
|
||||
// only MD_OP_MULH here
|
||||
// ah*bh
|
||||
mult_op_a = op_a_i[`OP_H];
|
||||
mult_op_b = op_b_i[`OP_H];
|
||||
sign_a = signed_mode_i[0] & op_a_i[31];
|
||||
sign_b = signed_mode_i[1] & op_b_i[31];
|
||||
accum[17: 0] = mac_res_q[33:16];
|
||||
accum[33:18] = {16{signed_mult & mac_res_q[33]}};
|
||||
// result of AH*BL is not signed only if signed_mode_i == 2'b00
|
||||
mac_res_d = mac_res;
|
||||
mult_state_d = ALBL;
|
||||
mult_valid = 1'b1;
|
||||
end
|
||||
default: begin
|
||||
mult_state_d = ALBL;
|
||||
end
|
||||
endcase // mult_state_q
|
||||
end
|
||||
|
||||
always_ff @(posedge clk_i or negedge rst_ni) begin
|
||||
if (!rst_ni) begin
|
||||
mult_state_q <= ALBL;
|
||||
end else begin
|
||||
if (mult_en_i) begin
|
||||
mult_state_q <= mult_state_d;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// States must be knwon/valid.
|
||||
`ASSERT_KNOWN(IbexMultStateKnown, mult_state_q)
|
||||
|
||||
end // gen_multdiv_fast
|
||||
|
||||
// Divider
|
||||
assign res_adder_h = alu_adder_ext_i[33:1];
|
||||
|
||||
assign next_remainder = is_greater_equal ? res_adder_h[31:0] : mac_res_q[31:0];
|
||||
assign next_quotient = is_greater_equal ? {1'b0,op_quotient_q} | {1'b0,one_shift} :
|
||||
{1'b0,op_quotient_q};
|
||||
assign next_quotient = is_greater_equal ? {1'b0, op_quotient_q} | {1'b0, one_shift} :
|
||||
{1'b0, op_quotient_q};
|
||||
|
||||
assign one_shift = {31'b0, 1'b1} << div_counter_q;
|
||||
|
||||
|
@ -144,13 +353,13 @@ module ibex_multdiv_fast (
|
|||
assign rem_change_sign = div_sign_a;
|
||||
|
||||
|
||||
always_comb begin : md_fsm
|
||||
div_counter_n = div_counter_q - 5'h1;
|
||||
op_remainder_n = mac_res_q;
|
||||
op_quotient_n = op_quotient_q;
|
||||
md_state_n = md_state_q;
|
||||
op_numerator_n = op_numerator_q;
|
||||
op_denominator_n = op_denominator_q;
|
||||
always_comb begin
|
||||
div_counter_d = div_counter_q - 5'h1;
|
||||
op_remainder_d = mac_res_q;
|
||||
op_quotient_d = op_quotient_q;
|
||||
md_state_d = md_state_q;
|
||||
op_numerator_d = op_numerator_q;
|
||||
op_denominator_d = op_denominator_q;
|
||||
alu_operand_a_o = {32'h0 , 1'b1};
|
||||
alu_operand_b_o = {~op_b_i, 1'b1};
|
||||
div_valid = 1'b0;
|
||||
|
@ -160,27 +369,27 @@ module ibex_multdiv_fast (
|
|||
if (operator_i == MD_OP_DIV) begin
|
||||
// Check if the Denominator is 0
|
||||
// quotient for division by 0
|
||||
op_remainder_n = '1;
|
||||
md_state_n = equal_to_zero ? MD_FINISH : MD_ABS_A;
|
||||
op_remainder_d = '1;
|
||||
md_state_d = equal_to_zero ? MD_FINISH : MD_ABS_A;
|
||||
end else begin
|
||||
// Check if the Denominator is 0
|
||||
// remainder for division by 0
|
||||
op_remainder_n = {2'b0, op_a_i};
|
||||
md_state_n = equal_to_zero ? MD_FINISH : MD_ABS_A;
|
||||
op_remainder_d = {2'b0, op_a_i};
|
||||
md_state_d = equal_to_zero ? MD_FINISH : MD_ABS_A;
|
||||
end
|
||||
// 0 - B = 0 iff B == 0
|
||||
alu_operand_a_o = {32'h0 , 1'b1};
|
||||
alu_operand_b_o = {~op_b_i, 1'b1};
|
||||
div_counter_n = 5'd31;
|
||||
div_counter_d = 5'd31;
|
||||
end
|
||||
|
||||
MD_ABS_A: begin
|
||||
// quotient
|
||||
op_quotient_n = '0;
|
||||
op_quotient_d = '0;
|
||||
// A abs value
|
||||
op_numerator_n = div_sign_a ? alu_adder_i : op_a_i;
|
||||
md_state_n = MD_ABS_B;
|
||||
div_counter_n = 5'd31;
|
||||
op_numerator_d = div_sign_a ? alu_adder_i : op_a_i;
|
||||
md_state_d = MD_ABS_B;
|
||||
div_counter_d = 5'd31;
|
||||
// ABS(A) = 0 - A
|
||||
alu_operand_a_o = {32'h0 , 1'b1};
|
||||
alu_operand_b_o = {~op_a_i, 1'b1};
|
||||
|
@ -188,20 +397,20 @@ module ibex_multdiv_fast (
|
|||
|
||||
MD_ABS_B: begin
|
||||
// remainder
|
||||
op_remainder_n = { 33'h0, op_numerator_q[31]};
|
||||
op_remainder_d = { 33'h0, op_numerator_q[31]};
|
||||
// B abs value
|
||||
op_denominator_n = div_sign_b ? alu_adder_i : op_b_i;
|
||||
md_state_n = MD_COMP;
|
||||
div_counter_n = 5'd31;
|
||||
op_denominator_d = div_sign_b ? alu_adder_i : op_b_i;
|
||||
md_state_d = MD_COMP;
|
||||
div_counter_d = 5'd31;
|
||||
// ABS(B) = 0 - B
|
||||
alu_operand_a_o = {32'h0 , 1'b1};
|
||||
alu_operand_b_o = {~op_b_i, 1'b1};
|
||||
end
|
||||
|
||||
MD_COMP: begin
|
||||
op_remainder_n = {1'b0, next_remainder[31:0], op_numerator_q[div_counter_n]};
|
||||
op_quotient_n = next_quotient[31:0];
|
||||
md_state_n = (div_counter_q == 5'd1) ? MD_LAST : MD_COMP;
|
||||
op_remainder_d = {1'b0, next_remainder[31:0], op_numerator_q[div_counter_d]};
|
||||
op_quotient_d = next_quotient[31:0];
|
||||
md_state_d = (div_counter_q == 5'd1) ? MD_LAST : MD_COMP;
|
||||
// Division
|
||||
alu_operand_a_o = {mac_res_q[31:0], 1'b1}; // it contains the remainder
|
||||
alu_operand_b_o = {~op_denominator_q[31:0], 1'b1}; // -denominator two's compliment
|
||||
|
@ -209,26 +418,26 @@ module ibex_multdiv_fast (
|
|||
|
||||
MD_LAST: begin
|
||||
if (operator_i == MD_OP_DIV) begin
|
||||
// this time we save the quotient in op_remainder_n (i.e. mac_res_q) since
|
||||
// this time we save the quotient in op_remainder_d (i.e. mac_res_q) since
|
||||
// we do not need anymore the remainder
|
||||
op_remainder_n = {1'b0, next_quotient};
|
||||
op_remainder_d = {1'b0, next_quotient};
|
||||
end else begin
|
||||
// this time we do not save the quotient anymore since we need only the remainder
|
||||
op_remainder_n = {2'b0, next_remainder[31:0]};
|
||||
op_remainder_d = {2'b0, next_remainder[31:0]};
|
||||
end
|
||||
// Division
|
||||
alu_operand_a_o = {mac_res_q[31:0], 1'b1}; // it contains the remainder
|
||||
alu_operand_b_o = {~op_denominator_q[31:0], 1'b1}; // -denominator two's compliment
|
||||
|
||||
md_state_n = MD_CHANGE_SIGN;
|
||||
md_state_d = MD_CHANGE_SIGN;
|
||||
end
|
||||
|
||||
MD_CHANGE_SIGN: begin
|
||||
md_state_n = MD_FINISH;
|
||||
md_state_d = MD_FINISH;
|
||||
if (operator_i == MD_OP_DIV) begin
|
||||
op_remainder_n = (div_change_sign) ? {2'h0,alu_adder_i} : mac_res_q;
|
||||
op_remainder_d = (div_change_sign) ? {2'h0, alu_adder_i} : mac_res_q;
|
||||
end else begin
|
||||
op_remainder_n = (rem_change_sign) ? {2'h0,alu_adder_i} : mac_res_q;
|
||||
op_remainder_d = (rem_change_sign) ? {2'h0, alu_adder_i} : mac_res_q;
|
||||
end
|
||||
// ABS(Quotient) = 0 - Quotient (or Remainder)
|
||||
alu_operand_a_o = {32'h0 , 1'b1};
|
||||
|
@ -236,99 +445,20 @@ module ibex_multdiv_fast (
|
|||
end
|
||||
|
||||
MD_FINISH: begin
|
||||
md_state_n = MD_IDLE;
|
||||
md_state_d = MD_IDLE;
|
||||
div_valid = 1'b1;
|
||||
end
|
||||
|
||||
default: begin
|
||||
md_state_n = MD_IDLE;
|
||||
md_state_d = MD_IDLE;
|
||||
end
|
||||
endcase // md_state_q
|
||||
end
|
||||
|
||||
assign valid_o = mult_valid | div_valid;
|
||||
|
||||
always_comb begin : mult_fsm
|
||||
mult_op_a = op_a_i[`OP_L];
|
||||
mult_op_b = op_b_i[`OP_L];
|
||||
sign_a = 1'b0;
|
||||
sign_b = 1'b0;
|
||||
accum = mac_res_q;
|
||||
mac_res_n = mac_res;
|
||||
mult_state_n = mult_state_q;
|
||||
mult_valid = 1'b0;
|
||||
|
||||
unique case (mult_state_q)
|
||||
|
||||
ALBL: begin
|
||||
// al*bl
|
||||
mult_op_a = op_a_i[`OP_L];
|
||||
mult_op_b = op_b_i[`OP_L];
|
||||
sign_a = 1'b0;
|
||||
sign_b = 1'b0;
|
||||
accum = '0;
|
||||
mac_res_n = mac_res;
|
||||
mult_state_n = ALBH;
|
||||
end
|
||||
|
||||
ALBH: begin
|
||||
// al*bh<<16
|
||||
mult_op_a = op_a_i[`OP_L];
|
||||
mult_op_b = op_b_i[`OP_H];
|
||||
sign_a = 1'b0;
|
||||
sign_b = signed_mode_i[1] & op_b_i[31];
|
||||
// result of AL*BL (in mac_res_q) always unsigned with no carry, so carries_q always 00
|
||||
accum = {18'b0,mac_res_q[31:16]};
|
||||
if (operator_i == MD_OP_MULL) begin
|
||||
mac_res_n = {2'b0,mac_res[`OP_L],mac_res_q[`OP_L]};
|
||||
end else begin
|
||||
// MD_OP_MULH
|
||||
mac_res_n = mac_res;
|
||||
end
|
||||
mult_state_n = AHBL;
|
||||
end
|
||||
|
||||
AHBL: begin
|
||||
// ah*bl<<16
|
||||
mult_op_a = op_a_i[`OP_H];
|
||||
mult_op_b = op_b_i[`OP_L];
|
||||
sign_a = signed_mode_i[0] & op_a_i[31];
|
||||
sign_b = 1'b0;
|
||||
if (operator_i == MD_OP_MULL) begin
|
||||
accum = {18'b0,mac_res_q[31:16]};
|
||||
mac_res_n = {2'b0,mac_res[15:0],mac_res_q[15:0]};
|
||||
mult_valid = 1'b1;
|
||||
mult_state_n = ALBL;
|
||||
end else begin
|
||||
accum = mac_res_q;
|
||||
mac_res_n = mac_res;
|
||||
mult_state_n = AHBH;
|
||||
end
|
||||
end
|
||||
|
||||
AHBH: begin
|
||||
// only MD_OP_MULH here
|
||||
// ah*bh
|
||||
mult_op_a = op_a_i[`OP_H];
|
||||
mult_op_b = op_b_i[`OP_H];
|
||||
sign_a = signed_mode_i[0] & op_a_i[31];
|
||||
sign_b = signed_mode_i[1] & op_b_i[31];
|
||||
accum[17: 0] = mac_res_q[33:16];
|
||||
accum[33:18] = {16{signed_mult & mac_res_q[33]}};
|
||||
// result of AH*BL is not signed only if signed_mode_i == 2'b00
|
||||
mac_res_n = mac_res;
|
||||
mult_state_n = ALBL;
|
||||
mult_valid = 1'b1;
|
||||
end
|
||||
default: begin
|
||||
mult_state_n = ALBL;
|
||||
end
|
||||
endcase // mult_state_q
|
||||
end
|
||||
|
||||
// States must be knwon/valid.
|
||||
`ASSERT(IbexMultDivStateValid, md_state_q inside {
|
||||
MD_IDLE, MD_ABS_A, MD_ABS_B, MD_COMP, MD_LAST, MD_CHANGE_SIGN, MD_FINISH})
|
||||
`ASSERT_KNOWN(IbexMultStateKnown, mult_state_q)
|
||||
|
||||
endmodule // ibex_mult
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue