mirror of
https://github.com/lowRISC/ibex.git
synced 2025-04-22 12:57:13 -04:00
[bitmanip] Add ZBE Instruction Group
This commit implements the Bit Manipulation Extension ZBE instruction group: bext (bit extract) and bdep (bit deposit). Architectural details: * bext/bdep: A new butterfly and inverse butterfly network is implemented. The generation of its controlbits depend on a parallel prefix bitcount of the deposit / extract mask. * bitcounter: The path for bext / bdep instructions traverses the bit counter and the butterfly network, resulting in both a larger delay and area. To mitigate the bitcounter has been changed from a serial bit counter to a radix-2 tree structure. * grev/gorc: Zbp instructions general reverse and general or-combine have as of yet shared the shifters reversal structure. It has proven benefitial to area and timing to reuse the novel butterfly network instead The butterfly network itself consumes ~3.5kGE and ~1.1kGE for synthesis with tight and relaxed timing constraints respectively. Including the optimizations of the bitcounter and grev/gorc, the overall change in area consumption is +4.6kGE (+1.2kGE) and +3.3kGE (+1.1kGE) for synthesis with tight (relaxed) timing constraints for 2- and 3-stage configurations respectively. For tight timing constraints that is a growth by around ~10%, for relaxed ~5%. The impact on the maximum frequency is negligable. Signed-off-by: ganoam <gnoam@live.com>
This commit is contained in:
parent
dd12d97934
commit
0afd000a09
8 changed files with 555 additions and 279 deletions
|
@ -66,7 +66,7 @@ Other blocks use the ALU for the following tasks:
|
|||
|
||||
Support for the RISC-V Bitmanipulation Extension (Document Version 0.92, November 8, 2019) is enabled via the parameter ``RV32B``.
|
||||
This feature is *EXPERIMENTAL* and the details of its impact are not yet documented here.
|
||||
Currently the Zbb, Zbs, Zbp and Zbt sub-extensions are implemented.
|
||||
Currently the Zbb, Zbs, Zbp, Zbe and Zbt sub-extensions are implemented.
|
||||
All instructions are carried out in a single clock cycle.
|
||||
|
||||
.. _mult-div:
|
||||
|
|
|
@ -93,7 +93,8 @@ Parameters
|
|||
+------------------------------+-------------+------------+-----------------------------------------------------------------+
|
||||
| ``RV32B`` | bit | 0 | *EXPERIMENTAL* - B(itmanipulation) extension enable: |
|
||||
| | | | Currently supported Z-extensions: Zbb (base), Zbs (single-bit) |
|
||||
| | | | Zbp (bit permutation) and Zbt (ternary) |
|
||||
| | | | Zbp (bit permutation), Zbe (bit extract/deposit and |
|
||||
| | | | Zbt (ternary) |
|
||||
+------------------------------+-------------+------------+-----------------------------------------------------------------+
|
||||
| ``BranchTargetALU`` | bit | 0 | *EXPERIMENTAL* - Enables branch target ALU removing a stall |
|
||||
| | | | cycle from taken branches |
|
||||
|
|
|
@ -595,5 +595,5 @@
|
|||
gen_test: riscv_rand_instr_test
|
||||
gen_opts: >
|
||||
+enable_b_extension=1
|
||||
+enable_bitmanip_groups=zbb,zbt,zbs,zbp
|
||||
+enable_bitmanip_groups=zbb,zbt,zbs,zbe,zbp
|
||||
rtl_test: core_ibex_base_test
|
||||
|
|
735
rtl/ibex_alu.sv
735
rtl/ibex_alu.sv
|
@ -89,192 +89,6 @@ module ibex_alu #(
|
|||
|
||||
assign adder_result_o = adder_result;
|
||||
|
||||
///////////
|
||||
// Shift //
|
||||
///////////
|
||||
|
||||
// The shifter structure consists of a 33-bit shifter: 32-bit operand + 1 bit extension for
|
||||
// arithmetic shifts and one-shift support.
|
||||
// Rotations and funnel shifts are implemented as multi-cycle instructions.
|
||||
// The shifter is also used for single-bit instructions as detailed below.
|
||||
//
|
||||
// Standard Shifts
|
||||
// ===============
|
||||
// For standard shift instructions, the direction of the shift is to the right by default. For
|
||||
// left shifts, the signal shift_left signal is set. If so, the operand is initially reversed,
|
||||
// shifted to the right by the specified amount and shifted back again. For arithmetic- and
|
||||
// one-shifts the 33rd bit of the shifter operand can is set accordingly.
|
||||
//
|
||||
// Multicycle Shifts
|
||||
// =================
|
||||
//
|
||||
// Rotation
|
||||
// --------
|
||||
// For rotations, the operand signals operand_a_i and operand_b_i are kept constant to rs1 and
|
||||
// rs2 respectively.
|
||||
//
|
||||
// Rotation pseudocode:
|
||||
// shift_amt = rs2 & 31;
|
||||
// multicycle_result = (rs1 >> shift_amt) | (rs1 << (32 - shift_amt));
|
||||
// ^-- cycle 0 -----^ ^-- cycle 1 --------------^
|
||||
//
|
||||
// Funnel Shifts
|
||||
// -------------
|
||||
// For funnel shifs, operand_a_i is tied to rs1 in the first cycle and rs3 in the
|
||||
// second cycle. operand_b_i is always tied to rs2. The order of applying the shift amount or
|
||||
// its complement is determined by bit [5] of shift_amt.
|
||||
//
|
||||
// Funnel shift Pseudocode: (fsl)
|
||||
// shift_amt = rs2 & 63;
|
||||
// shift_amt_compl = 32 - shift_amt[4:0]
|
||||
// if (shift_amt >=33):
|
||||
// multicycle_result = (rs1 >> shift_amt_cmpl[4:0]) | (rs3 << shift_amt[4:0]);
|
||||
// ^-- cycle 0 ---------------^ ^-- cycle 1 ------------^
|
||||
// else if (shift_amt <= 31 && shift_amt > 0):
|
||||
// multicycle_result = (rs1 << shift_amt[4:0]) | (rs3 >> shift_amt_compl[4:0]);
|
||||
// ^-- cycle 0 ----------^ ^-- cycle 1 -------------------^
|
||||
// For shift_amt == 0, 32, both shift_amt[4:0] and shift_amt_compl[4:0] == '0.
|
||||
// these cases need to be handled separately outside the shifting structure:
|
||||
// else if (shift_amt == 32):
|
||||
// multicycle_result = rs3
|
||||
// else if (shift_amt == 0):
|
||||
// multicycle_result = rs1.
|
||||
//
|
||||
// Single-Bit Instructions
|
||||
// =======================
|
||||
// Single bit instructions operate on bit operand_b_i[4:0] of operand_a_i.
|
||||
|
||||
// The operations sbset, sbclr and sbinv are implemented by generation of a bit-mask using the
|
||||
// shifter structure. This is done by left-shifting the operand 32'h1 by the required amount.
|
||||
// The signal shift_sbmode multiplexes the shifter input and sets the signal shift_left.
|
||||
// Further processing is taken care of by a separate structure.
|
||||
//
|
||||
// For sbext, the bit defined by operand_b_i[4:0] is to be returned. This is done by simply
|
||||
// shifting operand_a_i to the right by the required amount and returning bit [0] of the result.
|
||||
//
|
||||
// Generalized Reverse and Or-Combine
|
||||
// ==================================
|
||||
// Grev and gorc instructions share the reversing structure used for left-shifts. The control
|
||||
// bits are the same for shifts and grev/gorc. Shift_amt can therefore be reused for activating
|
||||
// the respective reversal stages.
|
||||
|
||||
|
||||
logic shift_left;
|
||||
logic shift_ones;
|
||||
logic shift_arith;
|
||||
logic shift_funnel;
|
||||
logic shift_sbmode;
|
||||
logic [5:0] shift_amt;
|
||||
logic [5:0] shift_amt_compl; // complementary shift amount (32 - shift_amt)
|
||||
|
||||
// bit shift_amt[5]: word swap bit: only considered for FSL/FSR.
|
||||
// if set, reverse operations in first and second cycle.
|
||||
assign shift_amt[5] = operand_b_i[5] && shift_funnel;
|
||||
assign shift_amt_compl = 32 - operand_b_i[4:0];
|
||||
|
||||
assign shift_amt[4:0] = instr_first_cycle_i ?
|
||||
(operand_b_i[5] && shift_funnel ? shift_amt_compl[4:0] : operand_b_i[4:0]) :
|
||||
(operand_b_i[5] && shift_funnel ? operand_b_i[4:0] : shift_amt_compl[4:0]);
|
||||
|
||||
// single-bit mode: shift
|
||||
assign shift_sbmode = RV32B ? (operator_i == ALU_SBSET) || (operator_i == ALU_SBCLR) ||
|
||||
(operator_i == ALU_SBINV) :
|
||||
1'b0;
|
||||
|
||||
// left shift if this is:
|
||||
// * a standard left shift (slo, sll)
|
||||
// * a rol in the first cycle
|
||||
// * a ror in the second cycle
|
||||
// * fsl: without word-swap bit: first cycle, else: second cycle
|
||||
// * fsr: without word-swap bit: second cycle, else: first cycle
|
||||
// * a single-bit instruction: sbclr, sbset, sbinv (excluding sbext)
|
||||
always_comb begin
|
||||
unique case (operator_i)
|
||||
ALU_SLL: shift_left = 1'b1;
|
||||
ALU_SLO: shift_left = RV32B ? 1'b1 : 1'b0;
|
||||
ALU_ROL: shift_left = RV32B ? instr_first_cycle_i : 0;
|
||||
ALU_ROR: shift_left = RV32B ? !instr_first_cycle_i : 0;
|
||||
ALU_FSL: shift_left =
|
||||
RV32B ? (shift_amt[5] ? !instr_first_cycle_i : instr_first_cycle_i) : 1'b0;
|
||||
ALU_FSR: shift_left =
|
||||
RV32B ? (shift_amt[5] ? instr_first_cycle_i : !instr_first_cycle_i) : 1'b0;
|
||||
default: shift_left = 1'b0;
|
||||
endcase
|
||||
if (shift_sbmode) begin
|
||||
shift_left = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
assign shift_arith = (operator_i == ALU_SRA);
|
||||
assign shift_ones = RV32B ? (operator_i == ALU_SLO) || (operator_i == ALU_SRO) : 1'b0;
|
||||
assign shift_funnel = RV32B ? (operator_i == ALU_FSL) || (operator_i == ALU_FSR) : 1'b0;
|
||||
|
||||
logic [31:0] shift_result;
|
||||
logic [32:0] shift_result_ext;
|
||||
|
||||
// grev / gorc instructions
|
||||
logic grev_op;
|
||||
assign grev_op = RV32B ? (operator_i == ALU_GREV) : 1'b0;
|
||||
logic gorc_op;
|
||||
assign gorc_op = RV32B ? (operator_i == ALU_GORC) : 1'b0;
|
||||
|
||||
// combined shifter/ reverser structure.
|
||||
always_comb begin
|
||||
shift_result = operand_a_i;
|
||||
|
||||
// select bit reversed or normal input
|
||||
if (shift_left) begin
|
||||
shift_result = operand_a_rev;
|
||||
end
|
||||
|
||||
// if this is a single bit instruction: we left-shift 32'h1 by shift_amt.
|
||||
// the first reverse of the left-shift operation can be easily omitted, since we
|
||||
// know the result of rev(32'h1).
|
||||
if (shift_sbmode) begin
|
||||
shift_result = 32'h8000_0000;
|
||||
end
|
||||
|
||||
shift_result_ext = $signed({shift_ones || (shift_arith && shift_result[31]), shift_result})
|
||||
>>> shift_amt[4:0];
|
||||
|
||||
shift_result = shift_result_ext[31:0];
|
||||
|
||||
if (grev_op || gorc_op) begin
|
||||
shift_result = operand_a_i;
|
||||
end
|
||||
|
||||
// left shift always do the full reverse. Orc and rev do permutation as requested by shift_amt.
|
||||
if (shift_left || ((grev_op || gorc_op) & shift_amt[0])) begin
|
||||
shift_result = (gorc_op ? shift_result : 32'h0) |
|
||||
((shift_result & 32'h5555_5555) << 1)|
|
||||
((shift_result & 32'haaaa_aaaa) >> 1);
|
||||
end
|
||||
|
||||
if (shift_left || ((grev_op || gorc_op) & shift_amt[1])) begin
|
||||
shift_result = (gorc_op ? shift_result : 32'h0) |
|
||||
((shift_result & 32'h3333_3333) << 2)|
|
||||
((shift_result & 32'hcccc_cccc) >> 2);
|
||||
end
|
||||
|
||||
if (shift_left || ((grev_op || gorc_op) & shift_amt[2])) begin
|
||||
shift_result = (gorc_op ? shift_result : 32'h0) |
|
||||
((shift_result & 32'h0f0f_0f0f) << 4)|
|
||||
((shift_result & 32'hf0f0_f0f0) >> 4);
|
||||
end
|
||||
|
||||
if (shift_left || ((grev_op || gorc_op) & shift_amt[3])) begin
|
||||
shift_result = (gorc_op ? shift_result : 32'h0) |
|
||||
((shift_result & 32'h00ff_00ff) << 8) |
|
||||
((shift_result & 32'hff00_ff00) >> 8);
|
||||
end
|
||||
|
||||
if (shift_left || ((grev_op || gorc_op) & shift_amt[4])) begin
|
||||
shift_result = (gorc_op ? shift_result : 32'h0) |
|
||||
((shift_result & 32'h0000_ffff) << 16) |
|
||||
((shift_result & 32'hffff_0000) >> 16);
|
||||
end
|
||||
end
|
||||
|
||||
////////////////
|
||||
// Comparison //
|
||||
////////////////
|
||||
|
@ -339,12 +153,150 @@ module ibex_alu #(
|
|||
|
||||
assign comparison_result_o = cmp_result;
|
||||
|
||||
logic [31:0] minmax_result;
|
||||
logic [5:0] bitcnt_result;
|
||||
logic [31:0] bwlogic_result;
|
||||
logic [31:0] pack_result;
|
||||
logic [31:0] multicycle_result;
|
||||
logic [31:0] singlebit_result;
|
||||
///////////
|
||||
// Shift //
|
||||
///////////
|
||||
|
||||
// The shifter structure consists of a 33-bit shifter: 32-bit operand + 1 bit extension for
|
||||
// arithmetic shifts and one-shift support.
|
||||
// Rotations and funnel shifts are implemented as multi-cycle instructions.
|
||||
// The shifter is also used for single-bit instructions as detailed below.
|
||||
//
|
||||
// Standard Shifts
|
||||
// ===============
|
||||
// For standard shift instructions, the direction of the shift is to the right by default. For
|
||||
// left shifts, the signal shift_left signal is set. If so, the operand is initially reversed,
|
||||
// shifted to the right by the specified amount and shifted back again. For arithmetic- and
|
||||
// one-shifts the 33rd bit of the shifter operand can is set accordingly.
|
||||
//
|
||||
// Multicycle Shifts
|
||||
// =================
|
||||
//
|
||||
// Rotation
|
||||
// --------
|
||||
// For rotations, the operand signals operand_a_i and operand_b_i are kept constant to rs1 and
|
||||
// rs2 respectively.
|
||||
//
|
||||
// Rotation pseudocode:
|
||||
// shift_amt = rs2 & 31;
|
||||
// multicycle_result = (rs1 >> shift_amt) | (rs1 << (32 - shift_amt));
|
||||
// ^-- cycle 0 -----^ ^-- cycle 1 --------------^
|
||||
//
|
||||
// Funnel Shifts
|
||||
// -------------
|
||||
// For funnel shifs, operand_a_i is tied to rs1 in the first cycle and rs3 in the
|
||||
// second cycle. operand_b_i is always tied to rs2. The order of applying the shift amount or
|
||||
// its complement is determined by bit [5] of shift_amt.
|
||||
//
|
||||
// Funnel shift Pseudocode: (fsl)
|
||||
// shift_amt = rs2 & 63;
|
||||
// shift_amt_compl = 32 - shift_amt[4:0]
|
||||
// if (shift_amt >=33):
|
||||
// multicycle_result = (rs1 >> shift_amt_cmpl[4:0]) | (rs3 << shift_amt[4:0]);
|
||||
// ^-- cycle 0 ---------------^ ^-- cycle 1 ------------^
|
||||
// else if (shift_amt <= 31 && shift_amt > 0):
|
||||
// multicycle_result = (rs1 << shift_amt[4:0]) | (rs3 >> shift_amt_compl[4:0]);
|
||||
// ^-- cycle 0 ----------^ ^-- cycle 1 -------------------^
|
||||
// For shift_amt == 0, 32, both shift_amt[4:0] and shift_amt_compl[4:0] == '0.
|
||||
// these cases need to be handled separately outside the shifting structure:
|
||||
// else if (shift_amt == 32):
|
||||
// multicycle_result = rs3
|
||||
// else if (shift_amt == 0):
|
||||
// multicycle_result = rs1.
|
||||
//
|
||||
// Single-Bit Instructions
|
||||
// =======================
|
||||
// Single bit instructions operate on bit operand_b_i[4:0] of operand_a_i.
|
||||
|
||||
// The operations sbset, sbclr and sbinv are implemented by generation of a bit-mask using the
|
||||
// shifter structure. This is done by left-shifting the operand 32'h1 by the required amount.
|
||||
// The signal shift_sbmode multiplexes the shifter input and sets the signal shift_left.
|
||||
// Further processing is taken care of by a separate structure.
|
||||
//
|
||||
// For sbext, the bit defined by operand_b_i[4:0] is to be returned. This is done by simply
|
||||
// shifting operand_a_i to the right by the required amount and returning bit [0] of the result.
|
||||
|
||||
logic shift_left;
|
||||
logic shift_ones;
|
||||
logic shift_arith;
|
||||
logic shift_funnel;
|
||||
logic shift_sbmode;
|
||||
logic [5:0] shift_amt;
|
||||
logic [5:0] shift_amt_compl; // complementary shift amount (32 - shift_amt)
|
||||
|
||||
logic [31:0] shift_result;
|
||||
logic [32:0] shift_result_ext;
|
||||
logic [31:0] shift_result_rev;
|
||||
|
||||
// bit shift_amt[5]: word swap bit: only considered for FSL/FSR.
|
||||
// if set, reverse operations in first and second cycle.
|
||||
assign shift_amt[5] = operand_b_i[5] & shift_funnel;
|
||||
assign shift_amt_compl = 32 - operand_b_i[4:0];
|
||||
|
||||
assign shift_amt[4:0] = instr_first_cycle_i ?
|
||||
(operand_b_i[5] && shift_funnel ? shift_amt_compl[4:0] : operand_b_i[4:0]) :
|
||||
(operand_b_i[5] && shift_funnel ? operand_b_i[4:0] : shift_amt_compl[4:0]);
|
||||
|
||||
// single-bit mode: shift
|
||||
assign shift_sbmode = RV32B ?
|
||||
(operator_i == ALU_SBSET) | (operator_i == ALU_SBCLR) | (operator_i == ALU_SBINV) : 1'b0;
|
||||
|
||||
// left shift if this is:
|
||||
// * a standard left shift (slo, sll)
|
||||
// * a rol in the first cycle
|
||||
// * a ror in the second cycle
|
||||
// * fsl: without word-swap bit: first cycle, else: second cycle
|
||||
// * fsr: without word-swap bit: second cycle, else: first cycle
|
||||
// * a single-bit instruction: sbclr, sbset, sbinv (excluding sbext)
|
||||
always_comb begin
|
||||
unique case (operator_i)
|
||||
ALU_SLL: shift_left = 1'b1;
|
||||
ALU_SLO: shift_left = RV32B ? 1'b1 : 1'b0;
|
||||
ALU_ROL: shift_left = RV32B ? instr_first_cycle_i : 0;
|
||||
ALU_ROR: shift_left = RV32B ? ~instr_first_cycle_i : 0;
|
||||
ALU_FSL: shift_left =
|
||||
RV32B ? (shift_amt[5] ? ~instr_first_cycle_i : instr_first_cycle_i) : 1'b0;
|
||||
ALU_FSR: shift_left =
|
||||
RV32B ? (shift_amt[5] ? instr_first_cycle_i : ~instr_first_cycle_i) : 1'b0;
|
||||
default: shift_left = 1'b0;
|
||||
endcase
|
||||
if (shift_sbmode) begin
|
||||
shift_left = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
assign shift_arith = (operator_i == ALU_SRA);
|
||||
assign shift_ones = RV32B ? (operator_i == ALU_SLO) | (operator_i == ALU_SRO) : 1'b0;
|
||||
assign shift_funnel = RV32B ? (operator_i == ALU_FSL) | (operator_i == ALU_FSR) : 1'b0;
|
||||
|
||||
// shifter structure.
|
||||
always_comb begin
|
||||
shift_result = operand_a_i;
|
||||
|
||||
// select bit reversed or normal input
|
||||
if (shift_left) begin
|
||||
shift_result = operand_a_rev;
|
||||
end
|
||||
|
||||
// if this is a single bit instruction: we left-shift 32'h1 by shift_amt.
|
||||
// the first reverse of the left-shift operation can be easily omitted, since we
|
||||
// know the result of rev(32'h1).
|
||||
if (shift_sbmode) begin
|
||||
shift_result = 32'h8000_0000;
|
||||
end
|
||||
|
||||
shift_result_ext =
|
||||
$signed({shift_ones | (shift_arith & shift_result[31]), shift_result}) >>> shift_amt[4:0];
|
||||
|
||||
shift_result = shift_result_ext[31:0];
|
||||
|
||||
for (int unsigned i=0; i<32; i++) begin
|
||||
shift_result_rev[i] = shift_result[31-i];
|
||||
end
|
||||
|
||||
shift_result = shift_left ? shift_result_rev : shift_result;
|
||||
|
||||
end
|
||||
|
||||
///////////////////
|
||||
// Bitwise Logic //
|
||||
|
@ -356,6 +308,7 @@ module ibex_alu #(
|
|||
logic [31:0] bwlogic_or_result;
|
||||
logic [31:0] bwlogic_and_result;
|
||||
logic [31:0] bwlogic_xor_result;
|
||||
logic [31:0] bwlogic_result;
|
||||
|
||||
logic bwlogic_op_b_negate;
|
||||
|
||||
|
@ -365,7 +318,7 @@ module ibex_alu #(
|
|||
ALU_XNOR,
|
||||
ALU_ORN,
|
||||
ALU_ANDN: bwlogic_op_b_negate = RV32B ? 1'b1 : 1'b0;
|
||||
ALU_CMIX: bwlogic_op_b_negate = RV32B ? !instr_first_cycle_i : 1'b0;
|
||||
ALU_CMIX: bwlogic_op_b_negate = RV32B ? ~instr_first_cycle_i : 1'b0;
|
||||
default: bwlogic_op_b_negate = 1'b0;
|
||||
endcase
|
||||
end
|
||||
|
@ -376,8 +329,8 @@ module ibex_alu #(
|
|||
assign bwlogic_and_result = operand_a_i & bwlogic_operand_b;
|
||||
assign bwlogic_xor_result = operand_a_i ^ bwlogic_operand_b;
|
||||
|
||||
assign bwlogic_or = (operator_i == ALU_OR) || (operator_i == ALU_ORN);
|
||||
assign bwlogic_and = (operator_i == ALU_AND) || (operator_i == ALU_ANDN);
|
||||
assign bwlogic_or = (operator_i == ALU_OR) | (operator_i == ALU_ORN);
|
||||
assign bwlogic_and = (operator_i == ALU_AND) | (operator_i == ALU_ANDN);
|
||||
|
||||
always_comb begin
|
||||
unique case (1'b1)
|
||||
|
@ -388,9 +341,334 @@ module ibex_alu #(
|
|||
end
|
||||
|
||||
logic [31:0] shuffle_result;
|
||||
logic [31:0] butterfly_result;
|
||||
logic [31:0] invbutterfly_result;
|
||||
|
||||
logic [31:0] minmax_result;
|
||||
logic [5:0] bitcnt_result;
|
||||
logic [31:0] pack_result;
|
||||
logic [31:0] multicycle_result;
|
||||
logic [31:0] singlebit_result;
|
||||
|
||||
if (RV32B) begin : g_alu_rvb
|
||||
|
||||
/////////////////
|
||||
// Bitcounting //
|
||||
/////////////////
|
||||
|
||||
// The bit-counter structure computes the number of set bits in its operand. Partial results
|
||||
// (from left to right) are needed to compute the control masks for computation of bext/bdep
|
||||
// by the butterfly network, if implemented.
|
||||
// For pcnt, clz and ctz, only the end result is used.
|
||||
|
||||
logic zbe_op;
|
||||
logic bitcnt_ctz;
|
||||
logic bitcnt_clz;
|
||||
logic bitcnt_cz;
|
||||
logic [31:0] bitcnt_bits;
|
||||
logic [31:0] bitcnt_mask_op;
|
||||
logic [31:0] bitcnt_bit_mask;
|
||||
logic [ 5:0] bitcnt_partial [32];
|
||||
|
||||
|
||||
assign bitcnt_ctz = operator_i == ALU_CTZ;
|
||||
assign bitcnt_clz = operator_i == ALU_CLZ;
|
||||
assign bitcnt_cz = bitcnt_ctz | bitcnt_clz;
|
||||
assign bitcnt_result = bitcnt_partial[31];
|
||||
|
||||
// Bit-mask generation for clz and ctz:
|
||||
// The bit mask is generated by spreading the lowest-order set bit in the operand to all
|
||||
// higher order bits. The resulting mask is inverted to cover the lowest order zeros. In order
|
||||
// to create the bit mask for leading zeros, the input operand needs to be reversed.
|
||||
assign bitcnt_mask_op = bitcnt_clz ? operand_a_rev : operand_a_i;
|
||||
|
||||
always_comb begin
|
||||
bitcnt_bit_mask = bitcnt_mask_op;
|
||||
bitcnt_bit_mask |= bitcnt_bit_mask << 1;
|
||||
bitcnt_bit_mask |= bitcnt_bit_mask << 2;
|
||||
bitcnt_bit_mask |= bitcnt_bit_mask << 4;
|
||||
bitcnt_bit_mask |= bitcnt_bit_mask << 8;
|
||||
bitcnt_bit_mask |= bitcnt_bit_mask << 16;
|
||||
bitcnt_bit_mask = ~bitcnt_bit_mask;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
case(1'b1)
|
||||
zbe_op: bitcnt_bits = operand_b_i;
|
||||
bitcnt_cz: bitcnt_bits = bitcnt_bit_mask & ~bitcnt_mask_op; // clz / ctz
|
||||
default: bitcnt_bits = operand_a_i; // pcnt
|
||||
endcase
|
||||
end
|
||||
|
||||
// The parallel prefix counter is of the structure of a Brent-Kung Adder. In the first
|
||||
// log2(width) stages, the sum of the n preceding bit lines is computed for the bit lines at
|
||||
// positions 2**n-1 (power-of-two positions) where n denotes the current stage.
|
||||
// In stage n=log2(width), the count for position width-1 (the MSB) is finished.
|
||||
// For the intermediate values, an inverse adder tree then computes the bit counts for the bit
|
||||
// lines at positions
|
||||
// m = 2**(n-1) + i*2**(n-2), where i = [1 ... width / 2**(n-1)-1] and n = [log2(width) ... 2].
|
||||
// Thus, at every subsequent stage the result of two previously unconnected sub-trees is
|
||||
// summed, starting at the node summing bits [width/2-1 : 0] and [3*width/4-1: width/2]
|
||||
// and moving to iteratively sum up all the sub-trees.
|
||||
// The inverse adder tree thus features log2(width) - 1 stages the first of these stages is a
|
||||
// single addition at position 3*width/4 - 1. It does not interfere with the last
|
||||
// stage of the primary adder tree. These stages can thus be folded together, resulting in a
|
||||
// total of 2*log2(width)-2 stages.
|
||||
// For more details refer to R. Brent, H. T. Kung, "A Regular Layout for Parallel Adders",
|
||||
// (1982).
|
||||
// For a bitline at position p, only bits
|
||||
// bitcnt_partial[max(i, such that p % log2(i) == 0)-1 : 0] are needed for generation of the
|
||||
// butterfly network control signals. The adders in the intermediate value adder tree thus need
|
||||
// not be full 5-bit adders. We leave the optimization to the synthesis tools.
|
||||
//
|
||||
// Consider the following 8-bit example for illustraton.
|
||||
//
|
||||
// let bitcnt_bits = 8'babcdefgh.
|
||||
//
|
||||
// a b c d e f g h
|
||||
// | /: | /: | /: | /:
|
||||
// |/ : |/ : |/ : |/ :
|
||||
// stage 1: + : + : + : + :
|
||||
// | : /: : | : /: :
|
||||
// |,--+ : : |,--+ : :
|
||||
// stage 2: + : : : + : : :
|
||||
// | : | : /: : : :
|
||||
// |,-----,--+ : : : : ^-primary adder tree
|
||||
// stage 3: + : + : : : : : -------------------------
|
||||
// : | /| /| /| /| /| : ,-intermediate adder tree
|
||||
// : |/ |/ |/ |/ |/ : :
|
||||
// stage 4 : + + + + + : :
|
||||
// : : : : : : : :
|
||||
// bitcnt_partial[i] 7 6 5 4 3 2 1 0
|
||||
|
||||
always_comb begin
|
||||
bitcnt_partial = '{default: '0};
|
||||
// stage 1
|
||||
for (int unsigned i=1; i<32; i+=2) begin
|
||||
bitcnt_partial[i] = {5'h0, bitcnt_bits[i]} + {5'h0, bitcnt_bits[i-1]};
|
||||
end
|
||||
// stage 2
|
||||
for (int unsigned i=3; i<32; i+=4) begin
|
||||
bitcnt_partial[i] = bitcnt_partial[i-2] + bitcnt_partial[i];
|
||||
end
|
||||
// stage 3
|
||||
for (int unsigned i=7; i<32; i+=8) begin
|
||||
bitcnt_partial[i] = bitcnt_partial[i-4] + bitcnt_partial[i];
|
||||
end
|
||||
// stage 4
|
||||
for (int unsigned i=15; i <32; i+=16) begin
|
||||
bitcnt_partial[i] = bitcnt_partial[i-8] + bitcnt_partial[i];
|
||||
end
|
||||
// stage 5
|
||||
bitcnt_partial[31] = bitcnt_partial[15] + bitcnt_partial[31];
|
||||
// ^- primary adder tree
|
||||
// -------------------------------
|
||||
// ,-intermediate value adder tree
|
||||
bitcnt_partial[23] = bitcnt_partial[15] + bitcnt_partial[23];
|
||||
|
||||
// stage 6
|
||||
for (int unsigned i=11; i<32; i+=8) begin
|
||||
bitcnt_partial[i] = bitcnt_partial[i-4] + bitcnt_partial[i];
|
||||
end
|
||||
|
||||
// stage 7
|
||||
for (int unsigned i=5; i<32; i+=4) begin
|
||||
bitcnt_partial[i] = bitcnt_partial[i-2] + bitcnt_partial[i];
|
||||
end
|
||||
// stage 8
|
||||
bitcnt_partial[0] = {5'h0, bitcnt_bits[0]};
|
||||
for (int unsigned i=2; i<32; i+=2) begin
|
||||
bitcnt_partial[i] = bitcnt_partial[i-1] + {5'h0, bitcnt_bits[i]};
|
||||
end
|
||||
end
|
||||
|
||||
///////////////
|
||||
// Butterfly //
|
||||
///////////////
|
||||
|
||||
// The butterfly / inverse butterfly network is shared between bext/bdep (zbe)instructions
|
||||
// respectively and grev / gorc instructions (zbp).
|
||||
// For bdep, the control bits mask of a local left region is generated by
|
||||
// the inverse of a n-bit left rotate and complement upon wrap (LROTC) operation by the number
|
||||
// of ones in the deposit bitmask to the right of the segment. n hereby denotes the width
|
||||
// of the according segment. The bitmask for a pertaining local right region is equal to the
|
||||
// corresponding local left region. Bext uses an analogue inverse process.
|
||||
// Consider the following 8-bit example. For details, see Hilewitz et al. "Fast Bit Gather,
|
||||
// Bit Scatter and Bit Permuation Instructions for Commodity Microprocessors", (2008).
|
||||
|
||||
// 8-bit example: (Hilewitz et al.)
|
||||
// Consider the instruction bdep operand_a_i deposit_mask
|
||||
// Let operand_a_i = 8'babcd_efgh
|
||||
// deposit_mask = 8'b1010_1101
|
||||
//
|
||||
// control bitmask for stage 1:
|
||||
// - number of ones in the right half of the deposit bitmask: 3
|
||||
// - width of the segment: 4
|
||||
// - control bitmask = ~LROTC(4'b0, 3)[3:0] = 4'b1000
|
||||
//
|
||||
// control bitmask: c3 c2 c1 c0 c3 c2 c1 c0
|
||||
// 1 0 0 0 1 0 0 0
|
||||
// <- L -----> <- R ----->
|
||||
// operand_a_i a b c d e f g h
|
||||
// :\ | | | /: | | |
|
||||
// : +|---|--|-+ : | | |
|
||||
// :/ | | | \: | | |
|
||||
// stage 1 e b c d a f g h
|
||||
// <L-> <R-> <L-> <R->
|
||||
// control bitmask: c3 c2 c3 c2 c1 c0 c1 c0
|
||||
// 1 1 1 1 1 0 1 0
|
||||
// :\ :\ /: /: :\ | /: |
|
||||
// : +:-+-:+ : : +|-+ : |
|
||||
// :/ :/ \: \: :/ | \: |
|
||||
// stage 2 c d e b g f a h
|
||||
// L R L R L R L R
|
||||
// control bitmask: c3 c3 c2 c2 c1 c1 c0 c0
|
||||
// 1 1 0 0 1 1 0 0
|
||||
// :\/: | | :\/: | |
|
||||
// : : | | : : | |
|
||||
// :/\: | | :/\: | |
|
||||
// stage 3 d c e b f g a h
|
||||
// & deposit bitmask: 1 0 1 0 1 1 0 1
|
||||
// result: d 0 e 0 f g 0 h
|
||||
|
||||
assign zbe_op = (operator_i == ALU_BEXT) | (operator_i == ALU_BDEP);
|
||||
|
||||
logic [31:0] butterfly_mask_l[5];
|
||||
logic [31:0] butterfly_mask_r[5];
|
||||
logic [31:0] butterfly_mask_not[5];
|
||||
logic [31:0] lrotc_stage [5]; // left rotate and complement upon wrap
|
||||
|
||||
// bext / bdep
|
||||
logic [31:0] butterfly_zbe_mask_l[5];
|
||||
logic [31:0] butterfly_zbe_mask_r[5];
|
||||
logic [31:0] butterfly_zbe_mask_not[5];
|
||||
|
||||
// grev / gorc
|
||||
logic [31:0] butterfly_zbp_mask_l[5];
|
||||
logic [31:0] butterfly_zbp_mask_r[5];
|
||||
logic [31:0] butterfly_zbp_mask_not[5];
|
||||
|
||||
logic grev_op;
|
||||
logic gorc_op;
|
||||
logic zbp_op;
|
||||
|
||||
// number of bits in local r = 32 / 2**(stage + 1) = 16/2**stage
|
||||
`define _N(stg) (16 >> stg)
|
||||
|
||||
// bext / bdep control bit generation
|
||||
for (genvar stg=0; stg<5; stg++) begin
|
||||
// number of segs: 2** stg
|
||||
for (genvar seg=0; seg<2**stg; seg++) begin
|
||||
|
||||
assign lrotc_stage[stg][2*`_N(stg)*(seg+1)-1 : 2*`_N(stg)*seg] =
|
||||
{{`_N(stg){1'b0}},{`_N(stg){1'b1}}} <<
|
||||
bitcnt_partial[`_N(stg)*(2*seg+1)-1][$clog2(`_N(stg)):0];
|
||||
|
||||
assign butterfly_zbe_mask_l[stg][`_N(stg)*(2*seg+2)-1 : `_N(stg)*(2*seg+1)]
|
||||
= ~lrotc_stage[stg][`_N(stg)*(2*seg+2)-1 : `_N(stg)*(2*seg+1)];
|
||||
|
||||
assign butterfly_zbe_mask_r[stg][`_N(stg)*(2*seg+1)-1 : `_N(stg)*(2*seg)]
|
||||
= ~lrotc_stage[stg][`_N(stg)*(2*seg+2)-1 : `_N(stg)*(2*seg+1)];
|
||||
|
||||
assign butterfly_zbe_mask_l[stg][`_N(stg)*(2*seg+1)-1 : `_N(stg)*(2*seg)] = '0;
|
||||
assign butterfly_zbe_mask_r[stg][`_N(stg)*(2*seg+2)-1 : `_N(stg)*(2*seg+1)] = '0;
|
||||
end
|
||||
end
|
||||
`undef _N
|
||||
|
||||
for (genvar stg=0; stg<5; stg++) begin
|
||||
assign butterfly_zbe_mask_not[stg] =
|
||||
~(butterfly_zbe_mask_l[stg] | butterfly_zbe_mask_r[stg]);
|
||||
end
|
||||
|
||||
// grev / gorc control bit generation
|
||||
assign butterfly_zbp_mask_l[0] = shift_amt[4] ? 32'hffff_0000 : 32'h0000_0000;
|
||||
assign butterfly_zbp_mask_r[0] = shift_amt[4] ? 32'h0000_ffff : 32'h0000_0000;
|
||||
assign butterfly_zbp_mask_not[0] =
|
||||
!shift_amt[4] || (shift_amt[4] && gorc_op) ? 32'hffff_ffff : 32'h0000_0000;
|
||||
|
||||
assign butterfly_zbp_mask_l[1] = shift_amt[3] ? 32'hff00_ff00 : 32'h0000_0000;
|
||||
assign butterfly_zbp_mask_r[1] = shift_amt[3] ? 32'h00ff_00ff : 32'h0000_0000;
|
||||
assign butterfly_zbp_mask_not[1] =
|
||||
!shift_amt[3] || (shift_amt[3] && gorc_op) ? 32'hffff_ffff : 32'h0000_0000;
|
||||
|
||||
assign butterfly_zbp_mask_l[2] = shift_amt[2] ? 32'hf0f0_f0f0 : 32'h0000_0000;
|
||||
assign butterfly_zbp_mask_r[2] = shift_amt[2] ? 32'h0f0f_0f0f : 32'h0000_0000;
|
||||
assign butterfly_zbp_mask_not[2] =
|
||||
!shift_amt[2] || (shift_amt[2] && gorc_op) ? 32'hffff_ffff : 32'h0000_0000;
|
||||
|
||||
assign butterfly_zbp_mask_l[3] = shift_amt[1] ? 32'hcccc_cccc : 32'h0000_0000;
|
||||
assign butterfly_zbp_mask_r[3] = shift_amt[1] ? 32'h3333_3333 : 32'h0000_0000;
|
||||
assign butterfly_zbp_mask_not[3] =
|
||||
!shift_amt[1] || (shift_amt[1] && gorc_op) ? 32'hffff_ffff : 32'h0000_0000;
|
||||
|
||||
assign butterfly_zbp_mask_l[4] = shift_amt[0] ? 32'haaaa_aaaa : 32'h0000_0000;
|
||||
assign butterfly_zbp_mask_r[4] = shift_amt[0] ? 32'h5555_5555 : 32'h0000_0000;
|
||||
assign butterfly_zbp_mask_not[4] =
|
||||
!shift_amt[0] || (shift_amt[0] && gorc_op) ? 32'hffff_ffff : 32'h0000_0000;
|
||||
|
||||
// grev / gorc instructions
|
||||
assign grev_op = RV32B ? (operator_i == ALU_GREV) : 1'b0;
|
||||
assign gorc_op = RV32B ? (operator_i == ALU_GORC) : 1'b0;
|
||||
assign zbp_op = grev_op | gorc_op;
|
||||
|
||||
// select set of masks:
|
||||
assign butterfly_mask_l = zbp_op ? butterfly_zbp_mask_l : butterfly_zbe_mask_l;
|
||||
assign butterfly_mask_r = zbp_op ? butterfly_zbp_mask_r : butterfly_zbe_mask_r;
|
||||
assign butterfly_mask_not = zbp_op ? butterfly_zbp_mask_not : butterfly_zbe_mask_not;
|
||||
|
||||
always_comb begin
|
||||
butterfly_result = operand_a_i;
|
||||
|
||||
butterfly_result = butterfly_result & butterfly_mask_not[0] |
|
||||
((butterfly_result & butterfly_mask_l[0]) >> 16)|
|
||||
((butterfly_result & butterfly_mask_r[0]) << 16);
|
||||
|
||||
butterfly_result = butterfly_result & butterfly_mask_not[1] |
|
||||
((butterfly_result & butterfly_mask_l[1]) >> 8)|
|
||||
((butterfly_result & butterfly_mask_r[1]) << 8);
|
||||
|
||||
butterfly_result = butterfly_result & butterfly_mask_not[2] |
|
||||
((butterfly_result & butterfly_mask_l[2]) >> 4)|
|
||||
((butterfly_result & butterfly_mask_r[2]) << 4);
|
||||
|
||||
butterfly_result = butterfly_result & butterfly_mask_not[3] |
|
||||
((butterfly_result & butterfly_mask_l[3]) >> 2)|
|
||||
((butterfly_result & butterfly_mask_r[3]) << 2);
|
||||
|
||||
butterfly_result = butterfly_result & butterfly_mask_not[4] |
|
||||
((butterfly_result & butterfly_mask_l[4]) >> 1)|
|
||||
((butterfly_result & butterfly_mask_r[4]) << 1);
|
||||
|
||||
if (!zbp_op) begin
|
||||
butterfly_result = butterfly_result & operand_b_i;
|
||||
end
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
invbutterfly_result = operand_a_i & operand_b_i;
|
||||
|
||||
invbutterfly_result = invbutterfly_result & butterfly_mask_not[4] |
|
||||
((invbutterfly_result & butterfly_mask_l[4]) >> 1)|
|
||||
((invbutterfly_result & butterfly_mask_r[4]) << 1);
|
||||
|
||||
invbutterfly_result = invbutterfly_result & butterfly_mask_not[3] |
|
||||
((invbutterfly_result & butterfly_mask_l[3]) >> 2)|
|
||||
((invbutterfly_result & butterfly_mask_r[3]) << 2);
|
||||
|
||||
invbutterfly_result = invbutterfly_result & butterfly_mask_not[2] |
|
||||
((invbutterfly_result & butterfly_mask_l[2]) >> 4)|
|
||||
((invbutterfly_result & butterfly_mask_r[2]) << 4);
|
||||
|
||||
invbutterfly_result = invbutterfly_result & butterfly_mask_not[1] |
|
||||
((invbutterfly_result & butterfly_mask_l[1]) >> 8)|
|
||||
((invbutterfly_result & butterfly_mask_r[1]) << 8);
|
||||
|
||||
invbutterfly_result = invbutterfly_result & butterfly_mask_not[0] |
|
||||
((invbutterfly_result & butterfly_mask_l[0]) >> 16)|
|
||||
((invbutterfly_result & butterfly_mask_r[0]) << 16);
|
||||
end
|
||||
|
||||
/////////////////////////
|
||||
// Shuffle / Unshuffle //
|
||||
/////////////////////////
|
||||
|
@ -429,8 +707,8 @@ module ibex_alu #(
|
|||
|
||||
if (shuffle_flip) begin
|
||||
shuffle_result = (shuffle_result & 32'h8822_4411) |
|
||||
((shuffle_result << 6) & FLIP_MASK_L[0]) | ((shuffle_result >> 6) & FLIP_MASK_R[0]) |
|
||||
((shuffle_result << 9) & FLIP_MASK_L[1]) | ((shuffle_result >> 9) & FLIP_MASK_R[1]) |
|
||||
((shuffle_result << 6) & FLIP_MASK_L[0]) | ((shuffle_result >> 6) & FLIP_MASK_R[0]) |
|
||||
((shuffle_result << 9) & FLIP_MASK_L[1]) | ((shuffle_result >> 9) & FLIP_MASK_R[1]) |
|
||||
((shuffle_result << 15) & FLIP_MASK_L[2]) | ((shuffle_result >> 15) & FLIP_MASK_R[2]) |
|
||||
((shuffle_result << 21) & FLIP_MASK_L[3]) | ((shuffle_result >> 21) & FLIP_MASK_R[3]);
|
||||
end
|
||||
|
@ -458,8 +736,8 @@ module ibex_alu #(
|
|||
|
||||
if (shuffle_flip) begin
|
||||
shuffle_result = (shuffle_result & 32'h8822_4411) |
|
||||
((shuffle_result << 6) & FLIP_MASK_L[0]) | ((shuffle_result >> 6) & FLIP_MASK_R[0]) |
|
||||
((shuffle_result << 9) & FLIP_MASK_L[1]) | ((shuffle_result >> 9) & FLIP_MASK_R[1]) |
|
||||
((shuffle_result << 6) & FLIP_MASK_L[0]) | ((shuffle_result >> 6) & FLIP_MASK_R[0]) |
|
||||
((shuffle_result << 9) & FLIP_MASK_L[1]) | ((shuffle_result >> 9) & FLIP_MASK_R[1]) |
|
||||
((shuffle_result << 15) & FLIP_MASK_L[2]) | ((shuffle_result >> 15) & FLIP_MASK_R[2]) |
|
||||
((shuffle_result << 21) & FLIP_MASK_L[3]) | ((shuffle_result >> 21) & FLIP_MASK_R[3]);
|
||||
end
|
||||
|
@ -534,33 +812,8 @@ module ibex_alu #(
|
|||
// Min / Max //
|
||||
///////////////
|
||||
|
||||
assign minmax_result = (cmp_result ? operand_a_i : operand_b_i);
|
||||
assign minmax_result = cmp_result ? operand_a_i : operand_b_i;
|
||||
|
||||
/////////////////
|
||||
// Bitcounting //
|
||||
/////////////////
|
||||
|
||||
logic bitcnt_ctz;
|
||||
logic bitcnt_pcnt;
|
||||
logic [31:0] bitcnt_bits;
|
||||
logic [32:0] bitcnt_bit_enable;
|
||||
|
||||
assign bitcnt_ctz = (operator_i == ALU_CTZ);
|
||||
assign bitcnt_pcnt = (operator_i == ALU_PCNT);
|
||||
|
||||
assign bitcnt_bits = bitcnt_pcnt ? operand_a_i : (bitcnt_ctz ? ~operand_a_i : ~operand_a_rev);
|
||||
|
||||
always_comb begin
|
||||
bitcnt_result = '0;
|
||||
bitcnt_bit_enable = {32'b0, 1'b1}; // bit 32 unused.
|
||||
for (int unsigned i=0; i<32; i++) begin : gen_bitcnt_adder
|
||||
// keep counting if all previous bits are 1
|
||||
bitcnt_bit_enable[i+1] = bitcnt_pcnt || (bitcnt_bit_enable[i] && bitcnt_bits[i]);
|
||||
if (bitcnt_bit_enable[i]) begin
|
||||
bitcnt_result[5:0] = bitcnt_result[5:0] + {5'h0, bitcnt_bits[i]};
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
//////////
|
||||
// Pack //
|
||||
|
@ -568,8 +821,8 @@ module ibex_alu #(
|
|||
|
||||
logic packu;
|
||||
logic packh;
|
||||
assign packu = (operator_i == ALU_PACKU);
|
||||
assign packh = (operator_i == ALU_PACKH);
|
||||
assign packu = operator_i == ALU_PACKU;
|
||||
assign packh = operator_i == ALU_PACKH;
|
||||
|
||||
always_comb begin
|
||||
unique case (1'b1)
|
||||
|
@ -580,15 +833,17 @@ module ibex_alu #(
|
|||
end
|
||||
end else begin : g_no_alu_rvb
|
||||
// RV32B result signals
|
||||
assign minmax_result = '0;
|
||||
assign bitcnt_result = '0;
|
||||
assign pack_result = '0;
|
||||
assign multicycle_result = '0;
|
||||
assign singlebit_result = '0;
|
||||
assign shuffle_result = '0;
|
||||
assign minmax_result = '0;
|
||||
assign bitcnt_result = '0;
|
||||
assign pack_result = '0;
|
||||
assign multicycle_result = '0;
|
||||
assign singlebit_result = '0;
|
||||
assign shuffle_result = '0;
|
||||
assign butterfly_result = '0;
|
||||
assign invbutterfly_result = '0;
|
||||
// RV32B support signals
|
||||
assign imd_val_d_o = '0;
|
||||
assign imd_val_we_o = '0;
|
||||
assign imd_val_d_o = '0;
|
||||
assign imd_val_we_o = '0;
|
||||
end
|
||||
|
||||
////////////////
|
||||
|
@ -611,8 +866,7 @@ module ibex_alu #(
|
|||
ALU_SLL, ALU_SRL,
|
||||
ALU_SRA,
|
||||
// RV32B Ops
|
||||
ALU_SLO, ALU_SRO,
|
||||
ALU_GREV, ALU_GORC: result_o = shift_result;
|
||||
ALU_SLO, ALU_SRO: result_o = shift_result;
|
||||
|
||||
// Shuffle Operations (RV32B Ops)
|
||||
ALU_SHFL, ALU_UNSHFL: result_o = shuffle_result;
|
||||
|
@ -644,6 +898,13 @@ module ibex_alu #(
|
|||
ALU_SBSET, ALU_SBCLR,
|
||||
ALU_SBINV, ALU_SBEXT: result_o = singlebit_result;
|
||||
|
||||
// Bit Extract / Deposit (RV32B Ops)
|
||||
ALU_BDEP: result_o = butterfly_result;
|
||||
ALU_BEXT: result_o = invbutterfly_result;
|
||||
|
||||
// General Reverse / Or-combine (RV32B Ops)
|
||||
ALU_GREV, ALU_GORC: result_o = butterfly_result;
|
||||
|
||||
default: ;
|
||||
endcase
|
||||
end
|
||||
|
|
|
@ -405,7 +405,7 @@ module ibex_decoder #(
|
|||
{7'b000_0000, 3'b101},
|
||||
{7'b010_0000, 3'b101}: illegal_insn = 1'b0;
|
||||
|
||||
// supported RV32B instructions (zbb)
|
||||
// RV32B zbb
|
||||
{7'b010_0000, 3'b111}, // andn
|
||||
{7'b010_0000, 3'b110}, // orn
|
||||
{7'b010_0000, 3'b100}, // xnor
|
||||
|
@ -420,11 +420,15 @@ module ibex_decoder #(
|
|||
{7'b000_0100, 3'b100}, // pack
|
||||
{7'b010_0100, 3'b100}, // packu
|
||||
{7'b000_0100, 3'b111}, // packh
|
||||
// RV32B instructions (zbs)
|
||||
// RV32B zbs
|
||||
{7'b010_0100, 3'b001}, // sbclr
|
||||
{7'b001_0100, 3'b001}, // sbset
|
||||
{7'b011_0100, 3'b001}, // sbinv
|
||||
{7'b010_0100, 3'b101}, // sbext
|
||||
// RV32B zbe
|
||||
{7'b010_0100, 3'b110}, // bdep
|
||||
{7'b000_0100, 3'b110}, // bext
|
||||
// RV32B zbp
|
||||
{7'b011_0100, 3'b101}, // grev
|
||||
{7'b001_0100, 3'b101}, // gorc
|
||||
{7'b000_0100, 3'b001}, // shfl
|
||||
|
@ -877,7 +881,7 @@ module ibex_decoder #(
|
|||
{7'b000_0000, 3'b101}: alu_operator_o = ALU_SRL; // Shift Right Logical
|
||||
{7'b010_0000, 3'b101}: alu_operator_o = ALU_SRA; // Shift Right Arithmetic
|
||||
|
||||
// RV32B ALU Operations
|
||||
// RV32B zbb
|
||||
{7'b001_0000, 3'b001}: if (RV32B) alu_operator_o = ALU_SLO; // Shift Left Ones
|
||||
{7'b001_0000, 3'b101}: if (RV32B) alu_operator_o = ALU_SRO; // Shift Right Ones
|
||||
{7'b011_0000, 3'b001}: begin
|
||||
|
@ -905,18 +909,21 @@ module ibex_decoder #(
|
|||
{7'b010_0000, 3'b100}: if (RV32B) alu_operator_o = ALU_XNOR; // Xnor
|
||||
{7'b010_0000, 3'b110}: if (RV32B) alu_operator_o = ALU_ORN; // Orn
|
||||
{7'b010_0000, 3'b111}: if (RV32B) alu_operator_o = ALU_ANDN; // Andn
|
||||
// RV32B zbp
|
||||
{7'b011_0100, 3'b101}: if (RV32B) alu_operator_o = ALU_GREV; // Grev
|
||||
{7'b001_0100, 3'b101}: if (RV32B) alu_operator_o = ALU_GORC; // Grev
|
||||
{7'b000_0100, 3'b001}: if (RV32B) alu_operator_o = ALU_SHFL; // Shfl
|
||||
{7'b000_0100, 3'b101}: if (RV32B) alu_operator_o = ALU_UNSHFL; // Unshfl
|
||||
|
||||
|
||||
// RV32B ALU_Operations (zbs)
|
||||
// RV32B zbs
|
||||
{7'b010_0100, 3'b001}: if (RV32B) alu_operator_o = ALU_SBCLR; // sbclr
|
||||
{7'b001_0100, 3'b001}: if (RV32B) alu_operator_o = ALU_SBSET; // sbset
|
||||
{7'b011_0100, 3'b001}: if (RV32B) alu_operator_o = ALU_SBINV; // sbinv
|
||||
{7'b010_0100, 3'b101}: if (RV32B) alu_operator_o = ALU_SBEXT; // sbext
|
||||
|
||||
// RV32B zbe
|
||||
{7'b010_0100, 3'b110}: if (RV32B) alu_operator_o = ALU_BDEP; // bdep
|
||||
{7'b000_0100, 3'b110}: if (RV32B) alu_operator_o = ALU_BEXT; // bext
|
||||
// RV32M instructions, all use the same ALU operation
|
||||
{7'b000_0001, 3'b000}: begin // mul
|
||||
alu_operator_o = ALU_ADD;
|
||||
|
|
|
@ -101,7 +101,12 @@ typedef enum logic [5:0] {
|
|||
ALU_SBSET,
|
||||
ALU_SBCLR,
|
||||
ALU_SBINV,
|
||||
ALU_SBEXT
|
||||
ALU_SBEXT,
|
||||
|
||||
// Bit Extract / Deposit
|
||||
// RV32B
|
||||
ALU_BEXT,
|
||||
ALU_BDEP
|
||||
} alu_op_e;
|
||||
|
||||
typedef enum logic [1:0] {
|
||||
|
|
|
@ -905,6 +905,9 @@ module ibex_tracer (
|
|||
INSN_SBSET: decode_r_insn("sbset");
|
||||
INSN_SBINV: decode_r_insn("sbinv");
|
||||
INSN_SBEXT: decode_r_insn("sbext");
|
||||
// RV32B - ZBE
|
||||
INSN_BDEP: decode_r_insn("bdep");
|
||||
INSN_BEXT: decode_r_insn("bext");
|
||||
// RV32B - ZBP
|
||||
INSN_GREV: decode_r_insn("grev");
|
||||
INSN_GREVI: begin
|
||||
|
|
|
@ -72,7 +72,6 @@ parameter logic [31:0] INSN_PMULHSU = { 7'b0000001, 10'b?, 3'b010, 5'b?, {OPCODE
|
|||
parameter logic [31:0] INSN_PMULHU = { 7'b0000001, 10'b?, 3'b011, 5'b?, {OPCODE_OP} };
|
||||
|
||||
// RV32B
|
||||
// OPIMM
|
||||
// ZBB
|
||||
parameter logic [31:0] INSN_SLOI = { 5'b00100 , 12'b?, 3'b001, 5'b?, {OPCODE_OP_IMM} };
|
||||
parameter logic [31:0] INSN_SROI = { 5'b00100 , 12'b?, 3'b101, 5'b?, {OPCODE_OP_IMM} };
|
||||
|
@ -80,6 +79,33 @@ parameter logic [31:0] INSN_RORI = { 5'b01100 , 12'b?, 3'b101, 5'b?, {OPC
|
|||
parameter logic [31:0] INSN_CLZ = { 12'b011000000000, 5'b? , 3'b001, 5'b?, {OPCODE_OP_IMM} };
|
||||
parameter logic [31:0] INSN_CTZ = { 12'b011000000001, 5'b? , 3'b001, 5'b?, {OPCODE_OP_IMM} };
|
||||
parameter logic [31:0] INSN_PCNT = { 12'b011000000010, 5'b? , 3'b001, 5'b?, {OPCODE_OP_IMM} };
|
||||
|
||||
parameter logic [31:0] INSN_SLO = { 7'b0010000, 10'b?, 3'b001, 5'b?, {OPCODE_OP} };
|
||||
parameter logic [31:0] INSN_SRO = { 7'b0010000, 10'b?, 3'b101, 5'b?, {OPCODE_OP} };
|
||||
parameter logic [31:0] INSN_ROL = { 7'b0110000, 10'b?, 3'b001, 5'b?, {OPCODE_OP} };
|
||||
parameter logic [31:0] INSN_ROR = { 7'b0110000, 10'b?, 3'b101, 5'b?, {OPCODE_OP} };
|
||||
parameter logic [31:0] INSN_MIN = { 7'b0000101, 10'b?, 3'b100, 5'b?, {OPCODE_OP} };
|
||||
parameter logic [31:0] INSN_MAX = { 7'b0000101, 10'b?, 3'b101, 5'b?, {OPCODE_OP} };
|
||||
parameter logic [31:0] INSN_MINU = { 7'b0000101, 10'b?, 3'b110, 5'b?, {OPCODE_OP} };
|
||||
parameter logic [31:0] INSN_MAXU = { 7'b0000101, 10'b?, 3'b111, 5'b?, {OPCODE_OP} };
|
||||
parameter logic [31:0] INSN_XNOR = { 7'b0100000, 10'b?, 3'b100, 5'b?, {OPCODE_OP} };
|
||||
parameter logic [31:0] INSN_ORN = { 7'b0100000, 10'b?, 3'b110, 5'b?, {OPCODE_OP} };
|
||||
parameter logic [31:0] INSN_ANDN = { 7'b0100000, 10'b?, 3'b111, 5'b?, {OPCODE_OP} };
|
||||
parameter logic [31:0] INSN_PACK = { 7'b0000100, 10'b?, 3'b100, 5'b?, {OPCODE_OP} };
|
||||
parameter logic [31:0] INSN_PACKU = { 7'b0100100, 10'b?, 3'b100, 5'b?, {OPCODE_OP} };
|
||||
parameter logic [31:0] INSN_PACKH = { 7'b0000100, 10'b?, 3'b111, 5'b?, {OPCODE_OP} };
|
||||
|
||||
// ZBS
|
||||
parameter logic [31:0] INSN_SBCLRI = { 5'b01001, 12'b?, 3'b001, 5'b?, {OPCODE_OP_IMM} };
|
||||
parameter logic [31:0] INSN_SBSETI = { 5'b00101, 12'b?, 3'b001, 5'b?, {OPCODE_OP_IMM} };
|
||||
parameter logic [31:0] INSN_SBINVI = { 5'b01101, 12'b?, 3'b001, 5'b?, {OPCODE_OP_IMM} };
|
||||
parameter logic [31:0] INSN_SBEXTI = { 5'b01001, 12'b?, 3'b101, 5'b?, {OPCODE_OP_IMM} };
|
||||
|
||||
parameter logic [31:0] INSN_SBCLR = { 7'b0100100, 10'b?, 3'b001, 5'b?, {OPCODE_OP} };
|
||||
parameter logic [31:0] INSN_SBSET = { 7'b0010100, 10'b?, 3'b001, 5'b?, {OPCODE_OP} };
|
||||
parameter logic [31:0] INSN_SBINV = { 7'b0110100, 10'b?, 3'b001, 5'b?, {OPCODE_OP} };
|
||||
parameter logic [31:0] INSN_SBEXT = { 7'b0100100, 10'b?, 3'b101, 5'b?, {OPCODE_OP} };
|
||||
|
||||
// ZBP
|
||||
// grevi
|
||||
parameter logic [31:0] INSN_GREVI = { 5'b01101, 12'b?, 3'b101, 5'b?, {OPCODE_OP_IMM} };
|
||||
|
@ -194,45 +220,18 @@ parameter logic [31:0] INSN_UNZIP2 =
|
|||
parameter logic [31:0] INSN_UNZIP =
|
||||
{ 5'b00010, 3'b?, 4'b1111, 5'b?, 3'b101, 5'b?, {OPCODE_OP_IMM} };
|
||||
|
||||
// ZBS
|
||||
parameter logic [31:0] INSN_SBCLRI = { 5'b01001 , 12'b?, 3'b001, 5'b?, {OPCODE_OP_IMM} };
|
||||
parameter logic [31:0] INSN_SBSETI = { 5'b00101 , 12'b?, 3'b001, 5'b?, {OPCODE_OP_IMM} };
|
||||
parameter logic [31:0] INSN_SBINVI = { 5'b01101 , 12'b?, 3'b001, 5'b?, {OPCODE_OP_IMM} };
|
||||
parameter logic [31:0] INSN_SBEXTI = { 5'b01001 , 12'b?, 3'b101, 5'b?, {OPCODE_OP_IMM} };
|
||||
|
||||
// ZBT
|
||||
parameter logic [31:0] INSN_FSRI = { 5'b?, 1'b1, 11'b?, 3'b101, 5'b?, {OPCODE_OP_IMM} };
|
||||
|
||||
// OP
|
||||
// ZBB
|
||||
parameter logic [31:0] INSN_SLO = { 7'b0010000, 10'b?, 3'b001, 5'b?, {OPCODE_OP} };
|
||||
parameter logic [31:0] INSN_SRO = { 7'b0010000, 10'b?, 3'b101, 5'b?, {OPCODE_OP} };
|
||||
parameter logic [31:0] INSN_ROL = { 7'b0110000, 10'b?, 3'b001, 5'b?, {OPCODE_OP} };
|
||||
parameter logic [31:0] INSN_ROR = { 7'b0110000, 10'b?, 3'b101, 5'b?, {OPCODE_OP} };
|
||||
parameter logic [31:0] INSN_MIN = { 7'b0000101, 10'b?, 3'b100, 5'b?, {OPCODE_OP} };
|
||||
parameter logic [31:0] INSN_MAX = { 7'b0000101, 10'b?, 3'b101, 5'b?, {OPCODE_OP} };
|
||||
parameter logic [31:0] INSN_MINU = { 7'b0000101, 10'b?, 3'b110, 5'b?, {OPCODE_OP} };
|
||||
parameter logic [31:0] INSN_MAXU = { 7'b0000101, 10'b?, 3'b111, 5'b?, {OPCODE_OP} };
|
||||
parameter logic [31:0] INSN_XNOR = { 7'b0100000, 10'b?, 3'b100, 5'b?, {OPCODE_OP} };
|
||||
parameter logic [31:0] INSN_ORN = { 7'b0100000, 10'b?, 3'b110, 5'b?, {OPCODE_OP} };
|
||||
parameter logic [31:0] INSN_ANDN = { 7'b0100000, 10'b?, 3'b111, 5'b?, {OPCODE_OP} };
|
||||
parameter logic [31:0] INSN_PACK = { 7'b0000100, 10'b?, 3'b100, 5'b?, {OPCODE_OP} };
|
||||
parameter logic [31:0] INSN_PACKU = { 7'b0100100, 10'b?, 3'b100, 5'b?, {OPCODE_OP} };
|
||||
parameter logic [31:0] INSN_PACKH = { 7'b0000100, 10'b?, 3'b111, 5'b?, {OPCODE_OP} };
|
||||
|
||||
// ZBP
|
||||
parameter logic [31:0] INSN_GREV = { 7'b0110100, 10'b?, 3'b101, 5'b?, {OPCODE_OP} };
|
||||
parameter logic [31:0] INSN_GORC = { 7'b0010100, 10'b?, 3'b101, 5'b?, {OPCODE_OP} };
|
||||
parameter logic [31:0] INSN_SHFL = { 7'b0000100, 10'b?, 3'b001, 5'b?, {OPCODE_OP} };
|
||||
parameter logic [31:0] INSN_UNSHFL = { 7'b0000100, 10'b?, 3'b101, 5'b?, {OPCODE_OP} };
|
||||
|
||||
// ZBS
|
||||
parameter logic [31:0] INSN_SBCLR = { 7'b0100100, 10'b?, 3'b001, 5'b?, {OPCODE_OP} };
|
||||
parameter logic [31:0] INSN_SBSET = { 7'b0010100, 10'b?, 3'b001, 5'b?, {OPCODE_OP} };
|
||||
parameter logic [31:0] INSN_SBINV = { 7'b0110100, 10'b?, 3'b001, 5'b?, {OPCODE_OP} };
|
||||
parameter logic [31:0] INSN_SBEXT = { 7'b0100100, 10'b?, 3'b101, 5'b?, {OPCODE_OP} };
|
||||
// ZBE
|
||||
parameter logic [31:0] INSN_BDEP = {7'b0100100, 10'b?, 3'b110, 5'b?, {OPCODE_OP} };
|
||||
parameter logic [31:0] INSN_BEXT = {7'b0000100, 10'b?, 3'b110, 5'b?, {OPCODE_OP} };
|
||||
|
||||
// ZBT
|
||||
parameter logic [31:0] INSN_FSRI = { 5'b?, 1'b1, 11'b?, 3'b101, 5'b?, {OPCODE_OP_IMM} };
|
||||
|
||||
parameter logic [31:0] INSN_CMIX = {5'b?, 2'b11, 10'b?, 3'b001, 5'b?, {OPCODE_OP} };
|
||||
parameter logic [31:0] INSN_CMOV = {5'b?, 2'b11, 10'b?, 3'b101, 5'b?, {OPCODE_OP} };
|
||||
parameter logic [31:0] INSN_FSL = {5'b?, 2'b10, 10'b?, 3'b001, 5'b?, {OPCODE_OP} };
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue