diff --git a/doc/instruction_decode_execute.rst b/doc/instruction_decode_execute.rst index 9c699730..a32777d7 100644 --- a/doc/instruction_decode_execute.rst +++ b/doc/instruction_decode_execute.rst @@ -66,7 +66,7 @@ Other blocks use the ALU for the following tasks: Support for the RISC-V Bitmanipulation Extension (Document Version 0.92, November 8, 2019) is enabled via the parameter ``RV32B``. This feature is *EXPERIMENTAL* and the details of its impact are not yet documented here. -Currently the Zbb, Zbs, Zbp and Zbt sub-extensions are implemented. +Currently the Zbb, Zbs, Zbp, Zbe and Zbt sub-extensions are implemented. All instructions are carried out in a single clock cycle. .. _mult-div: diff --git a/doc/integration.rst b/doc/integration.rst index 788324c1..97c6160c 100644 --- a/doc/integration.rst +++ b/doc/integration.rst @@ -93,7 +93,8 @@ Parameters +------------------------------+-------------+------------+-----------------------------------------------------------------+ | ``RV32B`` | bit | 0 | *EXPERIMENTAL* - B(itmanipulation) extension enable: | | | | | Currently supported Z-extensions: Zbb (base), Zbs (single-bit) | -| | | | Zbp (bit permutation) and Zbt (ternary) | +| | | | Zbp (bit permutation), Zbe (bit extract/deposit and | +| | | | Zbt (ternary) | +------------------------------+-------------+------------+-----------------------------------------------------------------+ | ``BranchTargetALU`` | bit | 0 | *EXPERIMENTAL* - Enables branch target ALU removing a stall | | | | | cycle from taken branches | diff --git a/dv/uvm/core_ibex/riscv_dv_extension/testlist.yaml b/dv/uvm/core_ibex/riscv_dv_extension/testlist.yaml index 9bb1dd79..d7611f3b 100644 --- a/dv/uvm/core_ibex/riscv_dv_extension/testlist.yaml +++ b/dv/uvm/core_ibex/riscv_dv_extension/testlist.yaml @@ -595,5 +595,5 @@ gen_test: riscv_rand_instr_test gen_opts: > +enable_b_extension=1 - +enable_bitmanip_groups=zbb,zbt,zbs,zbp + +enable_bitmanip_groups=zbb,zbt,zbs,zbe,zbp rtl_test: core_ibex_base_test diff --git a/rtl/ibex_alu.sv b/rtl/ibex_alu.sv index 6744ac4e..cf3d08e5 100644 --- a/rtl/ibex_alu.sv +++ b/rtl/ibex_alu.sv @@ -89,192 +89,6 @@ module ibex_alu #( assign adder_result_o = adder_result; - /////////// - // Shift // - /////////// - - // The shifter structure consists of a 33-bit shifter: 32-bit operand + 1 bit extension for - // arithmetic shifts and one-shift support. - // Rotations and funnel shifts are implemented as multi-cycle instructions. - // The shifter is also used for single-bit instructions as detailed below. - // - // Standard Shifts - // =============== - // For standard shift instructions, the direction of the shift is to the right by default. For - // left shifts, the signal shift_left signal is set. If so, the operand is initially reversed, - // shifted to the right by the specified amount and shifted back again. For arithmetic- and - // one-shifts the 33rd bit of the shifter operand can is set accordingly. - // - // Multicycle Shifts - // ================= - // - // Rotation - // -------- - // For rotations, the operand signals operand_a_i and operand_b_i are kept constant to rs1 and - // rs2 respectively. - // - // Rotation pseudocode: - // shift_amt = rs2 & 31; - // multicycle_result = (rs1 >> shift_amt) | (rs1 << (32 - shift_amt)); - // ^-- cycle 0 -----^ ^-- cycle 1 --------------^ - // - // Funnel Shifts - // ------------- - // For funnel shifs, operand_a_i is tied to rs1 in the first cycle and rs3 in the - // second cycle. operand_b_i is always tied to rs2. The order of applying the shift amount or - // its complement is determined by bit [5] of shift_amt. - // - // Funnel shift Pseudocode: (fsl) - // shift_amt = rs2 & 63; - // shift_amt_compl = 32 - shift_amt[4:0] - // if (shift_amt >=33): - // multicycle_result = (rs1 >> shift_amt_cmpl[4:0]) | (rs3 << shift_amt[4:0]); - // ^-- cycle 0 ---------------^ ^-- cycle 1 ------------^ - // else if (shift_amt <= 31 && shift_amt > 0): - // multicycle_result = (rs1 << shift_amt[4:0]) | (rs3 >> shift_amt_compl[4:0]); - // ^-- cycle 0 ----------^ ^-- cycle 1 -------------------^ - // For shift_amt == 0, 32, both shift_amt[4:0] and shift_amt_compl[4:0] == '0. - // these cases need to be handled separately outside the shifting structure: - // else if (shift_amt == 32): - // multicycle_result = rs3 - // else if (shift_amt == 0): - // multicycle_result = rs1. - // - // Single-Bit Instructions - // ======================= - // Single bit instructions operate on bit operand_b_i[4:0] of operand_a_i. - - // The operations sbset, sbclr and sbinv are implemented by generation of a bit-mask using the - // shifter structure. This is done by left-shifting the operand 32'h1 by the required amount. - // The signal shift_sbmode multiplexes the shifter input and sets the signal shift_left. - // Further processing is taken care of by a separate structure. - // - // For sbext, the bit defined by operand_b_i[4:0] is to be returned. This is done by simply - // shifting operand_a_i to the right by the required amount and returning bit [0] of the result. - // - // Generalized Reverse and Or-Combine - // ================================== - // Grev and gorc instructions share the reversing structure used for left-shifts. The control - // bits are the same for shifts and grev/gorc. Shift_amt can therefore be reused for activating - // the respective reversal stages. - - - logic shift_left; - logic shift_ones; - logic shift_arith; - logic shift_funnel; - logic shift_sbmode; - logic [5:0] shift_amt; - logic [5:0] shift_amt_compl; // complementary shift amount (32 - shift_amt) - - // bit shift_amt[5]: word swap bit: only considered for FSL/FSR. - // if set, reverse operations in first and second cycle. - assign shift_amt[5] = operand_b_i[5] && shift_funnel; - assign shift_amt_compl = 32 - operand_b_i[4:0]; - - assign shift_amt[4:0] = instr_first_cycle_i ? - (operand_b_i[5] && shift_funnel ? shift_amt_compl[4:0] : operand_b_i[4:0]) : - (operand_b_i[5] && shift_funnel ? operand_b_i[4:0] : shift_amt_compl[4:0]); - - // single-bit mode: shift - assign shift_sbmode = RV32B ? (operator_i == ALU_SBSET) || (operator_i == ALU_SBCLR) || - (operator_i == ALU_SBINV) : - 1'b0; - - // left shift if this is: - // * a standard left shift (slo, sll) - // * a rol in the first cycle - // * a ror in the second cycle - // * fsl: without word-swap bit: first cycle, else: second cycle - // * fsr: without word-swap bit: second cycle, else: first cycle - // * a single-bit instruction: sbclr, sbset, sbinv (excluding sbext) - always_comb begin - unique case (operator_i) - ALU_SLL: shift_left = 1'b1; - ALU_SLO: shift_left = RV32B ? 1'b1 : 1'b0; - ALU_ROL: shift_left = RV32B ? instr_first_cycle_i : 0; - ALU_ROR: shift_left = RV32B ? !instr_first_cycle_i : 0; - ALU_FSL: shift_left = - RV32B ? (shift_amt[5] ? !instr_first_cycle_i : instr_first_cycle_i) : 1'b0; - ALU_FSR: shift_left = - RV32B ? (shift_amt[5] ? instr_first_cycle_i : !instr_first_cycle_i) : 1'b0; - default: shift_left = 1'b0; - endcase - if (shift_sbmode) begin - shift_left = 1'b1; - end - end - - assign shift_arith = (operator_i == ALU_SRA); - assign shift_ones = RV32B ? (operator_i == ALU_SLO) || (operator_i == ALU_SRO) : 1'b0; - assign shift_funnel = RV32B ? (operator_i == ALU_FSL) || (operator_i == ALU_FSR) : 1'b0; - - logic [31:0] shift_result; - logic [32:0] shift_result_ext; - - // grev / gorc instructions - logic grev_op; - assign grev_op = RV32B ? (operator_i == ALU_GREV) : 1'b0; - logic gorc_op; - assign gorc_op = RV32B ? (operator_i == ALU_GORC) : 1'b0; - - // combined shifter/ reverser structure. - always_comb begin - shift_result = operand_a_i; - - // select bit reversed or normal input - if (shift_left) begin - shift_result = operand_a_rev; - end - - // if this is a single bit instruction: we left-shift 32'h1 by shift_amt. - // the first reverse of the left-shift operation can be easily omitted, since we - // know the result of rev(32'h1). - if (shift_sbmode) begin - shift_result = 32'h8000_0000; - end - - shift_result_ext = $signed({shift_ones || (shift_arith && shift_result[31]), shift_result}) - >>> shift_amt[4:0]; - - shift_result = shift_result_ext[31:0]; - - if (grev_op || gorc_op) begin - shift_result = operand_a_i; - end - - // left shift always do the full reverse. Orc and rev do permutation as requested by shift_amt. - if (shift_left || ((grev_op || gorc_op) & shift_amt[0])) begin - shift_result = (gorc_op ? shift_result : 32'h0) | - ((shift_result & 32'h5555_5555) << 1)| - ((shift_result & 32'haaaa_aaaa) >> 1); - end - - if (shift_left || ((grev_op || gorc_op) & shift_amt[1])) begin - shift_result = (gorc_op ? shift_result : 32'h0) | - ((shift_result & 32'h3333_3333) << 2)| - ((shift_result & 32'hcccc_cccc) >> 2); - end - - if (shift_left || ((grev_op || gorc_op) & shift_amt[2])) begin - shift_result = (gorc_op ? shift_result : 32'h0) | - ((shift_result & 32'h0f0f_0f0f) << 4)| - ((shift_result & 32'hf0f0_f0f0) >> 4); - end - - if (shift_left || ((grev_op || gorc_op) & shift_amt[3])) begin - shift_result = (gorc_op ? shift_result : 32'h0) | - ((shift_result & 32'h00ff_00ff) << 8) | - ((shift_result & 32'hff00_ff00) >> 8); - end - - if (shift_left || ((grev_op || gorc_op) & shift_amt[4])) begin - shift_result = (gorc_op ? shift_result : 32'h0) | - ((shift_result & 32'h0000_ffff) << 16) | - ((shift_result & 32'hffff_0000) >> 16); - end - end - //////////////// // Comparison // //////////////// @@ -339,12 +153,150 @@ module ibex_alu #( assign comparison_result_o = cmp_result; - logic [31:0] minmax_result; - logic [5:0] bitcnt_result; - logic [31:0] bwlogic_result; - logic [31:0] pack_result; - logic [31:0] multicycle_result; - logic [31:0] singlebit_result; + /////////// + // Shift // + /////////// + + // The shifter structure consists of a 33-bit shifter: 32-bit operand + 1 bit extension for + // arithmetic shifts and one-shift support. + // Rotations and funnel shifts are implemented as multi-cycle instructions. + // The shifter is also used for single-bit instructions as detailed below. + // + // Standard Shifts + // =============== + // For standard shift instructions, the direction of the shift is to the right by default. For + // left shifts, the signal shift_left signal is set. If so, the operand is initially reversed, + // shifted to the right by the specified amount and shifted back again. For arithmetic- and + // one-shifts the 33rd bit of the shifter operand can is set accordingly. + // + // Multicycle Shifts + // ================= + // + // Rotation + // -------- + // For rotations, the operand signals operand_a_i and operand_b_i are kept constant to rs1 and + // rs2 respectively. + // + // Rotation pseudocode: + // shift_amt = rs2 & 31; + // multicycle_result = (rs1 >> shift_amt) | (rs1 << (32 - shift_amt)); + // ^-- cycle 0 -----^ ^-- cycle 1 --------------^ + // + // Funnel Shifts + // ------------- + // For funnel shifs, operand_a_i is tied to rs1 in the first cycle and rs3 in the + // second cycle. operand_b_i is always tied to rs2. The order of applying the shift amount or + // its complement is determined by bit [5] of shift_amt. + // + // Funnel shift Pseudocode: (fsl) + // shift_amt = rs2 & 63; + // shift_amt_compl = 32 - shift_amt[4:0] + // if (shift_amt >=33): + // multicycle_result = (rs1 >> shift_amt_cmpl[4:0]) | (rs3 << shift_amt[4:0]); + // ^-- cycle 0 ---------------^ ^-- cycle 1 ------------^ + // else if (shift_amt <= 31 && shift_amt > 0): + // multicycle_result = (rs1 << shift_amt[4:0]) | (rs3 >> shift_amt_compl[4:0]); + // ^-- cycle 0 ----------^ ^-- cycle 1 -------------------^ + // For shift_amt == 0, 32, both shift_amt[4:0] and shift_amt_compl[4:0] == '0. + // these cases need to be handled separately outside the shifting structure: + // else if (shift_amt == 32): + // multicycle_result = rs3 + // else if (shift_amt == 0): + // multicycle_result = rs1. + // + // Single-Bit Instructions + // ======================= + // Single bit instructions operate on bit operand_b_i[4:0] of operand_a_i. + + // The operations sbset, sbclr and sbinv are implemented by generation of a bit-mask using the + // shifter structure. This is done by left-shifting the operand 32'h1 by the required amount. + // The signal shift_sbmode multiplexes the shifter input and sets the signal shift_left. + // Further processing is taken care of by a separate structure. + // + // For sbext, the bit defined by operand_b_i[4:0] is to be returned. This is done by simply + // shifting operand_a_i to the right by the required amount and returning bit [0] of the result. + + logic shift_left; + logic shift_ones; + logic shift_arith; + logic shift_funnel; + logic shift_sbmode; + logic [5:0] shift_amt; + logic [5:0] shift_amt_compl; // complementary shift amount (32 - shift_amt) + + logic [31:0] shift_result; + logic [32:0] shift_result_ext; + logic [31:0] shift_result_rev; + + // bit shift_amt[5]: word swap bit: only considered for FSL/FSR. + // if set, reverse operations in first and second cycle. + assign shift_amt[5] = operand_b_i[5] & shift_funnel; + assign shift_amt_compl = 32 - operand_b_i[4:0]; + + assign shift_amt[4:0] = instr_first_cycle_i ? + (operand_b_i[5] && shift_funnel ? shift_amt_compl[4:0] : operand_b_i[4:0]) : + (operand_b_i[5] && shift_funnel ? operand_b_i[4:0] : shift_amt_compl[4:0]); + + // single-bit mode: shift + assign shift_sbmode = RV32B ? + (operator_i == ALU_SBSET) | (operator_i == ALU_SBCLR) | (operator_i == ALU_SBINV) : 1'b0; + + // left shift if this is: + // * a standard left shift (slo, sll) + // * a rol in the first cycle + // * a ror in the second cycle + // * fsl: without word-swap bit: first cycle, else: second cycle + // * fsr: without word-swap bit: second cycle, else: first cycle + // * a single-bit instruction: sbclr, sbset, sbinv (excluding sbext) + always_comb begin + unique case (operator_i) + ALU_SLL: shift_left = 1'b1; + ALU_SLO: shift_left = RV32B ? 1'b1 : 1'b0; + ALU_ROL: shift_left = RV32B ? instr_first_cycle_i : 0; + ALU_ROR: shift_left = RV32B ? ~instr_first_cycle_i : 0; + ALU_FSL: shift_left = + RV32B ? (shift_amt[5] ? ~instr_first_cycle_i : instr_first_cycle_i) : 1'b0; + ALU_FSR: shift_left = + RV32B ? (shift_amt[5] ? instr_first_cycle_i : ~instr_first_cycle_i) : 1'b0; + default: shift_left = 1'b0; + endcase + if (shift_sbmode) begin + shift_left = 1'b1; + end + end + + assign shift_arith = (operator_i == ALU_SRA); + assign shift_ones = RV32B ? (operator_i == ALU_SLO) | (operator_i == ALU_SRO) : 1'b0; + assign shift_funnel = RV32B ? (operator_i == ALU_FSL) | (operator_i == ALU_FSR) : 1'b0; + + // shifter structure. + always_comb begin + shift_result = operand_a_i; + + // select bit reversed or normal input + if (shift_left) begin + shift_result = operand_a_rev; + end + + // if this is a single bit instruction: we left-shift 32'h1 by shift_amt. + // the first reverse of the left-shift operation can be easily omitted, since we + // know the result of rev(32'h1). + if (shift_sbmode) begin + shift_result = 32'h8000_0000; + end + + shift_result_ext = + $signed({shift_ones | (shift_arith & shift_result[31]), shift_result}) >>> shift_amt[4:0]; + + shift_result = shift_result_ext[31:0]; + + for (int unsigned i=0; i<32; i++) begin + shift_result_rev[i] = shift_result[31-i]; + end + + shift_result = shift_left ? shift_result_rev : shift_result; + + end /////////////////// // Bitwise Logic // @@ -356,6 +308,7 @@ module ibex_alu #( logic [31:0] bwlogic_or_result; logic [31:0] bwlogic_and_result; logic [31:0] bwlogic_xor_result; + logic [31:0] bwlogic_result; logic bwlogic_op_b_negate; @@ -365,7 +318,7 @@ module ibex_alu #( ALU_XNOR, ALU_ORN, ALU_ANDN: bwlogic_op_b_negate = RV32B ? 1'b1 : 1'b0; - ALU_CMIX: bwlogic_op_b_negate = RV32B ? !instr_first_cycle_i : 1'b0; + ALU_CMIX: bwlogic_op_b_negate = RV32B ? ~instr_first_cycle_i : 1'b0; default: bwlogic_op_b_negate = 1'b0; endcase end @@ -376,8 +329,8 @@ module ibex_alu #( assign bwlogic_and_result = operand_a_i & bwlogic_operand_b; assign bwlogic_xor_result = operand_a_i ^ bwlogic_operand_b; - assign bwlogic_or = (operator_i == ALU_OR) || (operator_i == ALU_ORN); - assign bwlogic_and = (operator_i == ALU_AND) || (operator_i == ALU_ANDN); + assign bwlogic_or = (operator_i == ALU_OR) | (operator_i == ALU_ORN); + assign bwlogic_and = (operator_i == ALU_AND) | (operator_i == ALU_ANDN); always_comb begin unique case (1'b1) @@ -388,9 +341,334 @@ module ibex_alu #( end logic [31:0] shuffle_result; + logic [31:0] butterfly_result; + logic [31:0] invbutterfly_result; + + logic [31:0] minmax_result; + logic [5:0] bitcnt_result; + logic [31:0] pack_result; + logic [31:0] multicycle_result; + logic [31:0] singlebit_result; if (RV32B) begin : g_alu_rvb + ///////////////// + // Bitcounting // + ///////////////// + + // The bit-counter structure computes the number of set bits in its operand. Partial results + // (from left to right) are needed to compute the control masks for computation of bext/bdep + // by the butterfly network, if implemented. + // For pcnt, clz and ctz, only the end result is used. + + logic zbe_op; + logic bitcnt_ctz; + logic bitcnt_clz; + logic bitcnt_cz; + logic [31:0] bitcnt_bits; + logic [31:0] bitcnt_mask_op; + logic [31:0] bitcnt_bit_mask; + logic [ 5:0] bitcnt_partial [32]; + + + assign bitcnt_ctz = operator_i == ALU_CTZ; + assign bitcnt_clz = operator_i == ALU_CLZ; + assign bitcnt_cz = bitcnt_ctz | bitcnt_clz; + assign bitcnt_result = bitcnt_partial[31]; + + // Bit-mask generation for clz and ctz: + // The bit mask is generated by spreading the lowest-order set bit in the operand to all + // higher order bits. The resulting mask is inverted to cover the lowest order zeros. In order + // to create the bit mask for leading zeros, the input operand needs to be reversed. + assign bitcnt_mask_op = bitcnt_clz ? operand_a_rev : operand_a_i; + + always_comb begin + bitcnt_bit_mask = bitcnt_mask_op; + bitcnt_bit_mask |= bitcnt_bit_mask << 1; + bitcnt_bit_mask |= bitcnt_bit_mask << 2; + bitcnt_bit_mask |= bitcnt_bit_mask << 4; + bitcnt_bit_mask |= bitcnt_bit_mask << 8; + bitcnt_bit_mask |= bitcnt_bit_mask << 16; + bitcnt_bit_mask = ~bitcnt_bit_mask; + end + + always_comb begin + case(1'b1) + zbe_op: bitcnt_bits = operand_b_i; + bitcnt_cz: bitcnt_bits = bitcnt_bit_mask & ~bitcnt_mask_op; // clz / ctz + default: bitcnt_bits = operand_a_i; // pcnt + endcase + end + + // The parallel prefix counter is of the structure of a Brent-Kung Adder. In the first + // log2(width) stages, the sum of the n preceding bit lines is computed for the bit lines at + // positions 2**n-1 (power-of-two positions) where n denotes the current stage. + // In stage n=log2(width), the count for position width-1 (the MSB) is finished. + // For the intermediate values, an inverse adder tree then computes the bit counts for the bit + // lines at positions + // m = 2**(n-1) + i*2**(n-2), where i = [1 ... width / 2**(n-1)-1] and n = [log2(width) ... 2]. + // Thus, at every subsequent stage the result of two previously unconnected sub-trees is + // summed, starting at the node summing bits [width/2-1 : 0] and [3*width/4-1: width/2] + // and moving to iteratively sum up all the sub-trees. + // The inverse adder tree thus features log2(width) - 1 stages the first of these stages is a + // single addition at position 3*width/4 - 1. It does not interfere with the last + // stage of the primary adder tree. These stages can thus be folded together, resulting in a + // total of 2*log2(width)-2 stages. + // For more details refer to R. Brent, H. T. Kung, "A Regular Layout for Parallel Adders", + // (1982). + // For a bitline at position p, only bits + // bitcnt_partial[max(i, such that p % log2(i) == 0)-1 : 0] are needed for generation of the + // butterfly network control signals. The adders in the intermediate value adder tree thus need + // not be full 5-bit adders. We leave the optimization to the synthesis tools. + // + // Consider the following 8-bit example for illustraton. + // + // let bitcnt_bits = 8'babcdefgh. + // + // a b c d e f g h + // | /: | /: | /: | /: + // |/ : |/ : |/ : |/ : + // stage 1: + : + : + : + : + // | : /: : | : /: : + // |,--+ : : |,--+ : : + // stage 2: + : : : + : : : + // | : | : /: : : : + // |,-----,--+ : : : : ^-primary adder tree + // stage 3: + : + : : : : : ------------------------- + // : | /| /| /| /| /| : ,-intermediate adder tree + // : |/ |/ |/ |/ |/ : : + // stage 4 : + + + + + : : + // : : : : : : : : + // bitcnt_partial[i] 7 6 5 4 3 2 1 0 + + always_comb begin + bitcnt_partial = '{default: '0}; + // stage 1 + for (int unsigned i=1; i<32; i+=2) begin + bitcnt_partial[i] = {5'h0, bitcnt_bits[i]} + {5'h0, bitcnt_bits[i-1]}; + end + // stage 2 + for (int unsigned i=3; i<32; i+=4) begin + bitcnt_partial[i] = bitcnt_partial[i-2] + bitcnt_partial[i]; + end + // stage 3 + for (int unsigned i=7; i<32; i+=8) begin + bitcnt_partial[i] = bitcnt_partial[i-4] + bitcnt_partial[i]; + end + // stage 4 + for (int unsigned i=15; i <32; i+=16) begin + bitcnt_partial[i] = bitcnt_partial[i-8] + bitcnt_partial[i]; + end + // stage 5 + bitcnt_partial[31] = bitcnt_partial[15] + bitcnt_partial[31]; + // ^- primary adder tree + // ------------------------------- + // ,-intermediate value adder tree + bitcnt_partial[23] = bitcnt_partial[15] + bitcnt_partial[23]; + + // stage 6 + for (int unsigned i=11; i<32; i+=8) begin + bitcnt_partial[i] = bitcnt_partial[i-4] + bitcnt_partial[i]; + end + + // stage 7 + for (int unsigned i=5; i<32; i+=4) begin + bitcnt_partial[i] = bitcnt_partial[i-2] + bitcnt_partial[i]; + end + // stage 8 + bitcnt_partial[0] = {5'h0, bitcnt_bits[0]}; + for (int unsigned i=2; i<32; i+=2) begin + bitcnt_partial[i] = bitcnt_partial[i-1] + {5'h0, bitcnt_bits[i]}; + end + end + + /////////////// + // Butterfly // + /////////////// + + // The butterfly / inverse butterfly network is shared between bext/bdep (zbe)instructions + // respectively and grev / gorc instructions (zbp). + // For bdep, the control bits mask of a local left region is generated by + // the inverse of a n-bit left rotate and complement upon wrap (LROTC) operation by the number + // of ones in the deposit bitmask to the right of the segment. n hereby denotes the width + // of the according segment. The bitmask for a pertaining local right region is equal to the + // corresponding local left region. Bext uses an analogue inverse process. + // Consider the following 8-bit example. For details, see Hilewitz et al. "Fast Bit Gather, + // Bit Scatter and Bit Permuation Instructions for Commodity Microprocessors", (2008). + + // 8-bit example: (Hilewitz et al.) + // Consider the instruction bdep operand_a_i deposit_mask + // Let operand_a_i = 8'babcd_efgh + // deposit_mask = 8'b1010_1101 + // + // control bitmask for stage 1: + // - number of ones in the right half of the deposit bitmask: 3 + // - width of the segment: 4 + // - control bitmask = ~LROTC(4'b0, 3)[3:0] = 4'b1000 + // + // control bitmask: c3 c2 c1 c0 c3 c2 c1 c0 + // 1 0 0 0 1 0 0 0 + // <- L -----> <- R -----> + // operand_a_i a b c d e f g h + // :\ | | | /: | | | + // : +|---|--|-+ : | | | + // :/ | | | \: | | | + // stage 1 e b c d a f g h + // + // control bitmask: c3 c2 c3 c2 c1 c0 c1 c0 + // 1 1 1 1 1 0 1 0 + // :\ :\ /: /: :\ | /: | + // : +:-+-:+ : : +|-+ : | + // :/ :/ \: \: :/ | \: | + // stage 2 c d e b g f a h + // L R L R L R L R + // control bitmask: c3 c3 c2 c2 c1 c1 c0 c0 + // 1 1 0 0 1 1 0 0 + // :\/: | | :\/: | | + // : : | | : : | | + // :/\: | | :/\: | | + // stage 3 d c e b f g a h + // & deposit bitmask: 1 0 1 0 1 1 0 1 + // result: d 0 e 0 f g 0 h + + assign zbe_op = (operator_i == ALU_BEXT) | (operator_i == ALU_BDEP); + + logic [31:0] butterfly_mask_l[5]; + logic [31:0] butterfly_mask_r[5]; + logic [31:0] butterfly_mask_not[5]; + logic [31:0] lrotc_stage [5]; // left rotate and complement upon wrap + + // bext / bdep + logic [31:0] butterfly_zbe_mask_l[5]; + logic [31:0] butterfly_zbe_mask_r[5]; + logic [31:0] butterfly_zbe_mask_not[5]; + + // grev / gorc + logic [31:0] butterfly_zbp_mask_l[5]; + logic [31:0] butterfly_zbp_mask_r[5]; + logic [31:0] butterfly_zbp_mask_not[5]; + + logic grev_op; + logic gorc_op; + logic zbp_op; + + // number of bits in local r = 32 / 2**(stage + 1) = 16/2**stage + `define _N(stg) (16 >> stg) + + // bext / bdep control bit generation + for (genvar stg=0; stg<5; stg++) begin + // number of segs: 2** stg + for (genvar seg=0; seg<2**stg; seg++) begin + + assign lrotc_stage[stg][2*`_N(stg)*(seg+1)-1 : 2*`_N(stg)*seg] = + {{`_N(stg){1'b0}},{`_N(stg){1'b1}}} << + bitcnt_partial[`_N(stg)*(2*seg+1)-1][$clog2(`_N(stg)):0]; + + assign butterfly_zbe_mask_l[stg][`_N(stg)*(2*seg+2)-1 : `_N(stg)*(2*seg+1)] + = ~lrotc_stage[stg][`_N(stg)*(2*seg+2)-1 : `_N(stg)*(2*seg+1)]; + + assign butterfly_zbe_mask_r[stg][`_N(stg)*(2*seg+1)-1 : `_N(stg)*(2*seg)] + = ~lrotc_stage[stg][`_N(stg)*(2*seg+2)-1 : `_N(stg)*(2*seg+1)]; + + assign butterfly_zbe_mask_l[stg][`_N(stg)*(2*seg+1)-1 : `_N(stg)*(2*seg)] = '0; + assign butterfly_zbe_mask_r[stg][`_N(stg)*(2*seg+2)-1 : `_N(stg)*(2*seg+1)] = '0; + end + end + `undef _N + + for (genvar stg=0; stg<5; stg++) begin + assign butterfly_zbe_mask_not[stg] = + ~(butterfly_zbe_mask_l[stg] | butterfly_zbe_mask_r[stg]); + end + + // grev / gorc control bit generation + assign butterfly_zbp_mask_l[0] = shift_amt[4] ? 32'hffff_0000 : 32'h0000_0000; + assign butterfly_zbp_mask_r[0] = shift_amt[4] ? 32'h0000_ffff : 32'h0000_0000; + assign butterfly_zbp_mask_not[0] = + !shift_amt[4] || (shift_amt[4] && gorc_op) ? 32'hffff_ffff : 32'h0000_0000; + + assign butterfly_zbp_mask_l[1] = shift_amt[3] ? 32'hff00_ff00 : 32'h0000_0000; + assign butterfly_zbp_mask_r[1] = shift_amt[3] ? 32'h00ff_00ff : 32'h0000_0000; + assign butterfly_zbp_mask_not[1] = + !shift_amt[3] || (shift_amt[3] && gorc_op) ? 32'hffff_ffff : 32'h0000_0000; + + assign butterfly_zbp_mask_l[2] = shift_amt[2] ? 32'hf0f0_f0f0 : 32'h0000_0000; + assign butterfly_zbp_mask_r[2] = shift_amt[2] ? 32'h0f0f_0f0f : 32'h0000_0000; + assign butterfly_zbp_mask_not[2] = + !shift_amt[2] || (shift_amt[2] && gorc_op) ? 32'hffff_ffff : 32'h0000_0000; + + assign butterfly_zbp_mask_l[3] = shift_amt[1] ? 32'hcccc_cccc : 32'h0000_0000; + assign butterfly_zbp_mask_r[3] = shift_amt[1] ? 32'h3333_3333 : 32'h0000_0000; + assign butterfly_zbp_mask_not[3] = + !shift_amt[1] || (shift_amt[1] && gorc_op) ? 32'hffff_ffff : 32'h0000_0000; + + assign butterfly_zbp_mask_l[4] = shift_amt[0] ? 32'haaaa_aaaa : 32'h0000_0000; + assign butterfly_zbp_mask_r[4] = shift_amt[0] ? 32'h5555_5555 : 32'h0000_0000; + assign butterfly_zbp_mask_not[4] = + !shift_amt[0] || (shift_amt[0] && gorc_op) ? 32'hffff_ffff : 32'h0000_0000; + + // grev / gorc instructions + assign grev_op = RV32B ? (operator_i == ALU_GREV) : 1'b0; + assign gorc_op = RV32B ? (operator_i == ALU_GORC) : 1'b0; + assign zbp_op = grev_op | gorc_op; + + // select set of masks: + assign butterfly_mask_l = zbp_op ? butterfly_zbp_mask_l : butterfly_zbe_mask_l; + assign butterfly_mask_r = zbp_op ? butterfly_zbp_mask_r : butterfly_zbe_mask_r; + assign butterfly_mask_not = zbp_op ? butterfly_zbp_mask_not : butterfly_zbe_mask_not; + + always_comb begin + butterfly_result = operand_a_i; + + butterfly_result = butterfly_result & butterfly_mask_not[0] | + ((butterfly_result & butterfly_mask_l[0]) >> 16)| + ((butterfly_result & butterfly_mask_r[0]) << 16); + + butterfly_result = butterfly_result & butterfly_mask_not[1] | + ((butterfly_result & butterfly_mask_l[1]) >> 8)| + ((butterfly_result & butterfly_mask_r[1]) << 8); + + butterfly_result = butterfly_result & butterfly_mask_not[2] | + ((butterfly_result & butterfly_mask_l[2]) >> 4)| + ((butterfly_result & butterfly_mask_r[2]) << 4); + + butterfly_result = butterfly_result & butterfly_mask_not[3] | + ((butterfly_result & butterfly_mask_l[3]) >> 2)| + ((butterfly_result & butterfly_mask_r[3]) << 2); + + butterfly_result = butterfly_result & butterfly_mask_not[4] | + ((butterfly_result & butterfly_mask_l[4]) >> 1)| + ((butterfly_result & butterfly_mask_r[4]) << 1); + + if (!zbp_op) begin + butterfly_result = butterfly_result & operand_b_i; + end + end + + always_comb begin + invbutterfly_result = operand_a_i & operand_b_i; + + invbutterfly_result = invbutterfly_result & butterfly_mask_not[4] | + ((invbutterfly_result & butterfly_mask_l[4]) >> 1)| + ((invbutterfly_result & butterfly_mask_r[4]) << 1); + + invbutterfly_result = invbutterfly_result & butterfly_mask_not[3] | + ((invbutterfly_result & butterfly_mask_l[3]) >> 2)| + ((invbutterfly_result & butterfly_mask_r[3]) << 2); + + invbutterfly_result = invbutterfly_result & butterfly_mask_not[2] | + ((invbutterfly_result & butterfly_mask_l[2]) >> 4)| + ((invbutterfly_result & butterfly_mask_r[2]) << 4); + + invbutterfly_result = invbutterfly_result & butterfly_mask_not[1] | + ((invbutterfly_result & butterfly_mask_l[1]) >> 8)| + ((invbutterfly_result & butterfly_mask_r[1]) << 8); + + invbutterfly_result = invbutterfly_result & butterfly_mask_not[0] | + ((invbutterfly_result & butterfly_mask_l[0]) >> 16)| + ((invbutterfly_result & butterfly_mask_r[0]) << 16); + end + ///////////////////////// // Shuffle / Unshuffle // ///////////////////////// @@ -429,8 +707,8 @@ module ibex_alu #( if (shuffle_flip) begin shuffle_result = (shuffle_result & 32'h8822_4411) | - ((shuffle_result << 6) & FLIP_MASK_L[0]) | ((shuffle_result >> 6) & FLIP_MASK_R[0]) | - ((shuffle_result << 9) & FLIP_MASK_L[1]) | ((shuffle_result >> 9) & FLIP_MASK_R[1]) | + ((shuffle_result << 6) & FLIP_MASK_L[0]) | ((shuffle_result >> 6) & FLIP_MASK_R[0]) | + ((shuffle_result << 9) & FLIP_MASK_L[1]) | ((shuffle_result >> 9) & FLIP_MASK_R[1]) | ((shuffle_result << 15) & FLIP_MASK_L[2]) | ((shuffle_result >> 15) & FLIP_MASK_R[2]) | ((shuffle_result << 21) & FLIP_MASK_L[3]) | ((shuffle_result >> 21) & FLIP_MASK_R[3]); end @@ -458,8 +736,8 @@ module ibex_alu #( if (shuffle_flip) begin shuffle_result = (shuffle_result & 32'h8822_4411) | - ((shuffle_result << 6) & FLIP_MASK_L[0]) | ((shuffle_result >> 6) & FLIP_MASK_R[0]) | - ((shuffle_result << 9) & FLIP_MASK_L[1]) | ((shuffle_result >> 9) & FLIP_MASK_R[1]) | + ((shuffle_result << 6) & FLIP_MASK_L[0]) | ((shuffle_result >> 6) & FLIP_MASK_R[0]) | + ((shuffle_result << 9) & FLIP_MASK_L[1]) | ((shuffle_result >> 9) & FLIP_MASK_R[1]) | ((shuffle_result << 15) & FLIP_MASK_L[2]) | ((shuffle_result >> 15) & FLIP_MASK_R[2]) | ((shuffle_result << 21) & FLIP_MASK_L[3]) | ((shuffle_result >> 21) & FLIP_MASK_R[3]); end @@ -534,33 +812,8 @@ module ibex_alu #( // Min / Max // /////////////// - assign minmax_result = (cmp_result ? operand_a_i : operand_b_i); + assign minmax_result = cmp_result ? operand_a_i : operand_b_i; - ///////////////// - // Bitcounting // - ///////////////// - - logic bitcnt_ctz; - logic bitcnt_pcnt; - logic [31:0] bitcnt_bits; - logic [32:0] bitcnt_bit_enable; - - assign bitcnt_ctz = (operator_i == ALU_CTZ); - assign bitcnt_pcnt = (operator_i == ALU_PCNT); - - assign bitcnt_bits = bitcnt_pcnt ? operand_a_i : (bitcnt_ctz ? ~operand_a_i : ~operand_a_rev); - - always_comb begin - bitcnt_result = '0; - bitcnt_bit_enable = {32'b0, 1'b1}; // bit 32 unused. - for (int unsigned i=0; i<32; i++) begin : gen_bitcnt_adder - // keep counting if all previous bits are 1 - bitcnt_bit_enable[i+1] = bitcnt_pcnt || (bitcnt_bit_enable[i] && bitcnt_bits[i]); - if (bitcnt_bit_enable[i]) begin - bitcnt_result[5:0] = bitcnt_result[5:0] + {5'h0, bitcnt_bits[i]}; - end - end - end ////////// // Pack // @@ -568,8 +821,8 @@ module ibex_alu #( logic packu; logic packh; - assign packu = (operator_i == ALU_PACKU); - assign packh = (operator_i == ALU_PACKH); + assign packu = operator_i == ALU_PACKU; + assign packh = operator_i == ALU_PACKH; always_comb begin unique case (1'b1) @@ -580,15 +833,17 @@ module ibex_alu #( end end else begin : g_no_alu_rvb // RV32B result signals - assign minmax_result = '0; - assign bitcnt_result = '0; - assign pack_result = '0; - assign multicycle_result = '0; - assign singlebit_result = '0; - assign shuffle_result = '0; + assign minmax_result = '0; + assign bitcnt_result = '0; + assign pack_result = '0; + assign multicycle_result = '0; + assign singlebit_result = '0; + assign shuffle_result = '0; + assign butterfly_result = '0; + assign invbutterfly_result = '0; // RV32B support signals - assign imd_val_d_o = '0; - assign imd_val_we_o = '0; + assign imd_val_d_o = '0; + assign imd_val_we_o = '0; end //////////////// @@ -611,8 +866,7 @@ module ibex_alu #( ALU_SLL, ALU_SRL, ALU_SRA, // RV32B Ops - ALU_SLO, ALU_SRO, - ALU_GREV, ALU_GORC: result_o = shift_result; + ALU_SLO, ALU_SRO: result_o = shift_result; // Shuffle Operations (RV32B Ops) ALU_SHFL, ALU_UNSHFL: result_o = shuffle_result; @@ -644,6 +898,13 @@ module ibex_alu #( ALU_SBSET, ALU_SBCLR, ALU_SBINV, ALU_SBEXT: result_o = singlebit_result; + // Bit Extract / Deposit (RV32B Ops) + ALU_BDEP: result_o = butterfly_result; + ALU_BEXT: result_o = invbutterfly_result; + + // General Reverse / Or-combine (RV32B Ops) + ALU_GREV, ALU_GORC: result_o = butterfly_result; + default: ; endcase end diff --git a/rtl/ibex_decoder.sv b/rtl/ibex_decoder.sv index 80e6d546..56f7bfd9 100644 --- a/rtl/ibex_decoder.sv +++ b/rtl/ibex_decoder.sv @@ -405,7 +405,7 @@ module ibex_decoder #( {7'b000_0000, 3'b101}, {7'b010_0000, 3'b101}: illegal_insn = 1'b0; - // supported RV32B instructions (zbb) + // RV32B zbb {7'b010_0000, 3'b111}, // andn {7'b010_0000, 3'b110}, // orn {7'b010_0000, 3'b100}, // xnor @@ -420,11 +420,15 @@ module ibex_decoder #( {7'b000_0100, 3'b100}, // pack {7'b010_0100, 3'b100}, // packu {7'b000_0100, 3'b111}, // packh - // RV32B instructions (zbs) + // RV32B zbs {7'b010_0100, 3'b001}, // sbclr {7'b001_0100, 3'b001}, // sbset {7'b011_0100, 3'b001}, // sbinv {7'b010_0100, 3'b101}, // sbext + // RV32B zbe + {7'b010_0100, 3'b110}, // bdep + {7'b000_0100, 3'b110}, // bext + // RV32B zbp {7'b011_0100, 3'b101}, // grev {7'b001_0100, 3'b101}, // gorc {7'b000_0100, 3'b001}, // shfl @@ -877,7 +881,7 @@ module ibex_decoder #( {7'b000_0000, 3'b101}: alu_operator_o = ALU_SRL; // Shift Right Logical {7'b010_0000, 3'b101}: alu_operator_o = ALU_SRA; // Shift Right Arithmetic - // RV32B ALU Operations + // RV32B zbb {7'b001_0000, 3'b001}: if (RV32B) alu_operator_o = ALU_SLO; // Shift Left Ones {7'b001_0000, 3'b101}: if (RV32B) alu_operator_o = ALU_SRO; // Shift Right Ones {7'b011_0000, 3'b001}: begin @@ -905,18 +909,21 @@ module ibex_decoder #( {7'b010_0000, 3'b100}: if (RV32B) alu_operator_o = ALU_XNOR; // Xnor {7'b010_0000, 3'b110}: if (RV32B) alu_operator_o = ALU_ORN; // Orn {7'b010_0000, 3'b111}: if (RV32B) alu_operator_o = ALU_ANDN; // Andn + // RV32B zbp {7'b011_0100, 3'b101}: if (RV32B) alu_operator_o = ALU_GREV; // Grev {7'b001_0100, 3'b101}: if (RV32B) alu_operator_o = ALU_GORC; // Grev {7'b000_0100, 3'b001}: if (RV32B) alu_operator_o = ALU_SHFL; // Shfl {7'b000_0100, 3'b101}: if (RV32B) alu_operator_o = ALU_UNSHFL; // Unshfl - - // RV32B ALU_Operations (zbs) + // RV32B zbs {7'b010_0100, 3'b001}: if (RV32B) alu_operator_o = ALU_SBCLR; // sbclr {7'b001_0100, 3'b001}: if (RV32B) alu_operator_o = ALU_SBSET; // sbset {7'b011_0100, 3'b001}: if (RV32B) alu_operator_o = ALU_SBINV; // sbinv {7'b010_0100, 3'b101}: if (RV32B) alu_operator_o = ALU_SBEXT; // sbext + // RV32B zbe + {7'b010_0100, 3'b110}: if (RV32B) alu_operator_o = ALU_BDEP; // bdep + {7'b000_0100, 3'b110}: if (RV32B) alu_operator_o = ALU_BEXT; // bext // RV32M instructions, all use the same ALU operation {7'b000_0001, 3'b000}: begin // mul alu_operator_o = ALU_ADD; diff --git a/rtl/ibex_pkg.sv b/rtl/ibex_pkg.sv index 1d4972b3..4d917bab 100644 --- a/rtl/ibex_pkg.sv +++ b/rtl/ibex_pkg.sv @@ -101,7 +101,12 @@ typedef enum logic [5:0] { ALU_SBSET, ALU_SBCLR, ALU_SBINV, - ALU_SBEXT + ALU_SBEXT, + + // Bit Extract / Deposit + // RV32B + ALU_BEXT, + ALU_BDEP } alu_op_e; typedef enum logic [1:0] { diff --git a/rtl/ibex_tracer.sv b/rtl/ibex_tracer.sv index 85710eed..cdd3c9c1 100644 --- a/rtl/ibex_tracer.sv +++ b/rtl/ibex_tracer.sv @@ -905,6 +905,9 @@ module ibex_tracer ( INSN_SBSET: decode_r_insn("sbset"); INSN_SBINV: decode_r_insn("sbinv"); INSN_SBEXT: decode_r_insn("sbext"); + // RV32B - ZBE + INSN_BDEP: decode_r_insn("bdep"); + INSN_BEXT: decode_r_insn("bext"); // RV32B - ZBP INSN_GREV: decode_r_insn("grev"); INSN_GREVI: begin diff --git a/rtl/ibex_tracer_pkg.sv b/rtl/ibex_tracer_pkg.sv index 730213c3..f3873056 100644 --- a/rtl/ibex_tracer_pkg.sv +++ b/rtl/ibex_tracer_pkg.sv @@ -72,7 +72,6 @@ parameter logic [31:0] INSN_PMULHSU = { 7'b0000001, 10'b?, 3'b010, 5'b?, {OPCODE parameter logic [31:0] INSN_PMULHU = { 7'b0000001, 10'b?, 3'b011, 5'b?, {OPCODE_OP} }; // RV32B -// OPIMM // ZBB parameter logic [31:0] INSN_SLOI = { 5'b00100 , 12'b?, 3'b001, 5'b?, {OPCODE_OP_IMM} }; parameter logic [31:0] INSN_SROI = { 5'b00100 , 12'b?, 3'b101, 5'b?, {OPCODE_OP_IMM} }; @@ -80,6 +79,33 @@ parameter logic [31:0] INSN_RORI = { 5'b01100 , 12'b?, 3'b101, 5'b?, {OPC parameter logic [31:0] INSN_CLZ = { 12'b011000000000, 5'b? , 3'b001, 5'b?, {OPCODE_OP_IMM} }; parameter logic [31:0] INSN_CTZ = { 12'b011000000001, 5'b? , 3'b001, 5'b?, {OPCODE_OP_IMM} }; parameter logic [31:0] INSN_PCNT = { 12'b011000000010, 5'b? , 3'b001, 5'b?, {OPCODE_OP_IMM} }; + +parameter logic [31:0] INSN_SLO = { 7'b0010000, 10'b?, 3'b001, 5'b?, {OPCODE_OP} }; +parameter logic [31:0] INSN_SRO = { 7'b0010000, 10'b?, 3'b101, 5'b?, {OPCODE_OP} }; +parameter logic [31:0] INSN_ROL = { 7'b0110000, 10'b?, 3'b001, 5'b?, {OPCODE_OP} }; +parameter logic [31:0] INSN_ROR = { 7'b0110000, 10'b?, 3'b101, 5'b?, {OPCODE_OP} }; +parameter logic [31:0] INSN_MIN = { 7'b0000101, 10'b?, 3'b100, 5'b?, {OPCODE_OP} }; +parameter logic [31:0] INSN_MAX = { 7'b0000101, 10'b?, 3'b101, 5'b?, {OPCODE_OP} }; +parameter logic [31:0] INSN_MINU = { 7'b0000101, 10'b?, 3'b110, 5'b?, {OPCODE_OP} }; +parameter logic [31:0] INSN_MAXU = { 7'b0000101, 10'b?, 3'b111, 5'b?, {OPCODE_OP} }; +parameter logic [31:0] INSN_XNOR = { 7'b0100000, 10'b?, 3'b100, 5'b?, {OPCODE_OP} }; +parameter logic [31:0] INSN_ORN = { 7'b0100000, 10'b?, 3'b110, 5'b?, {OPCODE_OP} }; +parameter logic [31:0] INSN_ANDN = { 7'b0100000, 10'b?, 3'b111, 5'b?, {OPCODE_OP} }; +parameter logic [31:0] INSN_PACK = { 7'b0000100, 10'b?, 3'b100, 5'b?, {OPCODE_OP} }; +parameter logic [31:0] INSN_PACKU = { 7'b0100100, 10'b?, 3'b100, 5'b?, {OPCODE_OP} }; +parameter logic [31:0] INSN_PACKH = { 7'b0000100, 10'b?, 3'b111, 5'b?, {OPCODE_OP} }; + +// ZBS +parameter logic [31:0] INSN_SBCLRI = { 5'b01001, 12'b?, 3'b001, 5'b?, {OPCODE_OP_IMM} }; +parameter logic [31:0] INSN_SBSETI = { 5'b00101, 12'b?, 3'b001, 5'b?, {OPCODE_OP_IMM} }; +parameter logic [31:0] INSN_SBINVI = { 5'b01101, 12'b?, 3'b001, 5'b?, {OPCODE_OP_IMM} }; +parameter logic [31:0] INSN_SBEXTI = { 5'b01001, 12'b?, 3'b101, 5'b?, {OPCODE_OP_IMM} }; + +parameter logic [31:0] INSN_SBCLR = { 7'b0100100, 10'b?, 3'b001, 5'b?, {OPCODE_OP} }; +parameter logic [31:0] INSN_SBSET = { 7'b0010100, 10'b?, 3'b001, 5'b?, {OPCODE_OP} }; +parameter logic [31:0] INSN_SBINV = { 7'b0110100, 10'b?, 3'b001, 5'b?, {OPCODE_OP} }; +parameter logic [31:0] INSN_SBEXT = { 7'b0100100, 10'b?, 3'b101, 5'b?, {OPCODE_OP} }; + // ZBP // grevi parameter logic [31:0] INSN_GREVI = { 5'b01101, 12'b?, 3'b101, 5'b?, {OPCODE_OP_IMM} }; @@ -194,45 +220,18 @@ parameter logic [31:0] INSN_UNZIP2 = parameter logic [31:0] INSN_UNZIP = { 5'b00010, 3'b?, 4'b1111, 5'b?, 3'b101, 5'b?, {OPCODE_OP_IMM} }; -// ZBS -parameter logic [31:0] INSN_SBCLRI = { 5'b01001 , 12'b?, 3'b001, 5'b?, {OPCODE_OP_IMM} }; -parameter logic [31:0] INSN_SBSETI = { 5'b00101 , 12'b?, 3'b001, 5'b?, {OPCODE_OP_IMM} }; -parameter logic [31:0] INSN_SBINVI = { 5'b01101 , 12'b?, 3'b001, 5'b?, {OPCODE_OP_IMM} }; -parameter logic [31:0] INSN_SBEXTI = { 5'b01001 , 12'b?, 3'b101, 5'b?, {OPCODE_OP_IMM} }; - -// ZBT -parameter logic [31:0] INSN_FSRI = { 5'b?, 1'b1, 11'b?, 3'b101, 5'b?, {OPCODE_OP_IMM} }; - -// OP -// ZBB -parameter logic [31:0] INSN_SLO = { 7'b0010000, 10'b?, 3'b001, 5'b?, {OPCODE_OP} }; -parameter logic [31:0] INSN_SRO = { 7'b0010000, 10'b?, 3'b101, 5'b?, {OPCODE_OP} }; -parameter logic [31:0] INSN_ROL = { 7'b0110000, 10'b?, 3'b001, 5'b?, {OPCODE_OP} }; -parameter logic [31:0] INSN_ROR = { 7'b0110000, 10'b?, 3'b101, 5'b?, {OPCODE_OP} }; -parameter logic [31:0] INSN_MIN = { 7'b0000101, 10'b?, 3'b100, 5'b?, {OPCODE_OP} }; -parameter logic [31:0] INSN_MAX = { 7'b0000101, 10'b?, 3'b101, 5'b?, {OPCODE_OP} }; -parameter logic [31:0] INSN_MINU = { 7'b0000101, 10'b?, 3'b110, 5'b?, {OPCODE_OP} }; -parameter logic [31:0] INSN_MAXU = { 7'b0000101, 10'b?, 3'b111, 5'b?, {OPCODE_OP} }; -parameter logic [31:0] INSN_XNOR = { 7'b0100000, 10'b?, 3'b100, 5'b?, {OPCODE_OP} }; -parameter logic [31:0] INSN_ORN = { 7'b0100000, 10'b?, 3'b110, 5'b?, {OPCODE_OP} }; -parameter logic [31:0] INSN_ANDN = { 7'b0100000, 10'b?, 3'b111, 5'b?, {OPCODE_OP} }; -parameter logic [31:0] INSN_PACK = { 7'b0000100, 10'b?, 3'b100, 5'b?, {OPCODE_OP} }; -parameter logic [31:0] INSN_PACKU = { 7'b0100100, 10'b?, 3'b100, 5'b?, {OPCODE_OP} }; -parameter logic [31:0] INSN_PACKH = { 7'b0000100, 10'b?, 3'b111, 5'b?, {OPCODE_OP} }; - -// ZBP parameter logic [31:0] INSN_GREV = { 7'b0110100, 10'b?, 3'b101, 5'b?, {OPCODE_OP} }; parameter logic [31:0] INSN_GORC = { 7'b0010100, 10'b?, 3'b101, 5'b?, {OPCODE_OP} }; parameter logic [31:0] INSN_SHFL = { 7'b0000100, 10'b?, 3'b001, 5'b?, {OPCODE_OP} }; parameter logic [31:0] INSN_UNSHFL = { 7'b0000100, 10'b?, 3'b101, 5'b?, {OPCODE_OP} }; -// ZBS -parameter logic [31:0] INSN_SBCLR = { 7'b0100100, 10'b?, 3'b001, 5'b?, {OPCODE_OP} }; -parameter logic [31:0] INSN_SBSET = { 7'b0010100, 10'b?, 3'b001, 5'b?, {OPCODE_OP} }; -parameter logic [31:0] INSN_SBINV = { 7'b0110100, 10'b?, 3'b001, 5'b?, {OPCODE_OP} }; -parameter logic [31:0] INSN_SBEXT = { 7'b0100100, 10'b?, 3'b101, 5'b?, {OPCODE_OP} }; +// ZBE +parameter logic [31:0] INSN_BDEP = {7'b0100100, 10'b?, 3'b110, 5'b?, {OPCODE_OP} }; +parameter logic [31:0] INSN_BEXT = {7'b0000100, 10'b?, 3'b110, 5'b?, {OPCODE_OP} }; // ZBT +parameter logic [31:0] INSN_FSRI = { 5'b?, 1'b1, 11'b?, 3'b101, 5'b?, {OPCODE_OP_IMM} }; + parameter logic [31:0] INSN_CMIX = {5'b?, 2'b11, 10'b?, 3'b001, 5'b?, {OPCODE_OP} }; parameter logic [31:0] INSN_CMOV = {5'b?, 2'b11, 10'b?, 3'b101, 5'b?, {OPCODE_OP} }; parameter logic [31:0] INSN_FSL = {5'b?, 2'b10, 10'b?, 3'b001, 5'b?, {OPCODE_OP} };