diff --git a/doc/03_reference/instruction_decode_execute.rst b/doc/03_reference/instruction_decode_execute.rst index 8c306fce..3a57e49b 100644 --- a/doc/03_reference/instruction_decode_execute.rst +++ b/doc/03_reference/instruction_decode_execute.rst @@ -97,8 +97,6 @@ Bit Manipulation Extension +---------------------------------+---------------+--------------------------+ | Zbt (Ternary) | Balanced/Full | All | +---------------------------------+---------------+--------------------------+ - | Zb_tmp (Temporary) [#B_zb_tmp]_ | Balanced/Full | None | - +---------------------------------+---------------+--------------------------+ The implementation of the B-extension comes with an area overhead of 1.8 to 3.0 kGE for the balanced version and 6.0 to 8.7 kGE for the full version. That corresponds to an approximate percentage increase in area of 9 to 14 % and 25 to 30 % for the balanced and full versions respectively. @@ -178,6 +176,3 @@ See :ref:`load-store-unit` for more details. Ibex will be updated to match future versions of the specification. Prior to ratification this may involve backwards incompatible changes. Additionally, neither GCC or Clang have committed to maintaining support upstream for unratified versions of the specification. - -.. [#B_zb_tmp] The sign-extend instructions `sext.b/sext.h` are defined but not unambiguously categorized in draft version 0.92 of the extension. - Temporarily, they have been assigned a separate Z-extension (Zb_tmp) both in Ibex and the RISCV-DV random instruction generator used to verify the bit manipulation instructions in Ibex. diff --git a/rtl/ibex_alu.sv b/rtl/ibex_alu.sv index 18b49d40..6ce70471 100644 --- a/rtl/ibex_alu.sv +++ b/rtl/ibex_alu.sv @@ -304,7 +304,7 @@ module ibex_alu #( always_comb begin unique case (operator_i) ALU_SLL: shift_left = 1'b1; - ALU_SLO, + ALU_SLO: shift_left = (RV32B == RV32BFull) ? 1'b1 : 1'b0; ALU_BFP: shift_left = (RV32B != RV32BNone) ? 1'b1 : 1'b0; ALU_ROL: shift_left = (RV32B != RV32BNone) ? instr_first_cycle_i : 0; ALU_ROR: shift_left = (RV32B != RV32BNone) ? ~instr_first_cycle_i : 0; @@ -321,7 +321,7 @@ module ibex_alu #( assign shift_arith = (operator_i == ALU_SRA); assign shift_ones = - (RV32B != RV32BNone) ? (operator_i == ALU_SLO) | (operator_i == ALU_SRO) : 1'b0; + (RV32B == RV32BFull) ? (operator_i == ALU_SLO) | (operator_i == ALU_SRO) : 1'b0; assign shift_funnel = (RV32B != RV32BNone) ? (operator_i == ALU_FSL) | (operator_i == ALU_FSR) : 1'b0; @@ -417,7 +417,7 @@ module ibex_alu #( // The bit-counter structure computes the number of set bits in its operand. Partial results // (from left to right) are needed to compute the control masks for computation of // bcompress/bdecompress by the butterfly network, if implemented. - // For pcnt, clz and ctz, only the end result is used. + // For cpop, clz and ctz, only the end result is used. logic zbe_op; logic bitcnt_ctz; @@ -458,7 +458,7 @@ module ibex_alu #( case (1'b1) zbe_op: bitcnt_bits = operand_b_i; bitcnt_cz: bitcnt_bits = bitcnt_bit_mask & ~bitcnt_mask_op; // clz / ctz - default: bitcnt_bits = operand_a_i; // pcnt + default: bitcnt_bits = operand_a_i; // cpop endcase end @@ -591,16 +591,16 @@ module ibex_alu #( // General Reverse and Or-combine // //////////////////////////////////// - // Only a subset of the General reverse and or-combine instructions are implemented in the - // balanced version of the B extension. Currently rev, rev8 and orc.b are supported in the - // base extension. + // Only a subset of the general reverse and or-combine instructions are implemented in the + // balanced version of the B extension. Currently rev8 (shift_amt = 5'b11000) and orc.b + // (shift_amt = 5'b00111) are supported in the base extension. logic [4:0] zbp_shift_amt; logic gorc_op; assign gorc_op = (operator_i == ALU_GORC); - assign zbp_shift_amt[2:0] = (RV32B == RV32BFull) ? shift_amt[2:0] : {3{&shift_amt[2:0]}}; - assign zbp_shift_amt[4:3] = (RV32B == RV32BFull) ? shift_amt[4:3] : {2{&shift_amt[4:3]}}; + assign zbp_shift_amt[2:0] = (RV32B == RV32BFull) ? shift_amt[2:0] : {3{shift_amt[0]}}; + assign zbp_shift_amt[4:3] = (RV32B == RV32BFull) ? shift_amt[4:3] : {2{shift_amt[3]}}; always_comb begin rev_result = operand_a_i; @@ -1250,7 +1250,7 @@ module ibex_alu #( // Bitcount Operations (RV32B) ALU_CLZ, ALU_CTZ, - ALU_PCNT: result_o = {26'h0, bitcnt_result}; + ALU_CPOP: result_o = {26'h0, bitcnt_result}; // Pack Operations (RV32B) ALU_PACK, ALU_PACKH, diff --git a/rtl/ibex_decoder.sv b/rtl/ibex_decoder.sv index 4a254c73..692d1413 100644 --- a/rtl/ibex_decoder.sv +++ b/rtl/ibex_decoder.sv @@ -362,7 +362,7 @@ module ibex_decoder #( 3'b001: begin unique case (instr[31:27]) 5'b0_0000: illegal_insn = (instr[26:25] == 2'b00) ? 1'b0 : 1'b1; // slli - 5'b0_0100, // sloi + 5'b0_0100: illegal_insn = (RV32B == RV32BFull) ? 1'b0 : 1'b1; // sloi 5'b0_1001, // bclri 5'b0_0101, // bseti 5'b0_1101: illegal_insn = (RV32B != RV32BNone) ? 1'b0 : 1'b1; // binvi @@ -375,7 +375,7 @@ module ibex_decoder #( unique case(instr[26:20]) 7'b000_0000, // clz 7'b000_0001, // ctz - 7'b000_0010, // pcnt + 7'b000_0010, // cpop 7'b000_0100, // sext.b 7'b000_0101: illegal_insn = (RV32B != RV32BNone) ? 1'b0 : 1'b1; // sext.h 7'b001_0000, // crc32.b @@ -400,20 +400,17 @@ module ibex_decoder #( 5'b0_0000, // srli 5'b0_1000: illegal_insn = (instr[26:25] == 2'b00) ? 1'b0 : 1'b1; // srai - 5'b0_0100, // sroi + 5'b0_0100: illegal_insn = (RV32B == RV32BFull) ? 1'b0 : 1'b1; // sroi 5'b0_1100, // rori 5'b0_1001: illegal_insn = (RV32B != RV32BNone) ? 1'b0 : 1'b1; // bexti 5'b0_1101: begin if ((RV32B == RV32BFull)) begin illegal_insn = 1'b0; // grevi + end else if (RV32B == RV32BBalanced) begin + illegal_insn = (instr[24:20] == 5'b11000) ? 1'b0 : 1'b1; // rev8 end else begin - unique case (instr[24:20]) - 5'b11111, // rev - 5'b11000: illegal_insn = (RV32B == RV32BBalanced) ? 1'b0 : 1'b1; // rev8 - - default: illegal_insn = 1'b1; - endcase + illegal_insn = 1'b1; end end 5'b0_0101: begin @@ -470,13 +467,11 @@ module ibex_decoder #( {7'b010_0000, 3'b111}, // andn {7'b010_0000, 3'b110}, // orn {7'b010_0000, 3'b100}, // xnor - {7'b001_0000, 3'b001}, // slo - {7'b001_0000, 3'b101}, // sro {7'b011_0000, 3'b001}, // rol {7'b011_0000, 3'b101}, // ror {7'b000_0101, 3'b100}, // min - {7'b000_0101, 3'b101}, // max - {7'b000_0101, 3'b110}, // minu + {7'b000_0101, 3'b110}, // max + {7'b000_0101, 3'b101}, // minu {7'b000_0101, 3'b111}, // maxu {7'b000_0100, 3'b100}, // pack {7'b010_0100, 3'b100}, // packu @@ -496,6 +491,8 @@ module ibex_decoder #( {7'b001_0100, 3'b101}, // gorc {7'b000_0100, 3'b001}, // shfl {7'b000_0100, 3'b101}, // unshfl + {7'b001_0000, 3'b001}, // slo + {7'b001_0000, 3'b101}, // sro // RV32B zbc {7'b000_0101, 3'b001}, // clmul {7'b000_0101, 3'b010}, // clmulr @@ -823,7 +820,8 @@ module ibex_decoder #( if (RV32B != RV32BNone) begin unique case (instr_alu[31:27]) 5'b0_0000: alu_operator_o = ALU_SLL; // Shift Left Logical by Immediate - 5'b0_0100: alu_operator_o = ALU_SLO; // Shift Left Ones by Immediate + // Shift Left Ones by Immediate + 5'b0_0100: if (RV32B == RV32BFull) alu_operator_o = ALU_SLO; 5'b0_1001: alu_operator_o = ALU_BCLR; // Clear bit specified by immediate 5'b0_0101: alu_operator_o = ALU_BSET; // Set bit specified by immediate 5'b0_1101: alu_operator_o = ALU_BINV; // Invert bit specified by immediate. @@ -833,7 +831,7 @@ module ibex_decoder #( unique case (instr_alu[26:20]) 7'b000_0000: alu_operator_o = ALU_CLZ; // clz 7'b000_0001: alu_operator_o = ALU_CTZ; // ctz - 7'b000_0010: alu_operator_o = ALU_PCNT; // pcnt + 7'b000_0010: alu_operator_o = ALU_CPOP; // cpop 7'b000_0100: alu_operator_o = ALU_SEXTB; // sext.b 7'b000_0101: alu_operator_o = ALU_SEXTH; // sext.h 7'b001_0000: begin @@ -897,7 +895,8 @@ module ibex_decoder #( unique case (instr_alu[31:27]) 5'b0_0000: alu_operator_o = ALU_SRL; // Shift Right Logical by Immediate 5'b0_1000: alu_operator_o = ALU_SRA; // Shift Right Arithmetically by Immediate - 5'b0_0100: alu_operator_o = ALU_SRO; // Shift Right Ones by Immediate + // Shift Right Ones by Immediate + 5'b0_0100: if (RV32B == RV32BFull) alu_operator_o = ALU_SRO; 5'b0_1001: alu_operator_o = ALU_BEXT; // Extract bit specified by immediate. 5'b0_1100: begin alu_operator_o = ALU_ROR; // Rotate Right by Immediate @@ -989,8 +988,6 @@ module ibex_decoder #( {7'b010_0000, 3'b101}: alu_operator_o = ALU_SRA; // Shift Right Arithmetic // RV32B ALU Operations - {7'b001_0000, 3'b001}: if (RV32B != RV32BNone) alu_operator_o = ALU_SLO; // slo - {7'b001_0000, 3'b101}: if (RV32B != RV32BNone) alu_operator_o = ALU_SRO; // sro {7'b011_0000, 3'b001}: begin if (RV32B != RV32BNone) begin alu_operator_o = ALU_ROL; // rol @@ -1005,8 +1002,8 @@ module ibex_decoder #( end {7'b000_0101, 3'b100}: if (RV32B != RV32BNone) alu_operator_o = ALU_MIN; // min - {7'b000_0101, 3'b101}: if (RV32B != RV32BNone) alu_operator_o = ALU_MAX; // max - {7'b000_0101, 3'b110}: if (RV32B != RV32BNone) alu_operator_o = ALU_MINU; // minu + {7'b000_0101, 3'b110}: if (RV32B != RV32BNone) alu_operator_o = ALU_MAX; // max + {7'b000_0101, 3'b101}: if (RV32B != RV32BNone) alu_operator_o = ALU_MINU; // minu {7'b000_0101, 3'b111}: if (RV32B != RV32BNone) alu_operator_o = ALU_MAXU; // maxu {7'b000_0100, 3'b100}: if (RV32B != RV32BNone) alu_operator_o = ALU_PACK; // pack @@ -1036,6 +1033,8 @@ module ibex_decoder #( {7'b001_0100, 3'b101}: if (RV32B != RV32BNone) alu_operator_o = ALU_GORC; // grev {7'b000_0100, 3'b001}: if (RV32B == RV32BFull) alu_operator_o = ALU_SHFL; // shfl {7'b000_0100, 3'b101}: if (RV32B == RV32BFull) alu_operator_o = ALU_UNSHFL; // unshfl + {7'b001_0000, 3'b001}: if (RV32B == RV32BFull) alu_operator_o = ALU_SLO; // slo + {7'b001_0000, 3'b101}: if (RV32B == RV32BFull) alu_operator_o = ALU_SRO; // sro // RV32B zbc {7'b000_0101, 3'b001}: if (RV32B == RV32BFull) alu_operator_o = ALU_CLMUL; // clmul diff --git a/rtl/ibex_pkg.sv b/rtl/ibex_pkg.sv index 59255861..58a8404f 100644 --- a/rtl/ibex_pkg.sv +++ b/rtl/ibex_pkg.sv @@ -135,7 +135,7 @@ package ibex_pkg; // RV32B ALU_CLZ, ALU_CTZ, - ALU_PCNT, + ALU_CPOP, // Set lower than ALU_SLT, diff --git a/rtl/ibex_tracer.sv b/rtl/ibex_tracer.sv index bfe228ba..bcfb87cc 100644 --- a/rtl/ibex_tracer.sv +++ b/rtl/ibex_tracer.sv @@ -912,11 +912,7 @@ module ibex_tracer ( INSN_SH2ADD: decode_r_insn("sh2add"); INSN_SH3ADD: decode_r_insn("sh3add"); // RV32B - ZBB - INSN_SLOI: decode_i_shift_insn("sloi"); - INSN_SROI: decode_i_shift_insn("sroi"); INSN_RORI: decode_i_shift_insn("rori"); - INSN_SLO: decode_r_insn("slo"); - INSN_SRO: decode_r_insn("sro"); INSN_ROL: decode_r_insn("rol"); INSN_ROR: decode_r_insn("ror"); INSN_MIN: decode_r_insn("min"); @@ -940,7 +936,7 @@ module ibex_tracer ( INSN_PACKU: decode_r_insn("packu"); INSN_CLZ: decode_r1_insn("clz"); INSN_CTZ: decode_r1_insn("ctz"); - INSN_PCNT: decode_r1_insn("pcnt"); + INSN_CPOP: decode_r1_insn("cpop"); INSN_SEXTB: decode_r1_insn("sext.b"); INSN_SEXTH: decode_r1_insn("sext.h"); // RV32B - ZBS @@ -1030,6 +1026,10 @@ module ibex_tracer ( default: decode_i_insn("unshfli"); endcase end + INSN_SLO: decode_r_insn("slo"); + INSN_SRO: decode_r_insn("sro"); + INSN_SLOI: decode_i_shift_insn("sloi"); + INSN_SROI: decode_i_shift_insn("sroi"); // RV32B - ZBT INSN_CMIX: decode_r_cmixcmov_insn("cmix"); diff --git a/rtl/ibex_tracer_pkg.sv b/rtl/ibex_tracer_pkg.sv index 9ee48324..0970bc80 100644 --- a/rtl/ibex_tracer_pkg.sv +++ b/rtl/ibex_tracer_pkg.sv @@ -77,15 +77,13 @@ package ibex_tracer_pkg; parameter logic [31:0] INSN_SH3ADD = { 7'b0010000, 10'h?, 3'b110, 5'h?, {OPCODE_OP} }; // ZBB - parameter logic [31:0] INSN_SLOI = { 5'b00100 , 12'h?, 3'b001, 5'h?, {OPCODE_OP_IMM} }; // Only log2(XLEN) bits of the immediate are used. For RV32, this means only the bits in // instr[24:20] are effectively used. Whenever instr[26] is set, sroi/rori is instead decoded as // fsri. - parameter logic [31:0] INSN_SROI = { 5'b00100 , 1'b0, 11'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} }; parameter logic [31:0] INSN_RORI = { 5'b01100 , 1'b0, 11'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} }; parameter logic [31:0] INSN_CLZ = { 12'b011000000000, 5'h?, 3'b001, 5'h?, {OPCODE_OP_IMM} }; parameter logic [31:0] INSN_CTZ = { 12'b011000000001, 5'h?, 3'b001, 5'h?, {OPCODE_OP_IMM} }; - parameter logic [31:0] INSN_PCNT = { 12'b011000000010, 5'h?, 3'b001, 5'h?, {OPCODE_OP_IMM} }; + parameter logic [31:0] INSN_CPOP = { 12'b011000000010, 5'h?, 3'b001, 5'h?, {OPCODE_OP_IMM} }; parameter logic [31:0] INSN_SEXTB = { 12'b011000000100, 5'h?, 3'b001, 5'h?, {OPCODE_OP_IMM} }; parameter logic [31:0] INSN_SEXTH = { 12'b011000000101, 5'h?, 3'b001, 5'h?, {OPCODE_OP_IMM} }; // The ZEXT.B and ZEXT.H pseudo-instructions are currently not emitted by the tracer due to a lack @@ -96,13 +94,11 @@ package ibex_tracer_pkg; // sext -- pseudoinstruction: pack rd, rs zero // parameter logic [31:0] INSN_ZEXTH = { 7'b0000100, 5'b00000, 5'h?, 3'b100, 5'h?, {OPCODE_OP} }; - parameter logic [31:0] INSN_SLO = { 7'b0010000, 10'h?, 3'b001, 5'h?, {OPCODE_OP} }; - parameter logic [31:0] INSN_SRO = { 7'b0010000, 10'h?, 3'b101, 5'h?, {OPCODE_OP} }; parameter logic [31:0] INSN_ROL = { 7'b0110000, 10'h?, 3'b001, 5'h?, {OPCODE_OP} }; parameter logic [31:0] INSN_ROR = { 7'b0110000, 10'h?, 3'b101, 5'h?, {OPCODE_OP} }; parameter logic [31:0] INSN_MIN = { 7'b0000101, 10'h?, 3'b100, 5'h?, {OPCODE_OP} }; - parameter logic [31:0] INSN_MAX = { 7'b0000101, 10'h?, 3'b101, 5'h?, {OPCODE_OP} }; - parameter logic [31:0] INSN_MINU = { 7'b0000101, 10'h?, 3'b110, 5'h?, {OPCODE_OP} }; + parameter logic [31:0] INSN_MAX = { 7'b0000101, 10'h?, 3'b110, 5'h?, {OPCODE_OP} }; + parameter logic [31:0] INSN_MINU = { 7'b0000101, 10'h?, 3'b101, 5'h?, {OPCODE_OP} }; parameter logic [31:0] INSN_MAXU = { 7'b0000101, 10'h?, 3'b111, 5'h?, {OPCODE_OP} }; parameter logic [31:0] INSN_XNOR = { 7'b0100000, 10'h?, 3'b100, 5'h?, {OPCODE_OP} }; parameter logic [31:0] INSN_ORN = { 7'b0100000, 10'h?, 3'b110, 5'h?, {OPCODE_OP} }; @@ -247,6 +243,14 @@ package ibex_tracer_pkg; parameter logic [31:0] INSN_SHFL = { 7'b0000100, 10'h?, 3'b001, 5'h?, {OPCODE_OP} }; parameter logic [31:0] INSN_UNSHFL = { 7'b0000100, 10'h?, 3'b101, 5'h?, {OPCODE_OP} }; + parameter logic [31:0] INSN_SLO = { 7'b0010000, 10'h?, 3'b001, 5'h?, {OPCODE_OP} }; + parameter logic [31:0] INSN_SRO = { 7'b0010000, 10'h?, 3'b101, 5'h?, {OPCODE_OP} }; + parameter logic [31:0] INSN_SLOI = { 5'b00100 , 12'h?, 3'b001, 5'h?, {OPCODE_OP_IMM} }; + // Only log2(XLEN) bits of the immediate are used. For RV32, this means only the bits in + // instr[24:20] are effectively used. Whenever instr[26] is set, sroi/rori is instead decoded as + // fsri. + parameter logic [31:0] INSN_SROI = { 5'b00100 , 1'b0, 11'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} }; + // ZBE parameter logic [31:0] INSN_BDECOMPRESS = {7'b0100100, 10'h?, 3'b110, 5'h?, {OPCODE_OP} }; parameter logic [31:0] INSN_BCOMPRESS = {7'b0000100, 10'h?, 3'b110, 5'h?, {OPCODE_OP} };