[rtl, bitmanip] Align Zbb implementation with draft v.0.93 and v.1.0.0

This invovles the following changes:
- Rename pcnt to cpop
- Switch encoding of max and minu
- Remove rev from Balanced version, only available in Full version via
  grev (Zbp)
- Include sext.b/h (previously in Zb_tmp)
- Remove slo[i] and sro[i] from Balanced version, only available in Full
  version (Zbp)

Signed-off-by: Pirmin Vogel <vogelpi@lowrisc.org>
This commit is contained in:
Pirmin Vogel 2021-11-26 14:23:05 +01:00
parent e765b4dfec
commit 16d6f5ea2b
6 changed files with 46 additions and 48 deletions

View file

@ -97,8 +97,6 @@ Bit Manipulation Extension
+---------------------------------+---------------+--------------------------+
| Zbt (Ternary) | Balanced/Full | All |
+---------------------------------+---------------+--------------------------+
| Zb_tmp (Temporary) [#B_zb_tmp]_ | Balanced/Full | None |
+---------------------------------+---------------+--------------------------+
The implementation of the B-extension comes with an area overhead of 1.8 to 3.0 kGE for the balanced version and 6.0 to 8.7 kGE for the full version.
That corresponds to an approximate percentage increase in area of 9 to 14 % and 25 to 30 % for the balanced and full versions respectively.
@ -178,6 +176,3 @@ See :ref:`load-store-unit` for more details.
Ibex will be updated to match future versions of the specification.
Prior to ratification this may involve backwards incompatible changes.
Additionally, neither GCC or Clang have committed to maintaining support upstream for unratified versions of the specification.
.. [#B_zb_tmp] The sign-extend instructions `sext.b/sext.h` are defined but not unambiguously categorized in draft version 0.92 of the extension.
Temporarily, they have been assigned a separate Z-extension (Zb_tmp) both in Ibex and the RISCV-DV random instruction generator used to verify the bit manipulation instructions in Ibex.

View file

@ -304,7 +304,7 @@ module ibex_alu #(
always_comb begin
unique case (operator_i)
ALU_SLL: shift_left = 1'b1;
ALU_SLO,
ALU_SLO: shift_left = (RV32B == RV32BFull) ? 1'b1 : 1'b0;
ALU_BFP: shift_left = (RV32B != RV32BNone) ? 1'b1 : 1'b0;
ALU_ROL: shift_left = (RV32B != RV32BNone) ? instr_first_cycle_i : 0;
ALU_ROR: shift_left = (RV32B != RV32BNone) ? ~instr_first_cycle_i : 0;
@ -321,7 +321,7 @@ module ibex_alu #(
assign shift_arith = (operator_i == ALU_SRA);
assign shift_ones =
(RV32B != RV32BNone) ? (operator_i == ALU_SLO) | (operator_i == ALU_SRO) : 1'b0;
(RV32B == RV32BFull) ? (operator_i == ALU_SLO) | (operator_i == ALU_SRO) : 1'b0;
assign shift_funnel =
(RV32B != RV32BNone) ? (operator_i == ALU_FSL) | (operator_i == ALU_FSR) : 1'b0;
@ -417,7 +417,7 @@ module ibex_alu #(
// The bit-counter structure computes the number of set bits in its operand. Partial results
// (from left to right) are needed to compute the control masks for computation of
// bcompress/bdecompress by the butterfly network, if implemented.
// For pcnt, clz and ctz, only the end result is used.
// For cpop, clz and ctz, only the end result is used.
logic zbe_op;
logic bitcnt_ctz;
@ -458,7 +458,7 @@ module ibex_alu #(
case (1'b1)
zbe_op: bitcnt_bits = operand_b_i;
bitcnt_cz: bitcnt_bits = bitcnt_bit_mask & ~bitcnt_mask_op; // clz / ctz
default: bitcnt_bits = operand_a_i; // pcnt
default: bitcnt_bits = operand_a_i; // cpop
endcase
end
@ -591,16 +591,16 @@ module ibex_alu #(
// General Reverse and Or-combine //
////////////////////////////////////
// Only a subset of the General reverse and or-combine instructions are implemented in the
// balanced version of the B extension. Currently rev, rev8 and orc.b are supported in the
// base extension.
// Only a subset of the general reverse and or-combine instructions are implemented in the
// balanced version of the B extension. Currently rev8 (shift_amt = 5'b11000) and orc.b
// (shift_amt = 5'b00111) are supported in the base extension.
logic [4:0] zbp_shift_amt;
logic gorc_op;
assign gorc_op = (operator_i == ALU_GORC);
assign zbp_shift_amt[2:0] = (RV32B == RV32BFull) ? shift_amt[2:0] : {3{&shift_amt[2:0]}};
assign zbp_shift_amt[4:3] = (RV32B == RV32BFull) ? shift_amt[4:3] : {2{&shift_amt[4:3]}};
assign zbp_shift_amt[2:0] = (RV32B == RV32BFull) ? shift_amt[2:0] : {3{shift_amt[0]}};
assign zbp_shift_amt[4:3] = (RV32B == RV32BFull) ? shift_amt[4:3] : {2{shift_amt[3]}};
always_comb begin
rev_result = operand_a_i;
@ -1250,7 +1250,7 @@ module ibex_alu #(
// Bitcount Operations (RV32B)
ALU_CLZ, ALU_CTZ,
ALU_PCNT: result_o = {26'h0, bitcnt_result};
ALU_CPOP: result_o = {26'h0, bitcnt_result};
// Pack Operations (RV32B)
ALU_PACK, ALU_PACKH,

View file

@ -362,7 +362,7 @@ module ibex_decoder #(
3'b001: begin
unique case (instr[31:27])
5'b0_0000: illegal_insn = (instr[26:25] == 2'b00) ? 1'b0 : 1'b1; // slli
5'b0_0100, // sloi
5'b0_0100: illegal_insn = (RV32B == RV32BFull) ? 1'b0 : 1'b1; // sloi
5'b0_1001, // bclri
5'b0_0101, // bseti
5'b0_1101: illegal_insn = (RV32B != RV32BNone) ? 1'b0 : 1'b1; // binvi
@ -375,7 +375,7 @@ module ibex_decoder #(
unique case(instr[26:20])
7'b000_0000, // clz
7'b000_0001, // ctz
7'b000_0010, // pcnt
7'b000_0010, // cpop
7'b000_0100, // sext.b
7'b000_0101: illegal_insn = (RV32B != RV32BNone) ? 1'b0 : 1'b1; // sext.h
7'b001_0000, // crc32.b
@ -400,20 +400,17 @@ module ibex_decoder #(
5'b0_0000, // srli
5'b0_1000: illegal_insn = (instr[26:25] == 2'b00) ? 1'b0 : 1'b1; // srai
5'b0_0100, // sroi
5'b0_0100: illegal_insn = (RV32B == RV32BFull) ? 1'b0 : 1'b1; // sroi
5'b0_1100, // rori
5'b0_1001: illegal_insn = (RV32B != RV32BNone) ? 1'b0 : 1'b1; // bexti
5'b0_1101: begin
if ((RV32B == RV32BFull)) begin
illegal_insn = 1'b0; // grevi
end else if (RV32B == RV32BBalanced) begin
illegal_insn = (instr[24:20] == 5'b11000) ? 1'b0 : 1'b1; // rev8
end else begin
unique case (instr[24:20])
5'b11111, // rev
5'b11000: illegal_insn = (RV32B == RV32BBalanced) ? 1'b0 : 1'b1; // rev8
default: illegal_insn = 1'b1;
endcase
illegal_insn = 1'b1;
end
end
5'b0_0101: begin
@ -470,13 +467,11 @@ module ibex_decoder #(
{7'b010_0000, 3'b111}, // andn
{7'b010_0000, 3'b110}, // orn
{7'b010_0000, 3'b100}, // xnor
{7'b001_0000, 3'b001}, // slo
{7'b001_0000, 3'b101}, // sro
{7'b011_0000, 3'b001}, // rol
{7'b011_0000, 3'b101}, // ror
{7'b000_0101, 3'b100}, // min
{7'b000_0101, 3'b101}, // max
{7'b000_0101, 3'b110}, // minu
{7'b000_0101, 3'b110}, // max
{7'b000_0101, 3'b101}, // minu
{7'b000_0101, 3'b111}, // maxu
{7'b000_0100, 3'b100}, // pack
{7'b010_0100, 3'b100}, // packu
@ -496,6 +491,8 @@ module ibex_decoder #(
{7'b001_0100, 3'b101}, // gorc
{7'b000_0100, 3'b001}, // shfl
{7'b000_0100, 3'b101}, // unshfl
{7'b001_0000, 3'b001}, // slo
{7'b001_0000, 3'b101}, // sro
// RV32B zbc
{7'b000_0101, 3'b001}, // clmul
{7'b000_0101, 3'b010}, // clmulr
@ -823,7 +820,8 @@ module ibex_decoder #(
if (RV32B != RV32BNone) begin
unique case (instr_alu[31:27])
5'b0_0000: alu_operator_o = ALU_SLL; // Shift Left Logical by Immediate
5'b0_0100: alu_operator_o = ALU_SLO; // Shift Left Ones by Immediate
// Shift Left Ones by Immediate
5'b0_0100: if (RV32B == RV32BFull) alu_operator_o = ALU_SLO;
5'b0_1001: alu_operator_o = ALU_BCLR; // Clear bit specified by immediate
5'b0_0101: alu_operator_o = ALU_BSET; // Set bit specified by immediate
5'b0_1101: alu_operator_o = ALU_BINV; // Invert bit specified by immediate.
@ -833,7 +831,7 @@ module ibex_decoder #(
unique case (instr_alu[26:20])
7'b000_0000: alu_operator_o = ALU_CLZ; // clz
7'b000_0001: alu_operator_o = ALU_CTZ; // ctz
7'b000_0010: alu_operator_o = ALU_PCNT; // pcnt
7'b000_0010: alu_operator_o = ALU_CPOP; // cpop
7'b000_0100: alu_operator_o = ALU_SEXTB; // sext.b
7'b000_0101: alu_operator_o = ALU_SEXTH; // sext.h
7'b001_0000: begin
@ -897,7 +895,8 @@ module ibex_decoder #(
unique case (instr_alu[31:27])
5'b0_0000: alu_operator_o = ALU_SRL; // Shift Right Logical by Immediate
5'b0_1000: alu_operator_o = ALU_SRA; // Shift Right Arithmetically by Immediate
5'b0_0100: alu_operator_o = ALU_SRO; // Shift Right Ones by Immediate
// Shift Right Ones by Immediate
5'b0_0100: if (RV32B == RV32BFull) alu_operator_o = ALU_SRO;
5'b0_1001: alu_operator_o = ALU_BEXT; // Extract bit specified by immediate.
5'b0_1100: begin
alu_operator_o = ALU_ROR; // Rotate Right by Immediate
@ -989,8 +988,6 @@ module ibex_decoder #(
{7'b010_0000, 3'b101}: alu_operator_o = ALU_SRA; // Shift Right Arithmetic
// RV32B ALU Operations
{7'b001_0000, 3'b001}: if (RV32B != RV32BNone) alu_operator_o = ALU_SLO; // slo
{7'b001_0000, 3'b101}: if (RV32B != RV32BNone) alu_operator_o = ALU_SRO; // sro
{7'b011_0000, 3'b001}: begin
if (RV32B != RV32BNone) begin
alu_operator_o = ALU_ROL; // rol
@ -1005,8 +1002,8 @@ module ibex_decoder #(
end
{7'b000_0101, 3'b100}: if (RV32B != RV32BNone) alu_operator_o = ALU_MIN; // min
{7'b000_0101, 3'b101}: if (RV32B != RV32BNone) alu_operator_o = ALU_MAX; // max
{7'b000_0101, 3'b110}: if (RV32B != RV32BNone) alu_operator_o = ALU_MINU; // minu
{7'b000_0101, 3'b110}: if (RV32B != RV32BNone) alu_operator_o = ALU_MAX; // max
{7'b000_0101, 3'b101}: if (RV32B != RV32BNone) alu_operator_o = ALU_MINU; // minu
{7'b000_0101, 3'b111}: if (RV32B != RV32BNone) alu_operator_o = ALU_MAXU; // maxu
{7'b000_0100, 3'b100}: if (RV32B != RV32BNone) alu_operator_o = ALU_PACK; // pack
@ -1036,6 +1033,8 @@ module ibex_decoder #(
{7'b001_0100, 3'b101}: if (RV32B != RV32BNone) alu_operator_o = ALU_GORC; // grev
{7'b000_0100, 3'b001}: if (RV32B == RV32BFull) alu_operator_o = ALU_SHFL; // shfl
{7'b000_0100, 3'b101}: if (RV32B == RV32BFull) alu_operator_o = ALU_UNSHFL; // unshfl
{7'b001_0000, 3'b001}: if (RV32B == RV32BFull) alu_operator_o = ALU_SLO; // slo
{7'b001_0000, 3'b101}: if (RV32B == RV32BFull) alu_operator_o = ALU_SRO; // sro
// RV32B zbc
{7'b000_0101, 3'b001}: if (RV32B == RV32BFull) alu_operator_o = ALU_CLMUL; // clmul

View file

@ -135,7 +135,7 @@ package ibex_pkg;
// RV32B
ALU_CLZ,
ALU_CTZ,
ALU_PCNT,
ALU_CPOP,
// Set lower than
ALU_SLT,

View file

@ -912,11 +912,7 @@ module ibex_tracer (
INSN_SH2ADD: decode_r_insn("sh2add");
INSN_SH3ADD: decode_r_insn("sh3add");
// RV32B - ZBB
INSN_SLOI: decode_i_shift_insn("sloi");
INSN_SROI: decode_i_shift_insn("sroi");
INSN_RORI: decode_i_shift_insn("rori");
INSN_SLO: decode_r_insn("slo");
INSN_SRO: decode_r_insn("sro");
INSN_ROL: decode_r_insn("rol");
INSN_ROR: decode_r_insn("ror");
INSN_MIN: decode_r_insn("min");
@ -940,7 +936,7 @@ module ibex_tracer (
INSN_PACKU: decode_r_insn("packu");
INSN_CLZ: decode_r1_insn("clz");
INSN_CTZ: decode_r1_insn("ctz");
INSN_PCNT: decode_r1_insn("pcnt");
INSN_CPOP: decode_r1_insn("cpop");
INSN_SEXTB: decode_r1_insn("sext.b");
INSN_SEXTH: decode_r1_insn("sext.h");
// RV32B - ZBS
@ -1030,6 +1026,10 @@ module ibex_tracer (
default: decode_i_insn("unshfli");
endcase
end
INSN_SLO: decode_r_insn("slo");
INSN_SRO: decode_r_insn("sro");
INSN_SLOI: decode_i_shift_insn("sloi");
INSN_SROI: decode_i_shift_insn("sroi");
// RV32B - ZBT
INSN_CMIX: decode_r_cmixcmov_insn("cmix");

View file

@ -77,15 +77,13 @@ package ibex_tracer_pkg;
parameter logic [31:0] INSN_SH3ADD = { 7'b0010000, 10'h?, 3'b110, 5'h?, {OPCODE_OP} };
// ZBB
parameter logic [31:0] INSN_SLOI = { 5'b00100 , 12'h?, 3'b001, 5'h?, {OPCODE_OP_IMM} };
// Only log2(XLEN) bits of the immediate are used. For RV32, this means only the bits in
// instr[24:20] are effectively used. Whenever instr[26] is set, sroi/rori is instead decoded as
// fsri.
parameter logic [31:0] INSN_SROI = { 5'b00100 , 1'b0, 11'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} };
parameter logic [31:0] INSN_RORI = { 5'b01100 , 1'b0, 11'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} };
parameter logic [31:0] INSN_CLZ = { 12'b011000000000, 5'h?, 3'b001, 5'h?, {OPCODE_OP_IMM} };
parameter logic [31:0] INSN_CTZ = { 12'b011000000001, 5'h?, 3'b001, 5'h?, {OPCODE_OP_IMM} };
parameter logic [31:0] INSN_PCNT = { 12'b011000000010, 5'h?, 3'b001, 5'h?, {OPCODE_OP_IMM} };
parameter logic [31:0] INSN_CPOP = { 12'b011000000010, 5'h?, 3'b001, 5'h?, {OPCODE_OP_IMM} };
parameter logic [31:0] INSN_SEXTB = { 12'b011000000100, 5'h?, 3'b001, 5'h?, {OPCODE_OP_IMM} };
parameter logic [31:0] INSN_SEXTH = { 12'b011000000101, 5'h?, 3'b001, 5'h?, {OPCODE_OP_IMM} };
// The ZEXT.B and ZEXT.H pseudo-instructions are currently not emitted by the tracer due to a lack
@ -96,13 +94,11 @@ package ibex_tracer_pkg;
// sext -- pseudoinstruction: pack rd, rs zero
// parameter logic [31:0] INSN_ZEXTH = { 7'b0000100, 5'b00000, 5'h?, 3'b100, 5'h?, {OPCODE_OP} };
parameter logic [31:0] INSN_SLO = { 7'b0010000, 10'h?, 3'b001, 5'h?, {OPCODE_OP} };
parameter logic [31:0] INSN_SRO = { 7'b0010000, 10'h?, 3'b101, 5'h?, {OPCODE_OP} };
parameter logic [31:0] INSN_ROL = { 7'b0110000, 10'h?, 3'b001, 5'h?, {OPCODE_OP} };
parameter logic [31:0] INSN_ROR = { 7'b0110000, 10'h?, 3'b101, 5'h?, {OPCODE_OP} };
parameter logic [31:0] INSN_MIN = { 7'b0000101, 10'h?, 3'b100, 5'h?, {OPCODE_OP} };
parameter logic [31:0] INSN_MAX = { 7'b0000101, 10'h?, 3'b101, 5'h?, {OPCODE_OP} };
parameter logic [31:0] INSN_MINU = { 7'b0000101, 10'h?, 3'b110, 5'h?, {OPCODE_OP} };
parameter logic [31:0] INSN_MAX = { 7'b0000101, 10'h?, 3'b110, 5'h?, {OPCODE_OP} };
parameter logic [31:0] INSN_MINU = { 7'b0000101, 10'h?, 3'b101, 5'h?, {OPCODE_OP} };
parameter logic [31:0] INSN_MAXU = { 7'b0000101, 10'h?, 3'b111, 5'h?, {OPCODE_OP} };
parameter logic [31:0] INSN_XNOR = { 7'b0100000, 10'h?, 3'b100, 5'h?, {OPCODE_OP} };
parameter logic [31:0] INSN_ORN = { 7'b0100000, 10'h?, 3'b110, 5'h?, {OPCODE_OP} };
@ -247,6 +243,14 @@ package ibex_tracer_pkg;
parameter logic [31:0] INSN_SHFL = { 7'b0000100, 10'h?, 3'b001, 5'h?, {OPCODE_OP} };
parameter logic [31:0] INSN_UNSHFL = { 7'b0000100, 10'h?, 3'b101, 5'h?, {OPCODE_OP} };
parameter logic [31:0] INSN_SLO = { 7'b0010000, 10'h?, 3'b001, 5'h?, {OPCODE_OP} };
parameter logic [31:0] INSN_SRO = { 7'b0010000, 10'h?, 3'b101, 5'h?, {OPCODE_OP} };
parameter logic [31:0] INSN_SLOI = { 5'b00100 , 12'h?, 3'b001, 5'h?, {OPCODE_OP_IMM} };
// Only log2(XLEN) bits of the immediate are used. For RV32, this means only the bits in
// instr[24:20] are effectively used. Whenever instr[26] is set, sroi/rori is instead decoded as
// fsri.
parameter logic [31:0] INSN_SROI = { 5'b00100 , 1'b0, 11'h?, 3'b101, 5'h?, {OPCODE_OP_IMM} };
// ZBE
parameter logic [31:0] INSN_BDECOMPRESS = {7'b0100100, 10'h?, 3'b110, 5'h?, {OPCODE_OP} };
parameter logic [31:0] INSN_BCOMPRESS = {7'b0000100, 10'h?, 3'b110, 5'h?, {OPCODE_OP} };