[bitmanip] Add ZBS Instruction Group

This commit implements the Bit Manipulation Extension SBS instruction
group: sbset[i], sbclr[i], sbinv[i] and sbext[i]. These instructions
set, clear, invert or extract bit rs1[rs2] or rs1[imm] for reg-reg and
reg-imm instructions respectively.

Archtectural details:
        * A multiplexer is added to the shifter structure in order to
          chose between 32'h1, used for the single-bit instructions as
          summarized below, and regular operand_b input.

        * Dedicated bitwise-logic blocks are introduced for multicycle
          shifts and cmix instructions (fsr, fsl, ror, rol),
          single-bit instructions (sbset, sbclr, sbinv, sbext), and
          stanard-ALU and zbb instructions (or, and xor, orn, andn,
          xnor).

Instruction details: All of the zbs instructions rely on sharing the
        existing shifter structure. The instructions are carried out in
        one cycle.

        * sbset, sbclr, sbinv:
                shift_result = 32'h1 << rs2[4:0];
                singlebit_result = rs1 [|, ^ , &~] shift_result;

        * sbext:
                shift_result = rs1 >> rs2[4:0];
                singlebit_result = {31'0,shift_resutl[0]};

Signed-off-by: ganoam <gnoam@live.com>
This commit is contained in:
ganoam 2020-04-20 16:12:04 +02:00 committed by Pirmin Vogel
parent 321a3d267f
commit 133fef2c2f
9 changed files with 146 additions and 36 deletions

View file

@ -66,7 +66,7 @@ Other blocks use the ALU for the following tasks:
Support for the RISC-V Bitmanipulation Extension (Document Version 0.92, November 8, 2019) is enabled via the parameter ``RV32B``.
This feature is *EXPERIMENTAL* and the details of its impact are not yet documented here.
Currently the Zbb and Zbt sub-extensions are implemented.
Currently the Zbb, Zbs and Zbt sub-extensions are implemented.
All instructions are carried out in a single clock cycle.
.. _mult-div:

View file

@ -92,7 +92,8 @@ Parameters
| ``RV32M`` | bit | 1 | M(ultiply) extension enable |
+------------------------------+-------------+------------+-----------------------------------------------------------------+
| ``RV32B`` | bit | 0 | *EXPERIMENTAL* - B(itmanipulation) extension enable: |
| | | | Currently supported Z-extensions: Zbb (base) |
| | | | Currently supported Z-extensions: Zbb (base), Zbs (single-bit) |
| | | | and Zbt (ternary) |
+------------------------------+-------------+------------+-----------------------------------------------------------------+
| ``BranchTargetALU`` | bit | 0 | *EXPERIMENTAL* - Enables branch target ALU removing a stall |
| | | | cycle from taken branches |

View file

@ -36,6 +36,12 @@ lint_off -rule UNUSED -file "*/rtl/ibex_alu.sv" -match "*'shift_amt_compl'[5]*"
// cleaner to write all bits even if not all are used
lint_off -rule UNUSED -file "*/rtl/ibex_alu.sv" -match "*'shift_result_ext'[32]*"
// Signal is not used for RV32B == 0: imd_val_q_i
//
// No ALU multicycle instructions exist to use the intermediate value register,
// if bitmanipulation extension is not enabled.
lint_off -rule UNUSED -file "*/rtl/ibex_alu.sv" -match "*'imd_val_q_i'"
// Bits of signal are not used: fetch_addr_n[0]
// cleaner to write all bits even if not all are used
lint_off -rule UNUSED -file "*/rtl/ibex_if_stage.sv" -match "*'fetch_addr_n'[0]*"

View file

@ -96,16 +96,34 @@ module ibex_alu #(
// The shifter structure consists of a 33-bit shifter: 32-bit operand + 1 bit extension for
// arithmetic shifts and one-shift support.
// Rotations and funnel shifts are implemented as multi-cycle instructions.
// For funnel shifs, operand_a_i is tied to rs1 in the first cycle and rs3 in the
// second cycle. operand_b_i is always tied to rs2.
// The shifter is also used for single-bit instructions as detailed below.
//
// Standard Shifts
// ===============
// For standard shift instructions, the direction of the shift is to the right by default. For
// left shifts, the signal shift_left signal is set. If so, the operand is initially reversed,
// shifted to the right by the specified amount and shifted back again. For arithmetic- and
// one-shifts the 33rd bit of the shifter operand can is set accordingly.
//
// Multicycle Shifts
// =================
//
// Rotation
// --------
// For rotations, the operand signals operand_a_i and operand_b_i are kept constant to rs1 and
// rs2 respectively.
//
// Rotation pseudocode:
// shift_amt = rs2 & 31;
// multicycle_result = (rs1 >> shift_amt) | (rs1 << (32 - shift_amt));
// ^-- cycle 0 -----^ ^-- cycle 1 --------------^
//
// For funnel shifts, the order of applying the shift amount or its complement is determined by
// bit [5] of shift_amt.
// Funnel Shifts
// -------------
// For funnel shifs, operand_a_i is tied to rs1 in the first cycle and rs3 in the
// second cycle. operand_b_i is always tied to rs2. The order of applying the shift amount or
// its complement is determined by bit [5] of shift_amt.
//
// Funnel shift Pseudocode: (fsl)
// shift_amt = rs2 & 63;
// shift_amt_compl = 32 - shift_amt[4:0]
@ -121,19 +139,30 @@ module ibex_alu #(
// multicycle_result = rs3
// else if (shift_amt == 0):
// multicycle_result = rs1.
//
// Single-Bit istructions
// ======================
// Single bit instructions operate on bit operand_b_i[4:0] of operand_a_i.
// The operations sbset, sbclr and sbinv are implemented by generation of a bit-mask using the
// shifter structure. This is done by left-shifting the operand 32'h1 by the required amount.
// The signal shift_sbmode multiplexes the shifter input and sets the signal shift_left.
// Further processing is taken care of by a separate structure.
//
// For sbext, the bit defined by operand_b_i[4:0] is to be returned. This is done by simply
// shifting operand_a_i to the right by the required amount and returning bit [0] of the result.
logic shift_left;
logic shift_ones;
logic shift_arith;
logic shift_rot;
logic shift_funnel;
logic shift_sbmode;
logic shift_none;
logic shift_op_rev;
logic shift_op_rev8;
logic shift_op_orc_b;
logic [5:0] shift_amt;
logic [5:0] shift_amt_compl; // complementary shift amount (32 - shift_amt)
logic shift_multicycle;
// bit shift_amt[5]: word swap bit: only considered for FSL/FSR.
// if set, reverse operations in first and second cycle.
@ -144,12 +173,18 @@ module ibex_alu #(
(operand_b_i[5] && shift_funnel ? shift_amt_compl[4:0] : operand_b_i[4:0]) :
(operand_b_i[5] && shift_funnel ? operand_b_i[4:0] : shift_amt_compl[4:0]);
// single-bit mode: shift
assign shift_sbmode = RV32B ? (operator_i == ALU_SBSET) || (operator_i == ALU_SBCLR) ||
(operator_i == ALU_SBINV) :
1'b0;
// left shift if this is:
// * a standard left shift (slo, sll)
// * a rol in the first cycle
// * a ror in the second cycle
// * fsl: without word-swap bit: first cycle, else: second cycle
// * fsr: without word-swap bit: second cycle, else: first cycle
// * a single-bit instruction: sbclr, sbset, sbinv (excluding sbext)
always_comb begin
unique case (operator_i)
ALU_SLL: shift_left = 1'b1;
@ -162,21 +197,22 @@ module ibex_alu #(
RV32B ? (shift_amt[5] ? instr_first_cycle_i : !instr_first_cycle_i) : 1'b0;
default: shift_left = 1'b0;
endcase
if (shift_sbmode) begin
shift_left = 1'b1;
end
end
assign shift_ones = RV32B ? (operator_i == ALU_SLO) || (operator_i == ALU_SRO) : 1'b0;
assign shift_arith = (operator_i == ALU_SRA);
assign shift_rot = RV32B ? (operator_i == ALU_ROL) || (operator_i == ALU_ROR) : 1'b0;
assign shift_funnel = RV32B ? (operator_i == ALU_FSL) || (operator_i == ALU_FSR) : 1'b0;
assign shift_multicycle = shift_funnel || shift_rot;
assign shift_arith = (operator_i == ALU_SRA);
assign shift_ones = RV32B ? (operator_i == ALU_SLO) || (operator_i == ALU_SRO) : 1'b0;
assign shift_funnel = RV32B ? (operator_i == ALU_FSL) || (operator_i == ALU_FSR) : 1'b0;
assign shift_none = RV32B ? (operator_i == ALU_REV) || (operator_i == ALU_REV8) ||
(operator_i == ALU_ORCB) :
1'b0;
assign shift_none = RV32B ? (operator_i == ALU_REV) || (operator_i == ALU_REV8) ||
(operator_i == ALU_ORCB) :
1'b0;
assign shift_op_rev = RV32B ? (operator_i == ALU_REV) : 1'b0;
assign shift_op_rev8 = RV32B ? (operator_i == ALU_REV8) : 1'b0;
assign shift_op_orc_b = RV32B ? (operator_i == ALU_ORCB) : 1'b0;
assign shift_op_rev = RV32B ? (operator_i == ALU_REV) : 1'b0;
assign shift_op_rev8 = RV32B ? (operator_i == ALU_REV8) : 1'b0;
assign shift_op_orc_b = RV32B ? (operator_i == ALU_ORCB) : 1'b0;
logic [31:0] shift_result;
logic [32:0] shift_result_ext;
@ -189,6 +225,13 @@ module ibex_alu #(
shift_result = operand_a_rev;
end
// if this is a single bit instruction: we left-shift 32'h1 by shift_amt.
// the first reverse of the left-shift operation can be easily omitted, since we
// know the result of rev(32'h1).
if (shift_sbmode) begin
shift_result = 32'h8000_0000;
end
shift_result_ext = $signed({shift_ones || (shift_arith && shift_result[31]), shift_result})
>>> shift_amt[4:0];
@ -292,6 +335,7 @@ module ibex_alu #(
logic [31:0] bwlogic_result;
logic [31:0] pack_result;
logic [31:0] multicycle_result;
logic [31:0] singlebit_result;
///////////////////
// Bitwise Logic //
@ -300,8 +344,6 @@ module ibex_alu #(
logic bwlogic_or;
logic bwlogic_and;
logic [31:0] bwlogic_operand_b;
logic [31:0] bwlogic_or_op_a;
logic [31:0] bwlogic_or_op_b;
logic [31:0] bwlogic_or_result;
logic [31:0] bwlogic_and_result;
logic [31:0] bwlogic_xor_result;
@ -315,18 +357,13 @@ module ibex_alu #(
ALU_ORN,
ALU_ANDN: bwlogic_op_b_negate = RV32B ? 1'b1 : 1'b0;
ALU_CMIX: bwlogic_op_b_negate = RV32B ? !instr_first_cycle_i : 1'b0;
default: bwlogic_op_b_negate = 1'b0;
default: bwlogic_op_b_negate = 1'b0;
endcase
end
assign bwlogic_operand_b = bwlogic_op_b_negate ? operand_b_neg[32:1] : operand_b_i;
assign bwlogic_or_op_a = ((operator_i == ALU_CMIX) || shift_multicycle) ?
imd_val_q_i : operand_a_i;
assign bwlogic_or_op_b = (operator_i == ALU_CMIX) ? bwlogic_and_result :
shift_multicycle ? shift_result : bwlogic_operand_b;
assign bwlogic_or_result = bwlogic_or_op_a | bwlogic_or_op_b;
assign bwlogic_or_result = operand_a_i | bwlogic_operand_b;
assign bwlogic_and_result = operand_a_i & bwlogic_operand_b;
assign bwlogic_xor_result = operand_a_i ^ bwlogic_operand_b;
@ -363,7 +400,7 @@ module ibex_alu #(
end
ALU_CMIX: begin
multicycle_result = bwlogic_or_result;
multicycle_result = imd_val_q_i | bwlogic_and_result;
imd_val_d_o = bwlogic_and_result;
if (instr_first_cycle_i) begin
imd_val_we_o = 1'b1;
@ -377,7 +414,7 @@ module ibex_alu #(
if (shift_amt[4:0] == 5'h0) begin
multicycle_result = shift_amt[5] ? operand_a_i : imd_val_q_i;
end else begin
multicycle_result = bwlogic_or_result;
multicycle_result = imd_val_q_i | shift_result;
end
imd_val_d_o = shift_result;
if (instr_first_cycle_i) begin
@ -394,6 +431,19 @@ module ibex_alu #(
endcase
end
/////////////////////////////
// Single-bit Instructions //
/////////////////////////////
always_comb begin
unique case (operator_i)
ALU_SBSET: singlebit_result = operand_a_i | shift_result;
ALU_SBCLR: singlebit_result = operand_a_i & ~shift_result;
ALU_SBINV: singlebit_result = operand_a_i ^ shift_result;
default: singlebit_result = {31'h0, shift_result[0]}; // ALU_SBEXT
endcase
end
///////////////
// Min / Max //
///////////////
@ -448,6 +498,7 @@ module ibex_alu #(
assign bitcnt_result = '0;
assign pack_result = '0;
assign multicycle_result = '0;
assign singlebit_result = '0;
// RV32B support signals
assign imd_val_d_o = '0;
assign imd_val_we_o = '0;
@ -500,6 +551,10 @@ module ibex_alu #(
ALU_FSL, ALU_FSR,
ALU_ROL, ALU_ROR: result_o = multicycle_result;
// Single-Bit Bitmanip Operations (RV32B Ops)
ALU_SBSET, ALU_SBCLR,
ALU_SBINV, ALU_SBEXT: result_o = singlebit_result;
default: ;
endcase
end

View file

@ -568,6 +568,7 @@ module ibex_core #(
.ready_wb_i ( ready_wb ),
.outstanding_load_wb_i ( outstanding_load_wb ),
.outstanding_store_wb_i ( outstanding_store_wb ),
// Performance Counters
.perf_jump_o ( perf_jump ),
.perf_branch_o ( perf_branch ),

View file

@ -337,7 +337,10 @@ module ibex_decoder #(
3'b001: begin
unique case (instr[31:27])
5'b0_0000: illegal_insn = 1'b0; // slli
5'b0_0100: illegal_insn = RV32B ? 1'b0 : 1'b1; // sloi
5'b0_0100, // sloi
5'b0_1001, // sbclri
5'b0_0101, // sbseti
5'b0_1101: illegal_insn = RV32B ? 1'b0 : 1'b1; // sbinvi
5'b0_1100: begin
unique case(instr[26:20])
7'b00_00000, // clz
@ -359,7 +362,8 @@ module ibex_decoder #(
5'b0_1000: illegal_insn = 1'b0; // srai
5'b0_0100, // sroi
5'b0_1100: illegal_insn = RV32B ? 1'b0 : 1'b1; // rori
5'b0_1100, // rori
5'b0_1001: illegal_insn = RV32B ? 1'b0 : 1'b1; // sbexti
5'b0_1101: begin
unique case(instr[24:20])
@ -418,7 +422,12 @@ module ibex_decoder #(
{7'b000_0101, 3'b111}, // maxu
{7'b000_0100, 3'b100}, // pack
{7'b010_0100, 3'b100}, // packu
{7'b000_0100, 3'b111}: illegal_insn = RV32B ? 1'b0 : 1'b1; // packh
{7'b000_0100, 3'b111}, // packh
// RV32B instructions (zbs)
{7'b010_0100, 3'b001}, // sbclr
{7'b001_0100, 3'b001}, // sbset
{7'b011_0100, 3'b001}, // sbinv
{7'b010_0100, 3'b101}: illegal_insn = RV32B ? 1'b0 : 1'b1; // sbext
// supported RV32M instructions
{7'b000_0001, 3'b000}: begin // mul
@ -754,6 +763,9 @@ module ibex_decoder #(
unique case (instr[31:27])
5'b0_0000: alu_operator_o = ALU_SLL; // Shift Left Logical by Immediate
5'b0_0100: alu_operator_o = ALU_SLO; // Shift Left Ones by Immediate
5'b0_1001: alu_operator_o = ALU_SBCLR; // Clear bit specified by immediate
5'b0_0101: alu_operator_o = ALU_SBSET; // Set bit specified by immediate
5'b0_1101: alu_operator_o = ALU_SBINV; // Invert bit specified by immediate.
5'b0_1100: begin
unique case (instr[26:20])
7'b000_0000: alu_operator_o = ALU_CLZ; // Count Leading Zeros
@ -785,6 +797,7 @@ module ibex_decoder #(
5'b0_0000: alu_operator_o = ALU_SRL; // Shift Right Logical by Immediate
5'b0_1000: alu_operator_o = ALU_SRA; // Shift Right Arithmetically by Immediate
5'b0_0100: alu_operator_o = ALU_SRO; // Shift Right Ones by Immediate
5'b0_1001: alu_operator_o = ALU_SBEXT; // Extract bit specified by immediate.
5'b0_1100: begin
alu_operator_o = ALU_ROR; // Rotate Right by Immediate
alu_multicycle_o = 1'b1;
@ -907,6 +920,12 @@ module ibex_decoder #(
{7'b010_0000, 3'b110}: if (RV32B) alu_operator_o = ALU_ORN; // Orn
{7'b010_0000, 3'b111}: if (RV32B) alu_operator_o = ALU_ANDN; // Andn
// RV32B ALU_Operations (zbs)
{7'b010_0100, 3'b001}: if (RV32B) alu_operator_o = ALU_SBCLR; // sbclr
{7'b001_0100, 3'b001}: if (RV32B) alu_operator_o = ALU_SBSET; // sbset
{7'b011_0100, 3'b001}: if (RV32B) alu_operator_o = ALU_SBINV; // sbinv
{7'b010_0100, 3'b101}: if (RV32B) alu_operator_o = ALU_SBEXT; // sbext
// supported RV32M instructions, all use the same ALU operation
{7'b000_0001, 3'b000}, // mul
{7'b000_0001, 3'b001}, // mulh

View file

@ -93,7 +93,14 @@ typedef enum logic [5:0] {
ALU_CMOV,
ALU_CMIX,
ALU_FSL,
ALU_FSR
ALU_FSR,
// Single-Bit Operations
// RV32B
ALU_SBSET,
ALU_SBCLR,
ALU_SBINV,
ALU_SBEXT
} alu_op_e;
typedef enum logic [1:0] {

View file

@ -875,7 +875,7 @@ module ibex_tracer (
// MISC-MEM
INSN_FENCE: decode_fence();
INSN_FENCEI: decode_mnemonic("fence.i");
// RV32B
// RV32B - ZBB
INSN_SLOI: decode_i_shift_insn("sloi");
INSN_SROI: decode_i_shift_insn("sroi");
INSN_RORI: decode_i_shift_insn("rori");
@ -899,7 +899,16 @@ module ibex_tracer (
INSN_PCNT: decode_r1_insn("pcnt");
INSN_REV: decode_r1_insn("rev");
INSN_REV8: decode_r1_insn("rev8");
// TERNARY BITMABIP INSTR
// RV32B - ZBS
INSN_SBCLRI: decode_i_insn("sbclri");
INSN_SBSETI: decode_i_insn("sbseti");
INSN_SBINVI: decode_i_insn("sbinvi");
INSN_SBEXTI: decode_i_insn("sbexti");
INSN_SBCLR: decode_r_insn("sbclr");
INSN_SBSET: decode_r_insn("sbset");
INSN_SBINV: decode_r_insn("sbinv");
INSN_SBEXT: decode_r_insn("sbext");
// RV32B - ZBT
INSN_CMIX: decode_r_cmixcmov_insn("cmix");
INSN_CMOV: decode_r_cmixcmov_insn("cmov");
INSN_FSR: decode_r_funnelshift_insn("fsr");

View file

@ -86,6 +86,12 @@ parameter logic [31:0] INSN_REV8 =
{ 5'b01101, 2'b?, 5'b11000, 5'b? , 3'b101, 5'b?, {OPCODE_OP_IMM} };
parameter logic [31:0] INSN_ORCB =
{ 5'b00101, 2'b?, 5'b00111, 5'b? , 3'b101, 5'b?, {OPCODE_OP_IMM} };
// ZBS
parameter logic [31:0] INSN_SBCLRI = { 5'b01001 , 12'b?, 3'b001, 5'b?, {OPCODE_OP_IMM} };
parameter logic [31:0] INSN_SBSETI = { 5'b00101 , 12'b?, 3'b001, 5'b?, {OPCODE_OP_IMM} };
parameter logic [31:0] INSN_SBINVI = { 5'b01101 , 12'b?, 3'b001, 5'b?, {OPCODE_OP_IMM} };
parameter logic [31:0] INSN_SBEXTI = { 5'b01001 , 12'b?, 3'b101, 5'b?, {OPCODE_OP_IMM} };
// ZBT
parameter logic [31:0] INSN_FSRI = { 5'b?, 1'b1, 11'b?, 3'b101, 5'b?, {OPCODE_OP_IMM} };
@ -105,6 +111,12 @@ parameter logic [31:0] INSN_ANDN = { 7'b0100000, 10'b?, 3'b111, 5'b?, {OPCODE_O
parameter logic [31:0] INSN_PACK = { 7'b0000100, 10'b?, 3'b100, 5'b?, {OPCODE_OP} };
parameter logic [31:0] INSN_PACKU = { 7'b0100100, 10'b?, 3'b100, 5'b?, {OPCODE_OP} };
parameter logic [31:0] INSN_PACKH = { 7'b0000100, 10'b?, 3'b111, 5'b?, {OPCODE_OP} };
// ZBS
parameter logic [31:0] INSN_SBCLR = { 7'b0100100, 10'b?, 3'b001, 5'b?, {OPCODE_OP} };
parameter logic [31:0] INSN_SBSET = { 7'b0010100, 10'b?, 3'b001, 5'b?, {OPCODE_OP} };
parameter logic [31:0] INSN_SBINV = { 7'b0110100, 10'b?, 3'b001, 5'b?, {OPCODE_OP} };
parameter logic [31:0] INSN_SBEXT = { 7'b0100100, 10'b?, 3'b101, 5'b?, {OPCODE_OP} };
// ZBT
parameter logic [31:0] INSN_CMIX = {5'b?, 2'b11, 10'b?, 3'b001, 5'b?, {OPCODE_OP} };
parameter logic [31:0] INSN_CMOV = {5'b?, 2'b11, 10'b?, 3'b101, 5'b?, {OPCODE_OP} };