diff --git a/rtl/ibex_alu.sv b/rtl/ibex_alu.sv index 6ce70471..ad796e21 100644 --- a/rtl/ibex_alu.sv +++ b/rtl/ibex_alu.sv @@ -403,6 +403,7 @@ module ibex_alu #( logic [31:0] singlebit_result; logic [31:0] rev_result; logic [31:0] shuffle_result; + logic [31:0] xperm_result; logic [31:0] butterfly_result; logic [31:0] invbutterfly_result; logic [31:0] clmul_result; @@ -724,6 +725,96 @@ module ibex_alu #( end end + ////////////// + // Crossbar // + ////////////// + // The crossbar permutation instructions xperm.[nbh] (Zbp) can be implemented using 8 + // parallel 4-bit-wide, 8-input crossbars. Basically, we permute the 8 nibbles of operand_a_i + // based on operand_b_i. + + // Generate selector indices and valid signals. + // - sel_n[x] indicates which nibble of operand_a_i is selected for output nibble x. + // - vld_n[x] indicates if the selection is valid. + logic [7:0][2:0] sel_n; // nibbles + logic [7:0] vld_n; // nibbles + logic [3:0][1:0] sel_b; // bytes + logic [3:0] vld_b; // bytes + logic [1:0][0:0] sel_h; // half words + logic [1:0] vld_h; // half words + + // Per nibble, 3 bits are needed for the selection. Other bits must be zero. + // sel_n bit mask: 32'b0111_0111_0111_0111_0111_0111_0111_0111 + // vld_n bit mask: 32'b1000_1000_1000_1000_1000_1000_1000_1000 + for (genvar i = 0; i < 8; i++) begin : gen_sel_vld_n + assign sel_n[i] = operand_b_i[i*4 +: 3]; + assign vld_n[i] = ~|operand_b_i[i*4 + 3 +: 1]; + end + + // Per byte, 2 bits are needed for the selection. Other bits must be zero. + // sel_b bit mask: 32'b0000_0011_0000_0011_0000_0011_0000_0011 + // vld_b bit mask: 32'b1111_1100_1111_1100_1111_1100_1111_1100 + for (genvar i = 0; i < 4; i++) begin : gen_sel_vld_b + assign sel_b[i] = operand_b_i[i*8 +: 2]; + assign vld_b[i] = ~|operand_b_i[i*8 + 2 +: 6]; + end + + // Per half word, 1 bit is needed for the selection only. All other bits must be zero. + // sel_h bit mask: 32'b0000_0000_0000_0001_0000_0000_0000_0001 + // vld_h bit mask: 32'b1111_1111_1111_1110_1111_1111_1111_1110 + for (genvar i = 0; i < 2; i++) begin : gen_sel_vld_h + assign sel_h[i] = operand_b_i[i*16 +: 1]; + assign vld_h[i] = ~|operand_b_i[i*16 + 1 +: 15]; + end + + // Convert selector indices and valid signals to control the nibble-based + // crossbar logic. + logic [7:0][2:0] sel; + logic [7:0] vld; + always_comb begin + unique case (operator_i) + ALU_XPERM_N: begin + // No conversion needed. + sel = sel_n; + vld = vld_n; + end + + ALU_XPERM_B: begin + // Convert byte to nibble indicies. + for (int b = 0; b < 4; b++) begin + sel[b*2 + 0] = {sel_b[b], 1'b0}; + sel[b*2 + 1] = {sel_b[b], 1'b1}; + vld[b*2 +: 2] = {2{vld_b[b]}}; + end + end + + ALU_XPERM_H: begin + // Convert half-word to nibble indices. + for (int h = 0; h < 2; h++) begin + sel[h*4 + 0] = {sel_h[h], 2'b00}; + sel[h*4 + 1] = {sel_h[h], 2'b01}; + sel[h*4 + 2] = {sel_h[h], 2'b10}; + sel[h*4 + 3] = {sel_h[h], 2'b11}; + vld[h*4 +: 4] = {4{vld_h[h]}}; + end + end + + default: begin + // Tie valid to zero to disable the crossbar unless we need it. + sel = sel_n; + vld = '0; + end + endcase + end + + // The actual nibble-based crossbar logic. + logic [7:0][3:0] val_n; + logic [7:0][3:0] xperm_n; + assign val_n = operand_a_i; + for (genvar i = 0; i < 8; i++) begin : gen_xperm_n + assign xperm_n[i] = vld[i] ? val_n[sel[i]] : '0; + end + assign xperm_result = xperm_n; + /////////////// // Butterfly // /////////////// @@ -1083,6 +1174,7 @@ module ibex_alu #( logic [31:0] unused_imd_val_q_1; assign unused_imd_val_q_1 = imd_val_q_i[1]; assign shuffle_result = '0; + assign xperm_result = '0; assign butterfly_result = '0; assign invbutterfly_result = '0; assign clmul_result = '0; @@ -1201,6 +1293,7 @@ module ibex_alu #( assign singlebit_result = '0; assign rev_result = '0; assign shuffle_result = '0; + assign xperm_result = '0; assign butterfly_result = '0; assign invbutterfly_result = '0; assign clmul_result = '0; @@ -1238,6 +1331,9 @@ module ibex_alu #( // Shuffle Operations (RV32B) ALU_SHFL, ALU_UNSHFL: result_o = shuffle_result; + // Crossbar Permutation Operations (RV32B) + ALU_XPERM_N, ALU_XPERM_B, ALU_XPERM_H: result_o = xperm_result; + // Comparison Operations ALU_EQ, ALU_NE, ALU_GE, ALU_GEU, diff --git a/rtl/ibex_decoder.sv b/rtl/ibex_decoder.sv index f3be0e19..fbc754a1 100644 --- a/rtl/ibex_decoder.sv +++ b/rtl/ibex_decoder.sv @@ -491,6 +491,9 @@ module ibex_decoder #( {7'b001_0100, 3'b101}, // gorc {7'b000_0100, 3'b001}, // shfl {7'b000_0100, 3'b101}, // unshfl + {7'b001_0100, 3'b010}, // xperm.n + {7'b001_0100, 3'b100}, // xperm.b + {7'b001_0100, 3'b110}, // xperm.h {7'b001_0000, 3'b001}, // slo {7'b001_0000, 3'b101}, // sro // RV32B zbc @@ -1029,12 +1032,15 @@ module ibex_decoder #( {7'b010_0100, 3'b111}: if (RV32B != RV32BNone) alu_operator_o = ALU_BFP; // bfp // RV32B zbp - {7'b011_0100, 3'b101}: if (RV32B != RV32BNone) alu_operator_o = ALU_GREV; // grev - {7'b001_0100, 3'b101}: if (RV32B != RV32BNone) alu_operator_o = ALU_GORC; // gorc - {7'b000_0100, 3'b001}: if (RV32B == RV32BFull) alu_operator_o = ALU_SHFL; // shfl - {7'b000_0100, 3'b101}: if (RV32B == RV32BFull) alu_operator_o = ALU_UNSHFL; // unshfl - {7'b001_0000, 3'b001}: if (RV32B == RV32BFull) alu_operator_o = ALU_SLO; // slo - {7'b001_0000, 3'b101}: if (RV32B == RV32BFull) alu_operator_o = ALU_SRO; // sro + {7'b011_0100, 3'b101}: if (RV32B != RV32BNone) alu_operator_o = ALU_GREV; // grev + {7'b001_0100, 3'b101}: if (RV32B != RV32BNone) alu_operator_o = ALU_GORC; // gorc + {7'b000_0100, 3'b001}: if (RV32B == RV32BFull) alu_operator_o = ALU_SHFL; // shfl + {7'b000_0100, 3'b101}: if (RV32B == RV32BFull) alu_operator_o = ALU_UNSHFL; // unshfl + {7'b001_0100, 3'b010}: if (RV32B == RV32BFull) alu_operator_o = ALU_XPERM_N; // xperm.n + {7'b001_0100, 3'b100}: if (RV32B == RV32BFull) alu_operator_o = ALU_XPERM_B; // xperm.b + {7'b001_0100, 3'b110}: if (RV32B == RV32BFull) alu_operator_o = ALU_XPERM_H; // xperm.h + {7'b001_0000, 3'b001}: if (RV32B == RV32BFull) alu_operator_o = ALU_SLO; // slo + {7'b001_0000, 3'b101}: if (RV32B == RV32BFull) alu_operator_o = ALU_SRO; // sro // RV32B zbc {7'b000_0101, 3'b001}: if (RV32B == RV32BFull) alu_operator_o = ALU_CLMUL; // clmul diff --git a/rtl/ibex_pkg.sv b/rtl/ibex_pkg.sv index 58a8404f..c0d427e4 100644 --- a/rtl/ibex_pkg.sv +++ b/rtl/ibex_pkg.sv @@ -73,7 +73,7 @@ package ibex_pkg; // ALU operations // //////////////////// - typedef enum logic [5:0] { + typedef enum logic [6:0] { // Arithmetics ALU_ADD, ALU_SUB, @@ -100,6 +100,9 @@ package ibex_pkg; ALU_GORC, ALU_SHFL, ALU_UNSHFL, + ALU_XPERM_N, + ALU_XPERM_B, + ALU_XPERM_H, // Address Calculations // RV32B diff --git a/rtl/ibex_tracer.sv b/rtl/ibex_tracer.sv index 22169d35..4a084465 100644 --- a/rtl/ibex_tracer.sv +++ b/rtl/ibex_tracer.sv @@ -1031,6 +1031,9 @@ module ibex_tracer ( default: decode_i_insn("unshfli"); endcase end + INSN_XPERM_N: decode_r_insn("xperm_n"); + INSN_XPERM_B: decode_r_insn("xperm_b"); + INSN_XPERM_H: decode_r_insn("xperm_h"); INSN_SLO: decode_r_insn("slo"); INSN_SRO: decode_r_insn("sro"); INSN_SLOI: decode_i_shift_insn("sloi"); diff --git a/rtl/ibex_tracer_pkg.sv b/rtl/ibex_tracer_pkg.sv index 20ead32e..6dbbfc90 100644 --- a/rtl/ibex_tracer_pkg.sv +++ b/rtl/ibex_tracer_pkg.sv @@ -246,6 +246,10 @@ package ibex_tracer_pkg; parameter logic [31:0] INSN_SHFL = { 7'b0000100, 10'h?, 3'b001, 5'h?, {OPCODE_OP} }; parameter logic [31:0] INSN_UNSHFL = { 7'b0000100, 10'h?, 3'b101, 5'h?, {OPCODE_OP} }; + parameter logic [31:0] INSN_XPERM_N = { 7'b0010100, 10'h?, 3'b010, 5'h?, {OPCODE_OP} }; + parameter logic [31:0] INSN_XPERM_B = { 7'b0010100, 10'h?, 3'b100, 5'h?, {OPCODE_OP} }; + parameter logic [31:0] INSN_XPERM_H = { 7'b0010100, 10'h?, 3'b110, 5'h?, {OPCODE_OP} }; + parameter logic [31:0] INSN_SLO = { 7'b0010000, 10'h?, 3'b001, 5'h?, {OPCODE_OP} }; parameter logic [31:0] INSN_SRO = { 7'b0010000, 10'h?, 3'b101, 5'h?, {OPCODE_OP} }; parameter logic [31:0] INSN_SLOI = { 5'b00100 , 12'h?, 3'b001, 5'h?, {OPCODE_OP_IMM} };