[rtl, bitmanip] Add xperm.[nbh] instruction (Zbp, draft v.0.93)

Signed-off-by: Pirmin Vogel <vogelpi@lowrisc.org>
This commit is contained in:
Pirmin Vogel 2021-12-02 21:50:20 +01:00
parent 40dab87448
commit c78acac8cc
5 changed files with 119 additions and 7 deletions

View file

@ -403,6 +403,7 @@ module ibex_alu #(
logic [31:0] singlebit_result;
logic [31:0] rev_result;
logic [31:0] shuffle_result;
logic [31:0] xperm_result;
logic [31:0] butterfly_result;
logic [31:0] invbutterfly_result;
logic [31:0] clmul_result;
@ -724,6 +725,96 @@ module ibex_alu #(
end
end
//////////////
// Crossbar //
//////////////
// The crossbar permutation instructions xperm.[nbh] (Zbp) can be implemented using 8
// parallel 4-bit-wide, 8-input crossbars. Basically, we permute the 8 nibbles of operand_a_i
// based on operand_b_i.
// Generate selector indices and valid signals.
// - sel_n[x] indicates which nibble of operand_a_i is selected for output nibble x.
// - vld_n[x] indicates if the selection is valid.
logic [7:0][2:0] sel_n; // nibbles
logic [7:0] vld_n; // nibbles
logic [3:0][1:0] sel_b; // bytes
logic [3:0] vld_b; // bytes
logic [1:0][0:0] sel_h; // half words
logic [1:0] vld_h; // half words
// Per nibble, 3 bits are needed for the selection. Other bits must be zero.
// sel_n bit mask: 32'b0111_0111_0111_0111_0111_0111_0111_0111
// vld_n bit mask: 32'b1000_1000_1000_1000_1000_1000_1000_1000
for (genvar i = 0; i < 8; i++) begin : gen_sel_vld_n
assign sel_n[i] = operand_b_i[i*4 +: 3];
assign vld_n[i] = ~|operand_b_i[i*4 + 3 +: 1];
end
// Per byte, 2 bits are needed for the selection. Other bits must be zero.
// sel_b bit mask: 32'b0000_0011_0000_0011_0000_0011_0000_0011
// vld_b bit mask: 32'b1111_1100_1111_1100_1111_1100_1111_1100
for (genvar i = 0; i < 4; i++) begin : gen_sel_vld_b
assign sel_b[i] = operand_b_i[i*8 +: 2];
assign vld_b[i] = ~|operand_b_i[i*8 + 2 +: 6];
end
// Per half word, 1 bit is needed for the selection only. All other bits must be zero.
// sel_h bit mask: 32'b0000_0000_0000_0001_0000_0000_0000_0001
// vld_h bit mask: 32'b1111_1111_1111_1110_1111_1111_1111_1110
for (genvar i = 0; i < 2; i++) begin : gen_sel_vld_h
assign sel_h[i] = operand_b_i[i*16 +: 1];
assign vld_h[i] = ~|operand_b_i[i*16 + 1 +: 15];
end
// Convert selector indices and valid signals to control the nibble-based
// crossbar logic.
logic [7:0][2:0] sel;
logic [7:0] vld;
always_comb begin
unique case (operator_i)
ALU_XPERM_N: begin
// No conversion needed.
sel = sel_n;
vld = vld_n;
end
ALU_XPERM_B: begin
// Convert byte to nibble indicies.
for (int b = 0; b < 4; b++) begin
sel[b*2 + 0] = {sel_b[b], 1'b0};
sel[b*2 + 1] = {sel_b[b], 1'b1};
vld[b*2 +: 2] = {2{vld_b[b]}};
end
end
ALU_XPERM_H: begin
// Convert half-word to nibble indices.
for (int h = 0; h < 2; h++) begin
sel[h*4 + 0] = {sel_h[h], 2'b00};
sel[h*4 + 1] = {sel_h[h], 2'b01};
sel[h*4 + 2] = {sel_h[h], 2'b10};
sel[h*4 + 3] = {sel_h[h], 2'b11};
vld[h*4 +: 4] = {4{vld_h[h]}};
end
end
default: begin
// Tie valid to zero to disable the crossbar unless we need it.
sel = sel_n;
vld = '0;
end
endcase
end
// The actual nibble-based crossbar logic.
logic [7:0][3:0] val_n;
logic [7:0][3:0] xperm_n;
assign val_n = operand_a_i;
for (genvar i = 0; i < 8; i++) begin : gen_xperm_n
assign xperm_n[i] = vld[i] ? val_n[sel[i]] : '0;
end
assign xperm_result = xperm_n;
///////////////
// Butterfly //
///////////////
@ -1083,6 +1174,7 @@ module ibex_alu #(
logic [31:0] unused_imd_val_q_1;
assign unused_imd_val_q_1 = imd_val_q_i[1];
assign shuffle_result = '0;
assign xperm_result = '0;
assign butterfly_result = '0;
assign invbutterfly_result = '0;
assign clmul_result = '0;
@ -1201,6 +1293,7 @@ module ibex_alu #(
assign singlebit_result = '0;
assign rev_result = '0;
assign shuffle_result = '0;
assign xperm_result = '0;
assign butterfly_result = '0;
assign invbutterfly_result = '0;
assign clmul_result = '0;
@ -1238,6 +1331,9 @@ module ibex_alu #(
// Shuffle Operations (RV32B)
ALU_SHFL, ALU_UNSHFL: result_o = shuffle_result;
// Crossbar Permutation Operations (RV32B)
ALU_XPERM_N, ALU_XPERM_B, ALU_XPERM_H: result_o = xperm_result;
// Comparison Operations
ALU_EQ, ALU_NE,
ALU_GE, ALU_GEU,

View file

@ -491,6 +491,9 @@ module ibex_decoder #(
{7'b001_0100, 3'b101}, // gorc
{7'b000_0100, 3'b001}, // shfl
{7'b000_0100, 3'b101}, // unshfl
{7'b001_0100, 3'b010}, // xperm.n
{7'b001_0100, 3'b100}, // xperm.b
{7'b001_0100, 3'b110}, // xperm.h
{7'b001_0000, 3'b001}, // slo
{7'b001_0000, 3'b101}, // sro
// RV32B zbc
@ -1029,12 +1032,15 @@ module ibex_decoder #(
{7'b010_0100, 3'b111}: if (RV32B != RV32BNone) alu_operator_o = ALU_BFP; // bfp
// RV32B zbp
{7'b011_0100, 3'b101}: if (RV32B != RV32BNone) alu_operator_o = ALU_GREV; // grev
{7'b001_0100, 3'b101}: if (RV32B != RV32BNone) alu_operator_o = ALU_GORC; // gorc
{7'b000_0100, 3'b001}: if (RV32B == RV32BFull) alu_operator_o = ALU_SHFL; // shfl
{7'b000_0100, 3'b101}: if (RV32B == RV32BFull) alu_operator_o = ALU_UNSHFL; // unshfl
{7'b001_0000, 3'b001}: if (RV32B == RV32BFull) alu_operator_o = ALU_SLO; // slo
{7'b001_0000, 3'b101}: if (RV32B == RV32BFull) alu_operator_o = ALU_SRO; // sro
{7'b011_0100, 3'b101}: if (RV32B != RV32BNone) alu_operator_o = ALU_GREV; // grev
{7'b001_0100, 3'b101}: if (RV32B != RV32BNone) alu_operator_o = ALU_GORC; // gorc
{7'b000_0100, 3'b001}: if (RV32B == RV32BFull) alu_operator_o = ALU_SHFL; // shfl
{7'b000_0100, 3'b101}: if (RV32B == RV32BFull) alu_operator_o = ALU_UNSHFL; // unshfl
{7'b001_0100, 3'b010}: if (RV32B == RV32BFull) alu_operator_o = ALU_XPERM_N; // xperm.n
{7'b001_0100, 3'b100}: if (RV32B == RV32BFull) alu_operator_o = ALU_XPERM_B; // xperm.b
{7'b001_0100, 3'b110}: if (RV32B == RV32BFull) alu_operator_o = ALU_XPERM_H; // xperm.h
{7'b001_0000, 3'b001}: if (RV32B == RV32BFull) alu_operator_o = ALU_SLO; // slo
{7'b001_0000, 3'b101}: if (RV32B == RV32BFull) alu_operator_o = ALU_SRO; // sro
// RV32B zbc
{7'b000_0101, 3'b001}: if (RV32B == RV32BFull) alu_operator_o = ALU_CLMUL; // clmul

View file

@ -73,7 +73,7 @@ package ibex_pkg;
// ALU operations //
////////////////////
typedef enum logic [5:0] {
typedef enum logic [6:0] {
// Arithmetics
ALU_ADD,
ALU_SUB,
@ -100,6 +100,9 @@ package ibex_pkg;
ALU_GORC,
ALU_SHFL,
ALU_UNSHFL,
ALU_XPERM_N,
ALU_XPERM_B,
ALU_XPERM_H,
// Address Calculations
// RV32B

View file

@ -1031,6 +1031,9 @@ module ibex_tracer (
default: decode_i_insn("unshfli");
endcase
end
INSN_XPERM_N: decode_r_insn("xperm_n");
INSN_XPERM_B: decode_r_insn("xperm_b");
INSN_XPERM_H: decode_r_insn("xperm_h");
INSN_SLO: decode_r_insn("slo");
INSN_SRO: decode_r_insn("sro");
INSN_SLOI: decode_i_shift_insn("sloi");

View file

@ -246,6 +246,10 @@ package ibex_tracer_pkg;
parameter logic [31:0] INSN_SHFL = { 7'b0000100, 10'h?, 3'b001, 5'h?, {OPCODE_OP} };
parameter logic [31:0] INSN_UNSHFL = { 7'b0000100, 10'h?, 3'b101, 5'h?, {OPCODE_OP} };
parameter logic [31:0] INSN_XPERM_N = { 7'b0010100, 10'h?, 3'b010, 5'h?, {OPCODE_OP} };
parameter logic [31:0] INSN_XPERM_B = { 7'b0010100, 10'h?, 3'b100, 5'h?, {OPCODE_OP} };
parameter logic [31:0] INSN_XPERM_H = { 7'b0010100, 10'h?, 3'b110, 5'h?, {OPCODE_OP} };
parameter logic [31:0] INSN_SLO = { 7'b0010000, 10'h?, 3'b001, 5'h?, {OPCODE_OP} };
parameter logic [31:0] INSN_SRO = { 7'b0010000, 10'h?, 3'b101, 5'h?, {OPCODE_OP} };
parameter logic [31:0] INSN_SLOI = { 5'b00100 , 12'h?, 3'b001, 5'h?, {OPCODE_OP_IMM} };