mirror of
https://github.com/lowRISC/ibex.git
synced 2025-06-27 17:00:41 -04:00
[rtl, bitmanip] Add xperm.[nbh] instruction (Zbp, draft v.0.93)
Signed-off-by: Pirmin Vogel <vogelpi@lowrisc.org>
This commit is contained in:
parent
40dab87448
commit
c78acac8cc
5 changed files with 119 additions and 7 deletions
|
@ -403,6 +403,7 @@ module ibex_alu #(
|
|||
logic [31:0] singlebit_result;
|
||||
logic [31:0] rev_result;
|
||||
logic [31:0] shuffle_result;
|
||||
logic [31:0] xperm_result;
|
||||
logic [31:0] butterfly_result;
|
||||
logic [31:0] invbutterfly_result;
|
||||
logic [31:0] clmul_result;
|
||||
|
@ -724,6 +725,96 @@ module ibex_alu #(
|
|||
end
|
||||
end
|
||||
|
||||
//////////////
|
||||
// Crossbar //
|
||||
//////////////
|
||||
// The crossbar permutation instructions xperm.[nbh] (Zbp) can be implemented using 8
|
||||
// parallel 4-bit-wide, 8-input crossbars. Basically, we permute the 8 nibbles of operand_a_i
|
||||
// based on operand_b_i.
|
||||
|
||||
// Generate selector indices and valid signals.
|
||||
// - sel_n[x] indicates which nibble of operand_a_i is selected for output nibble x.
|
||||
// - vld_n[x] indicates if the selection is valid.
|
||||
logic [7:0][2:0] sel_n; // nibbles
|
||||
logic [7:0] vld_n; // nibbles
|
||||
logic [3:0][1:0] sel_b; // bytes
|
||||
logic [3:0] vld_b; // bytes
|
||||
logic [1:0][0:0] sel_h; // half words
|
||||
logic [1:0] vld_h; // half words
|
||||
|
||||
// Per nibble, 3 bits are needed for the selection. Other bits must be zero.
|
||||
// sel_n bit mask: 32'b0111_0111_0111_0111_0111_0111_0111_0111
|
||||
// vld_n bit mask: 32'b1000_1000_1000_1000_1000_1000_1000_1000
|
||||
for (genvar i = 0; i < 8; i++) begin : gen_sel_vld_n
|
||||
assign sel_n[i] = operand_b_i[i*4 +: 3];
|
||||
assign vld_n[i] = ~|operand_b_i[i*4 + 3 +: 1];
|
||||
end
|
||||
|
||||
// Per byte, 2 bits are needed for the selection. Other bits must be zero.
|
||||
// sel_b bit mask: 32'b0000_0011_0000_0011_0000_0011_0000_0011
|
||||
// vld_b bit mask: 32'b1111_1100_1111_1100_1111_1100_1111_1100
|
||||
for (genvar i = 0; i < 4; i++) begin : gen_sel_vld_b
|
||||
assign sel_b[i] = operand_b_i[i*8 +: 2];
|
||||
assign vld_b[i] = ~|operand_b_i[i*8 + 2 +: 6];
|
||||
end
|
||||
|
||||
// Per half word, 1 bit is needed for the selection only. All other bits must be zero.
|
||||
// sel_h bit mask: 32'b0000_0000_0000_0001_0000_0000_0000_0001
|
||||
// vld_h bit mask: 32'b1111_1111_1111_1110_1111_1111_1111_1110
|
||||
for (genvar i = 0; i < 2; i++) begin : gen_sel_vld_h
|
||||
assign sel_h[i] = operand_b_i[i*16 +: 1];
|
||||
assign vld_h[i] = ~|operand_b_i[i*16 + 1 +: 15];
|
||||
end
|
||||
|
||||
// Convert selector indices and valid signals to control the nibble-based
|
||||
// crossbar logic.
|
||||
logic [7:0][2:0] sel;
|
||||
logic [7:0] vld;
|
||||
always_comb begin
|
||||
unique case (operator_i)
|
||||
ALU_XPERM_N: begin
|
||||
// No conversion needed.
|
||||
sel = sel_n;
|
||||
vld = vld_n;
|
||||
end
|
||||
|
||||
ALU_XPERM_B: begin
|
||||
// Convert byte to nibble indicies.
|
||||
for (int b = 0; b < 4; b++) begin
|
||||
sel[b*2 + 0] = {sel_b[b], 1'b0};
|
||||
sel[b*2 + 1] = {sel_b[b], 1'b1};
|
||||
vld[b*2 +: 2] = {2{vld_b[b]}};
|
||||
end
|
||||
end
|
||||
|
||||
ALU_XPERM_H: begin
|
||||
// Convert half-word to nibble indices.
|
||||
for (int h = 0; h < 2; h++) begin
|
||||
sel[h*4 + 0] = {sel_h[h], 2'b00};
|
||||
sel[h*4 + 1] = {sel_h[h], 2'b01};
|
||||
sel[h*4 + 2] = {sel_h[h], 2'b10};
|
||||
sel[h*4 + 3] = {sel_h[h], 2'b11};
|
||||
vld[h*4 +: 4] = {4{vld_h[h]}};
|
||||
end
|
||||
end
|
||||
|
||||
default: begin
|
||||
// Tie valid to zero to disable the crossbar unless we need it.
|
||||
sel = sel_n;
|
||||
vld = '0;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
// The actual nibble-based crossbar logic.
|
||||
logic [7:0][3:0] val_n;
|
||||
logic [7:0][3:0] xperm_n;
|
||||
assign val_n = operand_a_i;
|
||||
for (genvar i = 0; i < 8; i++) begin : gen_xperm_n
|
||||
assign xperm_n[i] = vld[i] ? val_n[sel[i]] : '0;
|
||||
end
|
||||
assign xperm_result = xperm_n;
|
||||
|
||||
///////////////
|
||||
// Butterfly //
|
||||
///////////////
|
||||
|
@ -1083,6 +1174,7 @@ module ibex_alu #(
|
|||
logic [31:0] unused_imd_val_q_1;
|
||||
assign unused_imd_val_q_1 = imd_val_q_i[1];
|
||||
assign shuffle_result = '0;
|
||||
assign xperm_result = '0;
|
||||
assign butterfly_result = '0;
|
||||
assign invbutterfly_result = '0;
|
||||
assign clmul_result = '0;
|
||||
|
@ -1201,6 +1293,7 @@ module ibex_alu #(
|
|||
assign singlebit_result = '0;
|
||||
assign rev_result = '0;
|
||||
assign shuffle_result = '0;
|
||||
assign xperm_result = '0;
|
||||
assign butterfly_result = '0;
|
||||
assign invbutterfly_result = '0;
|
||||
assign clmul_result = '0;
|
||||
|
@ -1238,6 +1331,9 @@ module ibex_alu #(
|
|||
// Shuffle Operations (RV32B)
|
||||
ALU_SHFL, ALU_UNSHFL: result_o = shuffle_result;
|
||||
|
||||
// Crossbar Permutation Operations (RV32B)
|
||||
ALU_XPERM_N, ALU_XPERM_B, ALU_XPERM_H: result_o = xperm_result;
|
||||
|
||||
// Comparison Operations
|
||||
ALU_EQ, ALU_NE,
|
||||
ALU_GE, ALU_GEU,
|
||||
|
|
|
@ -491,6 +491,9 @@ module ibex_decoder #(
|
|||
{7'b001_0100, 3'b101}, // gorc
|
||||
{7'b000_0100, 3'b001}, // shfl
|
||||
{7'b000_0100, 3'b101}, // unshfl
|
||||
{7'b001_0100, 3'b010}, // xperm.n
|
||||
{7'b001_0100, 3'b100}, // xperm.b
|
||||
{7'b001_0100, 3'b110}, // xperm.h
|
||||
{7'b001_0000, 3'b001}, // slo
|
||||
{7'b001_0000, 3'b101}, // sro
|
||||
// RV32B zbc
|
||||
|
@ -1029,12 +1032,15 @@ module ibex_decoder #(
|
|||
{7'b010_0100, 3'b111}: if (RV32B != RV32BNone) alu_operator_o = ALU_BFP; // bfp
|
||||
|
||||
// RV32B zbp
|
||||
{7'b011_0100, 3'b101}: if (RV32B != RV32BNone) alu_operator_o = ALU_GREV; // grev
|
||||
{7'b001_0100, 3'b101}: if (RV32B != RV32BNone) alu_operator_o = ALU_GORC; // gorc
|
||||
{7'b000_0100, 3'b001}: if (RV32B == RV32BFull) alu_operator_o = ALU_SHFL; // shfl
|
||||
{7'b000_0100, 3'b101}: if (RV32B == RV32BFull) alu_operator_o = ALU_UNSHFL; // unshfl
|
||||
{7'b001_0000, 3'b001}: if (RV32B == RV32BFull) alu_operator_o = ALU_SLO; // slo
|
||||
{7'b001_0000, 3'b101}: if (RV32B == RV32BFull) alu_operator_o = ALU_SRO; // sro
|
||||
{7'b011_0100, 3'b101}: if (RV32B != RV32BNone) alu_operator_o = ALU_GREV; // grev
|
||||
{7'b001_0100, 3'b101}: if (RV32B != RV32BNone) alu_operator_o = ALU_GORC; // gorc
|
||||
{7'b000_0100, 3'b001}: if (RV32B == RV32BFull) alu_operator_o = ALU_SHFL; // shfl
|
||||
{7'b000_0100, 3'b101}: if (RV32B == RV32BFull) alu_operator_o = ALU_UNSHFL; // unshfl
|
||||
{7'b001_0100, 3'b010}: if (RV32B == RV32BFull) alu_operator_o = ALU_XPERM_N; // xperm.n
|
||||
{7'b001_0100, 3'b100}: if (RV32B == RV32BFull) alu_operator_o = ALU_XPERM_B; // xperm.b
|
||||
{7'b001_0100, 3'b110}: if (RV32B == RV32BFull) alu_operator_o = ALU_XPERM_H; // xperm.h
|
||||
{7'b001_0000, 3'b001}: if (RV32B == RV32BFull) alu_operator_o = ALU_SLO; // slo
|
||||
{7'b001_0000, 3'b101}: if (RV32B == RV32BFull) alu_operator_o = ALU_SRO; // sro
|
||||
|
||||
// RV32B zbc
|
||||
{7'b000_0101, 3'b001}: if (RV32B == RV32BFull) alu_operator_o = ALU_CLMUL; // clmul
|
||||
|
|
|
@ -73,7 +73,7 @@ package ibex_pkg;
|
|||
// ALU operations //
|
||||
////////////////////
|
||||
|
||||
typedef enum logic [5:0] {
|
||||
typedef enum logic [6:0] {
|
||||
// Arithmetics
|
||||
ALU_ADD,
|
||||
ALU_SUB,
|
||||
|
@ -100,6 +100,9 @@ package ibex_pkg;
|
|||
ALU_GORC,
|
||||
ALU_SHFL,
|
||||
ALU_UNSHFL,
|
||||
ALU_XPERM_N,
|
||||
ALU_XPERM_B,
|
||||
ALU_XPERM_H,
|
||||
|
||||
// Address Calculations
|
||||
// RV32B
|
||||
|
|
|
@ -1031,6 +1031,9 @@ module ibex_tracer (
|
|||
default: decode_i_insn("unshfli");
|
||||
endcase
|
||||
end
|
||||
INSN_XPERM_N: decode_r_insn("xperm_n");
|
||||
INSN_XPERM_B: decode_r_insn("xperm_b");
|
||||
INSN_XPERM_H: decode_r_insn("xperm_h");
|
||||
INSN_SLO: decode_r_insn("slo");
|
||||
INSN_SRO: decode_r_insn("sro");
|
||||
INSN_SLOI: decode_i_shift_insn("sloi");
|
||||
|
|
|
@ -246,6 +246,10 @@ package ibex_tracer_pkg;
|
|||
parameter logic [31:0] INSN_SHFL = { 7'b0000100, 10'h?, 3'b001, 5'h?, {OPCODE_OP} };
|
||||
parameter logic [31:0] INSN_UNSHFL = { 7'b0000100, 10'h?, 3'b101, 5'h?, {OPCODE_OP} };
|
||||
|
||||
parameter logic [31:0] INSN_XPERM_N = { 7'b0010100, 10'h?, 3'b010, 5'h?, {OPCODE_OP} };
|
||||
parameter logic [31:0] INSN_XPERM_B = { 7'b0010100, 10'h?, 3'b100, 5'h?, {OPCODE_OP} };
|
||||
parameter logic [31:0] INSN_XPERM_H = { 7'b0010100, 10'h?, 3'b110, 5'h?, {OPCODE_OP} };
|
||||
|
||||
parameter logic [31:0] INSN_SLO = { 7'b0010000, 10'h?, 3'b001, 5'h?, {OPCODE_OP} };
|
||||
parameter logic [31:0] INSN_SRO = { 7'b0010000, 10'h?, 3'b101, 5'h?, {OPCODE_OP} };
|
||||
parameter logic [31:0] INSN_SLOI = { 5'b00100 , 12'h?, 3'b001, 5'h?, {OPCODE_OP_IMM} };
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue