diff --git a/BitManipZbe1cycle.scala b/BitManipZbe1cycle.scala new file mode 100644 index 0000000..c3df08c --- /dev/null +++ b/BitManipZbe1cycle.scala @@ -0,0 +1,414 @@ +// WARNING: this is auto-generated code! +// See https://github.com/rdolbeau/VexRiscvBPluginGenerator/ +package vexriscv.plugin +import spinal.core._ +import vexriscv.{Stageable, DecoderService, VexRiscv} +object BitManipZbe1cyclePlugin { + object BitManipZbe1cycleCtrlpackEnum extends SpinalEnum(binarySequential) { + val CTRL_PACK, CTRL_PACKH = newElement() + } + object BitManipZbe1cycleCtrlEnum extends SpinalEnum(binarySequential) { + val CTRL_pack = newElement() + } + object BitManipZbe1cycleCtrlpack extends Stageable(BitManipZbe1cycleCtrlpackEnum()) + object BitManipZbe1cycleCtrl extends Stageable(BitManipZbe1cycleCtrlEnum()) +// Prologue + + // function implementing the semantic of 32-bits generalized reverse + def fun_grev( a:Bits, b:Bits ) : Bits = { + val x1 = ((b&B"32'x00000001")===B"32'x00000001") ? (((a & B"32'x55555555") |<< 1) | ((a & B"32'xAAAAAAAA") |>> 1)) | a + val x2 = ((b&B"32'x00000002")===B"32'x00000002") ? (((x1 & B"32'x33333333") |<< 2) | ((x1 & B"32'xCCCCCCCC") |>> 2)) | x1 + val x4 = ((b&B"32'x00000004")===B"32'x00000004") ? (((x2 & B"32'x0F0F0F0F") |<< 4) | ((x2 & B"32'xF0F0F0F0") |>> 4)) | x2 + val x8 = ((b&B"32'x00000008")===B"32'x00000008") ? (((x4 & B"32'x00FF00FF") |<< 8) | ((x4 & B"32'xFF00FF00") |>> 8)) | x4 + val x16 = ((b&B"32'x00000010")===B"32'x00000010") ? (((x8 & B"32'x0000FFFF") |<<16) | ((x8 & B"32'xFFFF0000") |>>16)) | x8 + x16 // return value + } + // function implementing the semantic of 32-bits generalized OR-combine + def fun_gorc( a:Bits, b:Bits ) : Bits = { + val x1 = ((b&B"32'x00000001")===B"32'x00000001") ? (a | ((a & B"32'x55555555") |<< 1) | ((a & B"32'xAAAAAAAA") |>> 1)) | a + val x2 = ((b&B"32'x00000002")===B"32'x00000002") ? (x1 | ((x1 & B"32'x33333333") |<< 2) | ((x1 & B"32'xCCCCCCCC") |>> 2)) | x1 + val x4 = ((b&B"32'x00000004")===B"32'x00000004") ? (x2 | ((x2 & B"32'x0F0F0F0F") |<< 4) | ((x2 & B"32'xF0F0F0F0") |>> 4)) | x2 + val x8 = ((b&B"32'x00000008")===B"32'x00000008") ? (x4 | ((x4 & B"32'x00FF00FF") |<< 8) | ((x4 & B"32'xFF00FF00") |>> 8)) | x4 + val x16 = ((b&B"32'x00000010")===B"32'x00000010") ? (x8 | ((x8 & B"32'x0000FFFF") |<<16) | ((x8 & B"32'xFFFF0000") |>>16)) | x8 + x16 // return value + } + + // helper function for the implementation of the generalized shuffles + def fun_shuffle32_stage(src:Bits, maskL:Bits, maskR:Bits, N:Int) : Bits = { + val x = src & ~(maskL | maskR) + val x2 = x | ((src |<< N) & maskL) | ((src |>> N) & maskR); + x2 // return value + } + // function implementing the semantic of 32-bits generalized shuffle + def fun_shfl32(a:Bits, b:Bits) : Bits = { + val x = a; + val x1 = ((b&B"32'x00000008")===B"32'x00000008") ? fun_shuffle32_stage(x , B"32'x00FF0000", B"32'x0000FF00", 8) | x; + val x2 = ((b&B"32'x00000004")===B"32'x00000004") ? fun_shuffle32_stage(x1, B"32'x0F000F00", B"32'x00F000F0", 4) | x1; + val x3 = ((b&B"32'x00000002")===B"32'x00000002") ? fun_shuffle32_stage(x2, B"32'x30303030", B"32'x0C0C0C0C", 2) | x2; + val x4 = ((b&B"32'x00000001")===B"32'x00000001") ? fun_shuffle32_stage(x3, B"32'x44444444", B"32'x22222222", 1) | x3; + x4 // return value + } + // function implementing the semantic of 32-bits generalized unshuffle + def fun_unshfl32(a:Bits, b:Bits) : Bits = { + val x = a; + val x1 = ((b&B"32'x00000001")===B"32'x00000001") ? fun_shuffle32_stage(x , B"32'x44444444", B"32'x22222222", 1) | x; + val x2 = ((b&B"32'x00000002")===B"32'x00000002") ? fun_shuffle32_stage(x1, B"32'x30303030", B"32'x0C0C0C0C", 2) | x1; + val x3 = ((b&B"32'x00000004")===B"32'x00000004") ? fun_shuffle32_stage(x2, B"32'x0F000F00", B"32'x00F000F0", 4) | x2; + val x4 = ((b&B"32'x00000008")===B"32'x00000008") ? fun_shuffle32_stage(x3, B"32'x00FF0000", B"32'x0000FF00", 8) | x3; + x4 // return value + } + + + // this is trying to look like DOI 10.2478/jee-2015-0054 + def fun_clz_NLCi(x:Bits): Bits = { + val r2 = (~(x(0) | x(1) | x(2) | x(3))) + val r1 = (~(x(2) | x(3))) + val r0 = (~(x(3) | (x(1) & ~x(2)))) + val r = r2 ## r1 ## r0 + r // return value + } + def fun_clz_BNE(a:Bits) : Bits = { + val a01 = ~(a(0) & a(1)) + val a23 = ~(a(2) & a(3)) + + val a45 = ~(a(4) & a(5)) + val a67 = ~(a(6) & a(7)) + + val a0123 = ~(a01 | a23) // also r(2) + val a4567 = ~(a45 | a67) + + val a56 = ~(a(5) & ~a(6)) + val a024 = (a(0) & a(2) & a(4)) // AND not NAND + val a13 = ~(a(1) & a(3)) + val a12 = ~(a(1) & ~a(2)) + + val r3 = ((a0123 & a4567)) // AND not NAND + val r2 = (a0123) + val r1 = (~(a01 | (~a23 & a45))) + val r0 = (~((~((a56) & (a024))) & (~((a13) & (a12) & (a(0)))))) + + val r = r3 ## r2 ## r1 ##r0 + + r // return value + } + def fun_clz(in:Bits) : Bits = { + val nlc7 = fun_clz_NLCi(in(31 downto 28)) + val nlc6 = fun_clz_NLCi(in(27 downto 24)) + val nlc5 = fun_clz_NLCi(in(23 downto 20)) + val nlc4 = fun_clz_NLCi(in(19 downto 16)) + val nlc3 = fun_clz_NLCi(in(15 downto 12)) + val nlc2 = fun_clz_NLCi(in(11 downto 8)) + val nlc1 = fun_clz_NLCi(in( 7 downto 4)) + val nlc0 = fun_clz_NLCi(in( 3 downto 0)) + val a = nlc0(2) ## nlc1(2) ## nlc2(2) ## nlc3(2) ## nlc4(2) ## nlc5(2) ## nlc6(2) ## nlc7(2) + val bne = fun_clz_BNE(a) + + val muxo = (bne(2 downto 0)).mux( + B"3'b000" -> nlc7(1 downto 0), + B"3'b001" -> nlc6(1 downto 0), + B"3'b010" -> nlc5(1 downto 0), + B"3'b011" -> nlc4(1 downto 0), + B"3'b100" -> nlc3(1 downto 0), + B"3'b101" -> nlc2(1 downto 0), + B"3'b110" -> nlc1(1 downto 0), + B"3'b111" -> nlc0(1 downto 0) + ) + val r = (bne(3)) ? B"6'b100000" | (B"1'b0" ## bne(2 downto 0) ## muxo(1 downto 0)) // 6 bits + + r.resize(32) // return value + } + // For trailing count, count using use leading count on bit-reversed value + def fun_ctz(in:Bits) : Bits = { + val inr = in(0) ## in(1) ## in(2) ## in(3) ## in(4) ## in(5) ## in(6) ## in(7) ## in(8) ## in(9) ## in(10) ## in(11) ## in(12) ## in(13) ## in(14) ## in(15) ## in(16) ## in(17) ## in(18) ## in(19) ## in(20) ## in(21) ## in(22) ## in(23) ## in(24) ## in(25) ## in(26) ## in(27) ## in(28) ## in(29) ## in(30) ## in(31) + fun_clz(inr) // return value + } + + // naive popcnt + def fun_popcnt(in:Bits) : Bits = { + val r = in(0).asBits.resize(6).asUInt + in(1).asBits.resize(6).asUInt + in(2).asBits.resize(6).asUInt + in(3).asBits.resize(6).asUInt + + in(4).asBits.resize(6).asUInt + in(5).asBits.resize(6).asUInt + in(6).asBits.resize(6).asUInt + in(7).asBits.resize(6).asUInt + + in(8).asBits.resize(6).asUInt + in(9).asBits.resize(6).asUInt + in(10).asBits.resize(6).asUInt + in(11).asBits.resize(6).asUInt + + in(12).asBits.resize(6).asUInt + in(13).asBits.resize(6).asUInt + in(14).asBits.resize(6).asUInt + in(15).asBits.resize(6).asUInt + + in(16).asBits.resize(6).asUInt + in(17).asBits.resize(6).asUInt + in(18).asBits.resize(6).asUInt + in(19).asBits.resize(6).asUInt + + in(20).asBits.resize(6).asUInt + in(21).asBits.resize(6).asUInt + in(22).asBits.resize(6).asUInt + in(23).asBits.resize(6).asUInt + + in(24).asBits.resize(6).asUInt + in(25).asBits.resize(6).asUInt + in(26).asBits.resize(6).asUInt + in(27).asBits.resize(6).asUInt + + in(28).asBits.resize(6).asUInt + in(29).asBits.resize(6).asUInt + in(30).asBits.resize(6).asUInt + in(31).asBits.resize(6).asUInt + + r.asBits.resize(32) // return value + } + + //XPERMs + def fun_xperm_n(rs1:Bits, rs2:Bits) : Bits = { + val i0 = rs2(3 downto 0).asUInt + val i1 = rs2(7 downto 4).asUInt + val i2 = rs2(11 downto 8).asUInt + val i3 = rs2(15 downto 12).asUInt + val i4 = rs2(19 downto 16).asUInt + val i5 = rs2(23 downto 20).asUInt + val i6 = rs2(27 downto 24).asUInt + val i7 = rs2(31 downto 28).asUInt + val r0 = (i0).mux( + 0 -> rs1(3 downto 0), + 1 -> rs1(7 downto 4), + 2 -> rs1(11 downto 8), + 3 -> rs1(15 downto 12), + 4 -> rs1(19 downto 16), + 5 -> rs1(23 downto 20), + 6 -> rs1(27 downto 24), + 7 -> rs1(31 downto 28), + default -> B"4'b0000" + ) + val r1 = (i1).mux( + 0 -> rs1(3 downto 0), + 1 -> rs1(7 downto 4), + 2 -> rs1(11 downto 8), + 3 -> rs1(15 downto 12), + 4 -> rs1(19 downto 16), + 5 -> rs1(23 downto 20), + 6 -> rs1(27 downto 24), + 7 -> rs1(31 downto 28), + default -> B"4'b0000" + ) + val r2 = (i2).mux( + 0 -> rs1(3 downto 0), + 1 -> rs1(7 downto 4), + 2 -> rs1(11 downto 8), + 3 -> rs1(15 downto 12), + 4 -> rs1(19 downto 16), + 5 -> rs1(23 downto 20), + 6 -> rs1(27 downto 24), + 7 -> rs1(31 downto 28), + default -> B"4'b0000" + ) + val r3 = (i3).mux( + 0 -> rs1(3 downto 0), + 1 -> rs1(7 downto 4), + 2 -> rs1(11 downto 8), + 3 -> rs1(15 downto 12), + 4 -> rs1(19 downto 16), + 5 -> rs1(23 downto 20), + 6 -> rs1(27 downto 24), + 7 -> rs1(31 downto 28), + default -> B"4'b0000" + ) + val r4 = (i4).mux( + 0 -> rs1(3 downto 0), + 1 -> rs1(7 downto 4), + 2 -> rs1(11 downto 8), + 3 -> rs1(15 downto 12), + 4 -> rs1(19 downto 16), + 5 -> rs1(23 downto 20), + 6 -> rs1(27 downto 24), + 7 -> rs1(31 downto 28), + default -> B"4'b0000" + ) + val r5 = (i5).mux( + 0 -> rs1(3 downto 0), + 1 -> rs1(7 downto 4), + 2 -> rs1(11 downto 8), + 3 -> rs1(15 downto 12), + 4 -> rs1(19 downto 16), + 5 -> rs1(23 downto 20), + 6 -> rs1(27 downto 24), + 7 -> rs1(31 downto 28), + default -> B"4'b0000" + ) + val r6 = (i6).mux( + 0 -> rs1(3 downto 0), + 1 -> rs1(7 downto 4), + 2 -> rs1(11 downto 8), + 3 -> rs1(15 downto 12), + 4 -> rs1(19 downto 16), + 5 -> rs1(23 downto 20), + 6 -> rs1(27 downto 24), + 7 -> rs1(31 downto 28), + default -> B"4'b0000" + ) + val r7 = (i7).mux( + 0 -> rs1(3 downto 0), + 1 -> rs1(7 downto 4), + 2 -> rs1(11 downto 8), + 3 -> rs1(15 downto 12), + 4 -> rs1(19 downto 16), + 5 -> rs1(23 downto 20), + 6 -> rs1(27 downto 24), + 7 -> rs1(31 downto 28), + default -> B"4'b0000" + ) + r7 ## r6 ## r5 ## r4 ## r3 ## r2 ## r1 ## r0 // return value + } + def fun_xperm_b(rs1:Bits, rs2:Bits) : Bits = { + val i0 = rs2(7 downto 0).asUInt; + val i1 = rs2(15 downto 8).asUInt; + val i2 = rs2(23 downto 16).asUInt; + val i3 = rs2(31 downto 24).asUInt; + val r0 = (i0).mux( + 0 -> rs1(7 downto 0), + 1 -> rs1(15 downto 8), + 2 -> rs1(23 downto 16), + 3 -> rs1(31 downto 24), + default -> B"8'b00000000" + ) + val r1 = (i1).mux( + 0 -> rs1(7 downto 0), + 1 -> rs1(15 downto 8), + 2 -> rs1(23 downto 16), + 3 -> rs1(31 downto 24), + default -> B"8'b00000000" + ) + val r2 = (i2).mux( + 0 -> rs1(7 downto 0), + 1 -> rs1(15 downto 8), + 2 -> rs1(23 downto 16), + 3 -> rs1(31 downto 24), + default -> B"8'b00000000" + ) + val r3 = (i3).mux( + 0 -> rs1(7 downto 0), + 1 -> rs1(15 downto 8), + 2 -> rs1(23 downto 16), + 3 -> rs1(31 downto 24), + default -> B"8'b00000000" + ) + r3 ## r2 ## r1 ## r0 // return value + } + def fun_xperm_h(rs1:Bits, rs2:Bits) : Bits = { + val i0 = rs2(15 downto 0).asUInt; + val i1 = rs2(31 downto 16).asUInt; + val r0 = (i0).mux( + 0 -> rs1(15 downto 0), + 1 -> rs1(31 downto 16), + default -> B"16'x0000" + ) + val r1 = (i1).mux( + 0 -> rs1(15 downto 0), + 1 -> rs1(31 downto 16), + default -> B"16'x0000" + ) + r1 ## r0 // return value + } + + def fun_fsl(rs1:Bits, rs3:Bits, rs2:Bits) : Bits = { + val rawshamt = (rs2 & B"32'x0000003F").asUInt + val shamt = (rawshamt >= 32) ? (rawshamt - 32) | (rawshamt) + val A = (shamt === rawshamt) ? (rs1) | (rs3) + val B = (shamt === rawshamt) ? (rs3) | (rs1) + val r = (shamt === 0) ? (A) | ((A |<< shamt) | (B |>> (32-shamt))) + + r // return value + } + + def fun_fsr(rs1:Bits, rs3:Bits, rs2:Bits) : Bits = { + val rawshamt = (rs2 & B"32'x0000003F").asUInt + val shamt = (rawshamt >= 32) ? (rawshamt - 32) | (rawshamt) + val A = (shamt === rawshamt) ? (rs1) | (rs3) + val B = (shamt === rawshamt) ? (rs3) | (rs1) + val r = (shamt === 0) ? (A) | ((A |>> shamt) | (B |<< (32-shamt))) + + r // return value + } + + def fun_bfp(rs1:Bits, rs2:Bits) : Bits = { + val off = rs2(20 downto 16).asUInt + val rawlen = rs2(27 downto 24).asUInt + val convlen = (rawlen === 0) ? (rawlen+16) | (rawlen) + val len = ((convlen + off) > 32) ? (32 - off) | (convlen) + val allones = B"16'xFFFF" + val lenones = (allones |>> (16-len)) + //val one = B"17'x00001" + //val lenones = (((one |<< len).asUInt) - 1).asBits; + val mask = (lenones.resize(32) |<< off); + val data = (rs2 & lenones.resize(32)) |<< off; + + val r = (rs1 & ~mask) | data + + r // return value + } + +// End prologue +} // object Plugin +class BitManipZbe1cyclePlugin(earlyInjection : Boolean = true) extends Plugin[VexRiscv] { + import BitManipZbe1cyclePlugin._ + object IS_BitManipZbe1cycle extends Stageable(Bool) + object BitManipZbe1cycle_FINAL_OUTPUT extends Stageable(Bits(32 bits)) + override def setup(pipeline: VexRiscv): Unit = { + import pipeline.config._ + val immediateActions = List[(Stageable[_ <: BaseType],Any)]( + SRC1_CTRL -> Src1CtrlEnum.RS, + SRC2_CTRL -> Src2CtrlEnum.IMI, + REGFILE_WRITE_VALID -> True, + BYPASSABLE_EXECUTE_STAGE -> Bool(earlyInjection), + BYPASSABLE_MEMORY_STAGE -> True, + RS1_USE -> True, + IS_BitManipZbe1cycle -> True + ) + val binaryActions = List[(Stageable[_ <: BaseType],Any)]( + SRC1_CTRL -> Src1CtrlEnum.RS, + SRC2_CTRL -> Src2CtrlEnum.RS, + REGFILE_WRITE_VALID -> True, + BYPASSABLE_EXECUTE_STAGE -> Bool(earlyInjection), + BYPASSABLE_MEMORY_STAGE -> True, + RS1_USE -> True, + RS2_USE -> True, + IS_BitManipZbe1cycle -> True + ) + val unaryActions = List[(Stageable[_ <: BaseType],Any)]( + SRC1_CTRL -> Src1CtrlEnum.RS, + REGFILE_WRITE_VALID -> True, + BYPASSABLE_EXECUTE_STAGE -> Bool(earlyInjection), + BYPASSABLE_MEMORY_STAGE -> True, + RS1_USE -> True, + IS_BitManipZbe1cycle -> True + ) + val ternaryActions = List[(Stageable[_ <: BaseType],Any)]( + SRC1_CTRL -> Src1CtrlEnum.RS, + SRC2_CTRL -> Src2CtrlEnum.RS, + SRC3_CTRL -> Src3CtrlEnum.RS, + REGFILE_WRITE_VALID -> True, + BYPASSABLE_EXECUTE_STAGE -> Bool(earlyInjection), + BYPASSABLE_MEMORY_STAGE -> True, + RS1_USE -> True, + RS2_USE -> True, + RS3_USE -> True, + IS_BitManipZbe1cycle -> True + ) + val immTernaryActions = List[(Stageable[_ <: BaseType],Any)]( + SRC1_CTRL -> Src1CtrlEnum.RS, + SRC2_CTRL -> Src2CtrlEnum.IMI, + SRC3_CTRL -> Src3CtrlEnum.RS, + REGFILE_WRITE_VALID -> True, + BYPASSABLE_EXECUTE_STAGE -> Bool(earlyInjection), + BYPASSABLE_MEMORY_STAGE -> True, + RS1_USE -> True, + RS3_USE -> True, + IS_BitManipZbe1cycle -> True + ) + def PACK_KEY = M"0000100----------100-----0110011" + def PACKH_KEY = M"0000100----------111-----0110011" + val decoderService = pipeline.service(classOf[DecoderService]) + decoderService.addDefault(IS_BitManipZbe1cycle, False) + decoderService.add(List( + PACK_KEY -> (binaryActions ++ List(BitManipZbe1cycleCtrl -> BitManipZbe1cycleCtrlEnum.CTRL_pack, BitManipZbe1cycleCtrlpack -> BitManipZbe1cycleCtrlpackEnum.CTRL_PACK)), + PACKH_KEY -> (binaryActions ++ List(BitManipZbe1cycleCtrl -> BitManipZbe1cycleCtrlEnum.CTRL_pack, BitManipZbe1cycleCtrlpack -> BitManipZbe1cycleCtrlpackEnum.CTRL_PACKH)) + )) + } // override def setup + override def build(pipeline: VexRiscv): Unit = { + import pipeline._ + import pipeline.config._ + execute plug new Area{ + import execute._ + val val_pack = input(BitManipZbe1cycleCtrlpack).mux( + BitManipZbe1cycleCtrlpackEnum.CTRL_PACK -> (input(SRC2)(15 downto 0) ## input(SRC1)(15 downto 0)).asBits, + BitManipZbe1cycleCtrlpackEnum.CTRL_PACKH -> B"16'x0000" ## (input(SRC2)(7 downto 0) ## input(SRC1)(7 downto 0)).asBits + ) // mux pack + insert(BitManipZbe1cycle_FINAL_OUTPUT) := val_pack.asBits + } // execute plug newArea + val injectionStage = if(earlyInjection) execute else memory + injectionStage plug new Area { + import injectionStage._ + when (arbitration.isValid && input(IS_BitManipZbe1cycle)) { + output(REGFILE_WRITE_DATA) := input(BitManipZbe1cycle_FINAL_OUTPUT) + } // when input is + } // injectionStage plug newArea + } // override def build +} // class Plugin diff --git a/BitManipZbe2cycles.scala b/BitManipZbe2cycles.scala new file mode 100644 index 0000000..2347ea7 --- /dev/null +++ b/BitManipZbe2cycles.scala @@ -0,0 +1,161 @@ +// WARNING: this is auto-generated code! +// See https://github.com/rdolbeau/VexRiscvBPluginGenerator/ +package vexriscv.plugin +import spinal.core._ +import vexriscv.{Stageable, DecoderService, VexRiscv} +object BitManipZbe2cyclesPlugin { + object BitManipZbe2cyclesCtrlbcompressEnum extends SpinalEnum(binarySequential) { + val CTRL_BCOMPRESS, CTRL_BDECOMPRESS = newElement() + } + object BitManipZbe2cyclesCtrlEnum extends SpinalEnum(binarySequential) { + val CTRL_bcompress = newElement() + } + object BitManipZbe2cyclesCtrlbcompress extends Stageable(BitManipZbe2cyclesCtrlbcompressEnum()) + object BitManipZbe2cyclesCtrl extends Stageable(BitManipZbe2cyclesCtrlEnum()) +// Prologue + + def fun_compress1(rs1:Bits, rs2: Bits) : Bits = { + var r = B"16'x0000" + var j = (B"5'b00000").asUInt + for (i <- 0 to 15) { + val rs2i = rs2(i).asUInt + val rs1i = rs2(i) & rs1(i) + r = r | (rs1i.asBits << j).resize(16) + j = j + rs2i + } + j.asBits.resize(5) ## r ## rs1(31 downto 16) ## rs2(31 downto 16)// return value + } + def fun_compress2(x:Bits) : Bits = { + val rs2 = x(15 downto 0) + val rs1 = x(31 downto 16) + var r = B"16'x0000" ## x(47 downto 32) + var j = x(52 downto 48).asUInt + + for (i <- 0 to 15) { + val rs2i = rs2(i).asUInt + val rs1i = rs2(i) & rs1(i) + r = r | (rs1i.asBits << j).resize(32) + j = j + rs2i + } + r(31 downto 0) // return value + } + def fun_decompress1(rs1:Bits, rs2: Bits) : Bits = { + var r = B"16'x0000" + var j = (B"5'b00000").asUInt + for (i <- 0 to 15) { + val rs2i = rs2(i).asUInt + val rs1j = rs2(i) & (rs1 >> j)(0) + r = r | (rs1j.asBits << i).resize(16) + j = j + rs2i + } + j.asBits.resize(5) ## r ## rs1(31 downto 0) ## rs2(31 downto 16)// return value + } + def fun_decompress2(x:Bits) : Bits = { + val rs2 = x(15 downto 0) + val rs1 = x(47 downto 16) + var r = B"16'x0000" ## x(63 downto 48) + var j = x(68 downto 64).asUInt + + for (i <- 0 to 15) { + val rs2i = rs2(i).asUInt + val rs1j = rs2(i) & (rs1 >> j)(0) + r = r | (rs1j.asBits << (i+16)).asBits.resize(32) + j = j + rs2i + } + r(31 downto 0) // return value + } + +// End prologue +} // object Plugin +class BitManipZbe2cyclesPlugin(earlyInjection : Boolean = true) extends Plugin[VexRiscv] { + import BitManipZbe2cyclesPlugin._ + object IS_BitManipZbe2cycles extends Stageable(Bool) + object BitManipZbe2cycles_FINAL_OUTPUT extends Stageable(Bits(32 bits)) + object BitManipZbe2cycles_INTERMEDIATE_BCOMPRESS53 extends Stageable(Bits(53 bits)) + object BitManipZbe2cycles_INTERMEDIATE_BDECOMPRESS69 extends Stageable(Bits(69 bits)) + override def setup(pipeline: VexRiscv): Unit = { + import pipeline.config._ + val immediateActions = List[(Stageable[_ <: BaseType],Any)]( + SRC1_CTRL -> Src1CtrlEnum.RS, + SRC2_CTRL -> Src2CtrlEnum.IMI, + REGFILE_WRITE_VALID -> True, + BYPASSABLE_EXECUTE_STAGE -> False, + BYPASSABLE_MEMORY_STAGE -> Bool(earlyInjection), + RS1_USE -> True, + IS_BitManipZbe2cycles -> True + ) + val binaryActions = List[(Stageable[_ <: BaseType],Any)]( + SRC1_CTRL -> Src1CtrlEnum.RS, + SRC2_CTRL -> Src2CtrlEnum.RS, + REGFILE_WRITE_VALID -> True, + BYPASSABLE_EXECUTE_STAGE -> False, + BYPASSABLE_MEMORY_STAGE -> Bool(earlyInjection), + RS1_USE -> True, + RS2_USE -> True, + IS_BitManipZbe2cycles -> True + ) + val unaryActions = List[(Stageable[_ <: BaseType],Any)]( + SRC1_CTRL -> Src1CtrlEnum.RS, + REGFILE_WRITE_VALID -> True, + BYPASSABLE_EXECUTE_STAGE -> False, + BYPASSABLE_MEMORY_STAGE -> Bool(earlyInjection), + RS1_USE -> True, + IS_BitManipZbe2cycles -> True + ) + val ternaryActions = List[(Stageable[_ <: BaseType],Any)]( + SRC1_CTRL -> Src1CtrlEnum.RS, + SRC2_CTRL -> Src2CtrlEnum.RS, + SRC3_CTRL -> Src3CtrlEnum.RS, + REGFILE_WRITE_VALID -> True, + BYPASSABLE_EXECUTE_STAGE -> False, + BYPASSABLE_MEMORY_STAGE -> Bool(earlyInjection), + RS1_USE -> True, + RS2_USE -> True, + RS3_USE -> True, + IS_BitManipZbe2cycles -> True + ) + val immTernaryActions = List[(Stageable[_ <: BaseType],Any)]( + SRC1_CTRL -> Src1CtrlEnum.RS, + SRC2_CTRL -> Src2CtrlEnum.IMI, + SRC3_CTRL -> Src3CtrlEnum.RS, + REGFILE_WRITE_VALID -> True, + BYPASSABLE_EXECUTE_STAGE -> False, + BYPASSABLE_MEMORY_STAGE -> Bool(earlyInjection), + RS1_USE -> True, + RS3_USE -> True, + IS_BitManipZbe2cycles -> True + ) + def BDECOMPRESS_KEY = M"0100100----------110-----0110011" + def BCOMPRESS_KEY = M"0000100----------110-----0110011" + val decoderService = pipeline.service(classOf[DecoderService]) + decoderService.addDefault(IS_BitManipZbe2cycles, False) + decoderService.add(List( + BDECOMPRESS_KEY -> (binaryActions ++ List(BitManipZbe2cyclesCtrl -> BitManipZbe2cyclesCtrlEnum.CTRL_bcompress, BitManipZbe2cyclesCtrlbcompress -> BitManipZbe2cyclesCtrlbcompressEnum.CTRL_BDECOMPRESS)), + BCOMPRESS_KEY -> (binaryActions ++ List(BitManipZbe2cyclesCtrl -> BitManipZbe2cyclesCtrlEnum.CTRL_bcompress, BitManipZbe2cyclesCtrlbcompress -> BitManipZbe2cyclesCtrlbcompressEnum.CTRL_BCOMPRESS)) + )) + } // override def setup + override def build(pipeline: VexRiscv): Unit = { + import pipeline._ + import pipeline.config._ + execute plug new Area{ + import execute._ + insert(BitManipZbe2cycles_INTERMEDIATE_BCOMPRESS53) := fun_compress1(input(SRC1), input(SRC2)).asBits + insert(BitManipZbe2cycles_INTERMEDIATE_BDECOMPRESS69) := fun_decompress1(input(SRC1), input(SRC2)).asBits + } // execute plug newArea + memory plug new Area{ + import memory._ + val val_bcompress = input(BitManipZbe2cyclesCtrlbcompress).mux( + BitManipZbe2cyclesCtrlbcompressEnum.CTRL_BCOMPRESS -> fun_compress2(input(BitManipZbe2cycles_INTERMEDIATE_BCOMPRESS53)).asBits.asBits, + BitManipZbe2cyclesCtrlbcompressEnum.CTRL_BDECOMPRESS -> fun_decompress2(input(BitManipZbe2cycles_INTERMEDIATE_BDECOMPRESS69)).asBits.asBits + ) // mux bcompress + insert(BitManipZbe2cycles_FINAL_OUTPUT) := val_bcompress.asBits + } // memory plug newArea + val injectionStage = if(earlyInjection) memory else writeBack + injectionStage plug new Area { + import injectionStage._ + when (arbitration.isValid && input(IS_BitManipZbe2cycles)) { + output(REGFILE_WRITE_DATA) := input(BitManipZbe2cycles_FINAL_OUTPUT) + } // when input is + } // injectionStage plug newArea + } // override def build +} // class Plugin diff --git a/Makefile b/Makefile index 2509c89..40997a1 100644 --- a/Makefile +++ b/Makefile @@ -52,7 +52,13 @@ BitManipZbb.scala: gen_plugin data_bitmanip.txt data_bitmanip_ZbbOnly.txt BitManipZbc.scala: gen_plugin data_clmul.txt ./gen_plugin -n BitManipZbc -i data_clmul.txt -I Zbc >| $@ -## Zbe unimplemented, b(de)compress are missing and pack[h] are in Zbf anyway +# this is just pack[h], so don't use with Zbp or Zbf +BitManipZbe1cycle.scala: gen_plugin data_bitmanip.txt + ./gen_plugin -n BitManipZbe1cycle -i data_bitmanip.txt -I Zbe >| $@ + +# b[de]compress, implemented over 2 cycles +BitManipZbe2cycles.scala: gen_plugin data_bitmanip_compress.txt + ./gen_plugin -n BitManipZbe2cycles -i data_bitmanip_compress.txt -I Zbe >| $@ BitManipZbf.scala: gen_plugin data_bitmanip.txt ./gen_plugin -n BitManipZbf -i data_bitmanip.txt -I Zbf >| $@ @@ -122,7 +128,7 @@ PackedSIMDSlow.scala: gen_plugin data_Zpn_2cycles.txt PackedSIMDWide.scala: gen_plugin data_Zp64.txt ./gen_plugin -w -n PackedSIMDWide -i data_Zp64.txt -I '*' >| $@ -B: BitManipZba.scala BitManipZbb.scala BitManipZbbZbp.scala BitManipZbc.scala BitManipZbf.scala BitManipBFPonly.scala BitManipZbp.scala BitManipZbs.scala BitManipZbt.scala +B: BitManipZba.scala BitManipZbb.scala BitManipZbbZbp.scala BitManipZbc.scala BitManipZbe1cycle.scala BitManipZbe2cycles.scala BitManipZbf.scala BitManipBFPonly.scala BitManipZbp.scala BitManipZbs.scala BitManipZbt.scala P: PackedSIMDBase.scala PackedSIMDSlow.scala PackedSIMDWide.scala diff --git a/README.md b/README.md index 275172e..97ddce4 100644 --- a/README.md +++ b/README.md @@ -10,12 +10,11 @@ The generated plugin is for RV32 only. It doesn't yet support all B instructions * all instructions ending in 'W', as they are RV64-only * BMAT*, as they are RV64-only -* BDEP/BEXT * CRC32* * Three-operands instructions (CMIX, CMOV, FS[RL]*); they are available but need a VexRiscv patch to support the third input (all VexRiscv patch are available on https://github.com/rdolbeau/VexRiscv/tree/three_operands) There is support for partial instructions (rev8, zext.h, orc.b) so that the default plugins generated for Zba, Zbb and Zbc should be feature-complete. To get everything without conflicts, use: -`new BitManipZbaPlugin, new BitManipZbbZbpPlugin, new BitManipZbcPlugin, new BitManipBFPOnlyPlugin, new BitManipZbsPlugin, new BitManipZbtPlugin,` +`new BitManipZbaPlugin, new BitManipZbbZbpPlugin, new BitManipZbcPlugin, new BitManipZbe2cyclesPlugin, new BitManipBFPOnlyPlugin, new BitManipZbsPlugin, new BitManipZbtPlugin,` This has received limited testing in a [Linux-on-Litex-VexRiscv](https://github.com/litex-hub/linux-on-litex-vexriscv) SoC. YMMV. SMP mode was tested as well. diff --git a/data_bitmanip_compress.txt b/data_bitmanip_compress.txt new file mode 100644 index 0000000..e41b3da --- /dev/null +++ b/data_bitmanip_compress.txt @@ -0,0 +1,60 @@ +I BDECOMPRESS BDECOMPRESS 0100100----------110-----0110011 bcompress Zbe +I BCOMPRESS BCOMPRESS 0000100----------110-----0110011 bcompress Zbe + +S BCOMPRESS "fun_compress1(input(SRC1), input(SRC2))" +T BCOMPRESS 53 "fun_compress2" +S BDECOMPRESS "fun_decompress1(input(SRC1), input(SRC2))" +T BDECOMPRESS 69 "fun_decompress2" + +P """ + def fun_compress1(rs1:Bits, rs2: Bits) : Bits = { + var r = B"16'x0000" + var j = (B"5'b00000").asUInt + for (i <- 0 to 15) { + val rs2i = rs2(i).asUInt + val rs1i = rs2(i) & rs1(i) + r = r | (rs1i.asBits << j).resize(16) + j = j + rs2i + } + j.asBits.resize(5) ## r ## rs1(31 downto 16) ## rs2(31 downto 16)// return value + } + def fun_compress2(x:Bits) : Bits = { + val rs2 = x(15 downto 0) + val rs1 = x(31 downto 16) + var r = B"16'x0000" ## x(47 downto 32) + var j = x(52 downto 48).asUInt + + for (i <- 0 to 15) { + val rs2i = rs2(i).asUInt + val rs1i = rs2(i) & rs1(i) + r = r | (rs1i.asBits << j).resize(32) + j = j + rs2i + } + r(31 downto 0) // return value + } + def fun_decompress1(rs1:Bits, rs2: Bits) : Bits = { + var r = B"16'x0000" + var j = (B"5'b00000").asUInt + for (i <- 0 to 15) { + val rs2i = rs2(i).asUInt + val rs1j = rs2(i) & (rs1 >> j)(0) + r = r | (rs1j.asBits << i).resize(16) + j = j + rs2i + } + j.asBits.resize(5) ## r ## rs1(31 downto 0) ## rs2(31 downto 16)// return value + } + def fun_decompress2(x:Bits) : Bits = { + val rs2 = x(15 downto 0) + val rs1 = x(47 downto 16) + var r = B"16'x0000" ## x(63 downto 48) + var j = x(68 downto 64).asUInt + + for (i <- 0 to 15) { + val rs2i = rs2(i).asUInt + val rs1j = rs2(i) & (rs1 >> j)(0) + r = r | (rs1j.asBits << (i+16)).asBits.resize(32) + j = j + rs2i + } + r(31 downto 0) // return value + } +""" diff --git a/test_b.c b/test_b.c index f999069..b0db434 100644 --- a/test_b.c +++ b/test_b.c @@ -109,13 +109,6 @@ int main(int argc, char **argv) { installillhandler(); #endif // CHECK_SIGILL - if (argc > 1) - a = strtoul(argv[1], NULL, 16); - if (argc > 2) - b = strtoul(argv[2], NULL, 16); - if (argc > 3) - d = strtoul(argv[3], NULL, 16); - for (index = 0 ; index < nonrandom_cnt[0] ; index++) { a = nonrandom_a[index]; @@ -193,6 +186,9 @@ int main(int argc, char **argv) { T2(_rv32_bfp); + T2(_rv32_bdep); + T2(_rv32_bext); + for (index2 = 0 ; index2 < nonrandom_cnt[2] ; index2++) { d = nonrandom_d[index2]; T3(_rv_cmix); diff --git a/test_p.c b/test_p.c index 6d152b6..e928333 100644 --- a/test_p.c +++ b/test_p.c @@ -794,6 +794,10 @@ int main(int argc, char **argv) { unsigned int index, index1, index2, index3; unsigned long long cq = 0; +#if defined(CHECK_SIGILL) + installillhandler(); +#endif // CHECK_SIGILL + for (index = 0 ; index < nonrandom_cnt[0] ; index++) { a = nonrandom_a[index];