From 27fe5264cc49f9d731eab2441d79396a51fee2f6 Mon Sep 17 00:00:00 2001 From: Romain Dolbeau Date: Mon, 8 Mar 2021 11:52:14 -0500 Subject: [PATCH] Some cleanup related to toolchain, add Zbr --- BitManipZbr.scala | 134 +++++++++++++++++++++++++++++++++++ Makefile | 5 +- README.md | 1 - data_crc.txt | 40 +++++++++++ new_instructions_support_b.h | 15 ---- r5.mk | 2 +- test_b.c | 75 +++++++++++--------- 7 files changed, 218 insertions(+), 54 deletions(-) create mode 100644 BitManipZbr.scala create mode 100644 data_crc.txt diff --git a/BitManipZbr.scala b/BitManipZbr.scala new file mode 100644 index 0000000..5c5e8ae --- /dev/null +++ b/BitManipZbr.scala @@ -0,0 +1,134 @@ +// WARNING: this is auto-generated code! +// See https://github.com/rdolbeau/VexRiscvBPluginGenerator/ +package vexriscv.plugin +import spinal.core._ +import vexriscv.{Stageable, DecoderService, VexRiscv} +object BitManipZbrPlugin { + object BitManipZbrCtrlCRC32xEnum extends SpinalEnum(binarySequential) { + val CTRL_CRC32xdotB, CTRL_CRC32xdotH, CTRL_CRC32xdotW = newElement() + } + object BitManipZbrCtrlEnum extends SpinalEnum(binarySequential) { + val CTRL_CRC32x = newElement() + } + object BitManipZbrCtrlCRC32x extends Stageable(BitManipZbrCtrlCRC32xEnum()) + object BitManipZbrCtrl extends Stageable(BitManipZbrCtrlEnum()) +// Prologue + + def fun_crc32xdotb(rs1: Bits, isC : Bool) : Bits = { + val p = ((isC === True) ? (B"32'x82F63B78") | (B"32'xEDB88320")) + var x = rs1 + for (i <- 0 to 7) { + x = (x |>> 1) ^ ((x(0) === True) ? (p) | (B"32'x00000000")) + } + val r = x + r // return value + } + def fun_crc32xdoth(rs1: Bits, isC : Bool) : Bits = { + val p = ((isC === True) ? (B"32'x82F63B78") | (B"32'xEDB88320")) + var x = rs1 + for (i <- 0 to 15) { + x = (x |>> 1) ^ ((x(0) === True) ? (p) | (B"32'x00000000")) + } + val r = x + r // return value + } + def fun_crc32xdotw(rs1: Bits, isC : Bool) : Bits = { + val p = ((isC === True) ? (B"32'x82F63B78") | (B"32'xEDB88320")) + var x = rs1 + for (i <- 0 to 31) { + x = (x |>> 1) ^ ((x(0) === True) ? (p) | (B"32'x00000000")) + } + val r = x + r // return value + } + +// End prologue +} // object Plugin +class BitManipZbrPlugin(earlyInjection : Boolean = true) extends Plugin[VexRiscv] { + import BitManipZbrPlugin._ + object IS_BitManipZbr extends Stageable(Bool) + object BitManipZbr_FINAL_OUTPUT extends Stageable(Bits(32 bits)) + override def setup(pipeline: VexRiscv): Unit = { + import pipeline.config._ + val immediateActions = List[(Stageable[_ <: BaseType],Any)]( + SRC1_CTRL -> Src1CtrlEnum.RS, + SRC2_CTRL -> Src2CtrlEnum.IMI, + REGFILE_WRITE_VALID -> True, + BYPASSABLE_EXECUTE_STAGE -> Bool(earlyInjection), + BYPASSABLE_MEMORY_STAGE -> True, + RS1_USE -> True, + IS_BitManipZbr -> True + ) + val binaryActions = List[(Stageable[_ <: BaseType],Any)]( + SRC1_CTRL -> Src1CtrlEnum.RS, + SRC2_CTRL -> Src2CtrlEnum.RS, + REGFILE_WRITE_VALID -> True, + BYPASSABLE_EXECUTE_STAGE -> Bool(earlyInjection), + BYPASSABLE_MEMORY_STAGE -> True, + RS1_USE -> True, + RS2_USE -> True, + IS_BitManipZbr -> True + ) + val unaryActions = List[(Stageable[_ <: BaseType],Any)]( + SRC1_CTRL -> Src1CtrlEnum.RS, + REGFILE_WRITE_VALID -> True, + BYPASSABLE_EXECUTE_STAGE -> Bool(earlyInjection), + BYPASSABLE_MEMORY_STAGE -> True, + RS1_USE -> True, + IS_BitManipZbr -> True + ) + val ternaryActions = List[(Stageable[_ <: BaseType],Any)]( + SRC1_CTRL -> Src1CtrlEnum.RS, + SRC2_CTRL -> Src2CtrlEnum.RS, + SRC3_CTRL -> Src3CtrlEnum.RS, + REGFILE_WRITE_VALID -> True, + BYPASSABLE_EXECUTE_STAGE -> Bool(earlyInjection), + BYPASSABLE_MEMORY_STAGE -> True, + RS1_USE -> True, + RS2_USE -> True, + RS3_USE -> True, + IS_BitManipZbr -> True + ) + val immTernaryActions = List[(Stageable[_ <: BaseType],Any)]( + SRC1_CTRL -> Src1CtrlEnum.RS, + SRC2_CTRL -> Src2CtrlEnum.IMI, + SRC3_CTRL -> Src3CtrlEnum.RS, + REGFILE_WRITE_VALID -> True, + BYPASSABLE_EXECUTE_STAGE -> Bool(earlyInjection), + BYPASSABLE_MEMORY_STAGE -> True, + RS1_USE -> True, + RS3_USE -> True, + IS_BitManipZbr -> True + ) + def CRC32xdotB_KEY = M"01100001-000-----001-----0010011" + def CRC32xdotH_KEY = M"01100001-001-----001-----0010011" + def CRC32xdotW_KEY = M"01100001-010-----001-----0010011" + val decoderService = pipeline.service(classOf[DecoderService]) + decoderService.addDefault(IS_BitManipZbr, False) + decoderService.add(List( + CRC32xdotB_KEY -> (unaryActions ++ List(BitManipZbrCtrl -> BitManipZbrCtrlEnum.CTRL_CRC32x, BitManipZbrCtrlCRC32x -> BitManipZbrCtrlCRC32xEnum.CTRL_CRC32xdotB)), + CRC32xdotH_KEY -> (unaryActions ++ List(BitManipZbrCtrl -> BitManipZbrCtrlEnum.CTRL_CRC32x, BitManipZbrCtrlCRC32x -> BitManipZbrCtrlCRC32xEnum.CTRL_CRC32xdotH)), + CRC32xdotW_KEY -> (unaryActions ++ List(BitManipZbrCtrl -> BitManipZbrCtrlEnum.CTRL_CRC32x, BitManipZbrCtrlCRC32x -> BitManipZbrCtrlCRC32xEnum.CTRL_CRC32xdotW)) + )) + } // override def setup + override def build(pipeline: VexRiscv): Unit = { + import pipeline._ + import pipeline.config._ + execute plug new Area{ + import execute._ + val val_CRC32x = input(BitManipZbrCtrlCRC32x).mux( + BitManipZbrCtrlCRC32xEnum.CTRL_CRC32xdotB -> fun_crc32xdotb(input(SRC1), input(INSTRUCTION)(23)).asBits, + BitManipZbrCtrlCRC32xEnum.CTRL_CRC32xdotH -> fun_crc32xdoth(input(SRC1), input(INSTRUCTION)(23)).asBits, + BitManipZbrCtrlCRC32xEnum.CTRL_CRC32xdotW -> fun_crc32xdotw(input(SRC1), input(INSTRUCTION)(23)).asBits + ) // mux CRC32x + insert(BitManipZbr_FINAL_OUTPUT) := val_CRC32x.asBits + } // execute plug newArea + val injectionStage = if(earlyInjection) execute else memory + injectionStage plug new Area { + import injectionStage._ + when (arbitration.isValid && input(IS_BitManipZbr)) { + output(REGFILE_WRITE_DATA) := input(BitManipZbr_FINAL_OUTPUT) + } // when input is + } // injectionStage plug newArea + } // override def build +} // class Plugin diff --git a/Makefile b/Makefile index 03e1bdf..2451436 100644 --- a/Makefile +++ b/Makefile @@ -80,7 +80,8 @@ BitManipZbbZbp.scala: gen_plugin data_bitmanip.txt BitManipZbp.scala: gen_plugin data_bitmanip.txt ./gen_plugin -n BitManipZbp -i data_bitmanip.txt -I Zbp >| $@ -## Zbr unimplemented, crc32* are missing +BitManipZbr.scala: gen_plugin data_crc.txt + ./gen_plugin -n BitManipZbr -i data_crc.txt -I Zbr >| $@ BitManipZbs.scala: gen_plugin data_bitmanip.txt ./gen_plugin -n BitManipZbs -i data_bitmanip.txt -I Zbs >| $@ @@ -127,7 +128,7 @@ PackedSIMDSlow.scala: gen_plugin data_Zpn_2cycles.txt PackedSIMDWide.scala: gen_plugin data_Zp64.txt ./gen_plugin -w -n PackedSIMDWide -i data_Zp64.txt -I '*' >| $@ -B: BitManipZba.scala BitManipZbb.scala BitManipZbbZbp.scala BitManipZbc.scala BitManipZbe1cycle.scala BitManipZbe2cycles.scala BitManipZbf.scala BitManipBFPonly.scala BitManipZbp.scala BitManipZbs.scala BitManipZbt.scala +B: BitManipZba.scala BitManipZbb.scala BitManipZbbZbp.scala BitManipZbc.scala BitManipZbe1cycle.scala BitManipZbe2cycles.scala BitManipZbf.scala BitManipBFPonly.scala BitManipZbp.scala BitManipZbr.scala BitManipZbs.scala BitManipZbt.scala P: PackedSIMDBase.scala PackedSIMDSlow.scala PackedSIMDWide.scala diff --git a/README.md b/README.md index 3969b0c..3e32139 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,6 @@ The generated plugin is for RV32 only. It doesn't yet support all B instructions * all instructions ending in 'W', as they are RV64-only * BMAT*, as they are RV64-only -* CRC32* * Three-operands instructions (CMIX, CMOV, FS[RL]*); they are available but need a VexRiscv patch to support the third input (all VexRiscv patch are available on https://github.com/rdolbeau/VexRiscv/tree/three_operands) There is support for partial instructions (rev8, zext.h, orc.b) so that the default plugins generated for Zba, Zbb and Zbc should be feature-complete. To get everything without conflicts, use: diff --git a/data_crc.txt b/data_crc.txt new file mode 100644 index 0000000..01a691d --- /dev/null +++ b/data_crc.txt @@ -0,0 +1,40 @@ +// Zbr +I CRC32xdotB CRC32xdotB 01100001-000-----001-----0010011 CRC32x Zbr +I CRC32xdotH CRC32xdotH 01100001-001-----001-----0010011 CRC32x Zbr +I CRC32xdotW CRC32xdotW 01100001-010-----001-----0010011 CRC32x Zbr +//I CRC32xdotD CRC32xdotD 01100001-011-----001-----0010011 CRC32x Zbr + +S CRC32xdotB "fun_crc32xdotb(input(SRC1), input(INSTRUCTION)(23))" +S CRC32xdotH "fun_crc32xdoth(input(SRC1), input(INSTRUCTION)(23))" +S CRC32xdotW "fun_crc32xdotw(input(SRC1), input(INSTRUCTION)(23))" + +P """ + def fun_crc32xdotb(rs1: Bits, isC : Bool) : Bits = { + val p = ((isC === True) ? (B"32'x82F63B78") | (B"32'xEDB88320")) + var x = rs1 + for (i <- 0 to 7) { + x = (x |>> 1) ^ ((x(0) === True) ? (p) | (B"32'x00000000")) + } + val r = x + r // return value + } + def fun_crc32xdoth(rs1: Bits, isC : Bool) : Bits = { + val p = ((isC === True) ? (B"32'x82F63B78") | (B"32'xEDB88320")) + var x = rs1 + for (i <- 0 to 15) { + x = (x |>> 1) ^ ((x(0) === True) ? (p) | (B"32'x00000000")) + } + val r = x + r // return value + } + def fun_crc32xdotw(rs1: Bits, isC : Bool) : Bits = { + val p = ((isC === True) ? (B"32'x82F63B78") | (B"32'xEDB88320")) + var x = rs1 + for (i <- 0 to 31) { + x = (x |>> 1) ^ ((x(0) === True) ? (p) | (B"32'x00000000")) + } + val r = x + r // return value + } +""" + diff --git a/new_instructions_support_b.h b/new_instructions_support_b.h index 2e252db..3483700 100644 --- a/new_instructions_support_b.h +++ b/new_instructions_support_b.h @@ -5,21 +5,6 @@ #include "new_instructions_support.h" // for support macros -// for instructions currently missing in the toolchain -ASM2MACRO(XPERM_N,0x28002033) -ASM2MACRO(XPERM_B,0x28004033) -ASM2MACRO(XPERM_H,0x28006033) -ASM2MACRO(_SH1ADD,0x20002033) -ASM2MACRO(_SH2ADD,0x20004033) -ASM2MACRO(_SH3ADD,0x20006033) -FUN2(xperm_n,XPERM_N) -FUN2(xperm_b,XPERM_B) -FUN2(xperm_h,XPERM_H) -FUN2(_sh1add,_SH1ADD) -FUN2(_sh2add,_SH2ADD) -FUN2(_sh3add,_SH3ADD) - - #ifdef ENABLE_XAR ASM3RMACRO(XAR, 0xc8000077) FUN3R(__rv__xar, XAR) diff --git a/r5.mk b/r5.mk index e0dea5e..da2b024 100644 --- a/r5.mk +++ b/r5.mk @@ -1,5 +1,5 @@ ## B Toolchain so we get some automatic B generation -R5B_TOOLCHAIN=/opt/riscv64b/ +R5B_TOOLCHAIN=/opt/riscv64bk/ R5B_GCC=$(R5B_TOOLCHAIN)/bin/riscv64-unknown-elf-gcc R5B_OPT=-Os -march=rv32imab -mabi=ilp32 -I. diff --git a/test_b.c b/test_b.c index b0db434..8492572 100644 --- a/test_b.c +++ b/test_b.c @@ -36,38 +36,40 @@ #include "new_instructions_support_b.h" #define _rv64_clmul2(a,b) _rv64_clmul(a,b) + + + //not yet in rvintrin.h ? +static inline uint_xlen_t _rv32_sh1add(uint_xlen_t rs1, uint_xlen_t rs2) { + uint_xlen_t rd; + asm ("sh1add %0, %1, %2\n" : "=r" (rd) : "r" (rs1), "r" (rs2)); + return rd; +} +static inline uint_xlen_t _rv32_sh2add(uint_xlen_t rs1, uint_xlen_t rs2) { + uint_xlen_t rd; + asm ("sh2add %0, %1, %2\n" : "=r" (rd) : "r" (rs1), "r" (rs2)); + return rd; +} +static inline uint_xlen_t _rv32_sh3add(uint_xlen_t rs1, uint_xlen_t rs2) { + uint_xlen_t rd; + asm ("sh3add %0, %1, %2\n" : "=r" (rd) : "r" (rs1), "r" (rs2)); + return rd; +} #else #include // emulation typedef uint32_t uint_xlen_t; #define XLEN 32 -uint_xlen_t xperm(uint_xlen_t rs1, uint_xlen_t rs2, int sz_log2) -{ - uint_xlen_t r = 0; - uint_xlen_t sz = 1LL << sz_log2; - uint_xlen_t mask = (1LL << sz) - 1; - for (int i = 0; i < XLEN; i += sz) { - uint_xlen_t pos = ((rs2 >> i) & mask) << sz_log2; - if (pos < XLEN) - r |= ((rs1 >> pos) & mask) << i; - } - return r; -} -uint_xlen_t xperm_n (uint_xlen_t rs1, uint_xlen_t rs2) { return xperm(rs1, rs2, 2); } -uint_xlen_t xperm_b (uint_xlen_t rs1, uint_xlen_t rs2) { return xperm(rs1, rs2, 3); } -uint_xlen_t xperm_h (uint_xlen_t rs1, uint_xlen_t rs2) { return xperm(rs1, rs2, 4); } -uint_xlen_t xperm_w (uint_xlen_t rs1, uint_xlen_t rs2) { return xperm(rs1, rs2, 5); } - -uint_xlen_t _sh1add(uint_xlen_t rs1, uint_xlen_t rs2) + //not yet in rvintrin.h ? +uint_xlen_t _rv32_sh1add(uint_xlen_t rs1, uint_xlen_t rs2) { return (rs1 << 1) + rs2; } -uint_xlen_t _sh2add(uint_xlen_t rs1, uint_xlen_t rs2) +uint_xlen_t _rv32_sh2add(uint_xlen_t rs1, uint_xlen_t rs2) { return (rs1 << 2) + rs2; } -uint_xlen_t _sh3add(uint_xlen_t rs1, uint_xlen_t rs2) +uint_xlen_t _rv32_sh3add(uint_xlen_t rs1, uint_xlen_t rs2) { return (rs1 << 3) + rs2; } @@ -118,7 +120,14 @@ int main(int argc, char **argv) { T1(_rv32_clz); T1(_rv32_ctz); T1(_rv32_pcnt); - + + T1(_rv_crc32_b); + T1(_rv_crc32_h); + T1(_rv_crc32_w); + T1(_rv_crc32c_b); + T1(_rv_crc32c_h); + T1(_rv_crc32c_w); + for (index1 = 0 ; index1 < nonrandom_cnt[1] ; index1++) { b = nonrandom_b[index1]; @@ -139,10 +148,6 @@ int main(int argc, char **argv) { T2(_rv_xnor); T2(_rv_orn); - //T2(_rv32_sh1add); - //T2(_rv32_sh2add); - //T2(_rv32_sh3add); - T2(_rv32_sbset); T2(_rv32_sbclr); T2(_rv32_sbinv); @@ -152,9 +157,10 @@ int main(int argc, char **argv) { T2(_rv32_minu); T2(_rv32_max); T2(_rv32_maxu); - - T2(_rv32_slo); - T2(_rv32_sro); + + // no longer in toolchain, dropped from extension + /* T2(_rv32_slo); */ + /* T2(_rv32_sro); */ //T2(_rv32_xperm_b); @@ -175,14 +181,13 @@ int main(int argc, char **argv) { printf("0x%016llx 0x%016llx (0x%016llx)\n", z, z2, z^z2); } - // extra stuff - T2(_sh1add); - T2(_sh2add); - T2(_sh3add); + T2(_rv32_sh1add); + T2(_rv32_sh2add); + T2(_rv32_sh3add); - T2(xperm_n); - T2(xperm_b); - T2(xperm_h); + T2(_rv_xperm_n); + T2(_rv_xperm_b); + T2(_rv_xperm_h); T2(_rv32_bfp);