From 98f51a963228981f78efead17f2f8ee295e9d5a4 Mon Sep 17 00:00:00 2001 From: Romain Dolbeau Date: Sat, 20 Mar 2021 06:31:21 -0400 Subject: [PATCH] add and check some saturating instructions (w/o the CSR update) --- data_Zpn.txt | 57 +++++++++++++++++++++++++++++++++++ new_instructions_support_p.h | 11 +++++++ test_p.c | 58 +++++++++++++++++++++++++++++++++++- 3 files changed, 125 insertions(+), 1 deletion(-) diff --git a/data_Zpn.txt b/data_Zpn.txt index 4dd301f..f936d62 100644 --- a/data_Zpn.txt +++ b/data_Zpn.txt @@ -101,6 +101,12 @@ I UMAQA UMAQA 1100110----------000----01110111 pdpismul8 Zpnslow // ternary + binary (bit 25) I PBSADx PBSADx 111111-----------000-----1110111 pdpipsad Zpn +// Saturating, currently doesn't set the CSR +I KADD8 KADD8 0001100----------000-----1110111 pdpikadd8 Zpn +I UKADD8 UKADD8 0011100----------000-----1110111 pdpikadd8 Zpn +I KSUB8 KSUB8 0001101----------000-----1110111 pdpikadd8 Zpn +I UKSUB8 UKSUB8 0011101----------000-----1110111 pdpikadd8 Zpn + // binary S ADD8 "fun_add8(input(SRC1), input(SRC2))" S ADD16 "fun_add16(input(SRC1), input(SRC2))" @@ -159,6 +165,12 @@ S INSBI "fun_insb(input(SRC1), input(SRC2), input(SRC3))" S SMAQA "fun_smaqa(input(SRC1), input(SRC2), input(SRC3))" S UMAQA "fun_umaqa(input(SRC1), input(SRC2), input(SRC3))" +// saturating, fixme for csr +S KADD8 "fun_kadd8(input(SRC1), input(SRC2))" +S UKADD8 "fun_ukadd8(input(SRC1), input(SRC2))" +S KSUB8 "fun_ksub8(input(SRC1), input(SRC2))" +S UKSUB8 "fun_uksub8(input(SRC1), input(SRC2))" + P """ def fun_add8(rs1: Bits, rs2: Bits) : Bits = { val b0 = (rs1( 7 downto 0).asUInt + rs2( 7 downto 0).asUInt).asBits.resize(8) @@ -607,4 +619,49 @@ P """ ) r // return value } + +// saturating, csr is missing +// it seems sat() (and it's shortcut +| and -|) in SpinalHDL don't do what I need +// for unsigned substraction (no way to tell the difference between overflow +// and underflow unless going signed, I think) + def fun_satsub8u(a: Bits, b: Bits) : Bits = { + val s = (B"1'b0" ## a).asSInt -^ (B"1'b0" ## b).asSInt // -^ will keep 10 bits + // if sign bit set -> underflow, else if bit eight set -> overflow + val r = ((s(9).asUInt === 1) ? (B"8'x00") | ((s(8).asUInt === 1) ? (B"8'xFF") | (s(7 downto 0).asBits))) + + r // return value + } + + def fun_kadd8(rs1: Bits, rs2: Bits) : Bits = { + val b0 = (rs1( 7 downto 0).asSInt +| rs2( 7 downto 0).asSInt).asBits.resize(8) + val b1 = (rs1(15 downto 8).asSInt +| rs2(15 downto 8).asSInt).asBits.resize(8) + val b2 = (rs1(23 downto 16).asSInt +| rs2(23 downto 16).asSInt).asBits.resize(8) + val b3 = (rs1(31 downto 24).asSInt +| rs2(31 downto 24).asSInt).asBits.resize(8) + + b3 ## b2 ## b1 ## b0 // return value + } + def fun_ukadd8(rs1: Bits, rs2: Bits) : Bits = { + val b0 = (rs1( 7 downto 0).asUInt +| rs2( 7 downto 0).asUInt).asBits.resize(8) + val b1 = (rs1(15 downto 8).asUInt +| rs2(15 downto 8).asUInt).asBits.resize(8) + val b2 = (rs1(23 downto 16).asUInt +| rs2(23 downto 16).asUInt).asBits.resize(8) + val b3 = (rs1(31 downto 24).asUInt +| rs2(31 downto 24).asUInt).asBits.resize(8) + + b3 ## b2 ## b1 ## b0 // return value + } + def fun_ksub8(rs1: Bits, rs2: Bits) : Bits = { + val b0 = (rs1( 7 downto 0).asSInt -| rs2( 7 downto 0).asSInt).asBits.resize(8) + val b1 = (rs1(15 downto 8).asSInt -| rs2(15 downto 8).asSInt).asBits.resize(8) + val b2 = (rs1(23 downto 16).asSInt -| rs2(23 downto 16).asSInt).asBits.resize(8) + val b3 = (rs1(31 downto 24).asSInt -| rs2(31 downto 24).asSInt).asBits.resize(8) + + b3 ## b2 ## b1 ## b0 // return value + } + def fun_uksub8(rs1: Bits, rs2: Bits) : Bits = { + val b0 = fun_satsub8u(rs1( 7 downto 0), rs2( 7 downto 0)).asBits + val b1 = fun_satsub8u(rs1(15 downto 8), rs2(15 downto 8)).asBits + val b2 = fun_satsub8u(rs1(23 downto 16), rs2(23 downto 16)).asBits + val b3 = fun_satsub8u(rs1(31 downto 24), rs2(31 downto 24)).asBits + + b3 ## b2 ## b1 ## b0 // return value + } """ diff --git a/new_instructions_support_p.h b/new_instructions_support_p.h index 7abcdd3..5a78f5b 100644 --- a/new_instructions_support_p.h +++ b/new_instructions_support_p.h @@ -179,4 +179,15 @@ FUN1(__rv__sunpkd831, SUNPKD831) ASM1MACRO(SUNPKD832, 0xad300077) FUN1(__rv__sunpkd832, SUNPKD832) + +ASM2MACRO(KADD8,0x18000077) +FUN2(__rv__kadd8,KADD8) +ASM2MACRO(UKADD8,0x38000077) +FUN2(__rv__ukadd8,UKADD8) +ASM2MACRO(KSUB8,0x1a000077) +FUN2(__rv__ksub8,KSUB8) +ASM2MACRO(UKSUB8,0x3a000077) +FUN2(__rv__uksub8,UKSUB8) + + #endif // __NEW_INSTRUCTION_SUPPORT_P_H__ diff --git a/test_p.c b/test_p.c index e928333..63efc5d 100644 --- a/test_p.c +++ b/test_p.c @@ -783,6 +783,57 @@ GEN_ZUNPKD8(3,0) GEN_ZUNPKD8(3,1) GEN_ZUNPKD8(3,2) +#define SATs8(x) ((x) > 127 ? 127 : (x) < -128 ? -128 : (x)) +#define SATu8(x) ((x) > 255 ? 255 : (x) < 0 ? 0 : (x)) +uint32_t __rv__kadd8(const uint32_t rs1, const uint32_t rs2) { + int4x8_t a, b, c; + int32_t r; + memcpy(a, &rs1, 4); + memcpy(b, &rs2, 4); + c[0] = SATs8((int32_t)a[0] + (int32_t)b[0]); + c[1] = SATs8((int32_t)a[1] + (int32_t)b[1]); + c[2] = SATs8((int32_t)a[2] + (int32_t)b[2]); + c[3] = SATs8((int32_t)a[3] + (int32_t)b[3]); + memcpy(&r, c, 4); + return r; +} +uint32_t __rv__ukadd8(const uint32_t rs1, const uint32_t rs2) { + uint4x8_t a, b, c; + uint32_t r; + memcpy(a, &rs1, 4); + memcpy(b, &rs2, 4); + c[0] = SATu8((int32_t)a[0] + (int32_t)b[0]); + c[1] = SATu8((int32_t)a[1] + (int32_t)b[1]); + c[2] = SATu8((int32_t)a[2] + (int32_t)b[2]); + c[3] = SATu8((int32_t)a[3] + (int32_t)b[3]); + memcpy(&r, c, 4); + return r; +} +uint32_t __rv__ksub8(const uint32_t rs1, const uint32_t rs2) { + int4x8_t a, b, c; + int32_t r; + memcpy(a, &rs1, 4); + memcpy(b, &rs2, 4); + c[0] = SATs8((int32_t)a[0] - (int32_t)b[0]); + c[1] = SATs8((int32_t)a[1] - (int32_t)b[1]); + c[2] = SATs8((int32_t)a[2] - (int32_t)b[2]); + c[3] = SATs8((int32_t)a[3] - (int32_t)b[3]); + memcpy(&r, c, 4); + return r; +} +uint32_t __rv__uksub8(const uint32_t rs1, const uint32_t rs2) { + uint4x8_t a, b, c; + uint32_t r; + memcpy(a, &rs1, 4); + memcpy(b, &rs2, 4); + c[0] = SATu8((int32_t)a[0] - (int32_t)b[0]); + c[1] = SATu8((int32_t)a[1] - (int32_t)b[1]); + c[2] = SATu8((int32_t)a[2] - (int32_t)b[2]); + c[3] = SATu8((int32_t)a[3] - (int32_t)b[3]); + memcpy(&r, c, 4); + return r; +} + #endif // __riscv unsigned int a = 0x01234567; @@ -823,7 +874,7 @@ int main(int argc, char **argv) { T1(__rv__swap16); for (index1 = 0 ; index1 < nonrandom_cnt[1] ; index1++) { - b = nonrandom_b[index]; + b = nonrandom_b[index1]; #if 1 T2(__rv__add8); T2(__rv__radd8); @@ -891,6 +942,11 @@ int main(int argc, char **argv) { T2W(__rv__umulx8); T2W(__rv__smulx16); T2W(__rv__umulx16); + + T2(__rv__kadd8); + T2(__rv__ukadd8); + T2(__rv__ksub8); + T2(__rv__uksub8); for (index2 = 0 ; index2 < nonrandom_cnt[2] ; index2++) { d = nonrandom_d[index2];