add and check some saturating instructions (w/o the CSR update)

This commit is contained in:
Romain Dolbeau 2021-03-20 06:31:21 -04:00
parent 9ab05505bd
commit 98f51a9632
3 changed files with 125 additions and 1 deletions

View file

@ -101,6 +101,12 @@ I UMAQA UMAQA 1100110----------000----01110111 pdpismul8 Zpnslow
// ternary + binary (bit 25)
I PBSADx PBSADx 111111-----------000-----1110111 pdpipsad Zpn
// Saturating, currently doesn't set the CSR
I KADD8 KADD8 0001100----------000-----1110111 pdpikadd8 Zpn
I UKADD8 UKADD8 0011100----------000-----1110111 pdpikadd8 Zpn
I KSUB8 KSUB8 0001101----------000-----1110111 pdpikadd8 Zpn
I UKSUB8 UKSUB8 0011101----------000-----1110111 pdpikadd8 Zpn
// binary
S ADD8 "fun_add8(input(SRC1), input(SRC2))"
S ADD16 "fun_add16(input(SRC1), input(SRC2))"
@ -159,6 +165,12 @@ S INSBI "fun_insb(input(SRC1), input(SRC2), input(SRC3))"
S SMAQA "fun_smaqa(input(SRC1), input(SRC2), input(SRC3))"
S UMAQA "fun_umaqa(input(SRC1), input(SRC2), input(SRC3))"
// saturating, fixme for csr
S KADD8 "fun_kadd8(input(SRC1), input(SRC2))"
S UKADD8 "fun_ukadd8(input(SRC1), input(SRC2))"
S KSUB8 "fun_ksub8(input(SRC1), input(SRC2))"
S UKSUB8 "fun_uksub8(input(SRC1), input(SRC2))"
P """
def fun_add8(rs1: Bits, rs2: Bits) : Bits = {
val b0 = (rs1( 7 downto 0).asUInt + rs2( 7 downto 0).asUInt).asBits.resize(8)
@ -607,4 +619,49 @@ P """
)
r // return value
}
// saturating, csr is missing
// it seems sat() (and it's shortcut +| and -|) in SpinalHDL don't do what I need
// for unsigned substraction (no way to tell the difference between overflow
// and underflow unless going signed, I think)
def fun_satsub8u(a: Bits, b: Bits) : Bits = {
val s = (B"1'b0" ## a).asSInt -^ (B"1'b0" ## b).asSInt // -^ will keep 10 bits
// if sign bit set -> underflow, else if bit eight set -> overflow
val r = ((s(9).asUInt === 1) ? (B"8'x00") | ((s(8).asUInt === 1) ? (B"8'xFF") | (s(7 downto 0).asBits)))
r // return value
}
def fun_kadd8(rs1: Bits, rs2: Bits) : Bits = {
val b0 = (rs1( 7 downto 0).asSInt +| rs2( 7 downto 0).asSInt).asBits.resize(8)
val b1 = (rs1(15 downto 8).asSInt +| rs2(15 downto 8).asSInt).asBits.resize(8)
val b2 = (rs1(23 downto 16).asSInt +| rs2(23 downto 16).asSInt).asBits.resize(8)
val b3 = (rs1(31 downto 24).asSInt +| rs2(31 downto 24).asSInt).asBits.resize(8)
b3 ## b2 ## b1 ## b0 // return value
}
def fun_ukadd8(rs1: Bits, rs2: Bits) : Bits = {
val b0 = (rs1( 7 downto 0).asUInt +| rs2( 7 downto 0).asUInt).asBits.resize(8)
val b1 = (rs1(15 downto 8).asUInt +| rs2(15 downto 8).asUInt).asBits.resize(8)
val b2 = (rs1(23 downto 16).asUInt +| rs2(23 downto 16).asUInt).asBits.resize(8)
val b3 = (rs1(31 downto 24).asUInt +| rs2(31 downto 24).asUInt).asBits.resize(8)
b3 ## b2 ## b1 ## b0 // return value
}
def fun_ksub8(rs1: Bits, rs2: Bits) : Bits = {
val b0 = (rs1( 7 downto 0).asSInt -| rs2( 7 downto 0).asSInt).asBits.resize(8)
val b1 = (rs1(15 downto 8).asSInt -| rs2(15 downto 8).asSInt).asBits.resize(8)
val b2 = (rs1(23 downto 16).asSInt -| rs2(23 downto 16).asSInt).asBits.resize(8)
val b3 = (rs1(31 downto 24).asSInt -| rs2(31 downto 24).asSInt).asBits.resize(8)
b3 ## b2 ## b1 ## b0 // return value
}
def fun_uksub8(rs1: Bits, rs2: Bits) : Bits = {
val b0 = fun_satsub8u(rs1( 7 downto 0), rs2( 7 downto 0)).asBits
val b1 = fun_satsub8u(rs1(15 downto 8), rs2(15 downto 8)).asBits
val b2 = fun_satsub8u(rs1(23 downto 16), rs2(23 downto 16)).asBits
val b3 = fun_satsub8u(rs1(31 downto 24), rs2(31 downto 24)).asBits
b3 ## b2 ## b1 ## b0 // return value
}
"""

View file

@ -179,4 +179,15 @@ FUN1(__rv__sunpkd831, SUNPKD831)
ASM1MACRO(SUNPKD832, 0xad300077)
FUN1(__rv__sunpkd832, SUNPKD832)
ASM2MACRO(KADD8,0x18000077)
FUN2(__rv__kadd8,KADD8)
ASM2MACRO(UKADD8,0x38000077)
FUN2(__rv__ukadd8,UKADD8)
ASM2MACRO(KSUB8,0x1a000077)
FUN2(__rv__ksub8,KSUB8)
ASM2MACRO(UKSUB8,0x3a000077)
FUN2(__rv__uksub8,UKSUB8)
#endif // __NEW_INSTRUCTION_SUPPORT_P_H__

View file

@ -783,6 +783,57 @@ GEN_ZUNPKD8(3,0)
GEN_ZUNPKD8(3,1)
GEN_ZUNPKD8(3,2)
#define SATs8(x) ((x) > 127 ? 127 : (x) < -128 ? -128 : (x))
#define SATu8(x) ((x) > 255 ? 255 : (x) < 0 ? 0 : (x))
uint32_t __rv__kadd8(const uint32_t rs1, const uint32_t rs2) {
int4x8_t a, b, c;
int32_t r;
memcpy(a, &rs1, 4);
memcpy(b, &rs2, 4);
c[0] = SATs8((int32_t)a[0] + (int32_t)b[0]);
c[1] = SATs8((int32_t)a[1] + (int32_t)b[1]);
c[2] = SATs8((int32_t)a[2] + (int32_t)b[2]);
c[3] = SATs8((int32_t)a[3] + (int32_t)b[3]);
memcpy(&r, c, 4);
return r;
}
uint32_t __rv__ukadd8(const uint32_t rs1, const uint32_t rs2) {
uint4x8_t a, b, c;
uint32_t r;
memcpy(a, &rs1, 4);
memcpy(b, &rs2, 4);
c[0] = SATu8((int32_t)a[0] + (int32_t)b[0]);
c[1] = SATu8((int32_t)a[1] + (int32_t)b[1]);
c[2] = SATu8((int32_t)a[2] + (int32_t)b[2]);
c[3] = SATu8((int32_t)a[3] + (int32_t)b[3]);
memcpy(&r, c, 4);
return r;
}
uint32_t __rv__ksub8(const uint32_t rs1, const uint32_t rs2) {
int4x8_t a, b, c;
int32_t r;
memcpy(a, &rs1, 4);
memcpy(b, &rs2, 4);
c[0] = SATs8((int32_t)a[0] - (int32_t)b[0]);
c[1] = SATs8((int32_t)a[1] - (int32_t)b[1]);
c[2] = SATs8((int32_t)a[2] - (int32_t)b[2]);
c[3] = SATs8((int32_t)a[3] - (int32_t)b[3]);
memcpy(&r, c, 4);
return r;
}
uint32_t __rv__uksub8(const uint32_t rs1, const uint32_t rs2) {
uint4x8_t a, b, c;
uint32_t r;
memcpy(a, &rs1, 4);
memcpy(b, &rs2, 4);
c[0] = SATu8((int32_t)a[0] - (int32_t)b[0]);
c[1] = SATu8((int32_t)a[1] - (int32_t)b[1]);
c[2] = SATu8((int32_t)a[2] - (int32_t)b[2]);
c[3] = SATu8((int32_t)a[3] - (int32_t)b[3]);
memcpy(&r, c, 4);
return r;
}
#endif // __riscv
unsigned int a = 0x01234567;
@ -823,7 +874,7 @@ int main(int argc, char **argv) {
T1(__rv__swap16);
for (index1 = 0 ; index1 < nonrandom_cnt[1] ; index1++) {
b = nonrandom_b[index];
b = nonrandom_b[index1];
#if 1
T2(__rv__add8);
T2(__rv__radd8);
@ -891,6 +942,11 @@ int main(int argc, char **argv) {
T2W(__rv__umulx8);
T2W(__rv__smulx16);
T2W(__rv__umulx16);
T2(__rv__kadd8);
T2(__rv__ukadd8);
T2(__rv__ksub8);
T2(__rv__uksub8);
for (index2 = 0 ; index2 < nonrandom_cnt[2] ; index2++) {
d = nonrandom_d[index2];