diff --git a/Makefile b/Makefile index 3aaa6c0..2509c89 100644 --- a/Makefile +++ b/Makefile @@ -136,7 +136,7 @@ include r5.mk NEWINST_H=new_instructions_support_b.h new_instructions_support.h new_instructions_support_k.h new_instructions_support_p.h -tests: test_b test_p +tests: test_b test_p test_b.txt test_p.txt signal.o: signal.c $(R5IMA_GCC) $(R5IMA_OPT) -c $< -o $@ @@ -159,6 +159,12 @@ test_b: test_b.o signal.o test_p: test_p.o signal.o $(R5IMA_GCC) $(R5IMA_OPT) $^ -o $@ +test_b.txt: test_b.c + gcc -I. -O2 $< -o /tmp/a.out && /tmp/a.out | tee $@ + +test_p.txt: test_p.c + gcc -I. -O2 $< -o /tmp/a.out && /tmp/a.out | tee $@ + ## avoid builtin rule for .o .SUFFIXES: SUFFIXES := diff --git a/test_b.c b/test_b.c index 7cdac33..f999069 100644 --- a/test_b.c +++ b/test_b.c @@ -29,6 +29,8 @@ #include #include +#include "test_common.h" + #ifdef __riscv #include "new_instructions_support_b.h" @@ -39,7 +41,7 @@ typedef uint32_t uint_xlen_t; #define XLEN 32 - uint_xlen_t xperm(uint_xlen_t rs1, uint_xlen_t rs2, int sz_log2) +uint_xlen_t xperm(uint_xlen_t rs1, uint_xlen_t rs2, int sz_log2) { uint_xlen_t r = 0; uint_xlen_t sz = 1LL << sz_log2; @@ -75,207 +77,162 @@ uint_xlen_t _sh3add(uint_xlen_t rs1, uint_xlen_t rs2) /* emulates 64 bits clmul with 32 bit clmul/clmulh */ static inline int64_t _rv64_clmul2(int64_t rs1, int64_t rs2) { - int64_t r = 0; - uint32_t rs1l = rs1 & 0xFFFFFFFF; - uint32_t rs1h = (rs1>>32) & 0xFFFFFFFF; - uint32_t rs2l = rs2 & 0xFFFFFFFF; - uint32_t rs2h = (rs2>>32) & 0xFFFFFFFF; - uint32_t lll = _rv32_clmul(rs1l, rs2l); - uint32_t llh = _rv32_clmulh(rs1l, rs2l); - //uint32_t hhl = _rv32_clmul(rs1h, rs2h); - // hhh - uint32_t lhl = _rv32_clmul(rs1l, rs2h); - /* uint32_t lhh = _rv32_clmulh(rs1l, rs2h); */ - uint32_t hll = _rv32_clmul(rs1h, rs2l); - /* uint32_t hlh = _rv32_clmulh(rs1h, rs2l); */ + int64_t r = 0; + uint32_t rs1l = rs1 & 0xFFFFFFFF; + uint32_t rs1h = (rs1>>32) & 0xFFFFFFFF; + uint32_t rs2l = rs2 & 0xFFFFFFFF; + uint32_t rs2h = (rs2>>32) & 0xFFFFFFFF; + uint32_t lll = _rv32_clmul(rs1l, rs2l); + uint32_t llh = _rv32_clmulh(rs1l, rs2l); + //uint32_t hhl = _rv32_clmul(rs1h, rs2h); + // hhh + uint32_t lhl = _rv32_clmul(rs1l, rs2h); + /* uint32_t lhh = _rv32_clmulh(rs1l, rs2h); */ + uint32_t hll = _rv32_clmul(rs1h, rs2l); + /* uint32_t hlh = _rv32_clmulh(rs1h, rs2l); */ - uint32_t L = lll; - uint32_t H = llh ^ lhl ^ hll; - r = (int64_t)(((uint64_t)L)| ((uint64_t)H) << 32); - return r; + uint32_t L = lll; + uint32_t H = llh ^ lhl ^ hll; + r = (int64_t)(((uint64_t)L)| ((uint64_t)H) << 32); + return r; } - - unsigned int a = 0x01234567; - -//#define CHECK_SIGILL - -#if defined(CHECK_SIGILL) -#include -extern jmp_buf jb; -void installillhandler(void); -#endif // CHECK_SIGILL +unsigned int a = 0x01234567; int main(int argc, char **argv) { - unsigned int b = 0xdeadbeef; - unsigned int c; - unsigned int d = 0xC0FFEE00; - unsigned int index; - unsigned int index2; + unsigned int b = 0xdeadbeef; + unsigned int c; + unsigned int d = 0xC0FFEE00; + unsigned int index, index1, index2, index3; #if defined(CHECK_SIGILL) - installillhandler(); + installillhandler(); #endif // CHECK_SIGILL - if (argc > 1) - a = strtoul(argv[1], NULL, 16); - if (argc > 2) - b = strtoul(argv[2], NULL, 16); - if (argc > 3) - d = strtoul(argv[3], NULL, 16); + if (argc > 1) + a = strtoul(argv[1], NULL, 16); + if (argc > 2) + b = strtoul(argv[2], NULL, 16); + if (argc > 3) + d = strtoul(argv[3], NULL, 16); - #if !defined(CHECK_SIGILL) -#define T2(X) \ - c = X(a,b);printf(#X "(0x%08x, 0x%08x) -> 0x%08x\n", a, b, c) -#define T1(X) \ - c = X(a);printf(#X "(0x%08x) -> 0x%08x\n", a, c) -#define T3(X) \ - c = X(a,b,d);printf(#X "(0x%08x, 0x%08x, 0x%08x) -> 0x%08x\n", a, b, d, c) -#define T2W(X) \ - cq = X(a,b);printf(#X "(0x%08x, 0x%08x) -> 0x%016llx\n", a, b, cq) -#else -#define T2(X) do { \ - if (setjmp(jb)) { \ - printf(#X "(0x%08x, 0x%08x) -> *SIGILL*\n", a, b); \ - } else { \ - c = X(a,b); \ - printf(#X "(0x%08x, 0x%08x) -> 0x%08x\n", a, b, c); \ - } \ - } while (0) -#define T1(X) do { \ - if (setjmp(jb)) { \ - printf(#X "(0x%08x) -> *SIGILL*\n", a); \ - } else { \ - c = X(a); \ - printf(#X "(0x%08x) -> 0x%08x\n", a, c); \ - } \ - } while (0) -#define T3(X) do { \ - if (setjmp(jb)) { \ - printf(#X "(0x%08x, 0x%08x, 0x%08x) -> *SIGILL*\n", a, b, d); \ - } else { \ - c = X(a,b,d); \ - printf(#X "(0x%08x, 0x%08x, 0x%08x) -> 0x%08x\n", a, b, d, c); \ - } \ - } while (0) -#define T2W(X) do { \ - if (setjmp(jb)) { \ - printf(#X "(0x%08x, 0x%08x) -> *SIGILL*\n", a, b); \ - } else { \ - cq = X(a,b); \ - printf(#X "(0x%08x, 0x%08x) -> 0x%016llx\n", a, b, cq); \ - } \ - } while (0) -#endif // CHECK_SIGILL + for (index = 0 ; index < nonrandom_cnt[0] ; index++) { + a = nonrandom_a[index]; - for (index = 0 ; index < 32 ; index++) { + T1(_rv32_sext_b); + T1(_rv32_sext_h); + + T1(_rv32_clz); + T1(_rv32_ctz); + T1(_rv32_pcnt); + + for (index1 = 0 ; index1 < nonrandom_cnt[1] ; index1++) { + b = nonrandom_b[index1]; - T2(_rv32_ror); - T2(_rv32_rol); + T2(_rv32_ror); + T2(_rv32_rol); - T2(_rv32_grev); - T2(_rv32_gorc); + T2(_rv32_grev); + T2(_rv32_gorc); - T2(_rv32_pack); - T2(_rv32_packu); - T2(_rv32_packh); + T2(_rv32_pack); + T2(_rv32_packu); + T2(_rv32_packh); - T2(_rv32_shfl); - T2(_rv32_unshfl); + T2(_rv32_shfl); + T2(_rv32_unshfl); - T2(_rv_andn); - T2(_rv_xnor); - T2(_rv_orn); + T2(_rv_andn); + T2(_rv_xnor); + T2(_rv_orn); - //T2(_rv32_sh1add); - //T2(_rv32_sh2add); - //T2(_rv32_sh3add); + //T2(_rv32_sh1add); + //T2(_rv32_sh2add); + //T2(_rv32_sh3add); - T2(_rv32_sbset); - T2(_rv32_sbclr); - T2(_rv32_sbinv); - T2(_rv32_sbext); + T2(_rv32_sbset); + T2(_rv32_sbclr); + T2(_rv32_sbinv); + T2(_rv32_sbext); - T2(_rv32_min); - T2(_rv32_minu); - T2(_rv32_max); - T2(_rv32_maxu); + T2(_rv32_min); + T2(_rv32_minu); + T2(_rv32_max); + T2(_rv32_maxu); - T2(_rv32_slo); - T2(_rv32_sro); + T2(_rv32_slo); + T2(_rv32_sro); - //T2(_rv32_xperm_b); + //T2(_rv32_xperm_b); - T1(_rv32_sext_b); - T1(_rv32_sext_h); - - T1(_rv32_clz); - T1(_rv32_ctz); - T1(_rv32_pcnt); - - T2(_rv32_clmul); - T2(_rv32_clmulr); - T2(_rv32_clmulh); + T2(_rv32_clmul); + T2(_rv32_clmulr); + T2(_rv32_clmulh); #if defined(CHECK_SIGILL) - if (setjmp(jb)) { - printf("clmul[hr]: **SIGILL**\n"); - } else + if (setjmp(jb)) { + printf("clmul[hr]: **SIGILL**\n"); + } else #endif - { - int64_t x = 0xc4f5a63e4ac4567bULL ^ (uint64_t)a << 32 ^ (uint64_t)c << 17 ^ (uint64_t)b; - int64_t y = 0x9ff123456aabbcc9ULL ^ (uint64_t)c << 32 ^ (uint64_t)b << 23 ^ (uint64_t)a; - int64_t z = _rv64_clmul(x, y); - int64_t z2 = _rv64_clmul2(x, y); - printf("0x%016llx 0x%016llx (0x%016llx)\n", z, z2, z^z2); - } + { + int64_t x = 0xc4f5a63e4ac4567bULL ^ (uint64_t)a << 32 ^ (uint64_t)c << 17 ^ (uint64_t)b; + int64_t y = 0x9ff123456aabbcc9ULL ^ (uint64_t)c << 32 ^ (uint64_t)b << 23 ^ (uint64_t)a; + int64_t z = _rv64_clmul(x, y); + int64_t z2 = _rv64_clmul2(x, y); + printf("0x%016llx 0x%016llx (0x%016llx)\n", z, z2, z^z2); + } - // extra stuff - T2(_sh1add); - T2(_sh2add); - T2(_sh3add); + // extra stuff + T2(_sh1add); + T2(_sh2add); + T2(_sh3add); - T2(xperm_n); - T2(xperm_b); - T2(xperm_h); + T2(xperm_n); + T2(xperm_b); + T2(xperm_h); - T3(_rv_cmix); - T3(_rv_cmov); + T2(_rv32_bfp); - T3(_rv32_fsl); - T3(_rv32_fsr); + for (index2 = 0 ; index2 < nonrandom_cnt[2] ; index2++) { + d = nonrandom_d[index2]; + T3(_rv_cmix); + T3(_rv_cmov); - T2(_rv32_bfp); + T3(_rv32_fsl); + T3(_rv32_fsr); - b = index; - } + } + } + } +#if 0 #if defined(CHECK_SIGILL) - if (setjmp(jb)) { - printf("bfp: **SIGILL**\n"); - } else + if (setjmp(jb)) { + printf("bfp: **SIGILL**\n"); + } else +#endif + { + for (index2 = 0 ; index2 < 16 ; index2++) { + for (index = 0 ; index < 32 ; index++){ + { + unsigned int a2, b2, c2; + b2 = (index2<<24) | (index<<16) | 0; + a2 = 0xFFFFFFFF; + c2 = _rv32_bfp(a2,b2);printf("_rv32_bfp (0x%08x, 0x%08x) -> 0x%08x\n", a2, b2, c2); + } + } + } + for (index2 = 0 ; index2 < 16 ; index2++) { + for (index = 0 ; index < 32 ; index++){ + { + unsigned int a2, b2, c2; + b2 = (index2<<24) | (index<<16) | (a&0xFFFF); + a2 = 0; + c2 = _rv32_bfp(a2,b2);printf("_rv32_bfp (0x%08x, 0x%08x) -> 0x%08x\n", a2, b2, c2); + } + } + } + } #endif - { - for (index2 = 0 ; index2 < 16 ; index2++) { - for (index = 0 ; index < 32 ; index++){ - { - unsigned int a2, b2, c2; - b2 = (index2<<24) | (index<<16) | 0; - a2 = 0xFFFFFFFF; - c2 = _rv32_bfp(a2,b2);printf("_rv32_bfp (0x%08x, 0x%08x) -> 0x%08x\n", a2, b2, c2); - } - } - } - for (index2 = 0 ; index2 < 16 ; index2++) { - for (index = 0 ; index < 32 ; index++){ - { - unsigned int a2, b2, c2; - b2 = (index2<<24) | (index<<16) | (a&0xFFFF); - a2 = 0; - c2 = _rv32_bfp(a2,b2);printf("_rv32_bfp (0x%08x, 0x%08x) -> 0x%08x\n", a2, b2, c2); - } - } - } - } - return 0; + return 0; } diff --git a/test_common.h b/test_common.h new file mode 100644 index 0000000..a341929 --- /dev/null +++ b/test_common.h @@ -0,0 +1,65 @@ +#ifndef __TEST_COMMON_H__ +#define __TEST_COMMON_H__ + +#if !defined(CHECK_SIGILL) +#define T2(X) \ + c = X(a,b);printf(#X "(0x%08x, 0x%08x) -> 0x%08x\n", a, b, c) +#define T1(X) \ + c = X(a);printf(#X "(0x%08x) -> 0x%08x\n", a, c) +#define T3(X) \ + c = X(a,b,d);printf(#X "(0x%08x, 0x%08x, 0x%08x) -> 0x%08x\n", a, b, d, c) +#define T2W(X) \ + cq = X(a,b);printf(#X "(0x%08x, 0x%08x) -> 0x%016llx\n", a, b, cq) +#else +#define T2(X) do { \ + if (setjmp(jb)) { \ + printf(#X "(0x%08x, 0x%08x) -> *SIGILL*\n", a, b); \ + } else { \ + c = X(a,b); \ + printf(#X "(0x%08x, 0x%08x) -> 0x%08x\n", a, b, c); \ + } \ + } while (0) +#define T1(X) do { \ + if (setjmp(jb)) { \ + printf(#X "(0x%08x) -> *SIGILL*\n", a); \ + } else { \ + c = X(a); \ + printf(#X "(0x%08x) -> 0x%08x\n", a, c); \ + } \ + } while (0) +#define T3(X) do { \ + if (setjmp(jb)) { \ + printf(#X "(0x%08x, 0x%08x, 0x%08x) -> *SIGILL*\n", a, b, d); \ + } else { \ + c = X(a,b,d); \ + printf(#X "(0x%08x, 0x%08x, 0x%08x) -> 0x%08x\n", a, b, d, c); \ + } \ + } while (0) +#define T2W(X) do { \ + if (setjmp(jb)) { \ + printf(#X "(0x%08x, 0x%08x) -> *SIGILL*\n", a, b); \ + } else { \ + cq = X(a,b); \ + printf(#X "(0x%08x, 0x%08x) -> 0x%016llx\n", a, b, cq); \ + } \ + } while (0) + +#include +extern jmp_buf jb; +void installillhandler(void); + +#endif // CHECK_SIGILL + +const unsigned int nonrandom_a[15] = { 0x01234567, 0, 1, 2, 3, 4, 5, 6, 7, + 0x80000000, 0xFFFFFFFF, 0x7FFFFFFF, 0x7FFF7FFF, + 0x7F7F7F7F, 0x80808080 }; +const unsigned int nonrandom_b[15] = { 0xdeadbeef, 0, 1, 2, 3, 4, 5, 6, 7, + 0x80000000, 0xFFFFFFFF, 0x7FFFFFFF, 0x7FFF7FFF, + 0x7F7F7F7F, 0x80808080 }; +const unsigned int nonrandom_d[14] = { 0, 1, 2, 3, 4, 5, 6, 7, + 0x80000000, 0xFFFFFFFF, 0x7FFFFFFF, 0x7FFF7FFF, + 0x7F7F7F7F, 0x80808080 }; + +const int nonrandom_cnt[4] = { 15, 14, 14}; + +#endif // __TEST_COMMON_H__ diff --git a/test_p.c b/test_p.c index f6f7436..6d152b6 100644 --- a/test_p.c +++ b/test_p.c @@ -13,6 +13,8 @@ #include #include +#include "test_common.h" + #ifdef __riscv #include "new_instructions_support_p.h" @@ -30,714 +32,714 @@ typedef uint32_t uint2x32_t[2]; // for 2W typedef int32_t int2x32_t[2]; // for 2W uint32_t __rv__add8(const uint32_t rs1, const uint32_t rs2) { - uint4x8_t a, b, c; - uint32_t r; - memcpy(a, &rs1, 4); - memcpy(b, &rs2, 4); - c[0] = a[0] + b[0]; - c[1] = a[1] + b[1]; - c[2] = a[2] + b[2]; - c[3] = a[3] + b[3]; - memcpy(&r, c, 4); - return r; + uint4x8_t a, b, c; + uint32_t r; + memcpy(a, &rs1, 4); + memcpy(b, &rs2, 4); + c[0] = a[0] + b[0]; + c[1] = a[1] + b[1]; + c[2] = a[2] + b[2]; + c[3] = a[3] + b[3]; + memcpy(&r, c, 4); + return r; } uint32_t __rv__radd8(const uint32_t rs1, const uint32_t rs2) { - int4x8_t a, b, c; - uint32_t r; - memcpy(a, &rs1, 4); - memcpy(b, &rs2, 4); - c[0] = (((int32_t)a[0] + (int32_t)b[0]) & 0x000001FE)>>1; - c[1] = (((int32_t)a[1] + (int32_t)b[1]) & 0x000001FE)>>1; - c[2] = (((int32_t)a[2] + (int32_t)b[2]) & 0x000001FE)>>1; - c[3] = (((int32_t)a[3] + (int32_t)b[3]) & 0x000001FE)>>1; - memcpy(&r, c, 4); - return r; + int4x8_t a, b, c; + uint32_t r; + memcpy(a, &rs1, 4); + memcpy(b, &rs2, 4); + c[0] = (((int32_t)a[0] + (int32_t)b[0]) & 0x000001FE)>>1; + c[1] = (((int32_t)a[1] + (int32_t)b[1]) & 0x000001FE)>>1; + c[2] = (((int32_t)a[2] + (int32_t)b[2]) & 0x000001FE)>>1; + c[3] = (((int32_t)a[3] + (int32_t)b[3]) & 0x000001FE)>>1; + memcpy(&r, c, 4); + return r; } uint32_t __rv__rsub8(const uint32_t rs1, const uint32_t rs2) { - int4x8_t a, b, c; - uint32_t r; - memcpy(a, &rs1, 4); - memcpy(b, &rs2, 4); - c[0] = (((int32_t)a[0] - (int32_t)b[0]) & 0x000001FE)>>1; - c[1] = (((int32_t)a[1] - (int32_t)b[1]) & 0x000001FE)>>1; - c[2] = (((int32_t)a[2] - (int32_t)b[2]) & 0x000001FE)>>1; - c[3] = (((int32_t)a[3] - (int32_t)b[3]) & 0x000001FE)>>1; - memcpy(&r, c, 4); - return r; + int4x8_t a, b, c; + uint32_t r; + memcpy(a, &rs1, 4); + memcpy(b, &rs2, 4); + c[0] = (((int32_t)a[0] - (int32_t)b[0]) & 0x000001FE)>>1; + c[1] = (((int32_t)a[1] - (int32_t)b[1]) & 0x000001FE)>>1; + c[2] = (((int32_t)a[2] - (int32_t)b[2]) & 0x000001FE)>>1; + c[3] = (((int32_t)a[3] - (int32_t)b[3]) & 0x000001FE)>>1; + memcpy(&r, c, 4); + return r; } uint32_t __rv__cmpeq8(const uint32_t rs1, const uint32_t rs2) { - uint4x8_t a, b, c; - uint32_t r; - memcpy(a, &rs1, 4); - memcpy(b, &rs2, 4); - c[0] = (a[0] == b[0]) ? 0xFF : 0x00; - c[1] = (a[1] == b[1]) ? 0xFF : 0x00; - c[2] = (a[2] == b[2]) ? 0xFF : 0x00; - c[3] = (a[3] == b[3]) ? 0xFF : 0x00; - memcpy(&r, c, 4); - return r; + uint4x8_t a, b, c; + uint32_t r; + memcpy(a, &rs1, 4); + memcpy(b, &rs2, 4); + c[0] = (a[0] == b[0]) ? 0xFF : 0x00; + c[1] = (a[1] == b[1]) ? 0xFF : 0x00; + c[2] = (a[2] == b[2]) ? 0xFF : 0x00; + c[3] = (a[3] == b[3]) ? 0xFF : 0x00; + memcpy(&r, c, 4); + return r; } uint32_t __rv__clz8(const uint32_t rs1) { - uint4x8_t a, c; - uint32_t r; - memcpy(a, &rs1, 4); - c[0] = a[0] == 0 ? 8 : __builtin_clz((uint32_t)a[0]) - 24; - c[1] = a[1] == 0 ? 8 : __builtin_clz((uint32_t)a[1]) - 24; - c[2] = a[2] == 0 ? 8 : __builtin_clz((uint32_t)a[2]) - 24; - c[3] = a[3] == 0 ? 8 : __builtin_clz((uint32_t)a[3]) - 24; - memcpy(&r, c, 4); - return r; + uint4x8_t a, c; + uint32_t r; + memcpy(a, &rs1, 4); + c[0] = a[0] == 0 ? 8 : __builtin_clz((uint32_t)a[0]) - 24; + c[1] = a[1] == 0 ? 8 : __builtin_clz((uint32_t)a[1]) - 24; + c[2] = a[2] == 0 ? 8 : __builtin_clz((uint32_t)a[2]) - 24; + c[3] = a[3] == 0 ? 8 : __builtin_clz((uint32_t)a[3]) - 24; + memcpy(&r, c, 4); + return r; } uint32_t __rv__clo8(const uint32_t rs1) { - uint4x8_t a, c; - uint32_t r; - memcpy(a, &rs1, 4); - c[0] = a[0] == 0xff ? 8 : __builtin_clz((uint32_t)(uint8_t)(~a[0])) - 24; - c[1] = a[1] == 0xff ? 8 : __builtin_clz((uint32_t)(uint8_t)(~a[1])) - 24; - c[2] = a[2] == 0xff ? 8 : __builtin_clz((uint32_t)(uint8_t)(~a[2])) - 24; - c[3] = a[3] == 0xff ? 8 : __builtin_clz((uint32_t)(uint8_t)(~a[3])) - 24; - memcpy(&r, c, 4); - return r; + uint4x8_t a, c; + uint32_t r; + memcpy(a, &rs1, 4); + c[0] = a[0] == 0xff ? 8 : __builtin_clz((uint32_t)(uint8_t)(~a[0])) - 24; + c[1] = a[1] == 0xff ? 8 : __builtin_clz((uint32_t)(uint8_t)(~a[1])) - 24; + c[2] = a[2] == 0xff ? 8 : __builtin_clz((uint32_t)(uint8_t)(~a[2])) - 24; + c[3] = a[3] == 0xff ? 8 : __builtin_clz((uint32_t)(uint8_t)(~a[3])) - 24; + memcpy(&r, c, 4); + return r; } uint32_t __rv__clrs8(const uint32_t rs1) { - uint4x8_t a, c; - uint32_t r; - memcpy(a, &rs1, 4); - c[0] = __builtin_clrsb((int32_t)a[0]) - 24; - c[1] = __builtin_clrsb((int32_t)a[1]) - 24; - c[2] = __builtin_clrsb((int32_t)a[2]) - 24; - c[3] = __builtin_clrsb((int32_t)a[3]) - 24; - memcpy(&r, c, 4); - return r; + int4x8_t a, c; + uint32_t r; + memcpy(a, &rs1, 4); + c[0] = __builtin_clrsb((int32_t)a[0]) - 24; + c[1] = __builtin_clrsb((int32_t)a[1]) - 24; + c[2] = __builtin_clrsb((int32_t)a[2]) - 24; + c[3] = __builtin_clrsb((int32_t)a[3]) - 24; + memcpy(&r, c, 4); + return r; } uint32_t __rv__swap8(const uint32_t rs1) { - uint4x8_t a, c; - uint32_t r; - memcpy(a, &rs1, 4); - c[0] = a[1]; - c[1] = a[0]; - c[2] = a[3]; - c[3] = a[2]; - memcpy(&r, c, 4); - return r; + uint4x8_t a, c; + uint32_t r; + memcpy(a, &rs1, 4); + c[0] = a[1]; + c[1] = a[0]; + c[2] = a[3]; + c[3] = a[2]; + memcpy(&r, c, 4); + return r; } uint32_t __rv__scmple8(const uint32_t rs1, const uint32_t rs2) { - int4x8_t a, b, c; - uint32_t r; - memcpy(a, &rs1, 4); - memcpy(b, &rs2, 4); - c[0] = (a[0] <= b[0]) ? 0xFF : 0x00; - c[1] = (a[1] <= b[1]) ? 0xFF : 0x00; - c[2] = (a[2] <= b[2]) ? 0xFF : 0x00; - c[3] = (a[3] <= b[3]) ? 0xFF : 0x00; - memcpy(&r, c, 4); - return r; + int4x8_t a, b, c; + uint32_t r; + memcpy(a, &rs1, 4); + memcpy(b, &rs2, 4); + c[0] = (a[0] <= b[0]) ? 0xFF : 0x00; + c[1] = (a[1] <= b[1]) ? 0xFF : 0x00; + c[2] = (a[2] <= b[2]) ? 0xFF : 0x00; + c[3] = (a[3] <= b[3]) ? 0xFF : 0x00; + memcpy(&r, c, 4); + return r; } uint32_t __rv__scmplt8(const uint32_t rs1, const uint32_t rs2) { - int4x8_t a, b, c; - uint32_t r; - memcpy(a, &rs1, 4); - memcpy(b, &rs2, 4); - c[0] = (a[0] < b[0]) ? 0xFF : 0x00; - c[1] = (a[1] < b[1]) ? 0xFF : 0x00; - c[2] = (a[2] < b[2]) ? 0xFF : 0x00; - c[3] = (a[3] < b[3]) ? 0xFF : 0x00; - memcpy(&r, c, 4); - return r; + int4x8_t a, b, c; + uint32_t r; + memcpy(a, &rs1, 4); + memcpy(b, &rs2, 4); + c[0] = (a[0] < b[0]) ? 0xFF : 0x00; + c[1] = (a[1] < b[1]) ? 0xFF : 0x00; + c[2] = (a[2] < b[2]) ? 0xFF : 0x00; + c[3] = (a[3] < b[3]) ? 0xFF : 0x00; + memcpy(&r, c, 4); + return r; } uint32_t __rv__sll8(const uint32_t rs1, const uint32_t rs2) { - uint4x8_t a, c; - uint32_t o = rs2 & 0x7; - uint32_t r; - memcpy(a, &rs1, 4); - c[0] = a[0] << o; - c[1] = a[1] << o; - c[2] = a[2] << o; - c[3] = a[3] << o; - memcpy(&r, c, 4); - return r; + uint4x8_t a, c; + uint32_t o = rs2 & 0x7; + uint32_t r; + memcpy(a, &rs1, 4); + c[0] = a[0] << o; + c[1] = a[1] << o; + c[2] = a[2] << o; + c[3] = a[3] << o; + memcpy(&r, c, 4); + return r; } uint32_t __rv__srl8(const uint32_t rs1, const uint32_t rs2) { - uint4x8_t a, c; - uint32_t o = rs2 & 0x7; - uint32_t r; - memcpy(a, &rs1, 4); - c[0] = a[0] >> o; - c[1] = a[1] >> o; - c[2] = a[2] >> o; - c[3] = a[3] >> o; - memcpy(&r, c, 4); - return r; + uint4x8_t a, c; + uint32_t o = rs2 & 0x7; + uint32_t r; + memcpy(a, &rs1, 4); + c[0] = a[0] >> o; + c[1] = a[1] >> o; + c[2] = a[2] >> o; + c[3] = a[3] >> o; + memcpy(&r, c, 4); + return r; } uint32_t __rv__sra8(const uint32_t rs1, const uint32_t rs2) { - int4x8_t a, c; - uint32_t o = rs2 & 0x7; - uint32_t r; - memcpy(a, &rs1, 4); - c[0] = a[0] >> o; - c[1] = a[1] >> o; - c[2] = a[2] >> o; - c[3] = a[3] >> o; - memcpy(&r, c, 4); - return r; + int4x8_t a, c; + uint32_t o = rs2 & 0x7; + uint32_t r; + memcpy(a, &rs1, 4); + c[0] = a[0] >> o; + c[1] = a[1] >> o; + c[2] = a[2] >> o; + c[3] = a[3] >> o; + memcpy(&r, c, 4); + return r; } uint32_t __rv__smax8(const uint32_t rs1, const uint32_t rs2) { - int4x8_t a, b, c; - uint32_t r; - memcpy(a, &rs1, 4); - memcpy(b, &rs2, 4); - c[0] = (a[0] >= b[0]) ? a[0] : b[0]; - c[1] = (a[1] >= b[1]) ? a[1] : b[1]; - c[2] = (a[2] >= b[2]) ? a[2] : b[2]; - c[3] = (a[3] >= b[3]) ? a[3] : b[3]; - memcpy(&r, c, 4); - return r; + int4x8_t a, b, c; + uint32_t r; + memcpy(a, &rs1, 4); + memcpy(b, &rs2, 4); + c[0] = (a[0] >= b[0]) ? a[0] : b[0]; + c[1] = (a[1] >= b[1]) ? a[1] : b[1]; + c[2] = (a[2] >= b[2]) ? a[2] : b[2]; + c[3] = (a[3] >= b[3]) ? a[3] : b[3]; + memcpy(&r, c, 4); + return r; } uint32_t __rv__smin8(const uint32_t rs1, const uint32_t rs2) { - int4x8_t a, b, c; - uint32_t r; - memcpy(a, &rs1, 4); - memcpy(b, &rs2, 4); - c[0] = (a[0] <= b[0]) ? a[0] : b[0]; - c[1] = (a[1] <= b[1]) ? a[1] : b[1]; - c[2] = (a[2] <= b[2]) ? a[2] : b[2]; - c[3] = (a[3] <= b[3]) ? a[3] : b[3]; - memcpy(&r, c, 4); - return r; + int4x8_t a, b, c; + uint32_t r; + memcpy(a, &rs1, 4); + memcpy(b, &rs2, 4); + c[0] = (a[0] <= b[0]) ? a[0] : b[0]; + c[1] = (a[1] <= b[1]) ? a[1] : b[1]; + c[2] = (a[2] <= b[2]) ? a[2] : b[2]; + c[3] = (a[3] <= b[3]) ? a[3] : b[3]; + memcpy(&r, c, 4); + return r; } uint32_t __rv__ucmple8(const uint32_t rs1, const uint32_t rs2) { - uint4x8_t a, b, c; - uint32_t r; - memcpy(a, &rs1, 4); - memcpy(b, &rs2, 4); - c[0] = (a[0] <= b[0]) ? 0xFF : 0x00; - c[1] = (a[1] <= b[1]) ? 0xFF : 0x00; - c[2] = (a[2] <= b[2]) ? 0xFF : 0x00; - c[3] = (a[3] <= b[3]) ? 0xFF : 0x00; - memcpy(&r, c, 4); - return r; + uint4x8_t a, b, c; + uint32_t r; + memcpy(a, &rs1, 4); + memcpy(b, &rs2, 4); + c[0] = (a[0] <= b[0]) ? 0xFF : 0x00; + c[1] = (a[1] <= b[1]) ? 0xFF : 0x00; + c[2] = (a[2] <= b[2]) ? 0xFF : 0x00; + c[3] = (a[3] <= b[3]) ? 0xFF : 0x00; + memcpy(&r, c, 4); + return r; } uint32_t __rv__ucmplt8(const uint32_t rs1, const uint32_t rs2) { - uint4x8_t a, b, c; - uint32_t r; - memcpy(a, &rs1, 4); - memcpy(b, &rs2, 4); - c[0] = (a[0] < b[0]) ? 0xFF : 0x00; - c[1] = (a[1] < b[1]) ? 0xFF : 0x00; - c[2] = (a[2] < b[2]) ? 0xFF : 0x00; - c[3] = (a[3] < b[3]) ? 0xFF : 0x00; - memcpy(&r, c, 4); - return r; + uint4x8_t a, b, c; + uint32_t r; + memcpy(a, &rs1, 4); + memcpy(b, &rs2, 4); + c[0] = (a[0] < b[0]) ? 0xFF : 0x00; + c[1] = (a[1] < b[1]) ? 0xFF : 0x00; + c[2] = (a[2] < b[2]) ? 0xFF : 0x00; + c[3] = (a[3] < b[3]) ? 0xFF : 0x00; + memcpy(&r, c, 4); + return r; } uint32_t __rv__umax8(const uint32_t rs1, const uint32_t rs2) { - uint4x8_t a, b, c; - uint32_t r; - memcpy(a, &rs1, 4); - memcpy(b, &rs2, 4); - c[0] = (a[0] >= b[0]) ? a[0] : b[0]; - c[1] = (a[1] >= b[1]) ? a[1] : b[1]; - c[2] = (a[2] >= b[2]) ? a[2] : b[2]; - c[3] = (a[3] >= b[3]) ? a[3] : b[3]; - memcpy(&r, c, 4); - return r; + uint4x8_t a, b, c; + uint32_t r; + memcpy(a, &rs1, 4); + memcpy(b, &rs2, 4); + c[0] = (a[0] >= b[0]) ? a[0] : b[0]; + c[1] = (a[1] >= b[1]) ? a[1] : b[1]; + c[2] = (a[2] >= b[2]) ? a[2] : b[2]; + c[3] = (a[3] >= b[3]) ? a[3] : b[3]; + memcpy(&r, c, 4); + return r; } uint32_t __rv__umin8(const uint32_t rs1, const uint32_t rs2) { - uint4x8_t a, b, c; - uint32_t r; - memcpy(a, &rs1, 4); - memcpy(b, &rs2, 4); - c[0] = (a[0] <= b[0]) ? a[0] : b[0]; - c[1] = (a[1] <= b[1]) ? a[1] : b[1]; - c[2] = (a[2] <= b[2]) ? a[2] : b[2]; - c[3] = (a[3] <= b[3]) ? a[3] : b[3]; - memcpy(&r, c, 4); - return r; + uint4x8_t a, b, c; + uint32_t r; + memcpy(a, &rs1, 4); + memcpy(b, &rs2, 4); + c[0] = (a[0] <= b[0]) ? a[0] : b[0]; + c[1] = (a[1] <= b[1]) ? a[1] : b[1]; + c[2] = (a[2] <= b[2]) ? a[2] : b[2]; + c[3] = (a[3] <= b[3]) ? a[3] : b[3]; + memcpy(&r, c, 4); + return r; } uint32_t __rv__uradd8(const uint32_t rs1, const uint32_t rs2) { - uint4x8_t a, b, c; - uint32_t r; - memcpy(a, &rs1, 4); - memcpy(b, &rs2, 4); - c[0] = ((a[0] + b[0]) & 0x000001FE)>>1; - c[1] = ((a[1] + b[1]) & 0x000001FE)>>1; - c[2] = ((a[2] + b[2]) & 0x000001FE)>>1; - c[3] = ((a[3] + b[3]) & 0x000001FE)>>1; - memcpy(&r, c, 4); - return r; + uint4x8_t a, b, c; + uint32_t r; + memcpy(a, &rs1, 4); + memcpy(b, &rs2, 4); + c[0] = ((a[0] + b[0]) & 0x000001FE)>>1; + c[1] = ((a[1] + b[1]) & 0x000001FE)>>1; + c[2] = ((a[2] + b[2]) & 0x000001FE)>>1; + c[3] = ((a[3] + b[3]) & 0x000001FE)>>1; + memcpy(&r, c, 4); + return r; } uint32_t __rv__ursub8(const uint32_t rs1, const uint32_t rs2) { - uint4x8_t a, b, c; - uint32_t r; - memcpy(a, &rs1, 4); - memcpy(b, &rs2, 4); - c[0] = ((a[0] - b[0]) & 0x000001FE)>>1; - c[1] = ((a[1] - b[1]) & 0x000001FE)>>1; - c[2] = ((a[2] - b[2]) & 0x000001FE)>>1; - c[3] = ((a[3] - b[3]) & 0x000001FE)>>1; - memcpy(&r, c, 4); - return r; + uint4x8_t a, b, c; + uint32_t r; + memcpy(a, &rs1, 4); + memcpy(b, &rs2, 4); + c[0] = ((a[0] - b[0]) & 0x000001FE)>>1; + c[1] = ((a[1] - b[1]) & 0x000001FE)>>1; + c[2] = ((a[2] - b[2]) & 0x000001FE)>>1; + c[3] = ((a[3] - b[3]) & 0x000001FE)>>1; + memcpy(&r, c, 4); + return r; } uint32_t __rv__add16(const uint32_t rs1, const uint32_t rs2) { - uint2x16_t a, b, c; - uint32_t r; - memcpy(a, &rs1, 4); - memcpy(b, &rs2, 4); - c[0] = a[0] + b[0]; - c[1] = a[1] + b[1]; - memcpy(&r, c, 4); - return r; + uint2x16_t a, b, c; + uint32_t r; + memcpy(a, &rs1, 4); + memcpy(b, &rs2, 4); + c[0] = a[0] + b[0]; + c[1] = a[1] + b[1]; + memcpy(&r, c, 4); + return r; } uint32_t __rv__radd16(const uint32_t rs1, const uint32_t rs2) { - int2x16_t a, b, c; - uint32_t r; - memcpy(a, &rs1, 4); - memcpy(b, &rs2, 4); - c[0] = (((int32_t)a[0] + (int32_t)b[0]) & 0x0001FFFE)>>1; - c[1] = (((int32_t)a[1] + (int32_t)b[1]) & 0x0001FFFE)>>1; - memcpy(&r, c, 4); - return r; + int2x16_t a, b, c; + uint32_t r; + memcpy(a, &rs1, 4); + memcpy(b, &rs2, 4); + c[0] = (((int32_t)a[0] + (int32_t)b[0]) & 0x0001FFFE)>>1; + c[1] = (((int32_t)a[1] + (int32_t)b[1]) & 0x0001FFFE)>>1; + memcpy(&r, c, 4); + return r; } uint32_t __rv__rsub16(const uint32_t rs1, const uint32_t rs2) { - int2x16_t a, b, c; - uint32_t r; - memcpy(a, &rs1, 4); - memcpy(b, &rs2, 4); - c[0] = (((int32_t)a[0] - (int32_t)b[0]) & 0x0001FFFE)>>1; - c[1] = (((int32_t)a[1] - (int32_t)b[1]) & 0x0001FFFE)>>1; - memcpy(&r, c, 4); - return r; + int2x16_t a, b, c; + uint32_t r; + memcpy(a, &rs1, 4); + memcpy(b, &rs2, 4); + c[0] = (((int32_t)a[0] - (int32_t)b[0]) & 0x0001FFFE)>>1; + c[1] = (((int32_t)a[1] - (int32_t)b[1]) & 0x0001FFFE)>>1; + memcpy(&r, c, 4); + return r; } uint32_t __rv__cmpeq16(const uint32_t rs1, const uint32_t rs2) { - uint2x16_t a, b, c; - uint32_t r; - memcpy(a, &rs1, 4); - memcpy(b, &rs2, 4); - c[0] = (a[0] == b[0]) ? 0xFFFF : 0x0000; - c[1] = (a[1] == b[1]) ? 0xFFFF : 0x0000; - memcpy(&r, c, 4); - return r; + uint2x16_t a, b, c; + uint32_t r; + memcpy(a, &rs1, 4); + memcpy(b, &rs2, 4); + c[0] = (a[0] == b[0]) ? 0xFFFF : 0x0000; + c[1] = (a[1] == b[1]) ? 0xFFFF : 0x0000; + memcpy(&r, c, 4); + return r; } uint32_t __rv__clz16(const uint32_t rs1) { - uint2x16_t a, c; - uint32_t r; - memcpy(a, &rs1, 4); - c[0] = a[0] == 0 ? 16 : __builtin_clz((uint32_t)a[0]) - 16; - c[1] = a[1] == 0 ? 16 : __builtin_clz((uint32_t)a[1]) - 16; - memcpy(&r, c, 4); - return r; + uint2x16_t a, c; + uint32_t r; + memcpy(a, &rs1, 4); + c[0] = a[0] == 0 ? 16 : __builtin_clz((uint32_t)a[0]) - 16; + c[1] = a[1] == 0 ? 16 : __builtin_clz((uint32_t)a[1]) - 16; + memcpy(&r, c, 4); + return r; } uint32_t __rv__clo16(const uint32_t rs1) { - uint2x16_t a, c; - uint32_t r; - memcpy(a, &rs1, 4); - c[0] = a[0] == 0xff ? 16 : __builtin_clz((uint32_t)(uint8_t)(~a[0])) - 16; - c[1] = a[1] == 0xff ? 16 : __builtin_clz((uint32_t)(uint8_t)(~a[1])) - 16; - memcpy(&r, c, 4); - return r; + uint2x16_t a, c; + uint32_t r; + memcpy(a, &rs1, 4); + c[0] = a[0] == 0xff ? 16 : __builtin_clz((uint32_t)(uint8_t)(~a[0])) - 16; + c[1] = a[1] == 0xff ? 16 : __builtin_clz((uint32_t)(uint8_t)(~a[1])) - 16; + memcpy(&r, c, 4); + return r; } uint32_t __rv__clrs16(const uint32_t rs1) { - uint2x16_t a, c; - uint32_t r; - memcpy(a, &rs1, 4); - c[0] = __builtin_clrsb((int32_t)a[0]) - 16; - c[1] = __builtin_clrsb((int32_t)a[1]) - 16; - memcpy(&r, c, 4); - return r; + uint2x16_t a, c; + uint32_t r; + memcpy(a, &rs1, 4); + c[0] = __builtin_clrsb((int32_t)a[0]) - 16; + c[1] = __builtin_clrsb((int32_t)a[1]) - 16; + memcpy(&r, c, 4); + return r; } uint32_t __rv__swap16(const uint32_t rs1) { - uint2x16_t a, c; - uint32_t r; - memcpy(a, &rs1, 4); - c[0] = a[1]; - c[1] = a[0]; - memcpy(&r, c, 4); - return r; + uint2x16_t a, c; + uint32_t r; + memcpy(a, &rs1, 4); + c[0] = a[1]; + c[1] = a[0]; + memcpy(&r, c, 4); + return r; } uint32_t __rv__scmple16(const uint32_t rs1, const uint32_t rs2) { - int2x16_t a, b, c; - uint32_t r; - memcpy(a, &rs1, 4); - memcpy(b, &rs2, 4); - c[0] = (a[0] <= b[0]) ? 0xFFFF : 0x0000; - c[1] = (a[1] <= b[1]) ? 0xFFFF : 0x0000; - memcpy(&r, c, 4); - return r; + int2x16_t a, b, c; + uint32_t r; + memcpy(a, &rs1, 4); + memcpy(b, &rs2, 4); + c[0] = (a[0] <= b[0]) ? 0xFFFF : 0x0000; + c[1] = (a[1] <= b[1]) ? 0xFFFF : 0x0000; + memcpy(&r, c, 4); + return r; } uint32_t __rv__scmplt16(const uint32_t rs1, const uint32_t rs2) { - int2x16_t a, b, c; - uint32_t r; - memcpy(a, &rs1, 4); - memcpy(b, &rs2, 4); - c[0] = (a[0] < b[0]) ? 0xFFFF : 0x0000; - c[1] = (a[1] < b[1]) ? 0xFFFF : 0x0000; - memcpy(&r, c, 4); - return r; + int2x16_t a, b, c; + uint32_t r; + memcpy(a, &rs1, 4); + memcpy(b, &rs2, 4); + c[0] = (a[0] < b[0]) ? 0xFFFF : 0x0000; + c[1] = (a[1] < b[1]) ? 0xFFFF : 0x0000; + memcpy(&r, c, 4); + return r; } uint32_t __rv__sll16(const uint32_t rs1, const uint32_t rs2) { - uint2x16_t a, c; - uint32_t o = rs2 & 0xF; - uint32_t r; - memcpy(a, &rs1, 4); - c[0] = a[0] << o; - c[1] = a[1] << o; - memcpy(&r, c, 4); - return r; + uint2x16_t a, c; + uint32_t o = rs2 & 0xF; + uint32_t r; + memcpy(a, &rs1, 4); + c[0] = a[0] << o; + c[1] = a[1] << o; + memcpy(&r, c, 4); + return r; } uint32_t __rv__srl16(const uint32_t rs1, const uint32_t rs2) { - uint2x16_t a, c; - uint32_t o = rs2 & 0xF; - uint32_t r; - memcpy(a, &rs1, 4); - c[0] = a[0] >> o; - c[1] = a[1] >> o; - memcpy(&r, c, 4); - return r; + uint2x16_t a, c; + uint32_t o = rs2 & 0xF; + uint32_t r; + memcpy(a, &rs1, 4); + c[0] = a[0] >> o; + c[1] = a[1] >> o; + memcpy(&r, c, 4); + return r; } uint32_t __rv__sra16(const uint32_t rs1, const uint32_t rs2) { - int2x16_t a, c; - uint32_t o = rs2 & 0xF; - uint32_t r; - memcpy(a, &rs1, 4); - c[0] = a[0] >> o; - c[1] = a[1] >> o; - memcpy(&r, c, 4); - return r; + int2x16_t a, c; + uint32_t o = rs2 & 0xF; + uint32_t r; + memcpy(a, &rs1, 4); + c[0] = a[0] >> o; + c[1] = a[1] >> o; + memcpy(&r, c, 4); + return r; } uint32_t __rv__smax16(const uint32_t rs1, const uint32_t rs2) { - int2x16_t a, b, c; - uint32_t r; - memcpy(a, &rs1, 4); - memcpy(b, &rs2, 4); - c[0] = (a[0] >= b[0]) ? a[0] : b[0]; - c[1] = (a[1] >= b[1]) ? a[1] : b[1]; - memcpy(&r, c, 4); - return r; + int2x16_t a, b, c; + uint32_t r; + memcpy(a, &rs1, 4); + memcpy(b, &rs2, 4); + c[0] = (a[0] >= b[0]) ? a[0] : b[0]; + c[1] = (a[1] >= b[1]) ? a[1] : b[1]; + memcpy(&r, c, 4); + return r; } uint32_t __rv__smin16(const uint32_t rs1, const uint32_t rs2) { - int2x16_t a, b, c; - uint32_t r; - memcpy(a, &rs1, 4); - memcpy(b, &rs2, 4); - c[0] = (a[0] <= b[0]) ? a[0] : b[0]; - c[1] = (a[1] <= b[1]) ? a[1] : b[1]; - memcpy(&r, c, 4); - return r; + int2x16_t a, b, c; + uint32_t r; + memcpy(a, &rs1, 4); + memcpy(b, &rs2, 4); + c[0] = (a[0] <= b[0]) ? a[0] : b[0]; + c[1] = (a[1] <= b[1]) ? a[1] : b[1]; + memcpy(&r, c, 4); + return r; } uint32_t __rv__ucmple16(const uint32_t rs1, const uint32_t rs2) { - uint2x16_t a, b, c; - uint32_t r; - memcpy(a, &rs1, 4); - memcpy(b, &rs2, 4); - c[0] = (a[0] <= b[0]) ? 0xFFFF : 0x0000; - c[1] = (a[1] <= b[1]) ? 0xFFFF : 0x0000; - memcpy(&r, c, 4); - return r; + uint2x16_t a, b, c; + uint32_t r; + memcpy(a, &rs1, 4); + memcpy(b, &rs2, 4); + c[0] = (a[0] <= b[0]) ? 0xFFFF : 0x0000; + c[1] = (a[1] <= b[1]) ? 0xFFFF : 0x0000; + memcpy(&r, c, 4); + return r; } uint32_t __rv__ucmplt16(const uint32_t rs1, const uint32_t rs2) { - uint2x16_t a, b, c; - uint32_t r; - memcpy(a, &rs1, 4); - memcpy(b, &rs2, 4); - c[0] = (a[0] < b[0]) ? 0xFFFF : 0x0000; - c[1] = (a[1] < b[1]) ? 0xFFFF : 0x0000; - memcpy(&r, c, 4); - return r; + uint2x16_t a, b, c; + uint32_t r; + memcpy(a, &rs1, 4); + memcpy(b, &rs2, 4); + c[0] = (a[0] < b[0]) ? 0xFFFF : 0x0000; + c[1] = (a[1] < b[1]) ? 0xFFFF : 0x0000; + memcpy(&r, c, 4); + return r; } uint32_t __rv__umax16(const uint32_t rs1, const uint32_t rs2) { - uint2x16_t a, b, c; - uint32_t r; - memcpy(a, &rs1, 4); - memcpy(b, &rs2, 4); - c[0] = (a[0] >= b[0]) ? a[0] : b[0]; - c[1] = (a[1] >= b[1]) ? a[1] : b[1]; - memcpy(&r, c, 4); - return r; + uint2x16_t a, b, c; + uint32_t r; + memcpy(a, &rs1, 4); + memcpy(b, &rs2, 4); + c[0] = (a[0] >= b[0]) ? a[0] : b[0]; + c[1] = (a[1] >= b[1]) ? a[1] : b[1]; + memcpy(&r, c, 4); + return r; } uint32_t __rv__umin16(const uint32_t rs1, const uint32_t rs2) { - uint2x16_t a, b, c; - uint32_t r; - memcpy(a, &rs1, 4); - memcpy(b, &rs2, 4); - c[0] = (a[0] <= b[0]) ? a[0] : b[0]; - c[1] = (a[1] <= b[1]) ? a[1] : b[1]; - memcpy(&r, c, 4); - return r; + uint2x16_t a, b, c; + uint32_t r; + memcpy(a, &rs1, 4); + memcpy(b, &rs2, 4); + c[0] = (a[0] <= b[0]) ? a[0] : b[0]; + c[1] = (a[1] <= b[1]) ? a[1] : b[1]; + memcpy(&r, c, 4); + return r; } uint32_t __rv__uradd16(const uint32_t rs1, const uint32_t rs2) { - uint2x16_t a, b, c; - uint32_t r; - memcpy(a, &rs1, 4); - memcpy(b, &rs2, 4); - c[0] = ((a[0] + b[0]) & 0x0001FFFE)>>1; - c[1] = ((a[1] + b[1]) & 0x0001FFFE)>>1; - memcpy(&r, c, 4); - return r; + uint2x16_t a, b, c; + uint32_t r; + memcpy(a, &rs1, 4); + memcpy(b, &rs2, 4); + c[0] = ((a[0] + b[0]) & 0x0001FFFE)>>1; + c[1] = ((a[1] + b[1]) & 0x0001FFFE)>>1; + memcpy(&r, c, 4); + return r; } uint32_t __rv__ursub16(const uint32_t rs1, const uint32_t rs2) { - uint2x16_t a, b, c; - uint32_t r; - memcpy(a, &rs1, 4); - memcpy(b, &rs2, 4); - c[0] = ((a[0] - b[0]) & 0x0001FFFE)>>1; - c[1] = ((a[1] - b[1]) & 0x0001FFFE)>>1; - memcpy(&r, c, 4); - return r; + uint2x16_t a, b, c; + uint32_t r; + memcpy(a, &rs1, 4); + memcpy(b, &rs2, 4); + c[0] = ((a[0] - b[0]) & 0x0001FFFE)>>1; + c[1] = ((a[1] - b[1]) & 0x0001FFFE)>>1; + memcpy(&r, c, 4); + return r; } uint32_t __rv__pkbb16(const uint32_t rs1, const uint32_t rs2) { - uint2x16_t a, b, c; - uint32_t r; - memcpy(a, &rs1, 4); - memcpy(b, &rs2, 4); - c[1] = a[0]; - c[0] = b[0]; - memcpy(&r, c, 4); - return r; + uint2x16_t a, b, c; + uint32_t r; + memcpy(a, &rs1, 4); + memcpy(b, &rs2, 4); + c[1] = a[0]; + c[0] = b[0]; + memcpy(&r, c, 4); + return r; } uint32_t __rv__pkbt16(const uint32_t rs1, const uint32_t rs2) { - uint2x16_t a, b, c; - uint32_t r; - memcpy(a, &rs1, 4); - memcpy(b, &rs2, 4); - c[1] = a[0]; - c[0] = b[1]; - memcpy(&r, c, 4); - return r; + uint2x16_t a, b, c; + uint32_t r; + memcpy(a, &rs1, 4); + memcpy(b, &rs2, 4); + c[1] = a[0]; + c[0] = b[1]; + memcpy(&r, c, 4); + return r; } uint32_t __rv__pktb16(const uint32_t rs1, const uint32_t rs2) { - uint2x16_t a, b, c; - uint32_t r; - memcpy(a, &rs1, 4); - memcpy(b, &rs2, 4); - c[1] = a[1]; - c[0] = b[0]; - memcpy(&r, c, 4); - return r; + uint2x16_t a, b, c; + uint32_t r; + memcpy(a, &rs1, 4); + memcpy(b, &rs2, 4); + c[1] = a[1]; + c[0] = b[0]; + memcpy(&r, c, 4); + return r; } uint32_t __rv__pktt16(const uint32_t rs1, const uint32_t rs2) { - uint2x16_t a, b, c; - uint32_t r; - memcpy(a, &rs1, 4); - memcpy(b, &rs2, 4); - c[1] = a[1]; - c[0] = b[1]; - memcpy(&r, c, 4); - return r; + uint2x16_t a, b, c; + uint32_t r; + memcpy(a, &rs1, 4); + memcpy(b, &rs2, 4); + c[1] = a[1]; + c[0] = b[1]; + memcpy(&r, c, 4); + return r; } int32_t __rv__raddw(const int32_t rs1, const int32_t rs2) { - int64_t s = (int64_t)rs1 + (int64_t)rs2; - return (int32_t)((s>>1)&0xFFFFFFFF); + int64_t s = (int64_t)rs1 + (int64_t)rs2; + return (int32_t)((s>>1)&0xFFFFFFFF); } int32_t __rv__rsubw(const int32_t rs1, const int32_t rs2) { - int64_t s = (int64_t)rs1 - (int64_t)rs2; - return (int32_t)((s>>1)&0xFFFFFFFF); + int64_t s = (int64_t)rs1 - (int64_t)rs2; + return (int32_t)((s>>1)&0xFFFFFFFF); } uint32_t __rv__uraddw(const uint32_t rs1, const uint32_t rs2) { - uint64_t s = (uint64_t)rs1 + (uint64_t)rs2; - return (int32_t)((s>>1)&0xFFFFFFFF); + uint64_t s = (uint64_t)rs1 + (uint64_t)rs2; + return (int32_t)((s>>1)&0xFFFFFFFF); } uint32_t __rv__ursubw(const uint32_t rs1, const uint32_t rs2) { - uint64_t s = (uint64_t)rs1 - (uint64_t)rs2; - return (uint32_t)((s>>1)&0xFFFFFFFF); + uint64_t s = (uint64_t)rs1 - (uint64_t)rs2; + return (uint32_t)((s>>1)&0xFFFFFFFF); } int32_t __rv__ave(const int32_t rs1, const int32_t rs2) { - int64_t s = 1 + ((int64_t)rs1<<1) + ((int64_t)rs2<<1); - return (int32_t)((s>>1)&0xFFFFFFFF); + int64_t s = 1 + ((int64_t)rs1<<1) + ((int64_t)rs2<<1); + return (int32_t)((s>>1)&0xFFFFFFFF); } uint32_t __rv__bitrev(const uint32_t rs1, const uint32_t rs2) { - const uint32_t n = rs2 & 0x1F; - uint32_t x = rs1; + const uint32_t n = rs2 & 0x1F; + uint32_t x = rs1; - x = (x & 0x55555555)<<1 | (x & 0xaaaaaaaa)>>1; - x = (x & 0x33333333)<<2 | (x & 0xcccccccc)>>2; - x = (x & 0x0F0F0F0F)<<4 | (x & 0xF0F0F0F0)>>4; - x = (x & 0x00FF00FF)<<8 | (x & 0xFF00FF00)>>8; - x = (x & 0x0000FFFF)<<16 | (x & 0xFFFF0000)>>16; - return x >> (31-n); + x = (x & 0x55555555)<<1 | (x & 0xaaaaaaaa)>>1; + x = (x & 0x33333333)<<2 | (x & 0xcccccccc)>>2; + x = (x & 0x0F0F0F0F)<<4 | (x & 0xF0F0F0F0)>>4; + x = (x & 0x00FF00FF)<<8 | (x & 0xFF00FF00)>>8; + x = (x & 0x0000FFFF)<<16 | (x & 0xFFFF0000)>>16; + return x >> (31-n); } uint32_t __rv__pbsada(const uint32_t rs1, const uint32_t rs2, const uint32_t rs3) { - uint4x8_t a, b; - uint32_t r = rs3; - memcpy(a, &rs1, 4); - memcpy(b, &rs2, 4); - r += abs((int32_t)a[0] - (int32_t)b[0]); - r += abs((int32_t)a[1] - (int32_t)b[1]); - r += abs((int32_t)a[2] - (int32_t)b[2]); - r += abs((int32_t)a[3] - (int32_t)b[3]); - return r; + uint4x8_t a, b; + uint32_t r = rs3; + memcpy(a, &rs1, 4); + memcpy(b, &rs2, 4); + r += abs((int32_t)a[0] - (int32_t)b[0]); + r += abs((int32_t)a[1] - (int32_t)b[1]); + r += abs((int32_t)a[2] - (int32_t)b[2]); + r += abs((int32_t)a[3] - (int32_t)b[3]); + return r; } uint32_t __rv__pbsad(const uint32_t rs1, const uint32_t rs2) { - return __rv__pbsada(rs1, rs2, 0); + return __rv__pbsada(rs1, rs2, 0); } uint32_t __rv__insb0(const uint32_t rs1, const uint32_t rs2) { - uint32_t r; + uint32_t r; - r = rs2 & 0xFFFFFF00; - r |= ((rs1 & 0xFF) << 0); + r = rs2 & 0xFFFFFF00; + r |= ((rs1 & 0xFF) << 0); - return r; + return r; } uint32_t __rv__insb1(const uint32_t rs1, const uint32_t rs2) { - uint32_t r; + uint32_t r; - r = rs2 & 0xFFFF00FF; - r |= ((rs1 & 0xFF) << 8); + r = rs2 & 0xFFFF00FF; + r |= ((rs1 & 0xFF) << 8); - return r; + return r; } uint32_t __rv__insb2(const uint32_t rs1, const uint32_t rs2) { - uint32_t r; + uint32_t r; - r = rs2 & 0xFF00FFFF; - r |= ((rs1 & 0xFF) << 16); + r = rs2 & 0xFF00FFFF; + r |= ((rs1 & 0xFF) << 16); - return r; + return r; } uint32_t __rv__insb3(const uint32_t rs1, const uint32_t rs2) { - uint32_t r; + uint32_t r; - r = rs2 & 0x00FFFFFF; - r |= ((rs1 & 0xFF) << 24); + r = rs2 & 0x00FFFFFF; + r |= ((rs1 & 0xFF) << 24); - return r; + return r; } uint64_t __rv__smul8(const uint32_t rs1, const uint32_t rs2) { - int4x8_t a, b; - int4x16_t c; - uint64_t r; - memcpy(a, &rs1, 4); - memcpy(b, &rs2, 4); - c[0] = a[0] * b[0]; - c[1] = a[1] * b[1]; - c[2] = a[2] * b[2]; - c[3] = a[3] * b[3]; - memcpy(&r, c, 8); - return r; + int4x8_t a, b; + int4x16_t c; + uint64_t r; + memcpy(a, &rs1, 4); + memcpy(b, &rs2, 4); + c[0] = a[0] * b[0]; + c[1] = a[1] * b[1]; + c[2] = a[2] * b[2]; + c[3] = a[3] * b[3]; + memcpy(&r, c, 8); + return r; } uint64_t __rv__umul8(const uint32_t rs1, const uint32_t rs2) { - uint4x8_t a, b; - uint4x16_t c; - uint64_t r; - memcpy(a, &rs1, 4); - memcpy(b, &rs2, 4); - c[0] = a[0] * b[0]; - c[1] = a[1] * b[1]; - c[2] = a[2] * b[2]; - c[3] = a[3] * b[3]; - memcpy(&r, c, 8); - return r; + uint4x8_t a, b; + uint4x16_t c; + uint64_t r; + memcpy(a, &rs1, 4); + memcpy(b, &rs2, 4); + c[0] = a[0] * b[0]; + c[1] = a[1] * b[1]; + c[2] = a[2] * b[2]; + c[3] = a[3] * b[3]; + memcpy(&r, c, 8); + return r; } uint64_t __rv__smul16(const uint32_t rs1, const uint32_t rs2) { - int2x16_t a, b; - int2x32_t c; - uint64_t r; - memcpy(a, &rs1, 4); - memcpy(b, &rs2, 4); - c[0] = a[0] * b[0]; - c[1] = a[1] * b[1]; - memcpy(&r, c, 8); - return r; + int2x16_t a, b; + int2x32_t c; + uint64_t r; + memcpy(a, &rs1, 4); + memcpy(b, &rs2, 4); + c[0] = a[0] * b[0]; + c[1] = a[1] * b[1]; + memcpy(&r, c, 8); + return r; } uint64_t __rv__umul16(const uint32_t rs1, const uint32_t rs2) { - uint2x16_t a, b; - uint2x32_t c; - uint64_t r; - memcpy(a, &rs1, 4); - memcpy(b, &rs2, 4); - c[0] = a[0] * b[0]; - c[1] = a[1] * b[1]; - memcpy(&r, c, 8); - return r; + uint2x16_t a, b; + uint2x32_t c; + uint64_t r; + memcpy(a, &rs1, 4); + memcpy(b, &rs2, 4); + c[0] = a[0] * b[0]; + c[1] = a[1] * b[1]; + memcpy(&r, c, 8); + return r; } uint64_t __rv__smulx8(const uint32_t rs1, const uint32_t rs2) { - int4x8_t a, b; - int4x16_t c; - uint64_t r; - memcpy(a, &rs1, 4); - memcpy(b, &rs2, 4); - c[0] = a[0] * b[1]; - c[1] = a[1] * b[0]; - c[2] = a[2] * b[3]; - c[3] = a[3] * b[2]; - memcpy(&r, c, 8); - return r; + int4x8_t a, b; + int4x16_t c; + uint64_t r; + memcpy(a, &rs1, 4); + memcpy(b, &rs2, 4); + c[0] = a[0] * b[1]; + c[1] = a[1] * b[0]; + c[2] = a[2] * b[3]; + c[3] = a[3] * b[2]; + memcpy(&r, c, 8); + return r; } uint64_t __rv__umulx8(const uint32_t rs1, const uint32_t rs2) { - uint4x8_t a, b; - uint4x16_t c; - uint64_t r; - memcpy(a, &rs1, 4); - memcpy(b, &rs2, 4); - c[0] = a[0] * b[1]; - c[1] = a[1] * b[0]; - c[2] = a[2] * b[3]; - c[3] = a[3] * b[2]; - memcpy(&r, c, 8); - return r; + uint4x8_t a, b; + uint4x16_t c; + uint64_t r; + memcpy(a, &rs1, 4); + memcpy(b, &rs2, 4); + c[0] = a[0] * b[1]; + c[1] = a[1] * b[0]; + c[2] = a[2] * b[3]; + c[3] = a[3] * b[2]; + memcpy(&r, c, 8); + return r; } uint64_t __rv__smulx16(const uint32_t rs1, const uint32_t rs2) { - int2x16_t a, b; - int2x32_t c; - uint64_t r; - memcpy(a, &rs1, 4); - memcpy(b, &rs2, 4); - c[0] = a[0] * b[1]; - c[1] = a[1] * b[0]; - memcpy(&r, c, 8); - return r; + int2x16_t a, b; + int2x32_t c; + uint64_t r; + memcpy(a, &rs1, 4); + memcpy(b, &rs2, 4); + c[0] = a[0] * b[1]; + c[1] = a[1] * b[0]; + memcpy(&r, c, 8); + return r; } uint64_t __rv__umulx16(const uint32_t rs1, const uint32_t rs2) { - uint2x16_t a, b; - uint2x32_t c; - uint64_t r; - memcpy(a, &rs1, 4); - memcpy(b, &rs2, 4); - c[0] = a[0] * b[1]; - c[1] = a[1] * b[0]; - memcpy(&r, c, 8); - return r; + uint2x16_t a, b; + uint2x32_t c; + uint64_t r; + memcpy(a, &rs1, 4); + memcpy(b, &rs2, 4); + c[0] = a[0] * b[1]; + c[1] = a[1] * b[0]; + memcpy(&r, c, 8); + return r; } uint64_t __rv__smaqa(const uint32_t rs1, const uint32_t rs2, const uint32_t rs3) { - int4x8_t a, b; - int4x16_t c; - int32_t r; - memcpy(a, &rs1, 4); - memcpy(b, &rs2, 4); - c[0] = a[0] * b[0]; - c[1] = a[1] * b[1]; - c[2] = a[2] * b[2]; - c[3] = a[3] * b[3]; - r = ((int32_t)rs3) + c[0] + c[1] + c[2] + c[3]; - return r; + int4x8_t a, b; + int4x16_t c; + int32_t r; + memcpy(a, &rs1, 4); + memcpy(b, &rs2, 4); + c[0] = a[0] * b[0]; + c[1] = a[1] * b[1]; + c[2] = a[2] * b[2]; + c[3] = a[3] * b[3]; + r = ((int32_t)rs3) + c[0] + c[1] + c[2] + c[3]; + return r; } uint64_t __rv__umaqa(const uint32_t rs1, const uint32_t rs2, const uint32_t rs3) { - uint4x8_t a, b; - uint4x16_t c; - uint32_t r; - memcpy(a, &rs1, 4); - memcpy(b, &rs2, 4); - c[0] = a[0] * b[0]; - c[1] = a[1] * b[1]; - c[2] = a[2] * b[2]; - c[3] = a[3] * b[3]; - r = rs3 + c[0] + c[1] + c[2] + c[3]; - return r; + uint4x8_t a, b; + uint4x16_t c; + uint32_t r; + memcpy(a, &rs1, 4); + memcpy(b, &rs2, 4); + c[0] = a[0] * b[0]; + c[1] = a[1] * b[1]; + c[2] = a[2] * b[2]; + c[3] = a[3] * b[3]; + r = rs3 + c[0] + c[1] + c[2] + c[3]; + return r; } @@ -748,27 +750,27 @@ uint32_t __rv__msubr32(const uint32_t rs1, const uint32_t rs2, const uint32_t rs return rs3 - (rs1 * rs2); } -#define GEN_SUNPKD8(x,y) \ +#define GEN_SUNPKD8(x,y) \ static inline uint32_t __rv__sunpkd8##x##y(const uint32_t rs1) { \ - int4x8_t a; \ - int2x16_t c; \ - uint32_t r; \ - memcpy(a, &rs1, 4); \ - c[1] = a[x]; \ - c[0] = a[y]; \ - memcpy(&r, c, 4); \ - return r; \ + int4x8_t a; \ + int2x16_t c; \ + uint32_t r; \ + memcpy(a, &rs1, 4); \ + c[1] = a[x]; \ + c[0] = a[y]; \ + memcpy(&r, c, 4); \ + return r; \ } -#define GEN_ZUNPKD8(x,y) \ +#define GEN_ZUNPKD8(x,y) \ static inline uint32_t __rv__zunpkd8##x##y(const uint32_t rs1) { \ - uint4x8_t a; \ - uint2x16_t c; \ - uint32_t r; \ - memcpy(a, &rs1, 4); \ - c[1] = a[x]; \ - c[0] = a[y]; \ - memcpy(&r, c, 4); \ - return r; \ + uint4x8_t a; \ + uint2x16_t c; \ + uint32_t r; \ + memcpy(a, &rs1, 4); \ + c[1] = a[x]; \ + c[0] = a[y]; \ + memcpy(&r, c, 4); \ + return r; \ } GEN_SUNPKD8(1,0) GEN_SUNPKD8(2,0) @@ -783,173 +785,119 @@ GEN_ZUNPKD8(3,2) #endif // __riscv - unsigned int a = 0x01234567; - -//#define CHECK_SIGILL - -#if defined(CHECK_SIGILL) -#include -extern jmp_buf jb; -void installillhandler(void); -#endif // CHECK_SIGILL +unsigned int a = 0x01234567; int main(int argc, char **argv) { - unsigned int b = 0xdeadbeef; - unsigned int c = 0; - unsigned int d = 0xC0FFEE00; - unsigned int index; - unsigned int index2; - unsigned long long cq = 0; + unsigned int b = 0xdeadbeef; + unsigned int c = 0; + unsigned int d = 0xC0FFEE00; + unsigned int index, index1, index2, index3; + unsigned long long cq = 0; + + for (index = 0 ; index < nonrandom_cnt[0] ; index++) { + a = nonrandom_a[index]; + + T1(__rv__sunpkd810); + T1(__rv__sunpkd820); + T1(__rv__sunpkd830); + T1(__rv__sunpkd831); + T1(__rv__sunpkd832); + T1(__rv__zunpkd810); + T1(__rv__zunpkd820); + T1(__rv__zunpkd830); + T1(__rv__zunpkd831); + T1(__rv__zunpkd832); + + T1(__rv__clz8); + T1(__rv__clo8); + T1(__rv__clrs8); + T1(__rv__swap8); + + //T1(__rv__clz16); /* unimplemented */ + //T1(__rv__clo16); /* unimplemented */ + //T1(__rv__clrs16); /* unimplemented */ + T1(__rv__swap16); + + for (index1 = 0 ; index1 < nonrandom_cnt[1] ; index1++) { + b = nonrandom_b[index]; +#if 1 + T2(__rv__add8); + T2(__rv__radd8); + T2(__rv__rsub8); + T2(__rv__cmpeq8); + + T2(__rv__scmple8); + T2(__rv__scmplt8); + T2(__rv__sll8); + T2(__rv__srl8); + T2(__rv__sra8); + T2(__rv__smax8); + T2(__rv__smin8); + T2(__rv__ucmple8); + T2(__rv__ucmplt8); + T2(__rv__umax8); + T2(__rv__umin8); + T2(__rv__uradd8); + T2(__rv__ursub8); + + T2(__rv__add16); + T2(__rv__radd16); + T2(__rv__rsub16); + T2(__rv__cmpeq16); + + T2(__rv__scmple16); + T2(__rv__scmplt16); + T2(__rv__sll16); + T2(__rv__srl16); + T2(__rv__sra16); + T2(__rv__smax16); + T2(__rv__smin16); + T2(__rv__ucmple16); + T2(__rv__ucmplt16); + T2(__rv__umax16); + T2(__rv__umin16); + T2(__rv__uradd16); + T2(__rv__ursub16); -#if defined(CHECK_SIGILL) - installillhandler(); -#endif // CHECK_SIGILL - - if (argc > 1) - a = strtoul(argv[1], NULL, 16); - if (argc > 2) - b = strtoul(argv[2], NULL, 16); - if (argc > 3) - d = strtoul(argv[3], NULL, 16); - -#if !defined(CHECK_SIGILL) -#define T2(X) \ - c = X(a,b);printf(#X "(0x%08x, 0x%08x) -> 0x%08x\n", a, b, c) -#define T1(X) \ - c = X(a);printf(#X "(0x%08x) -> 0x%08x\n", a, c) -#define T3(X) \ - c = X(a,b,d);printf(#X "(0x%08x, 0x%08x, 0x%08x) -> 0x%08x\n", a, b, d, c) -#define T2W(X) \ - cq = X(a,b);printf(#X "(0x%08x, 0x%08x) -> 0x%016llx\n", a, b, cq) -#else -#define T2(X) do { \ - if (setjmp(jb)) { \ - printf(#X "(0x%08x, 0x%08x) -> *SIGILL*\n", a, b); \ - } else { \ - c = X(a,b); \ - printf(#X "(0x%08x, 0x%08x) -> 0x%08x\n", a, b, c); \ - } \ - } while (0) -#define T1(X) do { \ - if (setjmp(jb)) { \ - printf(#X "(0x%08x) -> *SIGILL*\n", a); \ - } else { \ - c = X(a); \ - printf(#X "(0x%08x) -> 0x%08x\n", a, c); \ - } \ - } while (0) -#define T3(X) do { \ - if (setjmp(jb)) { \ - printf(#X "(0x%08x, 0x%08x, 0x%08x) -> *SIGILL*\n", a, b, d); \ - } else { \ - c = X(a,b,d); \ - printf(#X "(0x%08x, 0x%08x, 0x%08x) -> 0x%08x\n", a, b, d, c); \ - } \ - } while (0) -#define T2W(X) do { \ - if (setjmp(jb)) { \ - printf(#X "(0x%08x, 0x%08x) -> *SIGILL*\n", a, b); \ - } else { \ - cq = X(a,b); \ - printf(#X "(0x%08x, 0x%08x) -> 0x%016llx\n", a, b, cq); \ - } \ - } while (0) -#endif // CHECK_SIGILL - - for (index = 0 ; index < 32 ; index++) { - #if 1 - T2(__rv__add8); - T2(__rv__radd8); - T2(__rv__rsub8); - T2(__rv__cmpeq8); - T1(__rv__clz8); - T1(__rv__clo8); - T1(__rv__clrs8); - T1(__rv__swap8); - T2(__rv__scmple8); - T2(__rv__scmplt8); - T2(__rv__sll8); - T2(__rv__srl8); - T2(__rv__sra8); - T2(__rv__smax8); - T2(__rv__smin8); - T2(__rv__ucmple8); - T2(__rv__ucmplt8); - T2(__rv__umax8); - T2(__rv__umin8); - T2(__rv__uradd8); - T2(__rv__ursub8); - - T2(__rv__add16); - T2(__rv__radd16); - T2(__rv__rsub16); - T2(__rv__cmpeq16); - //T1(__rv__clz16); /* unimplemented */ - //T1(__rv__clo16); /* unimplemented */ - //T1(__rv__clrs16); /* unimplemented */ - T1(__rv__swap16); - T2(__rv__scmple16); - T2(__rv__scmplt16); - T2(__rv__sll16); - T2(__rv__srl16); - T2(__rv__sra16); - T2(__rv__smax16); - T2(__rv__smin16); - T2(__rv__ucmple16); - T2(__rv__ucmplt16); - T2(__rv__umax16); - T2(__rv__umin16); - T2(__rv__uradd16); - T2(__rv__ursub16); + T2(__rv__pkbb16); + T2(__rv__pkbt16); + T2(__rv__pktb16); + T2(__rv__pktt16); - T2(__rv__pkbb16); - T2(__rv__pkbt16); - T2(__rv__pktb16); - T2(__rv__pktt16); + T2(__rv__raddw); + T2(__rv__rsubw); + T2(__rv__uraddw); + T2(__rv__ursubw); + T2(__rv__ave); + T2(__rv__bitrev); +#endif + T2(__rv__pbsad); + T3(__rv__pbsada); + + T2(__rv__insb0); + T2(__rv__insb1); + T2(__rv__insb2); + T2(__rv__insb3); + + T2W(__rv__smul8); + T2W(__rv__umul8); + T2W(__rv__smul16); + T2W(__rv__umul16); + T2W(__rv__smulx8); + T2W(__rv__umulx8); + T2W(__rv__smulx16); + T2W(__rv__umulx16); - T2(__rv__raddw); - T2(__rv__rsubw); - T2(__rv__uraddw); - T2(__rv__ursubw); - T2(__rv__ave); - T2(__rv__bitrev); - #endif - T2(__rv__pbsad); - T3(__rv__pbsada); - - T2(__rv__insb0); - T2(__rv__insb1); - T2(__rv__insb2); - T2(__rv__insb3); - - T2W(__rv__smul8); - T2W(__rv__umul8); - T2W(__rv__smul16); - T2W(__rv__umul16); - T2W(__rv__smulx8); - T2W(__rv__umulx8); - T2W(__rv__smulx16); - T2W(__rv__umulx16); - - T3(__rv__smaqa); - T3(__rv__umaqa); - - T3(__rv__maddr32); - T3(__rv__msubr32); - - T1(__rv__sunpkd810); - T1(__rv__sunpkd820); - T1(__rv__sunpkd830); - T1(__rv__sunpkd831); - T1(__rv__sunpkd832); - T1(__rv__zunpkd810); - T1(__rv__zunpkd820); - T1(__rv__zunpkd830); - T1(__rv__zunpkd831); - T1(__rv__zunpkd832); - - b = 0x0100F004 + index; - } - - return 0; + for (index2 = 0 ; index2 < nonrandom_cnt[2] ; index2++) { + d = nonrandom_d[index2]; + T3(__rv__smaqa); + T3(__rv__umaqa); + + T3(__rv__maddr32); + T3(__rv__msubr32); + } + } + } + + return 0; }