diff --git a/aes256ctrstandalone-rv32/Makefile b/aes256ctrstandalone-rv32/Makefile index d716d6a..de6c445 100644 --- a/aes256ctrstandalone-rv32/Makefile +++ b/aes256ctrstandalone-rv32/Makefile @@ -9,8 +9,8 @@ CC=$(COMPDIR)/bin/riscv32-buildroot-linux-gnu-gcc ALTCC=$(ALTCOMPDIR)/bin/riscv64-unknown-elf-gcc CXX=$(COMPDIR)/bin/riscv32-buildroot-linux-gnu-g++ STRIP=$(COMPDIR)/bin/riscv32-buildroot-linux-gnu-strip -NEWOPT=-march=rv32imab -mabi=ilp32 -I. -O3 -DRV32B #-fno-vectorize #-DUSE_EPI_CUSTOM -OPT=-march=rv32ima -mabi=ilp32 -I. -O3 #-fno-vectorize #-DUSE_EPI_CUSTOM +NEWOPT=-march=rv32imab -mabi=ilp32 -I. -I.. -O3 -DRV32B #-fno-vectorize #-DUSE_EPI_CUSTOM +OPT=-march=rv32ima -mabi=ilp32 -I. -I.. -O3 #-fno-vectorize #-DUSE_EPI_CUSTOM #ALTCC=$(CC) #NEWOPT=$(OPT) diff --git a/aes256ctrstandalone-rv32/riscv32.c b/aes256ctrstandalone-rv32/riscv32.c index af4881b..f083328 100644 --- a/aes256ctrstandalone-rv32/riscv32.c +++ b/aes256ctrstandalone-rv32/riscv32.c @@ -16,89 +16,7 @@ #define _bswap64(a) __builtin_bswap64(a) #define _bswap(a) __builtin_bswap32(a) -#define ASM1MACRO(N, O) asm(".macro "#N" rd, rs1\n" \ - ".word ("#O" | (\\rd << 7) | (\\rs1 << 15))\n" \ - ".endm\n"); -#define ASM2MACRO(N, O) asm(".macro "#N" rd, rs1, rs2\n" \ - ".word ("#O" | (\\rd << 7) | (\\rs1 << 15) | (\\rs2 << 20))\n" \ - ".endm\n"); -#define ASM2FMACRO(N, O) asm(".macro "#N" rt, rs2\n" \ - ".word ("#O" | (\\rt << 15) | (\\rs2 << 20))\n" \ - ".endm\n"); -asm("#define reg_zero 0\n"); -asm("#define reg_ra 1\n"); -asm("#define reg_sp 2\n"); -asm("#define reg_gp 3\n"); -asm("#define reg_tp 4\n"); -asm("#define reg_t0 5\n"); -asm("#define reg_t1 6\n"); -asm("#define reg_t2 7\n"); -asm("#define reg_s0 8\n"); -asm("#define reg_s1 9\n"); -asm("#define reg_a0 10\n"); -asm("#define reg_a1 11\n"); -asm("#define reg_a2 12\n"); -asm("#define reg_a3 13\n"); -asm("#define reg_a4 14\n"); -asm("#define reg_a5 15\n"); -asm("#define reg_a6 16\n"); -asm("#define reg_a7 17\n"); -asm("#define reg_s2 18\n"); -asm("#define reg_s3 19\n"); -asm("#define reg_s4 20\n"); -asm("#define reg_s5 21\n"); -asm("#define reg_s6 22\n"); -asm("#define reg_s7 23\n"); -asm("#define reg_s8 24\n"); -asm("#define reg_s9 25\n"); -asm("#define reg_s10 26\n"); -asm("#define reg_s11 27\n"); -asm("#define reg_t3 28\n"); -asm("#define reg_t4 29\n"); -asm("#define reg_t5 30\n"); -asm("#define reg_t6 31\n"); - -#define FUN1(NAME, ASNAME) \ - static inline uint32_t NAME(uint32_t rs1) { \ - uint32_t r; \ - asm (#ASNAME " reg_%0, reg_%1\n" \ - : "=r" (r) \ - : "r" (rs1)); \ - return r; \ - } -#define FUN2(NAME, ASNAME) \ - static inline uint32_t NAME(uint32_t rs1, uint32_t rs2) { \ - uint32_t r; \ - asm (#ASNAME " reg_%0, reg_%1, reg_%2\n" \ - : "=r" (r) \ - : "r" (rs1), "r" (rs2)); \ - return r; \ - } -#define FUN2F(NAME, ASNAME) \ - static inline uint32_t NAME(uint32_t rs1, uint32_t rs2) { \ - uint32_t r = rs1; \ - asm (#ASNAME " reg_%0, reg_%1\n" \ - : "+&r" (r) \ - : "r" (rs2)); \ - return r; \ - } - -ASM2FMACRO(AES32ESMI0,0x36000033) -ASM2FMACRO(AES32ESMI1,0x76000033) -ASM2FMACRO(AES32ESMI2,0xb6000033) -ASM2FMACRO(AES32ESMI3,0xf6000033) -ASM2FMACRO(AES32ESI0,0x32000033) -ASM2FMACRO(AES32ESI1,0x72000033) -ASM2FMACRO(AES32ESI2,0xb2000033) -ASM2FMACRO(AES32ESI3,0xf2000033) -FUN2F(aes32esmi0,AES32ESMI0) -FUN2F(aes32esmi1,AES32ESMI1) -FUN2F(aes32esmi2,AES32ESMI2) -FUN2F(aes32esmi3,AES32ESMI3) -FUN2F(aes32esi0,AES32ESI0) -FUN2F(aes32esi1,AES32ESI1) -FUN2F(aes32esi2,AES32ESI2) -FUN2F(aes32esi3,AES32ESI3) +#include "new_instructions_support_k.h" #define AES_ROUND1T(TAB,I,X0,X1,X2,X3,Y0,Y1,Y2,Y3) \ { \ diff --git a/aes256gcmv1standalone-rv32/Makefile b/aes256gcmv1standalone-rv32/Makefile index 28203c6..3556e19 100644 --- a/aes256gcmv1standalone-rv32/Makefile +++ b/aes256gcmv1standalone-rv32/Makefile @@ -9,8 +9,8 @@ CC=$(COMPDIR)/bin/riscv32-buildroot-linux-gnu-gcc ALTCC=$(ALTCOMPDIR)/bin/riscv64-unknown-elf-gcc CXX=$(COMPDIR)/bin/riscv32-buildroot-linux-gnu-g++ STRIP=$(COMPDIR)/bin/riscv32-buildroot-linux-gnu-strip -NEWOPT=-march=rv32imab -mabi=ilp32 -I. -O3 -DRV32B #-fno-vectorize #-DUSE_EPI_CUSTOM -OPT=-march=rv32ima -mabi=ilp32 -I. -O3 #-fno-vectorize #-DUSE_EPI_CUSTOM +NEWOPT=-march=rv32imab -mabi=ilp32 -I. -I.. -O3 -DRV32B #-fno-vectorize #-DUSE_EPI_CUSTOM +OPT=-march=rv32ima -mabi=ilp32 -I. -I.. -O3 #-fno-vectorize #-DUSE_EPI_CUSTOM #ALTCC=$(CC) #NEWOPT=$(OPT) diff --git a/aes256gcmv1standalone-rv32/encrypt.c b/aes256gcmv1standalone-rv32/encrypt.c index 40f9167..5bf73ad 100644 --- a/aes256gcmv1standalone-rv32/encrypt.c +++ b/aes256gcmv1standalone-rv32/encrypt.c @@ -8,89 +8,7 @@ #define _bswap64(a) __builtin_bswap64(a) #define _bswap(a) __builtin_bswap32(a) -#define ASM1MACRO(N, O) asm(".macro "#N" rd, rs1\n" \ - ".word ("#O" | (\\rd << 7) | (\\rs1 << 15))\n" \ - ".endm\n"); -#define ASM2MACRO(N, O) asm(".macro "#N" rd, rs1, rs2\n" \ - ".word ("#O" | (\\rd << 7) | (\\rs1 << 15) | (\\rs2 << 20))\n" \ - ".endm\n"); -#define ASM2FMACRO(N, O) asm(".macro "#N" rt, rs2\n" \ - ".word ("#O" | (\\rt << 15) | (\\rs2 << 20))\n" \ - ".endm\n"); -asm("#define reg_zero 0\n"); -asm("#define reg_ra 1\n"); -asm("#define reg_sp 2\n"); -asm("#define reg_gp 3\n"); -asm("#define reg_tp 4\n"); -asm("#define reg_t0 5\n"); -asm("#define reg_t1 6\n"); -asm("#define reg_t2 7\n"); -asm("#define reg_s0 8\n"); -asm("#define reg_s1 9\n"); -asm("#define reg_a0 10\n"); -asm("#define reg_a1 11\n"); -asm("#define reg_a2 12\n"); -asm("#define reg_a3 13\n"); -asm("#define reg_a4 14\n"); -asm("#define reg_a5 15\n"); -asm("#define reg_a6 16\n"); -asm("#define reg_a7 17\n"); -asm("#define reg_s2 18\n"); -asm("#define reg_s3 19\n"); -asm("#define reg_s4 20\n"); -asm("#define reg_s5 21\n"); -asm("#define reg_s6 22\n"); -asm("#define reg_s7 23\n"); -asm("#define reg_s8 24\n"); -asm("#define reg_s9 25\n"); -asm("#define reg_s10 26\n"); -asm("#define reg_s11 27\n"); -asm("#define reg_t3 28\n"); -asm("#define reg_t4 29\n"); -asm("#define reg_t5 30\n"); -asm("#define reg_t6 31\n"); - -#define FUN1(NAME, ASNAME) \ - static inline uint32_t NAME(uint32_t rs1) { \ - uint32_t r; \ - asm (#ASNAME " reg_%0, reg_%1\n" \ - : "=r" (r) \ - : "r" (rs1)); \ - return r; \ - } -#define FUN2(NAME, ASNAME) \ - static inline uint32_t NAME(uint32_t rs1, uint32_t rs2) { \ - uint32_t r; \ - asm (#ASNAME " reg_%0, reg_%1, reg_%2\n" \ - : "=r" (r) \ - : "r" (rs1), "r" (rs2)); \ - return r; \ - } -#define FUN2F(NAME, ASNAME) \ - static inline uint32_t NAME(uint32_t rs1, uint32_t rs2) { \ - uint32_t r = rs1; \ - asm (#ASNAME " reg_%0, reg_%1\n" \ - : "+&r" (r) \ - : "r" (rs2)); \ - return r; \ - } - -ASM2FMACRO(AES32ESMI0,0x36000033) -ASM2FMACRO(AES32ESMI1,0x76000033) -ASM2FMACRO(AES32ESMI2,0xb6000033) -ASM2FMACRO(AES32ESMI3,0xf6000033) -ASM2FMACRO(AES32ESI0,0x32000033) -ASM2FMACRO(AES32ESI1,0x72000033) -ASM2FMACRO(AES32ESI2,0xb2000033) -ASM2FMACRO(AES32ESI3,0xf2000033) -FUN2F(aes32esmi0,AES32ESMI0) -FUN2F(aes32esmi1,AES32ESMI1) -FUN2F(aes32esmi2,AES32ESMI2) -FUN2F(aes32esmi3,AES32ESMI3) -FUN2F(aes32esi0,AES32ESI0) -FUN2F(aes32esi1,AES32ESI1) -FUN2F(aes32esi2,AES32ESI2) -FUN2F(aes32esi3,AES32ESI3) +#include "new_instructions_support_k.h" #define AES_ROUND1T(TAB,I,X0,X1,X2,X3,Y0,Y1,Y2,Y3) \ { \ diff --git a/chacha20standalone-rv32/Makefile b/chacha20standalone-rv32/Makefile index 519cef3..f9f2ac9 100644 --- a/chacha20standalone-rv32/Makefile +++ b/chacha20standalone-rv32/Makefile @@ -9,8 +9,8 @@ CC=$(COMPDIR)/bin/riscv32-buildroot-linux-gnu-gcc ALTCC=$(ALTCOMPDIR)/bin/riscv64-unknown-elf-gcc CXX=$(COMPDIR)/bin/riscv32-buildroot-linux-gnu-g++ STRIP=$(COMPDIR)/bin/riscv32-buildroot-linux-gnu-strip -NEWOPT=-march=rv32imab -mabi=ilp32 -I. -O3 -DRV32B #-fno-vectorize #-DUSE_EPI_CUSTOM -OPT=-march=rv32ima -mabi=ilp32 -I. -O3 #-fno-vectorize #-DUSE_EPI_CUSTOM +NEWOPT=-march=rv32imab -mabi=ilp32 -I. -I.. -O3 -DRV32B #-fno-vectorize #-DUSE_EPI_CUSTOM +OPT=-march=rv32ima -mabi=ilp32 -I. -I.. -O3 #-fno-vectorize #-DUSE_EPI_CUSTOM #ALTCC=$(CC) #NEWOPT=$(OPT) diff --git a/chacha20standalone-rv32/chacha.c b/chacha20standalone-rv32/chacha.c index c026a17..c56e997 100644 --- a/chacha20standalone-rv32/chacha.c +++ b/chacha20standalone-rv32/chacha.c @@ -13,12 +13,10 @@ Public domain. #include #define ROUNDS 20 -#if 0 -#define ROTATE(v,c) (ROTL32(v,c)) -#else -#include +#include "new_instructions_support_b.h" + #define ROTATE(v,c) _rv32_rol(v,c) -#endif + #define XOR(v,w) ((v) ^ (w)) #define PLUS(v,w) (U32V((v) + (w))) #define PLUSONE(v) (PLUS((v),1)) diff --git a/new_instructions_support.h b/new_instructions_support.h new file mode 100644 index 0000000..f81aed6 --- /dev/null +++ b/new_instructions_support.h @@ -0,0 +1,135 @@ +#ifndef __NEW_INSTRUCTION_SUPPORT_H__ +#define __NEW_INSTRUCTION_SUPPORT_H__ + +#include + +typedef uint32_t uint_xlen_t; +#define XLEN 32 + + //when missing in toolchain... + +// macro to build the function to access the assembly instructions +// unary +#define FUN1(NAME, ASNAME) \ + static inline uint32_t NAME(uint32_t rs1) { \ + uint32_t r; \ + asm (#ASNAME " reg_%0, reg_%1\n" \ + : "=r" (r) \ + : "r" (rs1)); \ + return r; \ + } +// binary +#define FUN2(NAME, ASNAME) \ + static inline uint_xlen_t NAME(uint_xlen_t rs1, uint_xlen_t rs2) { \ + uint32_t r; \ + asm (#ASNAME " reg_%0, reg_%1, reg_%2\n" \ + : "=r" (r) \ + : "r" (rs1), "r" (rs2)); \ + return r; \ + } +// binary (destructive, e.g. aes32esmi from K) +#define FUN2F(NAME, ASNAME) \ + static inline uint32_t NAME(uint32_t rs1, uint32_t rs2) { \ + uint32_t r = rs1; \ + asm (#ASNAME " reg_%0, reg_%1\n" \ + : "+&r" (r) \ + : "r" (rs2)); \ + return r; \ + } +// ternary (constructive, e.g. cmov from B) +#define FUN3(NAME, ASNAME) \ + static inline uint_xlen_t NAME(uint_xlen_t rs1, uint_xlen_t rs2, uint_xlen_t rs3) { \ + uint32_t r; \ + asm (#ASNAME " reg_%0, reg_%1, reg_%2, reg_%3\n" \ + : "=r" (r) \ + : "r" (rs1), "r" (rs2), "r" (rs3)); \ + return r; \ + } +// ternary (destructive, e.g. pbsad from P) +#define FUN3R(NAME, ASNAME) \ + static inline uint_xlen_t NAME(uint_xlen_t rs1, uint_xlen_t rs2, uint_xlen_t rs3) { \ + uint32_t r = rs3; \ + asm (#ASNAME " reg_%0, reg_%1, reg_%2\n" \ + : "+&r" (r) \ + : "r" (rs1), "r" (rs2)); \ + return r; \ + } +// ternary (destructive using an immediate, e.g. insb from P) +#define FUN3RI(NAME, ASNAME, IMM) \ + static inline uint_xlen_t NAME##IMM(uint_xlen_t rs1, uint_xlen_t rs3) { \ + uint32_t r = rs3; \ + asm (#ASNAME " reg_%0, reg_%1, " #IMM "\n" \ + : "+&r" (r) \ + : "r" (rs1)); \ + return r; \ + } +// binary wide (64-bits output in R2n/R2n+1, e.g. smul8 from P) +#define FUN2W(NAME, ASNAME) \ + static inline uint64_t NAME(uint_xlen_t rs1, uint_xlen_t rs2) { \ + uint32_t r0, r1; \ + asm (#ASNAME " reg_t5, reg_%2, reg_%3\n" \ + "mv %0, t5\n" \ + "mv %1, t6\n" \ + : "=r" (r0), "=r" (r1) \ + : "r" (rs1), "r" (rs2) \ + : "t5", "t6"); \ + return ((uint64_t)r0 | (((uint64_t)r1)<<32)); \ + } + +// macro to build assembly macros to generate the proper +// opcodes as .word macro +// the translation from name to number is done my the +// defines below, so this need to go .c -> .S -> .o +// so that there preprocessor is applied to the +// intermediate .S file +#define ASM1MACRO(N, O) asm(".macro "#N" rd, rs1\n" \ + ".word ("#O" | (\\rd << 7) | (\\rs1 << 15))\n" \ + ".endm\n"); +#define ASM2MACRO(N, O) asm(".macro "#N" rd, rs1, rs2\n" \ + ".word ("#O" | (\\rd << 7) | (\\rs1 << 15) | (\\rs2 << 20))\n" \ + ".endm\n"); +#define ASM2FMACRO(N, O) asm(".macro "#N" rt, rs2\n" \ + ".word ("#O" | (\\rt << 15) | (\\rs2 << 20))\n" \ + ".endm\n"); +#define ASM3MACRO(N, O) asm(".macro "#N" rd, rs1, rs2, rs3\n" \ + ".word ("#O" | (\\rd << 7) | (\\rs1 << 15) | (\\rs2 << 20) | (\\rs3 << 27) )\n" \ + ".endm\n"); +#define ASM3RMACRO(N, O) ASM2MACRO(N, O) +#define ASM3RIMACRO(N, O) asm(".macro "#N" rd, rs1, imm\n" \ + ".word ("#O" | (\\rd << 7) | (\\rs1 << 15) | (\\imm << 20))\n" \ + ".endm\n"); +// register name -> number mapping +asm("#define reg_zero 0\n"); +asm("#define reg_ra 1\n"); +asm("#define reg_sp 2\n"); +asm("#define reg_gp 3\n"); +asm("#define reg_tp 4\n"); +asm("#define reg_t0 5\n"); +asm("#define reg_t1 6\n"); +asm("#define reg_t2 7\n"); +asm("#define reg_s0 8\n"); +asm("#define reg_s1 9\n"); +asm("#define reg_a0 10\n"); +asm("#define reg_a1 11\n"); +asm("#define reg_a2 12\n"); +asm("#define reg_a3 13\n"); +asm("#define reg_a4 14\n"); +asm("#define reg_a5 15\n"); +asm("#define reg_a6 16\n"); +asm("#define reg_a7 17\n"); +asm("#define reg_s2 18\n"); +asm("#define reg_s3 19\n"); +asm("#define reg_s4 20\n"); +asm("#define reg_s5 21\n"); +asm("#define reg_s6 22\n"); +asm("#define reg_s7 23\n"); +asm("#define reg_s8 24\n"); +asm("#define reg_s9 25\n"); +asm("#define reg_s10 26\n"); +asm("#define reg_s11 27\n"); +asm("#define reg_t3 28\n"); +asm("#define reg_t4 29\n"); +asm("#define reg_t5 30\n"); +asm("#define reg_t6 31\n"); + +#endif // __NEW_INSTRUCTION_SUPPORT_H__ diff --git a/new_instructions_support_b.h b/new_instructions_support_b.h new file mode 100644 index 0000000..9c46461 --- /dev/null +++ b/new_instructions_support_b.h @@ -0,0 +1,23 @@ +#ifndef __NEW_INSTRUCTION_SUPPORT_B_H__ +#define __NEW_INSTRUCTION_SUPPORT_B_H__ + +#include // assume we're using the B toolchain + +#include "new_instructions_support.h" // for support macros + +// for instructions currently missing in the toolchain +ASM2MACRO(XPERM_N,0x28002033) +ASM2MACRO(XPERM_B,0x28004033) +ASM2MACRO(XPERM_H,0x28006033) +ASM2MACRO(SH1ADD,0x20002033) +ASM2MACRO(SH2ADD,0x20004033) +ASM2MACRO(SH3ADD,0x20006033) +FUN2(xperm_n,XPERM_N) +FUN2(xperm_b,XPERM_B) +FUN2(xperm_h,XPERM_H) +FUN2(sh1add,SH1ADD) +FUN2(sh2add,SH2ADD) +FUN2(sh3add,SH3ADD) + + +#endif // __NEW_INSTRUCTION_SUPPORT_B_H__ diff --git a/new_instructions_support_k.h b/new_instructions_support_k.h new file mode 100644 index 0000000..d975b63 --- /dev/null +++ b/new_instructions_support_k.h @@ -0,0 +1,45 @@ +#ifndef __NEW_INSTRUCTION_SUPPORT_P_H__ +#define __NEW_INSTRUCTION_SUPPORT_P_H__ + +#include "new_instructions_support.h" // for support macros + +ASM2FMACRO(AES32ESMI0,0x36000033) +ASM2FMACRO(AES32ESMI1,0x76000033) +ASM2FMACRO(AES32ESMI2,0xb6000033) +ASM2FMACRO(AES32ESMI3,0xf6000033) +ASM2FMACRO(AES32ESI0,0x32000033) +ASM2FMACRO(AES32ESI1,0x72000033) +ASM2FMACRO(AES32ESI2,0xb2000033) +ASM2FMACRO(AES32ESI3,0xf2000033) +FUN2F(aes32esmi0,AES32ESMI0) +FUN2F(aes32esmi1,AES32ESMI1) +FUN2F(aes32esmi2,AES32ESMI2) +FUN2F(aes32esmi3,AES32ESMI3) +FUN2F(aes32esi0,AES32ESI0) +FUN2F(aes32esi1,AES32ESI1) +FUN2F(aes32esi2,AES32ESI2) +FUN2F(aes32esi3,AES32ESI3) + +ASM1MACRO(SHA256SIG0,0x10201013) +ASM1MACRO(SHA256SIG1,0x10301013) +ASM1MACRO(SHA256SUM0,0x10001013) +ASM1MACRO(SHA256SUM1,0x10101013) +FUN1(sha256sig0,SHA256SIG0) +FUN1(sha256sig1,SHA256SIG1) +FUN1(sha256sum0,SHA256SUM0) +FUN1(sha256sum1,SHA256SUM1) + +ASM2MACRO(SHA512SIG0L,0x54000033) +ASM2MACRO(SHA512SIG0H,0x5c000033) +ASM2MACRO(SHA512SIG1L,0x56000033) +ASM2MACRO(SHA512SIG1H,0x5e000033) +ASM2MACRO(SHA512SUM0R,0x50000033) +ASM2MACRO(SHA512SUM1R,0x52000033) +FUN2(sha512sig0l, SHA512SIG0L) +FUN2(sha512sig0h, SHA512SIG0H) +FUN2(sha512sig1l, SHA512SIG1L) +FUN2(sha512sig1h, SHA512SIG1H) +FUN2(sha512sum0r, SHA512SUM0R) +FUN2(sha512sum1r, SHA512SUM1R) + +#endif // __NEW_INSTRUCTION_SUPPORT_P_H__ diff --git a/new_instructions_support_p.h b/new_instructions_support_p.h new file mode 100644 index 0000000..60ddbbc --- /dev/null +++ b/new_instructions_support_p.h @@ -0,0 +1,149 @@ +#ifndef __NEW_INSTRUCTION_SUPPORT_P_H__ +#define __NEW_INSTRUCTION_SUPPORT_P_H__ + +#include "new_instructions_support.h" // for support macros + +ASM2MACRO(ADD8,0x48000077) +FUN2(__rv__add8,ADD8) +ASM2MACRO(RADD8,0x08000077) +FUN2(__rv__radd8,RADD8) +ASM2MACRO(RSUB8,0x0a000077) +FUN2(__rv__rsub8,RSUB8) +ASM2MACRO(CMPEQ8,0x4e000077) +FUN2(__rv__cmpeq8,CMPEQ8) +ASM1MACRO(CLZ8,0xae100077) +FUN1(__rv__clz8,CLZ8) +ASM1MACRO(CLO8,0xae300077) +FUN1(__rv__clo8,CLO8) +ASM1MACRO(CLRS8,0xae000077) +FUN1(__rv__clrs8,CLRS8) +ASM1MACRO(SWAP8,0xad800077) +FUN1(__rv__swap8,swap8) +ASM2MACRO(SCMPLE8,0x1e000077) +FUN2(__rv__scmple8,SCMPLE8) +ASM2MACRO(SCMPLT8,0x0e000077) +FUN2(__rv__scmplt8,SCMPLt8) +ASM2MACRO(SLL8,0x5c000077) +FUN2(__rv__sll8,SLL8) +ASM2MACRO(SRL8,0x5a000077) +FUN2(__rv__srl8,SRL8) +ASM2MACRO(SRA8,0x58000077) +FUN2(__rv__sra8,SRA8) +ASM2MACRO(SMAX8,0x8a000077) +FUN2(__rv__smax8,SMAX8) +ASM2MACRO(SMIN8,0x88000077) +FUN2(__rv__smin8,SMIN8) +ASM2MACRO(SUB8,0x4a000077) +FUN2(__rv__sub8,SUB8) +ASM2MACRO(UCMPLE8,0x3e000077) +FUN2(__rv__ucmple8,UCMPLE8) +ASM2MACRO(UCMPLT8,0x2e000077) +FUN2(__rv__ucmplt8,UCMPLt8) +ASM2MACRO(UMAX8,0x9a000077) +FUN2(__rv__umax8,UMAX8) +ASM2MACRO(UMIN8,0x98000077) +FUN2(__rv__umin8,UMIN8) +ASM2MACRO(URADD8,0x28000077) +FUN2(__rv__uradd8,URADD8) +ASM2MACRO(URSUB8,0x2a000077) +FUN2(__rv__ursub8,URSUB8) + +ASM2MACRO(ADD16,0x40000077) +FUN2(__rv__add16,ADD16) +ASM2MACRO(RADD16,0x00000077) +FUN2(__rv__radd16,RADD16) +ASM2MACRO(RSUB16,0x02000077) +FUN2(__rv__rsub16,RSUB16) +ASM2MACRO(CMPEQ16,0x4c000077) +FUN2(__rv__cmpeq16,CMPEQ16) +ASM1MACRO(CLZ16,0xae900077) +FUN1(__rv__clz16,CLZ16) +ASM1MACRO(CLO16,0xaeb00077) +FUN1(__rv__clo16,CLO16) +ASM1MACRO(CLRS16,0xae800077) +FUN1(__rv__clrs16,CLRS16) +ASM1MACRO(SWAP16,0xad900077) +FUN1(__rv__swap16,swap16) +ASM2MACRO(SCMPLE16,0x1c000077) +FUN2(__rv__scmple16,SCMPLE16) +ASM2MACRO(SCMPLT16,0x0c000077) +FUN2(__rv__scmplt16,SCMPLT16) +ASM2MACRO(SLL16,0x54000077) +FUN2(__rv__sll16,SLL16) +ASM2MACRO(SRL16,0x52000077) +FUN2(__rv__srl16,SRL16) +ASM2MACRO(SRA16,0x50000077) +FUN2(__rv__sra16,SRA16) +ASM2MACRO(SMAX16,0x82000077) +FUN2(__rv__smax16,SMAX16) +ASM2MACRO(SMIN16,0x80000077) +FUN2(__rv__smin16,SMIN16) +ASM2MACRO(SUB16,0x42000077) +FUN2(__rv__sub16,SUB16) +ASM2MACRO(UCMPLE16,0x3c000077) +FUN2(__rv__ucmple16,UCMPLE16) +ASM2MACRO(UCMPLT16,0x2c000077) +FUN2(__rv__ucmplt16,UCMPLT16) +ASM2MACRO(UMAX16,0x92000077) +FUN2(__rv__umax16,UMAX16) +ASM2MACRO(UMIN16,0x90000077) +FUN2(__rv__umin16,UMIN16) + +ASM2MACRO(PKBB16,0x0e001077) +FUN2(__rv__pkbb16,PKBB16) +ASM2MACRO(PKBT16,0x1e001077) +FUN2(__rv__pkbt16,PKBT16) +ASM2MACRO(PKTB16,0x2e001077) +FUN2(__rv__pktb16,PKTB16) +ASM2MACRO(PKTT16,0x3e001077) +FUN2(__rv__pktt16,PKTT16) +ASM2MACRO(URADD16,0x20000077) +FUN2(__rv__uradd16,URADD16) +ASM2MACRO(URSUB16,0x22000077) +FUN2(__rv__ursub16,URSUB16) + + +ASM2MACRO(RADDW,0x20001077) +FUN2(__rv__raddw,RADDW) +ASM2MACRO(RSUBW,0x22001077) +FUN2(__rv__rsubw,RSUBW) +ASM2MACRO(URADDW,0x30001077) +FUN2(__rv__uraddw,URADDW) +ASM2MACRO(URSUBW,0x32001077) +FUN2(__rv__ursubw,URSUBW) +ASM2MACRO(AVE,0xe0000077) +FUN2(__rv__ave,AVE) + +ASM2MACRO(PBSAD, 0xfc000077) +FUN2(__rv__pbsad, PBSAD) +ASM3RMACRO(PBSADA, 0xfe000077) +FUN3R(__rv__pbsada, PBSADA) + +ASM2MACRO(BITREV,0xe6000077) +FUN2(__rv__bitrev,BITREV) + +ASM3RIMACRO(INSB, 0xac000077) +FUN3RI(__rv__insb, INSB, 0) +FUN3RI(__rv__insb, INSB, 1) +FUN3RI(__rv__insb, INSB, 2) +FUN3RI(__rv__insb, INSB, 3) + +ASM2MACRO(SMUL8,0xa8000077) +FUN2W(__rv__smul8,SMUL8) +ASM2MACRO(UMUL8,0xb8000077) +FUN2W(__rv__umul8,UMUL8) +ASM2MACRO(SMUL16,0xa0000077) +FUN2W(__rv__smul16,SMUL16) +ASM2MACRO(UMUL16,0xb0000077) +FUN2W(__rv__umul16,UMUL16) + +ASM2MACRO(SMULx8,0xaa000077) +FUN2W(__rv__smulx8,SMULx8) +ASM2MACRO(UMULx8,0xba000077) +FUN2W(__rv__umulx8,UMULx8) +ASM2MACRO(SMULx16,0xa2000077) +FUN2W(__rv__smulx16,SMULx16) +ASM2MACRO(UMULx16,0xb2000077) +FUN2W(__rv__umulx16,UMULx16) + +#endif // __NEW_INSTRUCTION_SUPPORT_P_H__ diff --git a/pbsad.c b/pbsad.c index 55f648f..5a1a0e0 100644 --- a/pbsad.c +++ b/pbsad.c @@ -8,66 +8,7 @@ typedef ssize_t ptrdiff_t; #ifdef __riscv -typedef uint32_t uint_xlen_t; -#define XLEN 32 -#define FUN2(NAME, ASNAME) \ - static inline uint_xlen_t NAME(uint_xlen_t rs1, uint_xlen_t rs2) { \ - uint32_t r; \ - asm (#ASNAME " reg_%0, reg_%1, reg_%2\n" \ - : "=r" (r) \ - : "r" (rs1), "r" (rs2)); \ - return r; \ - } -#define FUN3R(NAME, ASNAME) \ - static inline uint_xlen_t NAME(uint_xlen_t rs1, uint_xlen_t rs2, uint_xlen_t rs3) { \ - uint32_t r = rs3; \ - asm (#ASNAME " reg_%0, reg_%1, reg_%2\n" \ - : "+&r" (r) \ - : "r" (rs1), "r" (rs2)); \ - return r; \ - } -#define ASM2MACRO(N, O) asm(".macro "#N" rd, rs1, rs2\n" \ - ".word ("#O" | (\\rd << 7) | (\\rs1 << 15) | (\\rs2 << 20))\n" \ - ".endm\n"); -#define ASM3RMACRO(N, O) ASM2MACRO(N, O) - -asm("#define reg_zero 0\n"); -asm("#define reg_ra 1\n"); -asm("#define reg_sp 2\n"); -asm("#define reg_gp 3\n"); -asm("#define reg_tp 4\n"); -asm("#define reg_t0 5\n"); -asm("#define reg_t1 6\n"); -asm("#define reg_t2 7\n"); -asm("#define reg_s0 8\n"); -asm("#define reg_s1 9\n"); -asm("#define reg_a0 10\n"); -asm("#define reg_a1 11\n"); -asm("#define reg_a2 12\n"); -asm("#define reg_a3 13\n"); -asm("#define reg_a4 14\n"); -asm("#define reg_a5 15\n"); -asm("#define reg_a6 16\n"); -asm("#define reg_a7 17\n"); -asm("#define reg_s2 18\n"); -asm("#define reg_s3 19\n"); -asm("#define reg_s4 20\n"); -asm("#define reg_s5 21\n"); -asm("#define reg_s6 22\n"); -asm("#define reg_s7 23\n"); -asm("#define reg_s8 24\n"); -asm("#define reg_s9 25\n"); -asm("#define reg_s10 26\n"); -asm("#define reg_s11 27\n"); -asm("#define reg_t3 28\n"); -asm("#define reg_t4 29\n"); -asm("#define reg_t5 30\n"); -asm("#define reg_t6 31\n"); - -ASM2MACRO(PBSAD, 0xfc000077) -FUN2(__rv__pbsad, PBSAD) -ASM3RMACRO(PBSADA, 0xfe000077) -FUN3R(__rv__pbsada, PBSADA) +#include "new_instructions_support_p.h" static inline long long cpucycles_riscv(void) { long long result; diff --git a/sha256standalone-rv32/Makefile b/sha256standalone-rv32/Makefile index 0473547..416712f 100644 --- a/sha256standalone-rv32/Makefile +++ b/sha256standalone-rv32/Makefile @@ -9,8 +9,8 @@ CC=$(COMPDIR)/bin/riscv32-buildroot-linux-gnu-gcc ALTCC=$(ALTCOMPDIR)/bin/riscv64-unknown-elf-gcc CXX=$(COMPDIR)/bin/riscv32-buildroot-linux-gnu-g++ STRIP=$(COMPDIR)/bin/riscv32-buildroot-linux-gnu-strip -NEWOPT=-march=rv32imab -mabi=ilp32 -I. -O3 -DRV32B #-fno-vectorize #-DUSE_EPI_CUSTOM -OPT=-march=rv32ima -mabi=ilp32 -I. -O3 #-fno-vectorize #-DUSE_EPI_CUSTOM +NEWOPT=-march=rv32imab -mabi=ilp32 -I. -I.. -O3 -DRV32ZKNH #-fno-vectorize #-DUSE_EPI_CUSTOM +OPT=-march=rv32ima -mabi=ilp32 -I. -I.. -O3 #-fno-vectorize #-DUSE_EPI_CUSTOM #ALTCC=$(CC) #NEWOPT=$(OPT) diff --git a/sha256standalone-rv32/blocks.c b/sha256standalone-rv32/blocks.c index ac15c8f..6b10f51 100644 --- a/sha256standalone-rv32/blocks.c +++ b/sha256standalone-rv32/blocks.c @@ -1,87 +1,22 @@ #include "crypto_hashblocks.h" -typedef unsigned int uint32; +#include #ifdef RV32ZKNH -typedef unsigned int uint32_t; -#define ASM1MACRO(N, O) asm(".macro "#N" rd, rs1\n" \ - ".word ("#O" | (\\rd << 7) | (\\rs1 << 15))\n" \ - ".endm\n"); -#define ASM2MACRO(N, O) asm(".macro "#N" rd, rs1, rs2\n" \ - ".word ("#O" | (\\rd << 7) | (\\rs1 << 15) | (\\rs2 << 20))\n" \ - ".endm\n"); -asm("#define reg_zero 0\n"); -asm("#define reg_ra 1\n"); -asm("#define reg_sp 2\n"); -asm("#define reg_gp 3\n"); -asm("#define reg_tp 4\n"); -asm("#define reg_t0 5\n"); -asm("#define reg_t1 6\n"); -asm("#define reg_t2 7\n"); -asm("#define reg_s0 8\n"); -asm("#define reg_s1 9\n"); -asm("#define reg_a0 10\n"); -asm("#define reg_a1 11\n"); -asm("#define reg_a2 12\n"); -asm("#define reg_a3 13\n"); -asm("#define reg_a4 14\n"); -asm("#define reg_a5 15\n"); -asm("#define reg_a6 16\n"); -asm("#define reg_a7 17\n"); -asm("#define reg_s2 18\n"); -asm("#define reg_s3 19\n"); -asm("#define reg_s4 20\n"); -asm("#define reg_s5 21\n"); -asm("#define reg_s6 22\n"); -asm("#define reg_s7 23\n"); -asm("#define reg_s8 24\n"); -asm("#define reg_s9 25\n"); -asm("#define reg_s10 26\n"); -asm("#define reg_s11 27\n"); -asm("#define reg_t3 28\n"); -asm("#define reg_t4 29\n"); -asm("#define reg_t5 30\n"); -asm("#define reg_t6 31\n"); - -#define FUN1(NAME, ASNAME) \ - static inline uint32_t NAME(uint32_t rs1) { \ - uint32_t r; \ - asm (#ASNAME " reg_%0, reg_%1\n" \ - : "=r" (r) \ - : "r" (rs1)); \ - return r; \ - } -#define FUN2(NAME, ASNAME) \ - static inline uint32_t NAME(uint32_t rs1, uint32_t rs2) { \ - uint32_t r; \ - asm (#ASNAME " reg_%0, reg_%1, reg_%2\n" \ - : "=r" (r) \ - : "r" (rs1), "r" (rs2)); \ - return r; \ - } - -ASM1MACRO(SHA256SIG0,0x10201013) -ASM1MACRO(SHA256SIG1,0x10301013) -ASM1MACRO(SHA256SUM0,0x10001013) -ASM1MACRO(SHA256SUM1,0x10101013) -FUN1(sha256sig0,SHA256SIG0) -FUN1(sha256sig1,SHA256SIG1) -FUN1(sha256sum0,SHA256SUM0) -FUN1(sha256sum1,SHA256SUM1) - +#include "new_instructions_support_k.h" #endif -static uint32 load_bigendian(const unsigned char *x) +static uint32_t load_bigendian(const unsigned char *x) { return - (uint32) (x[3]) \ - | (((uint32) (x[2])) << 8) \ - | (((uint32) (x[1])) << 16) \ - | (((uint32) (x[0])) << 24) + (uint32_t) (x[3]) \ + | (((uint32_t) (x[2])) << 8) \ + | (((uint32_t) (x[1])) << 16) \ + | (((uint32_t) (x[0])) << 24) ; } -static void store_bigendian(unsigned char *x,uint32 u) +static void store_bigendian(unsigned char *x,uint32_t u) { x[3] = u; u >>= 8; x[2] = u; u >>= 8; @@ -140,17 +75,17 @@ static void store_bigendian(unsigned char *x,uint32 u) int crypto_hashblocks(unsigned char *statebytes,const unsigned char *in,unsigned long long inlen) { - uint32 state[8]; - uint32 a; - uint32 b; - uint32 c; - uint32 d; - uint32 e; - uint32 f; - uint32 g; - uint32 h; - uint32 T1; - uint32 T2; + uint32_t state[8]; + uint32_t a; + uint32_t b; + uint32_t c; + uint32_t d; + uint32_t e; + uint32_t f; + uint32_t g; + uint32_t h; + uint32_t T1; + uint32_t T2; a = load_bigendian(statebytes + 0); state[0] = a; b = load_bigendian(statebytes + 4); state[1] = b; @@ -162,22 +97,22 @@ int crypto_hashblocks(unsigned char *statebytes,const unsigned char *in,unsigned h = load_bigendian(statebytes + 28); state[7] = h; while (inlen >= 64) { - uint32 w0 = load_bigendian(in + 0); - uint32 w1 = load_bigendian(in + 4); - uint32 w2 = load_bigendian(in + 8); - uint32 w3 = load_bigendian(in + 12); - uint32 w4 = load_bigendian(in + 16); - uint32 w5 = load_bigendian(in + 20); - uint32 w6 = load_bigendian(in + 24); - uint32 w7 = load_bigendian(in + 28); - uint32 w8 = load_bigendian(in + 32); - uint32 w9 = load_bigendian(in + 36); - uint32 w10 = load_bigendian(in + 40); - uint32 w11 = load_bigendian(in + 44); - uint32 w12 = load_bigendian(in + 48); - uint32 w13 = load_bigendian(in + 52); - uint32 w14 = load_bigendian(in + 56); - uint32 w15 = load_bigendian(in + 60); + uint32_t w0 = load_bigendian(in + 0); + uint32_t w1 = load_bigendian(in + 4); + uint32_t w2 = load_bigendian(in + 8); + uint32_t w3 = load_bigendian(in + 12); + uint32_t w4 = load_bigendian(in + 16); + uint32_t w5 = load_bigendian(in + 20); + uint32_t w6 = load_bigendian(in + 24); + uint32_t w7 = load_bigendian(in + 28); + uint32_t w8 = load_bigendian(in + 32); + uint32_t w9 = load_bigendian(in + 36); + uint32_t w10 = load_bigendian(in + 40); + uint32_t w11 = load_bigendian(in + 44); + uint32_t w12 = load_bigendian(in + 48); + uint32_t w13 = load_bigendian(in + 52); + uint32_t w14 = load_bigendian(in + 56); + uint32_t w15 = load_bigendian(in + 60); F(w0 ,0x428a2f98) F(w1 ,0x71374491) diff --git a/sha512standalone-rv32/Makefile b/sha512standalone-rv32/Makefile index 54880de..cc52cbc 100644 --- a/sha512standalone-rv32/Makefile +++ b/sha512standalone-rv32/Makefile @@ -9,8 +9,8 @@ CC=$(COMPDIR)/bin/riscv32-buildroot-linux-gnu-gcc ALTCC=$(ALTCOMPDIR)/bin/riscv64-unknown-elf-gcc CXX=$(COMPDIR)/bin/riscv32-buildroot-linux-gnu-g++ STRIP=$(COMPDIR)/bin/riscv32-buildroot-linux-gnu-strip -NEWOPT=-march=rv32imab -mabi=ilp32 -I. -O3 -DRV32B #-fno-vectorize #-DUSE_EPI_CUSTOM -OPT=-march=rv32ima -mabi=ilp32 -I. -O3 #-fno-vectorize #-DUSE_EPI_CUSTOM +NEWOPT=-march=rv32imab -mabi=ilp32 -I. -I.. -O3 -DRV32ZKNH #-fno-vectorize #-DUSE_EPI_CUSTOM +OPT=-march=rv32ima -mabi=ilp32 -I. -I.. -O3 #-fno-vectorize #-DUSE_EPI_CUSTOM #ALTCC=$(CC) #NEWOPT=$(OPT) diff --git a/sha512standalone-rv32/blocks.c b/sha512standalone-rv32/blocks.c index 38b5215..05c86eb 100644 --- a/sha512standalone-rv32/blocks.c +++ b/sha512standalone-rv32/blocks.c @@ -1,97 +1,27 @@ #include "crypto_hashblocks.h" -typedef unsigned long long uint64; +#include #ifdef RV32ZKNH -typedef unsigned int uint32_t; -typedef unsigned long long uint64_t; -#define ASM1MACRO(N, O) asm(".macro "#N" rd, rs1\n" \ - ".word ("#O" | (\\rd << 7) | (\\rs1 << 15))\n" \ - ".endm\n"); -#define ASM2MACRO(N, O) asm(".macro "#N" rd, rs1, rs2\n" \ - ".word ("#O" | (\\rd << 7) | (\\rs1 << 15) | (\\rs2 << 20))\n" \ - ".endm\n"); -asm("#define reg_zero 0\n"); -asm("#define reg_ra 1\n"); -asm("#define reg_sp 2\n"); -asm("#define reg_gp 3\n"); -asm("#define reg_tp 4\n"); -asm("#define reg_t0 5\n"); -asm("#define reg_t1 6\n"); -asm("#define reg_t2 7\n"); -asm("#define reg_s0 8\n"); -asm("#define reg_s1 9\n"); -asm("#define reg_a0 10\n"); -asm("#define reg_a1 11\n"); -asm("#define reg_a2 12\n"); -asm("#define reg_a3 13\n"); -asm("#define reg_a4 14\n"); -asm("#define reg_a5 15\n"); -asm("#define reg_a6 16\n"); -asm("#define reg_a7 17\n"); -asm("#define reg_s2 18\n"); -asm("#define reg_s3 19\n"); -asm("#define reg_s4 20\n"); -asm("#define reg_s5 21\n"); -asm("#define reg_s6 22\n"); -asm("#define reg_s7 23\n"); -asm("#define reg_s8 24\n"); -asm("#define reg_s9 25\n"); -asm("#define reg_s10 26\n"); -asm("#define reg_s11 27\n"); -asm("#define reg_t3 28\n"); -asm("#define reg_t4 29\n"); -asm("#define reg_t5 30\n"); -asm("#define reg_t6 31\n"); - -#define FUN1(NAME, ASNAME) \ - static inline uint32_t NAME(uint32_t rs1) { \ - uint32_t r; \ - asm (#ASNAME " reg_%0, reg_%1\n" \ - : "=r" (r) \ - : "r" (rs1)); \ - return r; \ - } -#define FUN2(NAME, ASNAME) \ - static inline uint32_t NAME(uint32_t rs1, uint32_t rs2) { \ - uint32_t r; \ - asm (#ASNAME " reg_%0, reg_%1, reg_%2\n" \ - : "=r" (r) \ - : "r" (rs1), "r" (rs2)); \ - return r; \ - } - -ASM2MACRO(SHA512SIG0L,0x54000033) -ASM2MACRO(SHA512SIG0H,0x5c000033) -ASM2MACRO(SHA512SIG1L,0x56000033) -ASM2MACRO(SHA512SIG1H,0x5e000033) -ASM2MACRO(SHA512SUM0R,0x50000033) -ASM2MACRO(SHA512SUM1R,0x52000033) -FUN2(sha512sig0l, SHA512SIG0L) -FUN2(sha512sig0h, SHA512SIG0H) -FUN2(sha512sig1l, SHA512SIG1L) -FUN2(sha512sig1h, SHA512SIG1H) -FUN2(sha512sum0r, SHA512SUM0R) -FUN2(sha512sum1r, SHA512SUM1R) - +#include "new_instructions_support_k.h" #endif -static uint64 load_bigendian(const unsigned char *x) +static uint64_t load_bigendian(const unsigned char *x) { return - (uint64) (x[7]) \ - | (((uint64) (x[6])) << 8) \ - | (((uint64) (x[5])) << 16) \ - | (((uint64) (x[4])) << 24) \ - | (((uint64) (x[3])) << 32) \ - | (((uint64) (x[2])) << 40) \ - | (((uint64) (x[1])) << 48) \ - | (((uint64) (x[0])) << 56) + (uint64_t) (x[7]) \ + | (((uint64_t) (x[6])) << 8) \ + | (((uint64_t) (x[5])) << 16) \ + | (((uint64_t) (x[4])) << 24) \ + | (((uint64_t) (x[3])) << 32) \ + | (((uint64_t) (x[2])) << 40) \ + | (((uint64_t) (x[1])) << 48) \ + | (((uint64_t) (x[0])) << 56) ; } -static void store_bigendian(unsigned char *x,uint64 u) +static void store_bigendian(unsigned char *x,uint64_t u) { x[7] = u; u >>= 8; x[6] = u; u >>= 8; @@ -155,17 +85,17 @@ static void store_bigendian(unsigned char *x,uint64 u) int crypto_hashblocks(unsigned char *statebytes,const unsigned char *in,unsigned long long inlen) { - uint64 state[8]; - uint64 a; - uint64 b; - uint64 c; - uint64 d; - uint64 e; - uint64 f; - uint64 g; - uint64 h; - uint64 T1; - uint64 T2; + uint64_t state[8]; + uint64_t a; + uint64_t b; + uint64_t c; + uint64_t d; + uint64_t e; + uint64_t f; + uint64_t g; + uint64_t h; + uint64_t T1; + uint64_t T2; a = load_bigendian(statebytes + 0); state[0] = a; b = load_bigendian(statebytes + 8); state[1] = b; @@ -177,22 +107,22 @@ int crypto_hashblocks(unsigned char *statebytes,const unsigned char *in,unsigned h = load_bigendian(statebytes + 56); state[7] = h; while (inlen >= 128) { - uint64 w0 = load_bigendian(in + 0); - uint64 w1 = load_bigendian(in + 8); - uint64 w2 = load_bigendian(in + 16); - uint64 w3 = load_bigendian(in + 24); - uint64 w4 = load_bigendian(in + 32); - uint64 w5 = load_bigendian(in + 40); - uint64 w6 = load_bigendian(in + 48); - uint64 w7 = load_bigendian(in + 56); - uint64 w8 = load_bigendian(in + 64); - uint64 w9 = load_bigendian(in + 72); - uint64 w10 = load_bigendian(in + 80); - uint64 w11 = load_bigendian(in + 88); - uint64 w12 = load_bigendian(in + 96); - uint64 w13 = load_bigendian(in + 104); - uint64 w14 = load_bigendian(in + 112); - uint64 w15 = load_bigendian(in + 120); + uint64_t w0 = load_bigendian(in + 0); + uint64_t w1 = load_bigendian(in + 8); + uint64_t w2 = load_bigendian(in + 16); + uint64_t w3 = load_bigendian(in + 24); + uint64_t w4 = load_bigendian(in + 32); + uint64_t w5 = load_bigendian(in + 40); + uint64_t w6 = load_bigendian(in + 48); + uint64_t w7 = load_bigendian(in + 56); + uint64_t w8 = load_bigendian(in + 64); + uint64_t w9 = load_bigendian(in + 72); + uint64_t w10 = load_bigendian(in + 80); + uint64_t w11 = load_bigendian(in + 88); + uint64_t w12 = load_bigendian(in + 96); + uint64_t w13 = load_bigendian(in + 104); + uint64_t w14 = load_bigendian(in + 112); + uint64_t w15 = load_bigendian(in + 120); F(w0 ,0x428a2f98d728ae22ULL) F(w1 ,0x7137449123ef65cdULL) diff --git a/test_b.c b/test_b.c index 71a1cc7..9d0ddda 100644 --- a/test_b.c +++ b/test_b.c @@ -29,103 +29,16 @@ #include #include -/* typedef uint32_t uint_xlen_t; */ -/* #define XLEN 32 */ -#include - - -typedef uint32_t uint_xlen_t; -#define XLEN 32 #ifdef __riscv - //when missing in toolchain -#define FUN1(NAME, ASNAME) \ - static inline uint32_t NAME(uint32_t rs1) { \ - uint32_t r; \ - asm (#ASNAME " reg_%0, reg_%1\n" \ - : "=r" (r) \ - : "r" (rs1)); \ - return r; \ - } -#define FUN2(NAME, ASNAME) \ - static inline uint_xlen_t NAME(uint_xlen_t rs1, uint_xlen_t rs2) { \ - uint32_t r; \ - asm (#ASNAME " reg_%0, reg_%1, reg_%2\n" \ - : "=r" (r) \ - : "r" (rs1), "r" (rs2)); \ - return r; \ - } -#define FUN3(NAME, ASNAME) \ - static inline uint_xlen_t NAME(uint_xlen_t rs1, uint_xlen_t rs2, uint_xlen_t rs3) { \ - uint32_t r; \ - asm (#ASNAME " reg_%0, reg_%1, reg_%2, reg_%3\n" \ - : "=r" (r) \ - : "r" (rs1), "r" (rs2), "r" (rs3)); \ - return r; \ - } -#define ASM1MACRO(N, O) asm(".macro "#N" rd, rs1\n" \ - ".word ("#O" | (\\rd << 7) | (\\rs1 << 15))\n" \ - ".endm\n"); -#define ASM2MACRO(N, O) asm(".macro "#N" rd, rs1, rs2\n" \ - ".word ("#O" | (\\rd << 7) | (\\rs1 << 15) | (\\rs2 << 20))\n" \ - ".endm\n"); -#define ASM3MACRO(N, O) asm(".macro "#N" rd, rs1, rs2, rs3\n" \ - ".word ("#O" | (\\rd << 7) | (\\rs1 << 15) | (\\rs2 << 20) | (\\rs3 << 27) )\n" \ - ".endm\n"); -asm("#define reg_zero 0\n"); -asm("#define reg_ra 1\n"); -asm("#define reg_sp 2\n"); -asm("#define reg_gp 3\n"); -asm("#define reg_tp 4\n"); -asm("#define reg_t0 5\n"); -asm("#define reg_t1 6\n"); -asm("#define reg_t2 7\n"); -asm("#define reg_s0 8\n"); -asm("#define reg_s1 9\n"); -asm("#define reg_a0 10\n"); -asm("#define reg_a1 11\n"); -asm("#define reg_a2 12\n"); -asm("#define reg_a3 13\n"); -asm("#define reg_a4 14\n"); -asm("#define reg_a5 15\n"); -asm("#define reg_a6 16\n"); -asm("#define reg_a7 17\n"); -asm("#define reg_s2 18\n"); -asm("#define reg_s3 19\n"); -asm("#define reg_s4 20\n"); -asm("#define reg_s5 21\n"); -asm("#define reg_s6 22\n"); -asm("#define reg_s7 23\n"); -asm("#define reg_s8 24\n"); -asm("#define reg_s9 25\n"); -asm("#define reg_s10 26\n"); -asm("#define reg_s11 27\n"); -asm("#define reg_t3 28\n"); -asm("#define reg_t4 29\n"); -asm("#define reg_t5 30\n"); -asm("#define reg_t6 31\n"); -ASM2MACRO(XPERM_N,0x28002033) -ASM2MACRO(XPERM_B,0x28004033) -ASM2MACRO(XPERM_H,0x28006033) -ASM2MACRO(SH1ADD,0x20002033) -ASM2MACRO(SH2ADD,0x20004033) -ASM2MACRO(SH3ADD,0x20006033) - -/* FUN(xperm_n,0x2928a9b3) */ -/* FUN(xperm_b,0x2928c9b3) */ -/* FUN(xperm_h,0x2928e9b3) */ -/* FUN(sh1add,0x2128a9b3) */ -/* FUN(sh2add,0x2128c9b3) */ -/* FUN(sh3add,0x2128e9b3) */ -FUN2(xperm_n,XPERM_N) -FUN2(xperm_b,XPERM_B) -FUN2(xperm_h,XPERM_H) -FUN2(sh1add,SH1ADD) -FUN2(sh2add,SH2ADD) -FUN2(sh3add,SH3ADD) +#include "new_instructions_support_b.h" #define _rv64_clmul2(a,b) _rv64_clmul(a,b) #else +#include // emulation + +typedef uint32_t uint_xlen_t; +#define XLEN 32 uint_xlen_t xperm(uint_xlen_t rs1, uint_xlen_t rs2, int sz_log2) { uint_xlen_t r = 0; diff --git a/test_p.c b/test_p.c index 4726c5f..2f7895b 100644 --- a/test_p.c +++ b/test_p.c @@ -13,252 +13,9 @@ #include #include -//#include - -typedef uint32_t uint_xlen_t; -#define XLEN 32 #ifdef __riscv - //when missing in toolchain -#define FUN1(NAME, ASNAME) \ - static inline uint32_t NAME(uint32_t rs1) { \ - uint32_t r; \ - asm (#ASNAME " reg_%0, reg_%1\n" \ - : "=r" (r) \ - : "r" (rs1)); \ - return r; \ - } -#define FUN2(NAME, ASNAME) \ - static inline uint_xlen_t NAME(uint_xlen_t rs1, uint_xlen_t rs2) { \ - uint32_t r; \ - asm (#ASNAME " reg_%0, reg_%1, reg_%2\n" \ - : "=r" (r) \ - : "r" (rs1), "r" (rs2)); \ - return r; \ - } -#define FUN3(NAME, ASNAME) \ - static inline uint_xlen_t NAME(uint_xlen_t rs1, uint_xlen_t rs2, uint_xlen_t rs3) { \ - uint32_t r; \ - asm (#ASNAME " reg_%0, reg_%1, reg_%2, reg_%3\n" \ - : "=r" (r) \ - : "r" (rs1), "r" (rs2), "r" (rs3)); \ - return r; \ - } -#define FUN3R(NAME, ASNAME) \ - static inline uint_xlen_t NAME(uint_xlen_t rs1, uint_xlen_t rs2, uint_xlen_t rs3) { \ - uint32_t r = rs3; \ - asm (#ASNAME " reg_%0, reg_%1, reg_%2\n" \ - : "+&r" (r) \ - : "r" (rs1), "r" (rs2)); \ - return r; \ - } -#define FUN3RI(NAME, ASNAME, IMM) \ - static inline uint_xlen_t NAME##IMM(uint_xlen_t rs1, uint_xlen_t rs3) { \ - uint32_t r = rs3; \ - asm (#ASNAME " reg_%0, reg_%1, " #IMM "\n" \ - : "+&r" (r) \ - : "r" (rs1)); \ - return r; \ - } -#define FUN2W(NAME, ASNAME) \ - static inline uint64_t NAME(uint_xlen_t rs1, uint_xlen_t rs2) { \ - uint32_t r0, r1; \ - asm (#ASNAME " reg_t5, reg_%2, reg_%3\n" \ - "mv %0, t5\n" \ - "mv %1, t6\n" \ - : "=r" (r0), "=r" (r1) \ - : "r" (rs1), "r" (rs2) \ - : "t5", "t6"); \ - return ((uint64_t)r0 | (((uint64_t)r1)<<32)); \ - } -#define ASM1MACRO(N, O) asm(".macro "#N" rd, rs1\n" \ - ".word ("#O" | (\\rd << 7) | (\\rs1 << 15))\n" \ - ".endm\n"); -#define ASM2MACRO(N, O) asm(".macro "#N" rd, rs1, rs2\n" \ - ".word ("#O" | (\\rd << 7) | (\\rs1 << 15) | (\\rs2 << 20))\n" \ - ".endm\n"); -#define ASM3MACRO(N, O) asm(".macro "#N" rd, rs1, rs2, rs3\n" \ - ".word ("#O" | (\\rd << 7) | (\\rs1 << 15) | (\\rs2 << 20) | (\\rs3 << 27) )\n" \ - ".endm\n"); -#define ASM3RMACRO(N, O) ASM2MACRO(N, O) -#define ASM3RIMACRO(N, O) asm(".macro "#N" rd, rs1, imm\n" \ - ".word ("#O" | (\\rd << 7) | (\\rs1 << 15) | (\\imm << 20))\n" \ - ".endm\n"); - -asm("#define reg_zero 0\n"); -asm("#define reg_ra 1\n"); -asm("#define reg_sp 2\n"); -asm("#define reg_gp 3\n"); -asm("#define reg_tp 4\n"); -asm("#define reg_t0 5\n"); -asm("#define reg_t1 6\n"); -asm("#define reg_t2 7\n"); -asm("#define reg_s0 8\n"); -asm("#define reg_s1 9\n"); -asm("#define reg_a0 10\n"); -asm("#define reg_a1 11\n"); -asm("#define reg_a2 12\n"); -asm("#define reg_a3 13\n"); -asm("#define reg_a4 14\n"); -asm("#define reg_a5 15\n"); -asm("#define reg_a6 16\n"); -asm("#define reg_a7 17\n"); -asm("#define reg_s2 18\n"); -asm("#define reg_s3 19\n"); -asm("#define reg_s4 20\n"); -asm("#define reg_s5 21\n"); -asm("#define reg_s6 22\n"); -asm("#define reg_s7 23\n"); -asm("#define reg_s8 24\n"); -asm("#define reg_s9 25\n"); -asm("#define reg_s10 26\n"); -asm("#define reg_s11 27\n"); -asm("#define reg_t3 28\n"); -asm("#define reg_t4 29\n"); -asm("#define reg_t5 30\n"); -asm("#define reg_t6 31\n"); -ASM2MACRO(ADD8,0x48000077) -FUN2(__rv__add8,ADD8) -ASM2MACRO(RADD8,0x08000077) -FUN2(__rv__radd8,RADD8) -ASM2MACRO(RSUB8,0x0a000077) -FUN2(__rv__rsub8,RSUB8) -ASM2MACRO(CMPEQ8,0x4e000077) -FUN2(__rv__cmpeq8,CMPEQ8) -ASM1MACRO(CLZ8,0xae100077) -FUN1(__rv__clz8,CLZ8) -ASM1MACRO(CLO8,0xae300077) -FUN1(__rv__clo8,CLO8) -ASM1MACRO(CLRS8,0xae000077) -FUN1(__rv__clrs8,CLRS8) -ASM1MACRO(SWAP8,0xad800077) -FUN1(__rv__swap8,swap8) -ASM2MACRO(SCMPLE8,0x1e000077) -FUN2(__rv__scmple8,SCMPLE8) -ASM2MACRO(SCMPLT8,0x0e000077) -FUN2(__rv__scmplt8,SCMPLt8) -ASM2MACRO(SLL8,0x5c000077) -FUN2(__rv__sll8,SLL8) -ASM2MACRO(SRL8,0x5a000077) -FUN2(__rv__srl8,SRL8) -ASM2MACRO(SRA8,0x58000077) -FUN2(__rv__sra8,SRA8) -ASM2MACRO(SMAX8,0x8a000077) -FUN2(__rv__smax8,SMAX8) -ASM2MACRO(SMIN8,0x88000077) -FUN2(__rv__smin8,SMIN8) -ASM2MACRO(SUB8,0x4a000077) -FUN2(__rv__sub8,SUB8) -ASM2MACRO(UCMPLE8,0x3e000077) -FUN2(__rv__ucmple8,UCMPLE8) -ASM2MACRO(UCMPLT8,0x2e000077) -FUN2(__rv__ucmplt8,UCMPLt8) -ASM2MACRO(UMAX8,0x9a000077) -FUN2(__rv__umax8,UMAX8) -ASM2MACRO(UMIN8,0x98000077) -FUN2(__rv__umin8,UMIN8) -ASM2MACRO(URADD8,0x28000077) -FUN2(__rv__uradd8,URADD8) -ASM2MACRO(URSUB8,0x2a000077) -FUN2(__rv__ursub8,URSUB8) - -ASM2MACRO(ADD16,0x40000077) -FUN2(__rv__add16,ADD16) -ASM2MACRO(RADD16,0x00000077) -FUN2(__rv__radd16,RADD16) -ASM2MACRO(RSUB16,0x02000077) -FUN2(__rv__rsub16,RSUB16) -ASM2MACRO(CMPEQ16,0x4c000077) -FUN2(__rv__cmpeq16,CMPEQ16) -ASM1MACRO(CLZ16,0xae900077) -FUN1(__rv__clz16,CLZ16) -ASM1MACRO(CLO16,0xaeb00077) -FUN1(__rv__clo16,CLO16) -ASM1MACRO(CLRS16,0xae800077) -FUN1(__rv__clrs16,CLRS16) -ASM1MACRO(SWAP16,0xad900077) -FUN1(__rv__swap16,swap16) -ASM2MACRO(SCMPLE16,0x1c000077) -FUN2(__rv__scmple16,SCMPLE16) -ASM2MACRO(SCMPLT16,0x0c000077) -FUN2(__rv__scmplt16,SCMPLT16) -ASM2MACRO(SLL16,0x54000077) -FUN2(__rv__sll16,SLL16) -ASM2MACRO(SRL16,0x52000077) -FUN2(__rv__srl16,SRL16) -ASM2MACRO(SRA16,0x50000077) -FUN2(__rv__sra16,SRA16) -ASM2MACRO(SMAX16,0x82000077) -FUN2(__rv__smax16,SMAX16) -ASM2MACRO(SMIN16,0x80000077) -FUN2(__rv__smin16,SMIN16) -ASM2MACRO(SUB16,0x42000077) -FUN2(__rv__sub16,SUB16) -ASM2MACRO(UCMPLE16,0x3c000077) -FUN2(__rv__ucmple16,UCMPLE16) -ASM2MACRO(UCMPLT16,0x2c000077) -FUN2(__rv__ucmplt16,UCMPLT16) -ASM2MACRO(UMAX16,0x92000077) -FUN2(__rv__umax16,UMAX16) -ASM2MACRO(UMIN16,0x90000077) -FUN2(__rv__umin16,UMIN16) - -ASM2MACRO(PKBB16,0x0e001077) -FUN2(__rv__pkbb16,PKBB16) -ASM2MACRO(PKBT16,0x1e001077) -FUN2(__rv__pkbt16,PKBT16) -ASM2MACRO(PKTB16,0x2e001077) -FUN2(__rv__pktb16,PKTB16) -ASM2MACRO(PKTT16,0x3e001077) -FUN2(__rv__pktt16,PKTT16) -ASM2MACRO(URADD16,0x20000077) -FUN2(__rv__uradd16,URADD16) -ASM2MACRO(URSUB16,0x22000077) -FUN2(__rv__ursub16,URSUB16) - - -ASM2MACRO(RADDW,0x20001077) -FUN2(__rv__raddw,RADDW) -ASM2MACRO(RSUBW,0x22001077) -FUN2(__rv__rsubw,RSUBW) -ASM2MACRO(URADDW,0x30001077) -FUN2(__rv__uraddw,URADDW) -ASM2MACRO(URSUBW,0x32001077) -FUN2(__rv__ursubw,URSUBW) -ASM2MACRO(AVE,0xe0000077) -FUN2(__rv__ave,AVE) - -ASM2MACRO(PBSAD, 0xfc000077) -FUN2(__rv__pbsad, PBSAD) -ASM3RMACRO(PBSADA, 0xfe000077) -FUN3R(__rv__pbsada, PBSADA) - -ASM2MACRO(BITREV,0xe6000077) -FUN2(__rv__bitrev,BITREV) - -ASM3RIMACRO(INSB, 0xac000077) -FUN3RI(__rv__insb, INSB, 0) -FUN3RI(__rv__insb, INSB, 1) -FUN3RI(__rv__insb, INSB, 2) -FUN3RI(__rv__insb, INSB, 3) - -ASM2MACRO(SMUL8,0xa8000077) -FUN2W(__rv__smul8,SMUL8) -ASM2MACRO(UMUL8,0xb8000077) -FUN2W(__rv__umul8,UMUL8) -ASM2MACRO(SMUL16,0xa0000077) -FUN2W(__rv__smul16,SMUL16) -ASM2MACRO(UMUL16,0xb0000077) -FUN2W(__rv__umul16,UMUL16) - -ASM2MACRO(SMULx8,0xaa000077) -FUN2W(__rv__smulx8,SMULx8) -ASM2MACRO(UMULx8,0xba000077) -FUN2W(__rv__umulx8,UMULx8) -ASM2MACRO(SMULx16,0xa2000077) -FUN2W(__rv__smulx16,SMULx16) -ASM2MACRO(UMULx16,0xb2000077) -FUN2W(__rv__umulx16,UMULx16) +#include "new_instructions_support_p.h" #else // !__riscv typedef uint8_t uint4x8_t[4];