This commit is contained in:
Romain Dolbeau 2021-02-13 04:55:07 -05:00
parent 54c93b289f
commit f09395cca8
17 changed files with 448 additions and 786 deletions

View file

@ -9,8 +9,8 @@ CC=$(COMPDIR)/bin/riscv32-buildroot-linux-gnu-gcc
ALTCC=$(ALTCOMPDIR)/bin/riscv64-unknown-elf-gcc ALTCC=$(ALTCOMPDIR)/bin/riscv64-unknown-elf-gcc
CXX=$(COMPDIR)/bin/riscv32-buildroot-linux-gnu-g++ CXX=$(COMPDIR)/bin/riscv32-buildroot-linux-gnu-g++
STRIP=$(COMPDIR)/bin/riscv32-buildroot-linux-gnu-strip STRIP=$(COMPDIR)/bin/riscv32-buildroot-linux-gnu-strip
NEWOPT=-march=rv32imab -mabi=ilp32 -I. -O3 -DRV32B #-fno-vectorize #-DUSE_EPI_CUSTOM NEWOPT=-march=rv32imab -mabi=ilp32 -I. -I.. -O3 -DRV32B #-fno-vectorize #-DUSE_EPI_CUSTOM
OPT=-march=rv32ima -mabi=ilp32 -I. -O3 #-fno-vectorize #-DUSE_EPI_CUSTOM OPT=-march=rv32ima -mabi=ilp32 -I. -I.. -O3 #-fno-vectorize #-DUSE_EPI_CUSTOM
#ALTCC=$(CC) #ALTCC=$(CC)
#NEWOPT=$(OPT) #NEWOPT=$(OPT)

View file

@ -16,89 +16,7 @@
#define _bswap64(a) __builtin_bswap64(a) #define _bswap64(a) __builtin_bswap64(a)
#define _bswap(a) __builtin_bswap32(a) #define _bswap(a) __builtin_bswap32(a)
#define ASM1MACRO(N, O) asm(".macro "#N" rd, rs1\n" \ #include "new_instructions_support_k.h"
".word ("#O" | (\\rd << 7) | (\\rs1 << 15))\n" \
".endm\n");
#define ASM2MACRO(N, O) asm(".macro "#N" rd, rs1, rs2\n" \
".word ("#O" | (\\rd << 7) | (\\rs1 << 15) | (\\rs2 << 20))\n" \
".endm\n");
#define ASM2FMACRO(N, O) asm(".macro "#N" rt, rs2\n" \
".word ("#O" | (\\rt << 15) | (\\rs2 << 20))\n" \
".endm\n");
asm("#define reg_zero 0\n");
asm("#define reg_ra 1\n");
asm("#define reg_sp 2\n");
asm("#define reg_gp 3\n");
asm("#define reg_tp 4\n");
asm("#define reg_t0 5\n");
asm("#define reg_t1 6\n");
asm("#define reg_t2 7\n");
asm("#define reg_s0 8\n");
asm("#define reg_s1 9\n");
asm("#define reg_a0 10\n");
asm("#define reg_a1 11\n");
asm("#define reg_a2 12\n");
asm("#define reg_a3 13\n");
asm("#define reg_a4 14\n");
asm("#define reg_a5 15\n");
asm("#define reg_a6 16\n");
asm("#define reg_a7 17\n");
asm("#define reg_s2 18\n");
asm("#define reg_s3 19\n");
asm("#define reg_s4 20\n");
asm("#define reg_s5 21\n");
asm("#define reg_s6 22\n");
asm("#define reg_s7 23\n");
asm("#define reg_s8 24\n");
asm("#define reg_s9 25\n");
asm("#define reg_s10 26\n");
asm("#define reg_s11 27\n");
asm("#define reg_t3 28\n");
asm("#define reg_t4 29\n");
asm("#define reg_t5 30\n");
asm("#define reg_t6 31\n");
#define FUN1(NAME, ASNAME) \
static inline uint32_t NAME(uint32_t rs1) { \
uint32_t r; \
asm (#ASNAME " reg_%0, reg_%1\n" \
: "=r" (r) \
: "r" (rs1)); \
return r; \
}
#define FUN2(NAME, ASNAME) \
static inline uint32_t NAME(uint32_t rs1, uint32_t rs2) { \
uint32_t r; \
asm (#ASNAME " reg_%0, reg_%1, reg_%2\n" \
: "=r" (r) \
: "r" (rs1), "r" (rs2)); \
return r; \
}
#define FUN2F(NAME, ASNAME) \
static inline uint32_t NAME(uint32_t rs1, uint32_t rs2) { \
uint32_t r = rs1; \
asm (#ASNAME " reg_%0, reg_%1\n" \
: "+&r" (r) \
: "r" (rs2)); \
return r; \
}
ASM2FMACRO(AES32ESMI0,0x36000033)
ASM2FMACRO(AES32ESMI1,0x76000033)
ASM2FMACRO(AES32ESMI2,0xb6000033)
ASM2FMACRO(AES32ESMI3,0xf6000033)
ASM2FMACRO(AES32ESI0,0x32000033)
ASM2FMACRO(AES32ESI1,0x72000033)
ASM2FMACRO(AES32ESI2,0xb2000033)
ASM2FMACRO(AES32ESI3,0xf2000033)
FUN2F(aes32esmi0,AES32ESMI0)
FUN2F(aes32esmi1,AES32ESMI1)
FUN2F(aes32esmi2,AES32ESMI2)
FUN2F(aes32esmi3,AES32ESMI3)
FUN2F(aes32esi0,AES32ESI0)
FUN2F(aes32esi1,AES32ESI1)
FUN2F(aes32esi2,AES32ESI2)
FUN2F(aes32esi3,AES32ESI3)
#define AES_ROUND1T(TAB,I,X0,X1,X2,X3,Y0,Y1,Y2,Y3) \ #define AES_ROUND1T(TAB,I,X0,X1,X2,X3,Y0,Y1,Y2,Y3) \
{ \ { \

View file

@ -9,8 +9,8 @@ CC=$(COMPDIR)/bin/riscv32-buildroot-linux-gnu-gcc
ALTCC=$(ALTCOMPDIR)/bin/riscv64-unknown-elf-gcc ALTCC=$(ALTCOMPDIR)/bin/riscv64-unknown-elf-gcc
CXX=$(COMPDIR)/bin/riscv32-buildroot-linux-gnu-g++ CXX=$(COMPDIR)/bin/riscv32-buildroot-linux-gnu-g++
STRIP=$(COMPDIR)/bin/riscv32-buildroot-linux-gnu-strip STRIP=$(COMPDIR)/bin/riscv32-buildroot-linux-gnu-strip
NEWOPT=-march=rv32imab -mabi=ilp32 -I. -O3 -DRV32B #-fno-vectorize #-DUSE_EPI_CUSTOM NEWOPT=-march=rv32imab -mabi=ilp32 -I. -I.. -O3 -DRV32B #-fno-vectorize #-DUSE_EPI_CUSTOM
OPT=-march=rv32ima -mabi=ilp32 -I. -O3 #-fno-vectorize #-DUSE_EPI_CUSTOM OPT=-march=rv32ima -mabi=ilp32 -I. -I.. -O3 #-fno-vectorize #-DUSE_EPI_CUSTOM
#ALTCC=$(CC) #ALTCC=$(CC)
#NEWOPT=$(OPT) #NEWOPT=$(OPT)

View file

@ -8,89 +8,7 @@
#define _bswap64(a) __builtin_bswap64(a) #define _bswap64(a) __builtin_bswap64(a)
#define _bswap(a) __builtin_bswap32(a) #define _bswap(a) __builtin_bswap32(a)
#define ASM1MACRO(N, O) asm(".macro "#N" rd, rs1\n" \ #include "new_instructions_support_k.h"
".word ("#O" | (\\rd << 7) | (\\rs1 << 15))\n" \
".endm\n");
#define ASM2MACRO(N, O) asm(".macro "#N" rd, rs1, rs2\n" \
".word ("#O" | (\\rd << 7) | (\\rs1 << 15) | (\\rs2 << 20))\n" \
".endm\n");
#define ASM2FMACRO(N, O) asm(".macro "#N" rt, rs2\n" \
".word ("#O" | (\\rt << 15) | (\\rs2 << 20))\n" \
".endm\n");
asm("#define reg_zero 0\n");
asm("#define reg_ra 1\n");
asm("#define reg_sp 2\n");
asm("#define reg_gp 3\n");
asm("#define reg_tp 4\n");
asm("#define reg_t0 5\n");
asm("#define reg_t1 6\n");
asm("#define reg_t2 7\n");
asm("#define reg_s0 8\n");
asm("#define reg_s1 9\n");
asm("#define reg_a0 10\n");
asm("#define reg_a1 11\n");
asm("#define reg_a2 12\n");
asm("#define reg_a3 13\n");
asm("#define reg_a4 14\n");
asm("#define reg_a5 15\n");
asm("#define reg_a6 16\n");
asm("#define reg_a7 17\n");
asm("#define reg_s2 18\n");
asm("#define reg_s3 19\n");
asm("#define reg_s4 20\n");
asm("#define reg_s5 21\n");
asm("#define reg_s6 22\n");
asm("#define reg_s7 23\n");
asm("#define reg_s8 24\n");
asm("#define reg_s9 25\n");
asm("#define reg_s10 26\n");
asm("#define reg_s11 27\n");
asm("#define reg_t3 28\n");
asm("#define reg_t4 29\n");
asm("#define reg_t5 30\n");
asm("#define reg_t6 31\n");
#define FUN1(NAME, ASNAME) \
static inline uint32_t NAME(uint32_t rs1) { \
uint32_t r; \
asm (#ASNAME " reg_%0, reg_%1\n" \
: "=r" (r) \
: "r" (rs1)); \
return r; \
}
#define FUN2(NAME, ASNAME) \
static inline uint32_t NAME(uint32_t rs1, uint32_t rs2) { \
uint32_t r; \
asm (#ASNAME " reg_%0, reg_%1, reg_%2\n" \
: "=r" (r) \
: "r" (rs1), "r" (rs2)); \
return r; \
}
#define FUN2F(NAME, ASNAME) \
static inline uint32_t NAME(uint32_t rs1, uint32_t rs2) { \
uint32_t r = rs1; \
asm (#ASNAME " reg_%0, reg_%1\n" \
: "+&r" (r) \
: "r" (rs2)); \
return r; \
}
ASM2FMACRO(AES32ESMI0,0x36000033)
ASM2FMACRO(AES32ESMI1,0x76000033)
ASM2FMACRO(AES32ESMI2,0xb6000033)
ASM2FMACRO(AES32ESMI3,0xf6000033)
ASM2FMACRO(AES32ESI0,0x32000033)
ASM2FMACRO(AES32ESI1,0x72000033)
ASM2FMACRO(AES32ESI2,0xb2000033)
ASM2FMACRO(AES32ESI3,0xf2000033)
FUN2F(aes32esmi0,AES32ESMI0)
FUN2F(aes32esmi1,AES32ESMI1)
FUN2F(aes32esmi2,AES32ESMI2)
FUN2F(aes32esmi3,AES32ESMI3)
FUN2F(aes32esi0,AES32ESI0)
FUN2F(aes32esi1,AES32ESI1)
FUN2F(aes32esi2,AES32ESI2)
FUN2F(aes32esi3,AES32ESI3)
#define AES_ROUND1T(TAB,I,X0,X1,X2,X3,Y0,Y1,Y2,Y3) \ #define AES_ROUND1T(TAB,I,X0,X1,X2,X3,Y0,Y1,Y2,Y3) \
{ \ { \

View file

@ -9,8 +9,8 @@ CC=$(COMPDIR)/bin/riscv32-buildroot-linux-gnu-gcc
ALTCC=$(ALTCOMPDIR)/bin/riscv64-unknown-elf-gcc ALTCC=$(ALTCOMPDIR)/bin/riscv64-unknown-elf-gcc
CXX=$(COMPDIR)/bin/riscv32-buildroot-linux-gnu-g++ CXX=$(COMPDIR)/bin/riscv32-buildroot-linux-gnu-g++
STRIP=$(COMPDIR)/bin/riscv32-buildroot-linux-gnu-strip STRIP=$(COMPDIR)/bin/riscv32-buildroot-linux-gnu-strip
NEWOPT=-march=rv32imab -mabi=ilp32 -I. -O3 -DRV32B #-fno-vectorize #-DUSE_EPI_CUSTOM NEWOPT=-march=rv32imab -mabi=ilp32 -I. -I.. -O3 -DRV32B #-fno-vectorize #-DUSE_EPI_CUSTOM
OPT=-march=rv32ima -mabi=ilp32 -I. -O3 #-fno-vectorize #-DUSE_EPI_CUSTOM OPT=-march=rv32ima -mabi=ilp32 -I. -I.. -O3 #-fno-vectorize #-DUSE_EPI_CUSTOM
#ALTCC=$(CC) #ALTCC=$(CC)
#NEWOPT=$(OPT) #NEWOPT=$(OPT)

View file

@ -13,12 +13,10 @@ Public domain.
#include <stdio.h> #include <stdio.h>
#define ROUNDS 20 #define ROUNDS 20
#if 0 #include "new_instructions_support_b.h"
#define ROTATE(v,c) (ROTL32(v,c))
#else
#include <rvintrin.h>
#define ROTATE(v,c) _rv32_rol(v,c) #define ROTATE(v,c) _rv32_rol(v,c)
#endif
#define XOR(v,w) ((v) ^ (w)) #define XOR(v,w) ((v) ^ (w))
#define PLUS(v,w) (U32V((v) + (w))) #define PLUS(v,w) (U32V((v) + (w)))
#define PLUSONE(v) (PLUS((v),1)) #define PLUSONE(v) (PLUS((v),1))

135
new_instructions_support.h Normal file
View file

@ -0,0 +1,135 @@
#ifndef __NEW_INSTRUCTION_SUPPORT_H__
#define __NEW_INSTRUCTION_SUPPORT_H__
#include <stdint.h>
typedef uint32_t uint_xlen_t;
#define XLEN 32
//when missing in toolchain...
// macro to build the function to access the assembly instructions
// unary
#define FUN1(NAME, ASNAME) \
static inline uint32_t NAME(uint32_t rs1) { \
uint32_t r; \
asm (#ASNAME " reg_%0, reg_%1\n" \
: "=r" (r) \
: "r" (rs1)); \
return r; \
}
// binary
#define FUN2(NAME, ASNAME) \
static inline uint_xlen_t NAME(uint_xlen_t rs1, uint_xlen_t rs2) { \
uint32_t r; \
asm (#ASNAME " reg_%0, reg_%1, reg_%2\n" \
: "=r" (r) \
: "r" (rs1), "r" (rs2)); \
return r; \
}
// binary (destructive, e.g. aes32esmi from K)
#define FUN2F(NAME, ASNAME) \
static inline uint32_t NAME(uint32_t rs1, uint32_t rs2) { \
uint32_t r = rs1; \
asm (#ASNAME " reg_%0, reg_%1\n" \
: "+&r" (r) \
: "r" (rs2)); \
return r; \
}
// ternary (constructive, e.g. cmov from B)
#define FUN3(NAME, ASNAME) \
static inline uint_xlen_t NAME(uint_xlen_t rs1, uint_xlen_t rs2, uint_xlen_t rs3) { \
uint32_t r; \
asm (#ASNAME " reg_%0, reg_%1, reg_%2, reg_%3\n" \
: "=r" (r) \
: "r" (rs1), "r" (rs2), "r" (rs3)); \
return r; \
}
// ternary (destructive, e.g. pbsad from P)
#define FUN3R(NAME, ASNAME) \
static inline uint_xlen_t NAME(uint_xlen_t rs1, uint_xlen_t rs2, uint_xlen_t rs3) { \
uint32_t r = rs3; \
asm (#ASNAME " reg_%0, reg_%1, reg_%2\n" \
: "+&r" (r) \
: "r" (rs1), "r" (rs2)); \
return r; \
}
// ternary (destructive using an immediate, e.g. insb from P)
#define FUN3RI(NAME, ASNAME, IMM) \
static inline uint_xlen_t NAME##IMM(uint_xlen_t rs1, uint_xlen_t rs3) { \
uint32_t r = rs3; \
asm (#ASNAME " reg_%0, reg_%1, " #IMM "\n" \
: "+&r" (r) \
: "r" (rs1)); \
return r; \
}
// binary wide (64-bits output in R2n/R2n+1, e.g. smul8 from P)
#define FUN2W(NAME, ASNAME) \
static inline uint64_t NAME(uint_xlen_t rs1, uint_xlen_t rs2) { \
uint32_t r0, r1; \
asm (#ASNAME " reg_t5, reg_%2, reg_%3\n" \
"mv %0, t5\n" \
"mv %1, t6\n" \
: "=r" (r0), "=r" (r1) \
: "r" (rs1), "r" (rs2) \
: "t5", "t6"); \
return ((uint64_t)r0 | (((uint64_t)r1)<<32)); \
}
// macro to build assembly macros to generate the proper
// opcodes as .word macro
// the translation from name to number is done my the
// defines below, so this need to go .c -> .S -> .o
// so that there preprocessor is applied to the
// intermediate .S file
#define ASM1MACRO(N, O) asm(".macro "#N" rd, rs1\n" \
".word ("#O" | (\\rd << 7) | (\\rs1 << 15))\n" \
".endm\n");
#define ASM2MACRO(N, O) asm(".macro "#N" rd, rs1, rs2\n" \
".word ("#O" | (\\rd << 7) | (\\rs1 << 15) | (\\rs2 << 20))\n" \
".endm\n");
#define ASM2FMACRO(N, O) asm(".macro "#N" rt, rs2\n" \
".word ("#O" | (\\rt << 15) | (\\rs2 << 20))\n" \
".endm\n");
#define ASM3MACRO(N, O) asm(".macro "#N" rd, rs1, rs2, rs3\n" \
".word ("#O" | (\\rd << 7) | (\\rs1 << 15) | (\\rs2 << 20) | (\\rs3 << 27) )\n" \
".endm\n");
#define ASM3RMACRO(N, O) ASM2MACRO(N, O)
#define ASM3RIMACRO(N, O) asm(".macro "#N" rd, rs1, imm\n" \
".word ("#O" | (\\rd << 7) | (\\rs1 << 15) | (\\imm << 20))\n" \
".endm\n");
// register name -> number mapping
asm("#define reg_zero 0\n");
asm("#define reg_ra 1\n");
asm("#define reg_sp 2\n");
asm("#define reg_gp 3\n");
asm("#define reg_tp 4\n");
asm("#define reg_t0 5\n");
asm("#define reg_t1 6\n");
asm("#define reg_t2 7\n");
asm("#define reg_s0 8\n");
asm("#define reg_s1 9\n");
asm("#define reg_a0 10\n");
asm("#define reg_a1 11\n");
asm("#define reg_a2 12\n");
asm("#define reg_a3 13\n");
asm("#define reg_a4 14\n");
asm("#define reg_a5 15\n");
asm("#define reg_a6 16\n");
asm("#define reg_a7 17\n");
asm("#define reg_s2 18\n");
asm("#define reg_s3 19\n");
asm("#define reg_s4 20\n");
asm("#define reg_s5 21\n");
asm("#define reg_s6 22\n");
asm("#define reg_s7 23\n");
asm("#define reg_s8 24\n");
asm("#define reg_s9 25\n");
asm("#define reg_s10 26\n");
asm("#define reg_s11 27\n");
asm("#define reg_t3 28\n");
asm("#define reg_t4 29\n");
asm("#define reg_t5 30\n");
asm("#define reg_t6 31\n");
#endif // __NEW_INSTRUCTION_SUPPORT_H__

View file

@ -0,0 +1,23 @@
#ifndef __NEW_INSTRUCTION_SUPPORT_B_H__
#define __NEW_INSTRUCTION_SUPPORT_B_H__
#include <rvintrin.h> // assume we're using the B toolchain
#include "new_instructions_support.h" // for support macros
// for instructions currently missing in the toolchain
ASM2MACRO(XPERM_N,0x28002033)
ASM2MACRO(XPERM_B,0x28004033)
ASM2MACRO(XPERM_H,0x28006033)
ASM2MACRO(SH1ADD,0x20002033)
ASM2MACRO(SH2ADD,0x20004033)
ASM2MACRO(SH3ADD,0x20006033)
FUN2(xperm_n,XPERM_N)
FUN2(xperm_b,XPERM_B)
FUN2(xperm_h,XPERM_H)
FUN2(sh1add,SH1ADD)
FUN2(sh2add,SH2ADD)
FUN2(sh3add,SH3ADD)
#endif // __NEW_INSTRUCTION_SUPPORT_B_H__

View file

@ -0,0 +1,45 @@
#ifndef __NEW_INSTRUCTION_SUPPORT_P_H__
#define __NEW_INSTRUCTION_SUPPORT_P_H__
#include "new_instructions_support.h" // for support macros
ASM2FMACRO(AES32ESMI0,0x36000033)
ASM2FMACRO(AES32ESMI1,0x76000033)
ASM2FMACRO(AES32ESMI2,0xb6000033)
ASM2FMACRO(AES32ESMI3,0xf6000033)
ASM2FMACRO(AES32ESI0,0x32000033)
ASM2FMACRO(AES32ESI1,0x72000033)
ASM2FMACRO(AES32ESI2,0xb2000033)
ASM2FMACRO(AES32ESI3,0xf2000033)
FUN2F(aes32esmi0,AES32ESMI0)
FUN2F(aes32esmi1,AES32ESMI1)
FUN2F(aes32esmi2,AES32ESMI2)
FUN2F(aes32esmi3,AES32ESMI3)
FUN2F(aes32esi0,AES32ESI0)
FUN2F(aes32esi1,AES32ESI1)
FUN2F(aes32esi2,AES32ESI2)
FUN2F(aes32esi3,AES32ESI3)
ASM1MACRO(SHA256SIG0,0x10201013)
ASM1MACRO(SHA256SIG1,0x10301013)
ASM1MACRO(SHA256SUM0,0x10001013)
ASM1MACRO(SHA256SUM1,0x10101013)
FUN1(sha256sig0,SHA256SIG0)
FUN1(sha256sig1,SHA256SIG1)
FUN1(sha256sum0,SHA256SUM0)
FUN1(sha256sum1,SHA256SUM1)
ASM2MACRO(SHA512SIG0L,0x54000033)
ASM2MACRO(SHA512SIG0H,0x5c000033)
ASM2MACRO(SHA512SIG1L,0x56000033)
ASM2MACRO(SHA512SIG1H,0x5e000033)
ASM2MACRO(SHA512SUM0R,0x50000033)
ASM2MACRO(SHA512SUM1R,0x52000033)
FUN2(sha512sig0l, SHA512SIG0L)
FUN2(sha512sig0h, SHA512SIG0H)
FUN2(sha512sig1l, SHA512SIG1L)
FUN2(sha512sig1h, SHA512SIG1H)
FUN2(sha512sum0r, SHA512SUM0R)
FUN2(sha512sum1r, SHA512SUM1R)
#endif // __NEW_INSTRUCTION_SUPPORT_P_H__

View file

@ -0,0 +1,149 @@
#ifndef __NEW_INSTRUCTION_SUPPORT_P_H__
#define __NEW_INSTRUCTION_SUPPORT_P_H__
#include "new_instructions_support.h" // for support macros
ASM2MACRO(ADD8,0x48000077)
FUN2(__rv__add8,ADD8)
ASM2MACRO(RADD8,0x08000077)
FUN2(__rv__radd8,RADD8)
ASM2MACRO(RSUB8,0x0a000077)
FUN2(__rv__rsub8,RSUB8)
ASM2MACRO(CMPEQ8,0x4e000077)
FUN2(__rv__cmpeq8,CMPEQ8)
ASM1MACRO(CLZ8,0xae100077)
FUN1(__rv__clz8,CLZ8)
ASM1MACRO(CLO8,0xae300077)
FUN1(__rv__clo8,CLO8)
ASM1MACRO(CLRS8,0xae000077)
FUN1(__rv__clrs8,CLRS8)
ASM1MACRO(SWAP8,0xad800077)
FUN1(__rv__swap8,swap8)
ASM2MACRO(SCMPLE8,0x1e000077)
FUN2(__rv__scmple8,SCMPLE8)
ASM2MACRO(SCMPLT8,0x0e000077)
FUN2(__rv__scmplt8,SCMPLt8)
ASM2MACRO(SLL8,0x5c000077)
FUN2(__rv__sll8,SLL8)
ASM2MACRO(SRL8,0x5a000077)
FUN2(__rv__srl8,SRL8)
ASM2MACRO(SRA8,0x58000077)
FUN2(__rv__sra8,SRA8)
ASM2MACRO(SMAX8,0x8a000077)
FUN2(__rv__smax8,SMAX8)
ASM2MACRO(SMIN8,0x88000077)
FUN2(__rv__smin8,SMIN8)
ASM2MACRO(SUB8,0x4a000077)
FUN2(__rv__sub8,SUB8)
ASM2MACRO(UCMPLE8,0x3e000077)
FUN2(__rv__ucmple8,UCMPLE8)
ASM2MACRO(UCMPLT8,0x2e000077)
FUN2(__rv__ucmplt8,UCMPLt8)
ASM2MACRO(UMAX8,0x9a000077)
FUN2(__rv__umax8,UMAX8)
ASM2MACRO(UMIN8,0x98000077)
FUN2(__rv__umin8,UMIN8)
ASM2MACRO(URADD8,0x28000077)
FUN2(__rv__uradd8,URADD8)
ASM2MACRO(URSUB8,0x2a000077)
FUN2(__rv__ursub8,URSUB8)
ASM2MACRO(ADD16,0x40000077)
FUN2(__rv__add16,ADD16)
ASM2MACRO(RADD16,0x00000077)
FUN2(__rv__radd16,RADD16)
ASM2MACRO(RSUB16,0x02000077)
FUN2(__rv__rsub16,RSUB16)
ASM2MACRO(CMPEQ16,0x4c000077)
FUN2(__rv__cmpeq16,CMPEQ16)
ASM1MACRO(CLZ16,0xae900077)
FUN1(__rv__clz16,CLZ16)
ASM1MACRO(CLO16,0xaeb00077)
FUN1(__rv__clo16,CLO16)
ASM1MACRO(CLRS16,0xae800077)
FUN1(__rv__clrs16,CLRS16)
ASM1MACRO(SWAP16,0xad900077)
FUN1(__rv__swap16,swap16)
ASM2MACRO(SCMPLE16,0x1c000077)
FUN2(__rv__scmple16,SCMPLE16)
ASM2MACRO(SCMPLT16,0x0c000077)
FUN2(__rv__scmplt16,SCMPLT16)
ASM2MACRO(SLL16,0x54000077)
FUN2(__rv__sll16,SLL16)
ASM2MACRO(SRL16,0x52000077)
FUN2(__rv__srl16,SRL16)
ASM2MACRO(SRA16,0x50000077)
FUN2(__rv__sra16,SRA16)
ASM2MACRO(SMAX16,0x82000077)
FUN2(__rv__smax16,SMAX16)
ASM2MACRO(SMIN16,0x80000077)
FUN2(__rv__smin16,SMIN16)
ASM2MACRO(SUB16,0x42000077)
FUN2(__rv__sub16,SUB16)
ASM2MACRO(UCMPLE16,0x3c000077)
FUN2(__rv__ucmple16,UCMPLE16)
ASM2MACRO(UCMPLT16,0x2c000077)
FUN2(__rv__ucmplt16,UCMPLT16)
ASM2MACRO(UMAX16,0x92000077)
FUN2(__rv__umax16,UMAX16)
ASM2MACRO(UMIN16,0x90000077)
FUN2(__rv__umin16,UMIN16)
ASM2MACRO(PKBB16,0x0e001077)
FUN2(__rv__pkbb16,PKBB16)
ASM2MACRO(PKBT16,0x1e001077)
FUN2(__rv__pkbt16,PKBT16)
ASM2MACRO(PKTB16,0x2e001077)
FUN2(__rv__pktb16,PKTB16)
ASM2MACRO(PKTT16,0x3e001077)
FUN2(__rv__pktt16,PKTT16)
ASM2MACRO(URADD16,0x20000077)
FUN2(__rv__uradd16,URADD16)
ASM2MACRO(URSUB16,0x22000077)
FUN2(__rv__ursub16,URSUB16)
ASM2MACRO(RADDW,0x20001077)
FUN2(__rv__raddw,RADDW)
ASM2MACRO(RSUBW,0x22001077)
FUN2(__rv__rsubw,RSUBW)
ASM2MACRO(URADDW,0x30001077)
FUN2(__rv__uraddw,URADDW)
ASM2MACRO(URSUBW,0x32001077)
FUN2(__rv__ursubw,URSUBW)
ASM2MACRO(AVE,0xe0000077)
FUN2(__rv__ave,AVE)
ASM2MACRO(PBSAD, 0xfc000077)
FUN2(__rv__pbsad, PBSAD)
ASM3RMACRO(PBSADA, 0xfe000077)
FUN3R(__rv__pbsada, PBSADA)
ASM2MACRO(BITREV,0xe6000077)
FUN2(__rv__bitrev,BITREV)
ASM3RIMACRO(INSB, 0xac000077)
FUN3RI(__rv__insb, INSB, 0)
FUN3RI(__rv__insb, INSB, 1)
FUN3RI(__rv__insb, INSB, 2)
FUN3RI(__rv__insb, INSB, 3)
ASM2MACRO(SMUL8,0xa8000077)
FUN2W(__rv__smul8,SMUL8)
ASM2MACRO(UMUL8,0xb8000077)
FUN2W(__rv__umul8,UMUL8)
ASM2MACRO(SMUL16,0xa0000077)
FUN2W(__rv__smul16,SMUL16)
ASM2MACRO(UMUL16,0xb0000077)
FUN2W(__rv__umul16,UMUL16)
ASM2MACRO(SMULx8,0xaa000077)
FUN2W(__rv__smulx8,SMULx8)
ASM2MACRO(UMULx8,0xba000077)
FUN2W(__rv__umulx8,UMULx8)
ASM2MACRO(SMULx16,0xa2000077)
FUN2W(__rv__smulx16,SMULx16)
ASM2MACRO(UMULx16,0xb2000077)
FUN2W(__rv__umulx16,UMULx16)
#endif // __NEW_INSTRUCTION_SUPPORT_P_H__

61
pbsad.c
View file

@ -8,66 +8,7 @@ typedef ssize_t ptrdiff_t;
#ifdef __riscv #ifdef __riscv
typedef uint32_t uint_xlen_t; #include "new_instructions_support_p.h"
#define XLEN 32
#define FUN2(NAME, ASNAME) \
static inline uint_xlen_t NAME(uint_xlen_t rs1, uint_xlen_t rs2) { \
uint32_t r; \
asm (#ASNAME " reg_%0, reg_%1, reg_%2\n" \
: "=r" (r) \
: "r" (rs1), "r" (rs2)); \
return r; \
}
#define FUN3R(NAME, ASNAME) \
static inline uint_xlen_t NAME(uint_xlen_t rs1, uint_xlen_t rs2, uint_xlen_t rs3) { \
uint32_t r = rs3; \
asm (#ASNAME " reg_%0, reg_%1, reg_%2\n" \
: "+&r" (r) \
: "r" (rs1), "r" (rs2)); \
return r; \
}
#define ASM2MACRO(N, O) asm(".macro "#N" rd, rs1, rs2\n" \
".word ("#O" | (\\rd << 7) | (\\rs1 << 15) | (\\rs2 << 20))\n" \
".endm\n");
#define ASM3RMACRO(N, O) ASM2MACRO(N, O)
asm("#define reg_zero 0\n");
asm("#define reg_ra 1\n");
asm("#define reg_sp 2\n");
asm("#define reg_gp 3\n");
asm("#define reg_tp 4\n");
asm("#define reg_t0 5\n");
asm("#define reg_t1 6\n");
asm("#define reg_t2 7\n");
asm("#define reg_s0 8\n");
asm("#define reg_s1 9\n");
asm("#define reg_a0 10\n");
asm("#define reg_a1 11\n");
asm("#define reg_a2 12\n");
asm("#define reg_a3 13\n");
asm("#define reg_a4 14\n");
asm("#define reg_a5 15\n");
asm("#define reg_a6 16\n");
asm("#define reg_a7 17\n");
asm("#define reg_s2 18\n");
asm("#define reg_s3 19\n");
asm("#define reg_s4 20\n");
asm("#define reg_s5 21\n");
asm("#define reg_s6 22\n");
asm("#define reg_s7 23\n");
asm("#define reg_s8 24\n");
asm("#define reg_s9 25\n");
asm("#define reg_s10 26\n");
asm("#define reg_s11 27\n");
asm("#define reg_t3 28\n");
asm("#define reg_t4 29\n");
asm("#define reg_t5 30\n");
asm("#define reg_t6 31\n");
ASM2MACRO(PBSAD, 0xfc000077)
FUN2(__rv__pbsad, PBSAD)
ASM3RMACRO(PBSADA, 0xfe000077)
FUN3R(__rv__pbsada, PBSADA)
static inline long long cpucycles_riscv(void) { static inline long long cpucycles_riscv(void) {
long long result; long long result;

View file

@ -9,8 +9,8 @@ CC=$(COMPDIR)/bin/riscv32-buildroot-linux-gnu-gcc
ALTCC=$(ALTCOMPDIR)/bin/riscv64-unknown-elf-gcc ALTCC=$(ALTCOMPDIR)/bin/riscv64-unknown-elf-gcc
CXX=$(COMPDIR)/bin/riscv32-buildroot-linux-gnu-g++ CXX=$(COMPDIR)/bin/riscv32-buildroot-linux-gnu-g++
STRIP=$(COMPDIR)/bin/riscv32-buildroot-linux-gnu-strip STRIP=$(COMPDIR)/bin/riscv32-buildroot-linux-gnu-strip
NEWOPT=-march=rv32imab -mabi=ilp32 -I. -O3 -DRV32B #-fno-vectorize #-DUSE_EPI_CUSTOM NEWOPT=-march=rv32imab -mabi=ilp32 -I. -I.. -O3 -DRV32ZKNH #-fno-vectorize #-DUSE_EPI_CUSTOM
OPT=-march=rv32ima -mabi=ilp32 -I. -O3 #-fno-vectorize #-DUSE_EPI_CUSTOM OPT=-march=rv32ima -mabi=ilp32 -I. -I.. -O3 #-fno-vectorize #-DUSE_EPI_CUSTOM
#ALTCC=$(CC) #ALTCC=$(CC)
#NEWOPT=$(OPT) #NEWOPT=$(OPT)

View file

@ -1,87 +1,22 @@
#include "crypto_hashblocks.h" #include "crypto_hashblocks.h"
typedef unsigned int uint32; #include <stdint.h>
#ifdef RV32ZKNH #ifdef RV32ZKNH
typedef unsigned int uint32_t; #include "new_instructions_support_k.h"
#define ASM1MACRO(N, O) asm(".macro "#N" rd, rs1\n" \
".word ("#O" | (\\rd << 7) | (\\rs1 << 15))\n" \
".endm\n");
#define ASM2MACRO(N, O) asm(".macro "#N" rd, rs1, rs2\n" \
".word ("#O" | (\\rd << 7) | (\\rs1 << 15) | (\\rs2 << 20))\n" \
".endm\n");
asm("#define reg_zero 0\n");
asm("#define reg_ra 1\n");
asm("#define reg_sp 2\n");
asm("#define reg_gp 3\n");
asm("#define reg_tp 4\n");
asm("#define reg_t0 5\n");
asm("#define reg_t1 6\n");
asm("#define reg_t2 7\n");
asm("#define reg_s0 8\n");
asm("#define reg_s1 9\n");
asm("#define reg_a0 10\n");
asm("#define reg_a1 11\n");
asm("#define reg_a2 12\n");
asm("#define reg_a3 13\n");
asm("#define reg_a4 14\n");
asm("#define reg_a5 15\n");
asm("#define reg_a6 16\n");
asm("#define reg_a7 17\n");
asm("#define reg_s2 18\n");
asm("#define reg_s3 19\n");
asm("#define reg_s4 20\n");
asm("#define reg_s5 21\n");
asm("#define reg_s6 22\n");
asm("#define reg_s7 23\n");
asm("#define reg_s8 24\n");
asm("#define reg_s9 25\n");
asm("#define reg_s10 26\n");
asm("#define reg_s11 27\n");
asm("#define reg_t3 28\n");
asm("#define reg_t4 29\n");
asm("#define reg_t5 30\n");
asm("#define reg_t6 31\n");
#define FUN1(NAME, ASNAME) \
static inline uint32_t NAME(uint32_t rs1) { \
uint32_t r; \
asm (#ASNAME " reg_%0, reg_%1\n" \
: "=r" (r) \
: "r" (rs1)); \
return r; \
}
#define FUN2(NAME, ASNAME) \
static inline uint32_t NAME(uint32_t rs1, uint32_t rs2) { \
uint32_t r; \
asm (#ASNAME " reg_%0, reg_%1, reg_%2\n" \
: "=r" (r) \
: "r" (rs1), "r" (rs2)); \
return r; \
}
ASM1MACRO(SHA256SIG0,0x10201013)
ASM1MACRO(SHA256SIG1,0x10301013)
ASM1MACRO(SHA256SUM0,0x10001013)
ASM1MACRO(SHA256SUM1,0x10101013)
FUN1(sha256sig0,SHA256SIG0)
FUN1(sha256sig1,SHA256SIG1)
FUN1(sha256sum0,SHA256SUM0)
FUN1(sha256sum1,SHA256SUM1)
#endif #endif
static uint32 load_bigendian(const unsigned char *x) static uint32_t load_bigendian(const unsigned char *x)
{ {
return return
(uint32) (x[3]) \ (uint32_t) (x[3]) \
| (((uint32) (x[2])) << 8) \ | (((uint32_t) (x[2])) << 8) \
| (((uint32) (x[1])) << 16) \ | (((uint32_t) (x[1])) << 16) \
| (((uint32) (x[0])) << 24) | (((uint32_t) (x[0])) << 24)
; ;
} }
static void store_bigendian(unsigned char *x,uint32 u) static void store_bigendian(unsigned char *x,uint32_t u)
{ {
x[3] = u; u >>= 8; x[3] = u; u >>= 8;
x[2] = u; u >>= 8; x[2] = u; u >>= 8;
@ -140,17 +75,17 @@ static void store_bigendian(unsigned char *x,uint32 u)
int crypto_hashblocks(unsigned char *statebytes,const unsigned char *in,unsigned long long inlen) int crypto_hashblocks(unsigned char *statebytes,const unsigned char *in,unsigned long long inlen)
{ {
uint32 state[8]; uint32_t state[8];
uint32 a; uint32_t a;
uint32 b; uint32_t b;
uint32 c; uint32_t c;
uint32 d; uint32_t d;
uint32 e; uint32_t e;
uint32 f; uint32_t f;
uint32 g; uint32_t g;
uint32 h; uint32_t h;
uint32 T1; uint32_t T1;
uint32 T2; uint32_t T2;
a = load_bigendian(statebytes + 0); state[0] = a; a = load_bigendian(statebytes + 0); state[0] = a;
b = load_bigendian(statebytes + 4); state[1] = b; b = load_bigendian(statebytes + 4); state[1] = b;
@ -162,22 +97,22 @@ int crypto_hashblocks(unsigned char *statebytes,const unsigned char *in,unsigned
h = load_bigendian(statebytes + 28); state[7] = h; h = load_bigendian(statebytes + 28); state[7] = h;
while (inlen >= 64) { while (inlen >= 64) {
uint32 w0 = load_bigendian(in + 0); uint32_t w0 = load_bigendian(in + 0);
uint32 w1 = load_bigendian(in + 4); uint32_t w1 = load_bigendian(in + 4);
uint32 w2 = load_bigendian(in + 8); uint32_t w2 = load_bigendian(in + 8);
uint32 w3 = load_bigendian(in + 12); uint32_t w3 = load_bigendian(in + 12);
uint32 w4 = load_bigendian(in + 16); uint32_t w4 = load_bigendian(in + 16);
uint32 w5 = load_bigendian(in + 20); uint32_t w5 = load_bigendian(in + 20);
uint32 w6 = load_bigendian(in + 24); uint32_t w6 = load_bigendian(in + 24);
uint32 w7 = load_bigendian(in + 28); uint32_t w7 = load_bigendian(in + 28);
uint32 w8 = load_bigendian(in + 32); uint32_t w8 = load_bigendian(in + 32);
uint32 w9 = load_bigendian(in + 36); uint32_t w9 = load_bigendian(in + 36);
uint32 w10 = load_bigendian(in + 40); uint32_t w10 = load_bigendian(in + 40);
uint32 w11 = load_bigendian(in + 44); uint32_t w11 = load_bigendian(in + 44);
uint32 w12 = load_bigendian(in + 48); uint32_t w12 = load_bigendian(in + 48);
uint32 w13 = load_bigendian(in + 52); uint32_t w13 = load_bigendian(in + 52);
uint32 w14 = load_bigendian(in + 56); uint32_t w14 = load_bigendian(in + 56);
uint32 w15 = load_bigendian(in + 60); uint32_t w15 = load_bigendian(in + 60);
F(w0 ,0x428a2f98) F(w0 ,0x428a2f98)
F(w1 ,0x71374491) F(w1 ,0x71374491)

View file

@ -9,8 +9,8 @@ CC=$(COMPDIR)/bin/riscv32-buildroot-linux-gnu-gcc
ALTCC=$(ALTCOMPDIR)/bin/riscv64-unknown-elf-gcc ALTCC=$(ALTCOMPDIR)/bin/riscv64-unknown-elf-gcc
CXX=$(COMPDIR)/bin/riscv32-buildroot-linux-gnu-g++ CXX=$(COMPDIR)/bin/riscv32-buildroot-linux-gnu-g++
STRIP=$(COMPDIR)/bin/riscv32-buildroot-linux-gnu-strip STRIP=$(COMPDIR)/bin/riscv32-buildroot-linux-gnu-strip
NEWOPT=-march=rv32imab -mabi=ilp32 -I. -O3 -DRV32B #-fno-vectorize #-DUSE_EPI_CUSTOM NEWOPT=-march=rv32imab -mabi=ilp32 -I. -I.. -O3 -DRV32ZKNH #-fno-vectorize #-DUSE_EPI_CUSTOM
OPT=-march=rv32ima -mabi=ilp32 -I. -O3 #-fno-vectorize #-DUSE_EPI_CUSTOM OPT=-march=rv32ima -mabi=ilp32 -I. -I.. -O3 #-fno-vectorize #-DUSE_EPI_CUSTOM
#ALTCC=$(CC) #ALTCC=$(CC)
#NEWOPT=$(OPT) #NEWOPT=$(OPT)

View file

@ -1,97 +1,27 @@
#include "crypto_hashblocks.h" #include "crypto_hashblocks.h"
typedef unsigned long long uint64; #include <stdint.h>
#ifdef RV32ZKNH #ifdef RV32ZKNH
typedef unsigned int uint32_t; #include "new_instructions_support_k.h"
typedef unsigned long long uint64_t;
#define ASM1MACRO(N, O) asm(".macro "#N" rd, rs1\n" \
".word ("#O" | (\\rd << 7) | (\\rs1 << 15))\n" \
".endm\n");
#define ASM2MACRO(N, O) asm(".macro "#N" rd, rs1, rs2\n" \
".word ("#O" | (\\rd << 7) | (\\rs1 << 15) | (\\rs2 << 20))\n" \
".endm\n");
asm("#define reg_zero 0\n");
asm("#define reg_ra 1\n");
asm("#define reg_sp 2\n");
asm("#define reg_gp 3\n");
asm("#define reg_tp 4\n");
asm("#define reg_t0 5\n");
asm("#define reg_t1 6\n");
asm("#define reg_t2 7\n");
asm("#define reg_s0 8\n");
asm("#define reg_s1 9\n");
asm("#define reg_a0 10\n");
asm("#define reg_a1 11\n");
asm("#define reg_a2 12\n");
asm("#define reg_a3 13\n");
asm("#define reg_a4 14\n");
asm("#define reg_a5 15\n");
asm("#define reg_a6 16\n");
asm("#define reg_a7 17\n");
asm("#define reg_s2 18\n");
asm("#define reg_s3 19\n");
asm("#define reg_s4 20\n");
asm("#define reg_s5 21\n");
asm("#define reg_s6 22\n");
asm("#define reg_s7 23\n");
asm("#define reg_s8 24\n");
asm("#define reg_s9 25\n");
asm("#define reg_s10 26\n");
asm("#define reg_s11 27\n");
asm("#define reg_t3 28\n");
asm("#define reg_t4 29\n");
asm("#define reg_t5 30\n");
asm("#define reg_t6 31\n");
#define FUN1(NAME, ASNAME) \
static inline uint32_t NAME(uint32_t rs1) { \
uint32_t r; \
asm (#ASNAME " reg_%0, reg_%1\n" \
: "=r" (r) \
: "r" (rs1)); \
return r; \
}
#define FUN2(NAME, ASNAME) \
static inline uint32_t NAME(uint32_t rs1, uint32_t rs2) { \
uint32_t r; \
asm (#ASNAME " reg_%0, reg_%1, reg_%2\n" \
: "=r" (r) \
: "r" (rs1), "r" (rs2)); \
return r; \
}
ASM2MACRO(SHA512SIG0L,0x54000033)
ASM2MACRO(SHA512SIG0H,0x5c000033)
ASM2MACRO(SHA512SIG1L,0x56000033)
ASM2MACRO(SHA512SIG1H,0x5e000033)
ASM2MACRO(SHA512SUM0R,0x50000033)
ASM2MACRO(SHA512SUM1R,0x52000033)
FUN2(sha512sig0l, SHA512SIG0L)
FUN2(sha512sig0h, SHA512SIG0H)
FUN2(sha512sig1l, SHA512SIG1L)
FUN2(sha512sig1h, SHA512SIG1H)
FUN2(sha512sum0r, SHA512SUM0R)
FUN2(sha512sum1r, SHA512SUM1R)
#endif #endif
static uint64 load_bigendian(const unsigned char *x) static uint64_t load_bigendian(const unsigned char *x)
{ {
return return
(uint64) (x[7]) \ (uint64_t) (x[7]) \
| (((uint64) (x[6])) << 8) \ | (((uint64_t) (x[6])) << 8) \
| (((uint64) (x[5])) << 16) \ | (((uint64_t) (x[5])) << 16) \
| (((uint64) (x[4])) << 24) \ | (((uint64_t) (x[4])) << 24) \
| (((uint64) (x[3])) << 32) \ | (((uint64_t) (x[3])) << 32) \
| (((uint64) (x[2])) << 40) \ | (((uint64_t) (x[2])) << 40) \
| (((uint64) (x[1])) << 48) \ | (((uint64_t) (x[1])) << 48) \
| (((uint64) (x[0])) << 56) | (((uint64_t) (x[0])) << 56)
; ;
} }
static void store_bigendian(unsigned char *x,uint64 u) static void store_bigendian(unsigned char *x,uint64_t u)
{ {
x[7] = u; u >>= 8; x[7] = u; u >>= 8;
x[6] = u; u >>= 8; x[6] = u; u >>= 8;
@ -155,17 +85,17 @@ static void store_bigendian(unsigned char *x,uint64 u)
int crypto_hashblocks(unsigned char *statebytes,const unsigned char *in,unsigned long long inlen) int crypto_hashblocks(unsigned char *statebytes,const unsigned char *in,unsigned long long inlen)
{ {
uint64 state[8]; uint64_t state[8];
uint64 a; uint64_t a;
uint64 b; uint64_t b;
uint64 c; uint64_t c;
uint64 d; uint64_t d;
uint64 e; uint64_t e;
uint64 f; uint64_t f;
uint64 g; uint64_t g;
uint64 h; uint64_t h;
uint64 T1; uint64_t T1;
uint64 T2; uint64_t T2;
a = load_bigendian(statebytes + 0); state[0] = a; a = load_bigendian(statebytes + 0); state[0] = a;
b = load_bigendian(statebytes + 8); state[1] = b; b = load_bigendian(statebytes + 8); state[1] = b;
@ -177,22 +107,22 @@ int crypto_hashblocks(unsigned char *statebytes,const unsigned char *in,unsigned
h = load_bigendian(statebytes + 56); state[7] = h; h = load_bigendian(statebytes + 56); state[7] = h;
while (inlen >= 128) { while (inlen >= 128) {
uint64 w0 = load_bigendian(in + 0); uint64_t w0 = load_bigendian(in + 0);
uint64 w1 = load_bigendian(in + 8); uint64_t w1 = load_bigendian(in + 8);
uint64 w2 = load_bigendian(in + 16); uint64_t w2 = load_bigendian(in + 16);
uint64 w3 = load_bigendian(in + 24); uint64_t w3 = load_bigendian(in + 24);
uint64 w4 = load_bigendian(in + 32); uint64_t w4 = load_bigendian(in + 32);
uint64 w5 = load_bigendian(in + 40); uint64_t w5 = load_bigendian(in + 40);
uint64 w6 = load_bigendian(in + 48); uint64_t w6 = load_bigendian(in + 48);
uint64 w7 = load_bigendian(in + 56); uint64_t w7 = load_bigendian(in + 56);
uint64 w8 = load_bigendian(in + 64); uint64_t w8 = load_bigendian(in + 64);
uint64 w9 = load_bigendian(in + 72); uint64_t w9 = load_bigendian(in + 72);
uint64 w10 = load_bigendian(in + 80); uint64_t w10 = load_bigendian(in + 80);
uint64 w11 = load_bigendian(in + 88); uint64_t w11 = load_bigendian(in + 88);
uint64 w12 = load_bigendian(in + 96); uint64_t w12 = load_bigendian(in + 96);
uint64 w13 = load_bigendian(in + 104); uint64_t w13 = load_bigendian(in + 104);
uint64 w14 = load_bigendian(in + 112); uint64_t w14 = load_bigendian(in + 112);
uint64 w15 = load_bigendian(in + 120); uint64_t w15 = load_bigendian(in + 120);
F(w0 ,0x428a2f98d728ae22ULL) F(w0 ,0x428a2f98d728ae22ULL)
F(w1 ,0x7137449123ef65cdULL) F(w1 ,0x7137449123ef65cdULL)

View file

@ -29,103 +29,16 @@
#include <stdint.h> #include <stdint.h>
#include <stdlib.h> #include <stdlib.h>
/* typedef uint32_t uint_xlen_t; */
/* #define XLEN 32 */
#include <rvintrin.h>
typedef uint32_t uint_xlen_t;
#define XLEN 32
#ifdef __riscv #ifdef __riscv
//when missing in toolchain
#define FUN1(NAME, ASNAME) \
static inline uint32_t NAME(uint32_t rs1) { \
uint32_t r; \
asm (#ASNAME " reg_%0, reg_%1\n" \
: "=r" (r) \
: "r" (rs1)); \
return r; \
}
#define FUN2(NAME, ASNAME) \
static inline uint_xlen_t NAME(uint_xlen_t rs1, uint_xlen_t rs2) { \
uint32_t r; \
asm (#ASNAME " reg_%0, reg_%1, reg_%2\n" \
: "=r" (r) \
: "r" (rs1), "r" (rs2)); \
return r; \
}
#define FUN3(NAME, ASNAME) \
static inline uint_xlen_t NAME(uint_xlen_t rs1, uint_xlen_t rs2, uint_xlen_t rs3) { \
uint32_t r; \
asm (#ASNAME " reg_%0, reg_%1, reg_%2, reg_%3\n" \
: "=r" (r) \
: "r" (rs1), "r" (rs2), "r" (rs3)); \
return r; \
}
#define ASM1MACRO(N, O) asm(".macro "#N" rd, rs1\n" \ #include "new_instructions_support_b.h"
".word ("#O" | (\\rd << 7) | (\\rs1 << 15))\n" \
".endm\n");
#define ASM2MACRO(N, O) asm(".macro "#N" rd, rs1, rs2\n" \
".word ("#O" | (\\rd << 7) | (\\rs1 << 15) | (\\rs2 << 20))\n" \
".endm\n");
#define ASM3MACRO(N, O) asm(".macro "#N" rd, rs1, rs2, rs3\n" \
".word ("#O" | (\\rd << 7) | (\\rs1 << 15) | (\\rs2 << 20) | (\\rs3 << 27) )\n" \
".endm\n");
asm("#define reg_zero 0\n");
asm("#define reg_ra 1\n");
asm("#define reg_sp 2\n");
asm("#define reg_gp 3\n");
asm("#define reg_tp 4\n");
asm("#define reg_t0 5\n");
asm("#define reg_t1 6\n");
asm("#define reg_t2 7\n");
asm("#define reg_s0 8\n");
asm("#define reg_s1 9\n");
asm("#define reg_a0 10\n");
asm("#define reg_a1 11\n");
asm("#define reg_a2 12\n");
asm("#define reg_a3 13\n");
asm("#define reg_a4 14\n");
asm("#define reg_a5 15\n");
asm("#define reg_a6 16\n");
asm("#define reg_a7 17\n");
asm("#define reg_s2 18\n");
asm("#define reg_s3 19\n");
asm("#define reg_s4 20\n");
asm("#define reg_s5 21\n");
asm("#define reg_s6 22\n");
asm("#define reg_s7 23\n");
asm("#define reg_s8 24\n");
asm("#define reg_s9 25\n");
asm("#define reg_s10 26\n");
asm("#define reg_s11 27\n");
asm("#define reg_t3 28\n");
asm("#define reg_t4 29\n");
asm("#define reg_t5 30\n");
asm("#define reg_t6 31\n");
ASM2MACRO(XPERM_N,0x28002033)
ASM2MACRO(XPERM_B,0x28004033)
ASM2MACRO(XPERM_H,0x28006033)
ASM2MACRO(SH1ADD,0x20002033)
ASM2MACRO(SH2ADD,0x20004033)
ASM2MACRO(SH3ADD,0x20006033)
/* FUN(xperm_n,0x2928a9b3) */
/* FUN(xperm_b,0x2928c9b3) */
/* FUN(xperm_h,0x2928e9b3) */
/* FUN(sh1add,0x2128a9b3) */
/* FUN(sh2add,0x2128c9b3) */
/* FUN(sh3add,0x2128e9b3) */
FUN2(xperm_n,XPERM_N)
FUN2(xperm_b,XPERM_B)
FUN2(xperm_h,XPERM_H)
FUN2(sh1add,SH1ADD)
FUN2(sh2add,SH2ADD)
FUN2(sh3add,SH3ADD)
#define _rv64_clmul2(a,b) _rv64_clmul(a,b) #define _rv64_clmul2(a,b) _rv64_clmul(a,b)
#else #else
#include <rvintrin.h> // emulation
typedef uint32_t uint_xlen_t;
#define XLEN 32
uint_xlen_t xperm(uint_xlen_t rs1, uint_xlen_t rs2, int sz_log2) uint_xlen_t xperm(uint_xlen_t rs1, uint_xlen_t rs2, int sz_log2)
{ {
uint_xlen_t r = 0; uint_xlen_t r = 0;

245
test_p.c
View file

@ -13,252 +13,9 @@
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
//#include <rvintrin.h>
typedef uint32_t uint_xlen_t;
#define XLEN 32
#ifdef __riscv #ifdef __riscv
//when missing in toolchain
#define FUN1(NAME, ASNAME) \
static inline uint32_t NAME(uint32_t rs1) { \
uint32_t r; \
asm (#ASNAME " reg_%0, reg_%1\n" \
: "=r" (r) \
: "r" (rs1)); \
return r; \
}
#define FUN2(NAME, ASNAME) \
static inline uint_xlen_t NAME(uint_xlen_t rs1, uint_xlen_t rs2) { \
uint32_t r; \
asm (#ASNAME " reg_%0, reg_%1, reg_%2\n" \
: "=r" (r) \
: "r" (rs1), "r" (rs2)); \
return r; \
}
#define FUN3(NAME, ASNAME) \
static inline uint_xlen_t NAME(uint_xlen_t rs1, uint_xlen_t rs2, uint_xlen_t rs3) { \
uint32_t r; \
asm (#ASNAME " reg_%0, reg_%1, reg_%2, reg_%3\n" \
: "=r" (r) \
: "r" (rs1), "r" (rs2), "r" (rs3)); \
return r; \
}
#define FUN3R(NAME, ASNAME) \
static inline uint_xlen_t NAME(uint_xlen_t rs1, uint_xlen_t rs2, uint_xlen_t rs3) { \
uint32_t r = rs3; \
asm (#ASNAME " reg_%0, reg_%1, reg_%2\n" \
: "+&r" (r) \
: "r" (rs1), "r" (rs2)); \
return r; \
}
#define FUN3RI(NAME, ASNAME, IMM) \
static inline uint_xlen_t NAME##IMM(uint_xlen_t rs1, uint_xlen_t rs3) { \
uint32_t r = rs3; \
asm (#ASNAME " reg_%0, reg_%1, " #IMM "\n" \
: "+&r" (r) \
: "r" (rs1)); \
return r; \
}
#define FUN2W(NAME, ASNAME) \
static inline uint64_t NAME(uint_xlen_t rs1, uint_xlen_t rs2) { \
uint32_t r0, r1; \
asm (#ASNAME " reg_t5, reg_%2, reg_%3\n" \
"mv %0, t5\n" \
"mv %1, t6\n" \
: "=r" (r0), "=r" (r1) \
: "r" (rs1), "r" (rs2) \
: "t5", "t6"); \
return ((uint64_t)r0 | (((uint64_t)r1)<<32)); \
}
#define ASM1MACRO(N, O) asm(".macro "#N" rd, rs1\n" \ #include "new_instructions_support_p.h"
".word ("#O" | (\\rd << 7) | (\\rs1 << 15))\n" \
".endm\n");
#define ASM2MACRO(N, O) asm(".macro "#N" rd, rs1, rs2\n" \
".word ("#O" | (\\rd << 7) | (\\rs1 << 15) | (\\rs2 << 20))\n" \
".endm\n");
#define ASM3MACRO(N, O) asm(".macro "#N" rd, rs1, rs2, rs3\n" \
".word ("#O" | (\\rd << 7) | (\\rs1 << 15) | (\\rs2 << 20) | (\\rs3 << 27) )\n" \
".endm\n");
#define ASM3RMACRO(N, O) ASM2MACRO(N, O)
#define ASM3RIMACRO(N, O) asm(".macro "#N" rd, rs1, imm\n" \
".word ("#O" | (\\rd << 7) | (\\rs1 << 15) | (\\imm << 20))\n" \
".endm\n");
asm("#define reg_zero 0\n");
asm("#define reg_ra 1\n");
asm("#define reg_sp 2\n");
asm("#define reg_gp 3\n");
asm("#define reg_tp 4\n");
asm("#define reg_t0 5\n");
asm("#define reg_t1 6\n");
asm("#define reg_t2 7\n");
asm("#define reg_s0 8\n");
asm("#define reg_s1 9\n");
asm("#define reg_a0 10\n");
asm("#define reg_a1 11\n");
asm("#define reg_a2 12\n");
asm("#define reg_a3 13\n");
asm("#define reg_a4 14\n");
asm("#define reg_a5 15\n");
asm("#define reg_a6 16\n");
asm("#define reg_a7 17\n");
asm("#define reg_s2 18\n");
asm("#define reg_s3 19\n");
asm("#define reg_s4 20\n");
asm("#define reg_s5 21\n");
asm("#define reg_s6 22\n");
asm("#define reg_s7 23\n");
asm("#define reg_s8 24\n");
asm("#define reg_s9 25\n");
asm("#define reg_s10 26\n");
asm("#define reg_s11 27\n");
asm("#define reg_t3 28\n");
asm("#define reg_t4 29\n");
asm("#define reg_t5 30\n");
asm("#define reg_t6 31\n");
ASM2MACRO(ADD8,0x48000077)
FUN2(__rv__add8,ADD8)
ASM2MACRO(RADD8,0x08000077)
FUN2(__rv__radd8,RADD8)
ASM2MACRO(RSUB8,0x0a000077)
FUN2(__rv__rsub8,RSUB8)
ASM2MACRO(CMPEQ8,0x4e000077)
FUN2(__rv__cmpeq8,CMPEQ8)
ASM1MACRO(CLZ8,0xae100077)
FUN1(__rv__clz8,CLZ8)
ASM1MACRO(CLO8,0xae300077)
FUN1(__rv__clo8,CLO8)
ASM1MACRO(CLRS8,0xae000077)
FUN1(__rv__clrs8,CLRS8)
ASM1MACRO(SWAP8,0xad800077)
FUN1(__rv__swap8,swap8)
ASM2MACRO(SCMPLE8,0x1e000077)
FUN2(__rv__scmple8,SCMPLE8)
ASM2MACRO(SCMPLT8,0x0e000077)
FUN2(__rv__scmplt8,SCMPLt8)
ASM2MACRO(SLL8,0x5c000077)
FUN2(__rv__sll8,SLL8)
ASM2MACRO(SRL8,0x5a000077)
FUN2(__rv__srl8,SRL8)
ASM2MACRO(SRA8,0x58000077)
FUN2(__rv__sra8,SRA8)
ASM2MACRO(SMAX8,0x8a000077)
FUN2(__rv__smax8,SMAX8)
ASM2MACRO(SMIN8,0x88000077)
FUN2(__rv__smin8,SMIN8)
ASM2MACRO(SUB8,0x4a000077)
FUN2(__rv__sub8,SUB8)
ASM2MACRO(UCMPLE8,0x3e000077)
FUN2(__rv__ucmple8,UCMPLE8)
ASM2MACRO(UCMPLT8,0x2e000077)
FUN2(__rv__ucmplt8,UCMPLt8)
ASM2MACRO(UMAX8,0x9a000077)
FUN2(__rv__umax8,UMAX8)
ASM2MACRO(UMIN8,0x98000077)
FUN2(__rv__umin8,UMIN8)
ASM2MACRO(URADD8,0x28000077)
FUN2(__rv__uradd8,URADD8)
ASM2MACRO(URSUB8,0x2a000077)
FUN2(__rv__ursub8,URSUB8)
ASM2MACRO(ADD16,0x40000077)
FUN2(__rv__add16,ADD16)
ASM2MACRO(RADD16,0x00000077)
FUN2(__rv__radd16,RADD16)
ASM2MACRO(RSUB16,0x02000077)
FUN2(__rv__rsub16,RSUB16)
ASM2MACRO(CMPEQ16,0x4c000077)
FUN2(__rv__cmpeq16,CMPEQ16)
ASM1MACRO(CLZ16,0xae900077)
FUN1(__rv__clz16,CLZ16)
ASM1MACRO(CLO16,0xaeb00077)
FUN1(__rv__clo16,CLO16)
ASM1MACRO(CLRS16,0xae800077)
FUN1(__rv__clrs16,CLRS16)
ASM1MACRO(SWAP16,0xad900077)
FUN1(__rv__swap16,swap16)
ASM2MACRO(SCMPLE16,0x1c000077)
FUN2(__rv__scmple16,SCMPLE16)
ASM2MACRO(SCMPLT16,0x0c000077)
FUN2(__rv__scmplt16,SCMPLT16)
ASM2MACRO(SLL16,0x54000077)
FUN2(__rv__sll16,SLL16)
ASM2MACRO(SRL16,0x52000077)
FUN2(__rv__srl16,SRL16)
ASM2MACRO(SRA16,0x50000077)
FUN2(__rv__sra16,SRA16)
ASM2MACRO(SMAX16,0x82000077)
FUN2(__rv__smax16,SMAX16)
ASM2MACRO(SMIN16,0x80000077)
FUN2(__rv__smin16,SMIN16)
ASM2MACRO(SUB16,0x42000077)
FUN2(__rv__sub16,SUB16)
ASM2MACRO(UCMPLE16,0x3c000077)
FUN2(__rv__ucmple16,UCMPLE16)
ASM2MACRO(UCMPLT16,0x2c000077)
FUN2(__rv__ucmplt16,UCMPLT16)
ASM2MACRO(UMAX16,0x92000077)
FUN2(__rv__umax16,UMAX16)
ASM2MACRO(UMIN16,0x90000077)
FUN2(__rv__umin16,UMIN16)
ASM2MACRO(PKBB16,0x0e001077)
FUN2(__rv__pkbb16,PKBB16)
ASM2MACRO(PKBT16,0x1e001077)
FUN2(__rv__pkbt16,PKBT16)
ASM2MACRO(PKTB16,0x2e001077)
FUN2(__rv__pktb16,PKTB16)
ASM2MACRO(PKTT16,0x3e001077)
FUN2(__rv__pktt16,PKTT16)
ASM2MACRO(URADD16,0x20000077)
FUN2(__rv__uradd16,URADD16)
ASM2MACRO(URSUB16,0x22000077)
FUN2(__rv__ursub16,URSUB16)
ASM2MACRO(RADDW,0x20001077)
FUN2(__rv__raddw,RADDW)
ASM2MACRO(RSUBW,0x22001077)
FUN2(__rv__rsubw,RSUBW)
ASM2MACRO(URADDW,0x30001077)
FUN2(__rv__uraddw,URADDW)
ASM2MACRO(URSUBW,0x32001077)
FUN2(__rv__ursubw,URSUBW)
ASM2MACRO(AVE,0xe0000077)
FUN2(__rv__ave,AVE)
ASM2MACRO(PBSAD, 0xfc000077)
FUN2(__rv__pbsad, PBSAD)
ASM3RMACRO(PBSADA, 0xfe000077)
FUN3R(__rv__pbsada, PBSADA)
ASM2MACRO(BITREV,0xe6000077)
FUN2(__rv__bitrev,BITREV)
ASM3RIMACRO(INSB, 0xac000077)
FUN3RI(__rv__insb, INSB, 0)
FUN3RI(__rv__insb, INSB, 1)
FUN3RI(__rv__insb, INSB, 2)
FUN3RI(__rv__insb, INSB, 3)
ASM2MACRO(SMUL8,0xa8000077)
FUN2W(__rv__smul8,SMUL8)
ASM2MACRO(UMUL8,0xb8000077)
FUN2W(__rv__umul8,UMUL8)
ASM2MACRO(SMUL16,0xa0000077)
FUN2W(__rv__smul16,SMUL16)
ASM2MACRO(UMUL16,0xb0000077)
FUN2W(__rv__umul16,UMUL16)
ASM2MACRO(SMULx8,0xaa000077)
FUN2W(__rv__smulx8,SMULx8)
ASM2MACRO(UMULx8,0xba000077)
FUN2W(__rv__umulx8,UMULx8)
ASM2MACRO(SMULx16,0xa2000077)
FUN2W(__rv__smulx16,SMULx16)
ASM2MACRO(UMULx16,0xb2000077)
FUN2W(__rv__umulx16,UMULx16)
#else // !__riscv #else // !__riscv
typedef uint8_t uint4x8_t[4]; typedef uint8_t uint4x8_t[4];