mirror of
https://github.com/rdolbeau/VexRiscvBPluginGenerator.git
synced 2025-04-16 09:44:41 -04:00
Using P opcodes (and double-width/read-rs3-from-rd behavior) to try some custom Chacha-oriented instructions
This commit is contained in:
parent
bb182d099d
commit
29738d20b8
6 changed files with 145 additions and 8 deletions
|
@ -41,3 +41,29 @@ kernelrandombytes.o: random.cpp
|
|||
|
||||
cpucycles.o: riscv.c
|
||||
$(R5IMA_GCC) $< -march=rv32ima -mabi=ilp32 -I. -O1 -c -o $@
|
||||
|
||||
|
||||
chacha_XAR.S: chacha.c
|
||||
$(R5B_GCC) $(R5B_OPT) -DENABLE_XAR $< -S -o $@
|
||||
|
||||
chacha_XAR.o: chacha_XAR.S
|
||||
$(R5B_GCC) $(R5B_OPT) -DENABLE_XAR $< -c -o $@
|
||||
|
||||
chacha20_XAR: $(OBJs) chacha_XAR.o try.o $(SCLIBS)
|
||||
$(R5IMA_GXX) $(R5IMA_OPT) $^ -o $@
|
||||
|
||||
chacha20_XAR_small: $(OBJs) chacha_XAR.o try_small.o $(SCLIBS)
|
||||
$(R5IMA_GXX) $(R5IMA_OPT) $^ -o $@
|
||||
|
||||
|
||||
chacha_CHACHA.S: chacha.c
|
||||
$(R5B_GCC) $(R5B_OPT) -O3 -DENABLE_CHACHA $< -S -o $@
|
||||
|
||||
chacha_CHACHA.o: chacha_CHACHA.S
|
||||
$(R5B_GCC) $(R5B_OPT) -O3 -DENABLE_CHACHA $< -c -o $@
|
||||
|
||||
chacha20_CHACHA: $(OBJs) chacha_CHACHA.o try.o $(SCLIBS)
|
||||
$(R5IMA_GXX) $(R5IMA_OPT) $^ -o $@
|
||||
|
||||
chacha20_CHACHA_small: $(OBJs) chacha_CHACHA.o try_small.o $(SCLIBS)
|
||||
$(R5IMA_GXX) $(R5IMA_OPT) $^ -o $@
|
||||
|
|
|
@ -21,12 +21,61 @@ Public domain.
|
|||
#define PLUS(v,w) (U32V((v) + (w)))
|
||||
#define PLUSONE(v) (PLUS((v),1))
|
||||
|
||||
#if !defined(ENABLE_XAR) && !defined(ENABLE_CHACHA)
|
||||
#define QUARTERROUND(a,b,c,d) \
|
||||
x[a] = PLUS(x[a],x[b]); x[d] = ROTATE(XOR(x[d],x[a]),16); \
|
||||
x[c] = PLUS(x[c],x[d]); x[b] = ROTATE(XOR(x[b],x[c]),12); \
|
||||
x[a] = PLUS(x[a],x[b]); x[d] = ROTATE(XOR(x[d],x[a]), 8); \
|
||||
x[c] = PLUS(x[c],x[d]); x[b] = ROTATE(XOR(x[b],x[c]), 7);
|
||||
|
||||
#define QUARTERROUND1(a,b,c,d) QUARTERROUND(a,b,c,d)
|
||||
#define QUARTERROUND2(a,b,c,d) QUARTERROUND(a,b,c,d)
|
||||
#define QUARTERROUND3(a,b,c,d) QUARTERROUND(a,b,c,d)
|
||||
#define QUARTERROUND4(a,b,c,d) QUARTERROUND(a,b,c,d)
|
||||
#define QUARTERROUND5(a,b,c,d) QUARTERROUND(a,b,c,d)
|
||||
#define QUARTERROUND6(a,b,c,d) QUARTERROUND(a,b,c,d)
|
||||
#define QUARTERROUND7(a,b,c,d) QUARTERROUND(a,b,c,d)
|
||||
#define QUARTERROUND8(a,b,c,d) QUARTERROUND(a,b,c,d)
|
||||
#elif !defined(ENABLE_CHACHA)
|
||||
#define QUARTERROUND(a,b,c,d) \
|
||||
x[a] = PLUS(x[a],x[b]); x[d] = __rv__xar(x[a],16,x[d]); \
|
||||
x[c] = PLUS(x[c],x[d]); x[b] = __rv__xar(x[c],12,x[b]); \
|
||||
x[a] = PLUS(x[a],x[b]); x[d] = __rv__xar(x[a], 8,x[d]); \
|
||||
x[c] = PLUS(x[c],x[d]); x[b] = __rv__xar(x[c], 7,x[b]);
|
||||
|
||||
#define QUARTERROUND1(a,b,c,d) QUARTERROUND(a,b,c,d)
|
||||
#define QUARTERROUND2(a,b,c,d) QUARTERROUND(a,b,c,d)
|
||||
#define QUARTERROUND3(a,b,c,d) QUARTERROUND(a,b,c,d)
|
||||
#define QUARTERROUND4(a,b,c,d) QUARTERROUND(a,b,c,d)
|
||||
#define QUARTERROUND5(a,b,c,d) QUARTERROUND(a,b,c,d)
|
||||
#define QUARTERROUND6(a,b,c,d) QUARTERROUND(a,b,c,d)
|
||||
#define QUARTERROUND7(a,b,c,d) QUARTERROUND(a,b,c,d)
|
||||
#define QUARTERROUND8(a,b,c,d) QUARTERROUND(a,b,c,d)
|
||||
#else
|
||||
#define QUARTERROUNDx(a,b,c,d,r0,r1,r2,r3) \
|
||||
{ \
|
||||
register uint32_t A asm(""#r0) = x[a]; \
|
||||
register uint32_t D asm(""#r1) = x[d]; \
|
||||
register uint32_t C asm(""#r2) = x[c]; \
|
||||
register uint32_t B asm(""#r3) = x[b]; \
|
||||
asm("CHACHA16 reg_%0, reg_%1, reg_%3\n" \
|
||||
"CHACHA12 reg_%2, reg_%3, reg_%1\n" \
|
||||
"CHACHA8 reg_%0, reg_%1, reg_%3\n" \
|
||||
"CHACHA7 reg_%2, reg_%3, reg_%1\n" \
|
||||
: "+&r" (A), "+&r" (B), "+&r" (C), "+&r" (D)); \
|
||||
x[a] = A; x[b] = B; x[c] = C; x[d] = D; \
|
||||
}
|
||||
#define QUARTERROUND1(a,b,c,d) QUARTERROUNDx(a,b,c,d,t3,t4,t5,t6)
|
||||
#define QUARTERROUND2(a,b,c,d) QUARTERROUNDx(a,b,c,d,s8,s9,s10,s11)
|
||||
#define QUARTERROUND3(a,b,c,d) QUARTERROUNDx(a,b,c,d,s4,s5,s6,s7)
|
||||
#define QUARTERROUND4(a,b,c,d) QUARTERROUNDx(a,b,c,d,a6,a7,s2,s3)
|
||||
|
||||
#define QUARTERROUND5(a,b,c,d) QUARTERROUNDx(a,b,c,d,t3,t4,s6,s7)
|
||||
#define QUARTERROUND6(a,b,c,d) QUARTERROUNDx(a,b,c,d,s8,s9,s2,s3)
|
||||
#define QUARTERROUND7(a,b,c,d) QUARTERROUNDx(a,b,c,d,s4,s5,t5,t6)
|
||||
#define QUARTERROUND8(a,b,c,d) QUARTERROUNDx(a,b,c,d,a6,a7,s10,s11)
|
||||
#endif
|
||||
|
||||
static void salsa20_wordtobyte(u8 output[64],const u32 input[16])
|
||||
{
|
||||
u32 x[16];
|
||||
|
@ -34,14 +83,14 @@ static void salsa20_wordtobyte(u8 output[64],const u32 input[16])
|
|||
|
||||
for (i = 0;i < 16;++i) x[i] = input[i];
|
||||
for (i = ROUNDS;i > 0;i -= 2) {
|
||||
QUARTERROUND( 0, 4, 8,12)
|
||||
QUARTERROUND( 1, 5, 9,13)
|
||||
QUARTERROUND( 2, 6,10,14)
|
||||
QUARTERROUND( 3, 7,11,15)
|
||||
QUARTERROUND( 0, 5,10,15)
|
||||
QUARTERROUND( 1, 6,11,12)
|
||||
QUARTERROUND( 2, 7, 8,13)
|
||||
QUARTERROUND( 3, 4, 9,14)
|
||||
QUARTERROUND1( 0, 4, 8,12)
|
||||
QUARTERROUND2( 1, 5, 9,13)
|
||||
QUARTERROUND3( 2, 6,10,14)
|
||||
QUARTERROUND4( 3, 7,11,15)
|
||||
QUARTERROUND5( 0, 5,10,15)
|
||||
QUARTERROUND6( 1, 6,11,12)
|
||||
QUARTERROUND7( 2, 7, 8,13)
|
||||
QUARTERROUND8( 3, 4, 9,14)
|
||||
}
|
||||
for (i = 0;i < 16;++i) x[i] = PLUS(x[i],input[i]);
|
||||
for (i = 0;i < 16;++i) U32TO8_LITTLE(output + 4 * i,x[i]);
|
||||
|
|
24
data_Chacha64.txt
Normal file
24
data_Chacha64.txt
Normal file
|
@ -0,0 +1,24 @@
|
|||
//for vX.Y of P
|
||||
|
||||
// low-order bit of Rd (7) is 0 to ensure even-numbered Rd
|
||||
I CHACHA CHACHA 101--00----------000----01110111 chacha Zchacha
|
||||
|
||||
S CHACHA "fun_chacha(input(SRC1), input(SRC2), input(SRC3), input(INSTRUCTION)(28 downto 27))"
|
||||
|
||||
P """
|
||||
def fun_chacha(rs1: Bits, rs2: Bits, rs3: Bits, num: Bits) : Bits = {
|
||||
val rotv = (num).mux(
|
||||
B"2'b00" -> U(16),
|
||||
B"2'b01" -> U(12),
|
||||
B"2'b10" -> U( 8),
|
||||
B"2'b11" -> U( 7)
|
||||
)
|
||||
val a = rs3
|
||||
val b = rs1
|
||||
val d = rs2
|
||||
val sum = (a.asUInt + b.asUInt).asBits.resize(32)
|
||||
val xor = sum ^ d
|
||||
val rot = xor.rotateLeft(rotv)
|
||||
rot ## sum // return value
|
||||
}
|
||||
"""
|
11
data_xar.txt
Normal file
11
data_xar.txt
Normal file
|
@ -0,0 +1,11 @@
|
|||
I XAR XAR 1100100----------000-----1110111 xar Zbxar
|
||||
|
||||
S XAR "fun_xar(input(SRC1), input(SRC2), input(SRC3))"
|
||||
|
||||
P """
|
||||
def fun_xar(rs1: Bits, rs2: Bits, rs3: Bits) : Bits = {
|
||||
val in = rs1 ^ rs3
|
||||
val r = in.rotateLeft(rs2(4 downto 0).asUInt)
|
||||
r // return value
|
||||
}
|
||||
"""
|
|
@ -73,6 +73,21 @@ typedef uint32_t uint_xlen_t;
|
|||
return ((uint64_t)r0 | (((uint64_t)r1)<<32)); \
|
||||
}
|
||||
|
||||
// ternary wide (64-bits output in R2n/R2n+1)
|
||||
#define FUN3Wx(NAME, ASNAME, r0, r1) \
|
||||
static inline uint64_t NAME(uint_xlen_t rs1, uint_xlen_t rs2, uint_xlen_t rs3) { \
|
||||
register uint32_t r0 asm (""#r0), r1 asm (""#r1); \
|
||||
r0 = rs3; \
|
||||
asm (#ASNAME " reg_%0, reg_%2, reg_%3\n" \
|
||||
: "+r" (r0), "=r" (r1) \
|
||||
: "r" (rs1), "r" (rs2)); \
|
||||
return ((uint64_t)r0 | (((uint64_t)r1)<<32)); \
|
||||
}
|
||||
#define FUN3Wt5(NAME, ASNAME) FUN3Wx(NAME, ASNAME, t5, t6)
|
||||
#define FUN3Wt3(NAME, ASNAME) FUN3Wx(NAME, ASNAME, t3, t4)
|
||||
#define FUN3Ws10(NAME, ASNAME) FUN3Wx(NAME, ASNAME, s10, s11)
|
||||
#define FUN3Ws8(NAME, ASNAME) FUN3Wx(NAME, ASNAME, s8, s9)
|
||||
|
||||
// macro to build assembly macros to generate the proper
|
||||
// opcodes as .word macro
|
||||
// the translation from name to number is done my the
|
||||
|
|
|
@ -19,4 +19,16 @@ FUN2(_sh1add,_SH1ADD)
|
|||
FUN2(_sh2add,_SH2ADD)
|
||||
FUN2(_sh3add,_SH3ADD)
|
||||
|
||||
|
||||
#ifdef ENABLE_XAR
|
||||
ASM3RMACRO(XAR, 0xc8000077)
|
||||
FUN3R(__rv__xar, XAR)
|
||||
#endif
|
||||
#ifdef ENABLE_CHACHA
|
||||
ASM3RMACRO(CHACHA16,0xa0000077)
|
||||
ASM3RMACRO(CHACHA12,0xa8000077)
|
||||
ASM3RMACRO(CHACHA8, 0xb0000077)
|
||||
ASM3RMACRO(CHACHA7, 0xb8000077)
|
||||
#endif
|
||||
|
||||
#endif // __NEW_INSTRUCTION_SUPPORT_B_H__
|
||||
|
|
Loading…
Add table
Reference in a new issue