diff --git a/aeadaes256ocbtaglen128v1-rv32/Makefile b/aeadaes256ocbtaglen128v1-rv32/Makefile
new file mode 100644
index 0000000..f195c70
--- /dev/null
+++ b/aeadaes256ocbtaglen128v1-rv32/Makefile
@@ -0,0 +1,47 @@
+SRCs=encrypt.c try-anything.c verify.c
+OBJs=$(SRCs:.c=.o)
+SCLIBS=cpucycles.o kernelrandombytes.o
+
+COMPDIR=~dolbeau2/LITEX/buildroot-rv32/output/host
+ALTCOMPDIR=/opt/riscv64b
+
+CC=$(COMPDIR)/bin/riscv32-buildroot-linux-gnu-gcc
+ALTCC=$(ALTCOMPDIR)/bin/riscv64-unknown-elf-gcc
+CXX=$(COMPDIR)/bin/riscv32-buildroot-linux-gnu-g++
+STRIP=$(COMPDIR)/bin/riscv32-buildroot-linux-gnu-strip
+NEWOPT=-march=rv32imab -mabi=ilp32 -I. -I.. -O3 -DRV32B #-fno-vectorize #-DUSE_EPI_CUSTOM
+OPT=-march=rv32ima -mabi=ilp32 -I. -I.. -O3 #-fno-vectorize #-DUSE_EPI_CUSTOM
+#ALTCC=$(CC)
+#NEWOPT=$(OPT)
+
+all: aeadaes256ocbtaglen128v1 aeadaes256ocbtaglen128v1_small
+
+clean:
+ rm -f $(OBJs) *.S try.o try_small.o encrypt.o aeadaes256ocbtaglen128v1 aeadaes256ocbtaglen128v1_small
+
+%.o: %.c
+ $(CC) $(OPT) $< -c -o $@
+
+try.o: try.c
+ $(CC) $(OPT) $< -c -o $@
+
+try_small.o: try.c
+ $(CC) $(OPT) $< -c -o $@ -DSMALL
+
+encrypt.S: encrypt.c
+ $(ALTCC) $(NEWOPT) $< -S -o $@
+
+encrypt.o: encrypt.S
+ $(ALTCC) $(NEWOPT) $< -c -o $@
+
+aeadaes256ocbtaglen128v1: $(OBJs) encrypt.o try.o $(SCLIBS)
+ $(CXX) $(OPT) $^ -o $@
+
+aeadaes256ocbtaglen128v1_small: $(OBJs) encrypt.o try_small.o $(SCLIBS)
+ $(CXX) $(OPT) $^ -o $@
+
+kernelrandombytes.o: random.cpp
+ $(CXX) $(OPT) $< -c -o $@
+
+cpucycles.o: riscv.c
+ $(CC) $< -march=rv32ima -mabi=ilp32 -I. -O1 -c -o $@
diff --git a/aeadaes256ocbtaglen128v1-rv32/api.h b/aeadaes256ocbtaglen128v1-rv32/api.h
new file mode 100644
index 0000000..d507767
--- /dev/null
+++ b/aeadaes256ocbtaglen128v1-rv32/api.h
@@ -0,0 +1,4 @@
+#define CRYPTO_KEYBYTES 32
+#define CRYPTO_NSECBYTES 0
+#define CRYPTO_NPUBBYTES 12
+#define CRYPTO_ABYTES 16
diff --git a/aeadaes256ocbtaglen128v1-rv32/cpucycles.h b/aeadaes256ocbtaglen128v1-rv32/cpucycles.h
new file mode 100644
index 0000000..ae1b7ba
--- /dev/null
+++ b/aeadaes256ocbtaglen128v1-rv32/cpucycles.h
@@ -0,0 +1,28 @@
+/*
+cpucycles riscv.h version 20190803
+D. J. Bernstein
+Romain Dolbeau
+Public domain.
+*/
+
+#ifndef CPUCYCLES_riscv_h
+#define CPUCYCLES_riscv_h
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern long long cpucycles_riscv(void);
+extern long long cpucycles_riscv_persecond(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#ifndef cpucycles_implementation
+#define cpucycles_implementation "riscv"
+#define cpucycles cpucycles_riscv
+#define cpucycles_persecond cpucycles_riscv_persecond
+#endif
+
+#endif
diff --git a/aeadaes256ocbtaglen128v1-rv32/crypto_aead.h b/aeadaes256ocbtaglen128v1-rv32/crypto_aead.h
new file mode 100644
index 0000000..79bf8ec
--- /dev/null
+++ b/aeadaes256ocbtaglen128v1-rv32/crypto_aead.h
@@ -0,0 +1,17 @@
+#ifndef crypto_aead_H
+#define crypto_aead_H
+
+#include "crypto_aead_aeadaes256ocbtaglen128v1.h"
+
+#define crypto_aead_encrypt crypto_aead_aeadaes256ocbtaglen128v1_encrypt
+#define crypto_aead_decrypt crypto_aead_aeadaes256ocbtaglen128v1_decrypt
+#define crypto_aead_KEYBYTES crypto_aead_aeadaes256ocbtaglen128v1_KEYBYTES
+#define crypto_aead_NSECBYTES crypto_aead_aeadaes256ocbtaglen128v1_NSECBYTES
+#define crypto_aead_NPUBBYTES crypto_aead_aeadaes256ocbtaglen128v1_NPUBBYTES
+#define crypto_aead_ABYTES crypto_aead_aeadaes256ocbtaglen128v1_ABYTES
+#define crypto_aead_NOOVERLAP crypto_aead_aeadaes256ocbtaglen128v1_NOOVERLAP
+#define crypto_aead_PRIMITIVE "aeadaes256ocbtaglen128v1"
+#define crypto_aead_IMPLEMENTATION crypto_aead_aeadaes256ocbtaglen128v1_IMPLEMENTATION
+#define crypto_aead_VERSION crypto_aead_aeadaes256ocbtaglen128v1_VERSION
+
+#endif
diff --git a/aeadaes256ocbtaglen128v1-rv32/crypto_aead_aeadaes256ocbtaglen128v1.h b/aeadaes256ocbtaglen128v1-rv32/crypto_aead_aeadaes256ocbtaglen128v1.h
new file mode 100644
index 0000000..d9fcf49
--- /dev/null
+++ b/aeadaes256ocbtaglen128v1-rv32/crypto_aead_aeadaes256ocbtaglen128v1.h
@@ -0,0 +1,31 @@
+#ifndef crypto_aead_aeadaes256ocbtaglen128v1_H
+#define crypto_aead_aeadaes256ocbtaglen128v1_H
+
+#define crypto_aead_aeadaes256ocbtaglen128v1_rv32_KEYBYTES 32
+#define crypto_aead_aeadaes256ocbtaglen128v1_rv32_NSECBYTES 0
+#define crypto_aead_aeadaes256ocbtaglen128v1_rv32_NPUBBYTES 12
+#define crypto_aead_aeadaes256ocbtaglen128v1_rv32_ABYTES 16
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+extern int crypto_aead_aeadaes256ocbtaglen128v1_rv32_encrypt(unsigned char *,unsigned long long *,const unsigned char *,unsigned long long,const unsigned char *,unsigned long long,const unsigned char *,const unsigned char *,const unsigned char *);
+extern int crypto_aead_aeadaes256ocbtaglen128v1_rv32_decrypt(unsigned char *,unsigned long long *,unsigned char *,const unsigned char *,unsigned long long,const unsigned char *,unsigned long long,const unsigned char *,const unsigned char *);
+#ifdef __cplusplus
+}
+#endif
+
+#define crypto_aead_aeadaes256ocbtaglen128v1_encrypt crypto_aead_aeadaes256ocbtaglen128v1_rv32_encrypt
+#define crypto_aead_aeadaes256ocbtaglen128v1_decrypt crypto_aead_aeadaes256ocbtaglen128v1_rv32_decrypt
+#define crypto_aead_aeadaes256ocbtaglen128v1_KEYBYTES crypto_aead_aeadaes256ocbtaglen128v1_rv32_KEYBYTES
+#define crypto_aead_aeadaes256ocbtaglen128v1_NSECBYTES crypto_aead_aeadaes256ocbtaglen128v1_rv32_NSECBYTES
+#define crypto_aead_aeadaes256ocbtaglen128v1_NPUBBYTES crypto_aead_aeadaes256ocbtaglen128v1_rv32_NPUBBYTES
+#define crypto_aead_aeadaes256ocbtaglen128v1_ABYTES crypto_aead_aeadaes256ocbtaglen128v1_rv32_ABYTES
+#define crypto_aead_aeadaes256ocbtaglen128v1_NOOVERLAP crypto_aead_aeadaes256ocbtaglen128v1_rv32_NOOVERLAP
+#define crypto_aead_aeadaes256ocbtaglen128v1_IMPLEMENTATION "crypto_aead/aeadaes256ocbtaglen128v1/dolbeau/aesenc-int"
+#ifndef crypto_aead_aeadaes256ocbtaglen128v1_rv32_VERSION
+#define crypto_aead_aeadaes256ocbtaglen128v1_rv32_VERSION "-"
+#endif
+#define crypto_aead_aeadaes256ocbtaglen128v1_VERSION crypto_aead_aeadaes256ocbtaglen128v1_rv32_VERSION
+
+#endif
diff --git a/aeadaes256ocbtaglen128v1-rv32/crypto_uint32.h b/aeadaes256ocbtaglen128v1-rv32/crypto_uint32.h
new file mode 100644
index 0000000..21020d7
--- /dev/null
+++ b/aeadaes256ocbtaglen128v1-rv32/crypto_uint32.h
@@ -0,0 +1,6 @@
+#ifndef crypto_uint32_h
+#define crypto_uint32_h
+
+typedef unsigned int crypto_uint32;
+
+#endif
diff --git a/aeadaes256ocbtaglen128v1-rv32/crypto_uint64.h b/aeadaes256ocbtaglen128v1-rv32/crypto_uint64.h
new file mode 100644
index 0000000..5aa0070
--- /dev/null
+++ b/aeadaes256ocbtaglen128v1-rv32/crypto_uint64.h
@@ -0,0 +1,6 @@
+#ifndef crypto_uint64_h
+#define crypto_uint64_h
+
+typedef unsigned long long crypto_uint64;
+
+#endif
diff --git a/aeadaes256ocbtaglen128v1-rv32/crypto_uint8.h b/aeadaes256ocbtaglen128v1-rv32/crypto_uint8.h
new file mode 100644
index 0000000..f17b77e
--- /dev/null
+++ b/aeadaes256ocbtaglen128v1-rv32/crypto_uint8.h
@@ -0,0 +1,6 @@
+#ifndef crypto_uint8_h
+#define crypto_uint8_h
+
+typedef unsigned char crypto_uint8;
+
+#endif
diff --git a/aeadaes256ocbtaglen128v1-rv32/crypto_verify.h b/aeadaes256ocbtaglen128v1-rv32/crypto_verify.h
new file mode 100644
index 0000000..c8d8513
--- /dev/null
+++ b/aeadaes256ocbtaglen128v1-rv32/crypto_verify.h
@@ -0,0 +1,12 @@
+#ifndef crypto_verify_H
+#define crypto_verify_H
+
+#include "crypto_verify_16.h"
+
+#define crypto_verify crypto_verify_16
+#define crypto_verify_BYTES crypto_verify_16_BYTES
+#define crypto_verify_PRIMITIVE "16"
+#define crypto_verify_IMPLEMENTATION crypto_verify_16_IMPLEMENTATION
+#define crypto_verify_VERSION crypto_verify_16_VERSION
+
+#endif
diff --git a/aeadaes256ocbtaglen128v1-rv32/crypto_verify_16.h b/aeadaes256ocbtaglen128v1-rv32/crypto_verify_16.h
new file mode 100644
index 0000000..4d21a68
--- /dev/null
+++ b/aeadaes256ocbtaglen128v1-rv32/crypto_verify_16.h
@@ -0,0 +1,22 @@
+#ifndef crypto_verify_16_H
+#define crypto_verify_16_H
+
+#define crypto_verify_16_ref_BYTES 16
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+extern int crypto_verify_16_ref(const unsigned char *,const unsigned char *);
+#ifdef __cplusplus
+}
+#endif
+
+#define crypto_verify_16 crypto_verify_16_ref
+#define crypto_verify_16_BYTES crypto_verify_16_ref_BYTES
+#define crypto_verify_16_IMPLEMENTATION "crypto_verify/16/ref"
+#ifndef crypto_verify_16_ref_VERSION
+#define crypto_verify_16_ref_VERSION "-"
+#endif
+#define crypto_verify_16_VERSION crypto_verify_16_ref_VERSION
+
+#endif
diff --git a/aeadaes256ocbtaglen128v1-rv32/encrypt.c b/aeadaes256ocbtaglen128v1-rv32/encrypt.c
new file mode 100644
index 0000000..b32f79e
--- /dev/null
+++ b/aeadaes256ocbtaglen128v1-rv32/encrypt.c
@@ -0,0 +1,796 @@
+/*
+// CAESAR OCB v1 somewhat optimised code
+// Info: http://www.cs.ucdavis.edu/~rogaway/ocb
+//
+// Written by Romain Dolbeau (romain@dolbeau.org),
+// based on the reference implementation by Ted Krovetz (ted@krovetz.net).
+//
+// Phillip Rogaway holds patents relevant to OCB. See the following for
+// his free patent grant: http://www.cs.ucdavis.edu/~rogaway/ocb/grant.htm
+//
+// This is free and unencumbered software released into the public domain.
+//
+// Anyone is free to copy, modify, publish, use, compile, sell, or
+// distribute this software, either in source code form or as a compiled
+// binary, for any purpose, commercial or non-commercial, and by any
+// means.
+//
+// In jurisdictions that recognize copyright laws, the author or authors
+// of this software dedicate any and all copyright interest in the
+// software to the public domain. We make this dedication for the benefit
+// of the public at large and to the detriment of our heirs and
+// successors. We intend this dedication to be an overt act of
+// relinquishment in perpetuity of all present and future rights to this
+// software under copyright law.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+// IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+// OTHER DEALINGS IN THE SOFTWARE.
+//
+// For more information, please refer to
+*/
+
+#include
+
+#include "api.h"
+#include "crypto_aead.h"
+#define KEYBYTES CRYPTO_KEYBYTES
+#define NONCEBYTES CRYPTO_NPUBBYTES
+#define TAGBYTES CRYPTO_ABYTES
+
+#define ALIGN16 __attribute__((aligned(16)))
+#define ALIGN32 __attribute__((aligned(32)))
+#define ALIGN64 __attribute__((aligned(64)))
+#define _bswap64(a) __builtin_bswap64(a)
+#define _bswap(a) __builtin_bswap32(a)
+
+#define printv16c(p,v) \
+ { \
+ ALIGN16 unsigned char temp[16]; \
+ _mm_store_si128(temp, v); \
+ int z; \
+ printf("%8s:%8s = ",p,#v); \
+ for (z = 15 ; z >= 0 ; z--) { \
+ printf("%02hhx", temp[z]); \
+ if ((z%4)==0) printf(" "); \
+ } \
+ printf("\n"); \
+ }
+
+#include "m128_compat.h"
+
+#include "new_instructions_support_k.h"
+
+#define rotr(a,b) _rv32_ror(a,b)
+
+static inline void aes256_Tsetkey_encrypt(const unsigned int key[], unsigned int *aes_edrk) {
+ unsigned int i = 0;
+ unsigned int rotl_aes_edrk;
+ unsigned int tmp8, tmp9, tmp10, tmp11;
+ unsigned int tmp12, tmp13, tmp14, tmp15;
+ unsigned int temp_lds;
+ unsigned int round = 0x00000001;
+
+ tmp8 = (key[0]);
+ aes_edrk[0] = tmp8;
+ tmp9 = (key[1]);
+ aes_edrk[1] = tmp9;
+ tmp10 = (key[2]);
+ aes_edrk[2] = tmp10;
+ tmp11 = (key[3]);
+ aes_edrk[3] = tmp11;
+ tmp12 = (key[4]);
+ aes_edrk[4] = tmp12;
+ tmp13 = (key[5]);
+ aes_edrk[5] = tmp13;
+ tmp14 = (key[6]);
+ aes_edrk[6] = tmp14;
+ tmp15 = (key[7]);
+ aes_edrk[7] = tmp15;
+
+ for( i = 8; i < 56; /* i+=8 */ )
+ {
+ tmp8 = tmp8 ^ round;
+ round = round << 1;
+ rotl_aes_edrk = rotr(tmp15,8);
+ tmp8 = aes32esi0(tmp8, rotl_aes_edrk);
+ tmp8 = aes32esi1(tmp8, rotl_aes_edrk);
+ tmp8 = aes32esi2(tmp8, rotl_aes_edrk);
+ tmp8 = aes32esi3(tmp8, rotl_aes_edrk);
+
+ aes_edrk[i++] = tmp8;
+ tmp9 = tmp9 ^ tmp8;
+ aes_edrk[i++] = tmp9;
+ tmp10 = tmp10 ^ tmp9;
+ aes_edrk[i++] = tmp10;
+ tmp11 = tmp11 ^ tmp10;
+ aes_edrk[i++] = tmp11;
+
+ tmp12 = aes32esi0(tmp12, tmp11);
+ tmp12 = aes32esi1(tmp12, tmp11);
+ tmp12 = aes32esi2(tmp12, tmp11);
+ tmp12 = aes32esi3(tmp12, tmp11);
+
+ aes_edrk[i++] = tmp12;
+ tmp13 = tmp13 ^ tmp12;
+ aes_edrk[i++] = tmp13;
+ tmp14 = tmp14 ^ tmp13;
+ aes_edrk[i++] = tmp14;
+ tmp15 = tmp15 ^ tmp14;
+ aes_edrk[i++] = tmp15;
+ }
+
+ tmp8 = tmp8 ^ round;
+ round = round << 1;
+ rotl_aes_edrk = rotr(tmp15,8);
+ tmp8 = aes32esi0(tmp8, rotl_aes_edrk);
+ tmp8 = aes32esi1(tmp8, rotl_aes_edrk);
+ tmp8 = aes32esi2(tmp8, rotl_aes_edrk);
+ tmp8 = aes32esi3(tmp8, rotl_aes_edrk);
+
+ aes_edrk[i++] = tmp8;
+ tmp9 = tmp9 ^ tmp8;
+ aes_edrk[i++] = tmp9;
+ tmp10 = tmp10 ^ tmp9;
+ aes_edrk[i++] = tmp10;
+ tmp11 = tmp11 ^ tmp10;
+ aes_edrk[i++] = tmp11;
+}
+
+static void aes256_key_enc2dec(unsigned int *erk, unsigned int *drk)
+{
+ int i, j;
+ // first and last unchanged (but swapped)
+ for (i = 0; i < 4; i++) {
+ drk[i] = erk[i+56];
+ drk[i+56] = erk[i];
+ }
+ // convert & revert order
+ for (i = 1; i < 14; i++) {
+ for (j = 0 ; j < 4 ; j++) {
+ unsigned int ek, dk;
+ ek = erk[i*4+j];
+
+ dk = 0;
+ dk = aes32esi0(dk, ek);
+ dk = aes32esi1(dk, ek);
+ dk = aes32esi2(dk, ek);
+ dk = aes32esi3(dk, ek);
+
+ ek = 0;
+ ek = aes32dsmi0(ek, dk);
+ ek = aes32dsmi1(ek, dk);
+ ek = aes32dsmi2(ek, dk);
+ ek = aes32dsmi3(ek, dk);
+
+ drk[56-4*i+j] = ek;
+ }
+ }
+}
+
+#define AES_ROUND1T(TAB,I,X0,X1,X2,X3,Y0,Y1,Y2,Y3) \
+ { \
+ X0 = aes32esmi0(TAB[I++],Y0); \
+ X0 = aes32esmi1(X0,Y1); \
+ X0 = aes32esmi2(X0,Y2); \
+ X0 = aes32esmi3(X0,Y3); \
+ X1 = aes32esmi0(TAB[I++],Y1); \
+ X1 = aes32esmi1(X1,Y2); \
+ X1 = aes32esmi2(X1,Y3); \
+ X1 = aes32esmi3(X1,Y0); \
+ X2 = aes32esmi0(TAB[I++],Y2); \
+ X2 = aes32esmi1(X2,Y3); \
+ X2 = aes32esmi2(X2,Y0); \
+ X2 = aes32esmi3(X2,Y1); \
+ X3 = aes32esmi0(TAB[I++],Y3); \
+ X3 = aes32esmi1(X3,Y0); \
+ X3 = aes32esmi2(X3,Y1); \
+ X3 = aes32esmi3(X3,Y2); \
+ }
+
+/* using the K + B instructions */
+static inline void aes256_1Tft_encrypt(const uint32_t *aes_edrk, const uint32_t *input, uint32_t *output)
+{
+ unsigned int X0, X1, X2, X3, Y0, Y1, Y2, Y3;
+ unsigned int i = 0, j = 0;
+ unsigned int l_aes_nr = 14;
+
+ X0 = ((input[0]) ^ aes_edrk[j++]);
+ X1 = ((input[1]) ^ aes_edrk[j++]);
+ X2 = ((input[2]) ^ aes_edrk[j++]);
+ X3 = ((input[3]) ^ aes_edrk[j++]);
+
+ for (i = 4 ; i < (l_aes_nr<<2) ; ) {
+
+ AES_ROUND1T(aes_edrk, i, Y0, Y1, Y2, Y3, X0, X1, X2, X3 );
+
+ X0=Y0;
+ X1=Y1;
+ X2=Y2;
+ X3=Y3;
+ }
+ /* last round */
+
+ Y0 = aes32esi0(aes_edrk[i], X0);
+ Y0 = aes32esi1(Y0, X1);
+ Y0 = aes32esi2(Y0, X2);
+ Y0 = aes32esi3(Y0, X3);
+ i++;
+ Y1 = aes32esi0(aes_edrk[i], X1);
+ Y1 = aes32esi1(Y1, X2);
+ Y1 = aes32esi2(Y1, X3);
+ Y1 = aes32esi3(Y1, X0);
+ i++;
+ Y2 = aes32esi0(aes_edrk[i], X2);
+ Y2 = aes32esi1(Y2, X3);
+ Y2 = aes32esi2(Y2, X0);
+ Y2 = aes32esi3(Y2, X1);
+ i++;
+ Y3 = aes32esi0(aes_edrk[i], X3);
+ Y3 = aes32esi1(Y3, X0);
+ Y3 = aes32esi2(Y3, X1);
+ Y3 = aes32esi3(Y3, X2);
+
+ output[0] = (Y0);
+ output[1] = (Y1);
+ output[2] = (Y2);
+ output[3] = (Y3);
+}
+
+
+#define AES_ROUND_DKT(TAB,I,X0,X1,X2,X3,Y0,Y1,Y2,Y3) \
+ { \
+ X0 = aes32dsmi0(TAB[I+0],Y0); \
+ X0 = aes32dsmi1(X0,Y3); \
+ X0 = aes32dsmi2(X0,Y2); \
+ X0 = aes32dsmi3(X0,Y1); \
+ X1 = aes32dsmi0(TAB[I+1],Y1); \
+ X1 = aes32dsmi1(X1,Y0); \
+ X1 = aes32dsmi2(X1,Y3); \
+ X1 = aes32dsmi3(X1,Y2); \
+ X2 = aes32dsmi0(TAB[I+2],Y2); \
+ X2 = aes32dsmi1(X2,Y1); \
+ X2 = aes32dsmi2(X2,Y0); \
+ X2 = aes32dsmi3(X2,Y3); \
+ X3 = aes32dsmi0(TAB[I+3],Y3); \
+ X3 = aes32dsmi1(X3,Y2); \
+ X3 = aes32dsmi2(X3,Y1); \
+ X3 = aes32dsmi3(X3,Y0); \
+ }
+
+void aes256_1Tft_decrypt(const unsigned int *aes_drk, const unsigned int *input, unsigned int *output)
+{
+ const unsigned int aes_nr = 14; // aes256
+ unsigned int X0, X1, X2, X3, Y0, Y1, Y2, Y3;
+ unsigned int i;
+
+ X0 = input[0]; X0 ^= aes_drk[0];
+ X1 = input[1]; X1 ^= aes_drk[1];
+ X2 = input[2]; X2 ^= aes_drk[2];
+ X3 = input[3]; X3 ^= aes_drk[3];
+
+ // for (i=1;i> 7);
+ d[15] = (s[15] << 1) ^ ((tmp >> 7) * 135);
+}
+#else
+#if 0
+/* 64 bits little-endian doubling, faster */
+static inline void double_block(unsigned long long *d, const unsigned long long* s) {
+ unsigned long long sl = _bswap64(s[1]), sh = _bswap64(s[0]);
+ unsigned long long sl1 = sl << 1;
+ unsigned long long sh1 = sh << 1;
+ sh1 |= sl>>63;
+ sl1 ^= (((long long)sh>>63) & 135);
+ d[1]=_bswap64(sl1);
+ d[0]=_bswap64(sh1);
+}
+#else
+/* 128 bits SSE, much faster */
+static inline __m128i double_block_si128_norev(const __m128i sv) {
+ const __m128i mask = _mm_set_epi32(135,1,1,1);
+ /* __m128i sv31 = _mm_srai_epi32(sv, 31); */
+ __m128i sv31 = wordsign128(sv);
+ __m128i sv31m = _mm_and_si128(sv31, mask);
+ /* __m128i sv31ms = _mm_shuffle_epi32(sv31m, _MM_SHUFFLE(2,1,0,3)); */
+ __m128i sv31ms = wordrotate1l128(sv31m);
+ __m128i sv1 = _mm_slli_epi32(sv, 1);
+ __m128i dv = _mm_xor_si128(sv31ms,sv1);
+ return dv;
+}
+static inline __m128i double_block_si128(const __m128i svr) {
+ /* const __m128i rev = _mm_set_epi8(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15); */
+ /* __m128i sv = _mm_shuffle_epi8(svr, rev); */
+ __m128i sv = bytereverse128(svr);
+ __m128i dv = double_block_si128_norev(sv);
+ /* return _mm_shuffle_epi8(dv, rev); */
+ return bytereverse128(dv);
+}
+static inline void double_block(unsigned char *d, const unsigned char* s) {
+ __m128i sv = _mm_loadu_si128((const __m128i*)s);
+ __m128i dv = double_block_si128(sv);
+ _mm_storeu_si128((__m128i*)d,dv);
+}
+/* 128 bits SSE times 4 */
+static const unsigned short lk4[64] = {
+0x0000, 0x0086, 0x010c, 0x018a, 0x0218, 0x029e, 0x0314, 0x0392,
+0x0430, 0x04b6, 0x053c, 0x05ba, 0x0628, 0x06ae, 0x0724, 0x07a2,
+0x0860, 0x08e6, 0x096c, 0x09ea, 0x0a78, 0x0afe, 0x0b74, 0x0bf2,
+0x0c50, 0x0cd6, 0x0d5c, 0x0dda, 0x0e48, 0x0ece, 0x0f44, 0x0fc2,
+0x10c0, 0x1046, 0x11cc, 0x114a, 0x12d8, 0x125e, 0x13d4, 0x1352,
+0x14f0, 0x1476, 0x15fc, 0x157a, 0x16e8, 0x166e, 0x17e4, 0x1762,
+0x18a0, 0x1826, 0x19ac, 0x192a, 0x1ab8, 0x1a3e, 0x1bb4, 0x1b32,
+0x1c90, 0x1c16, 0x1d9c, 0x1d1a, 0x1e88, 0x1e0e, 0x1f84, 0x1f02
+};
+static inline __m128i double_block_2_si128_norev(const __m128i sv) {
+ const __m128i mask = _mm_set_epi32(3,3,3,3);
+ const int idx = _mm_extract_epi8(sv,15);
+ /* __m128i sv30x = _mm_insert_epi16(_mm_setzero_si128(),lk4[(idx&0xC0)>>6],0); */
+ __m128i sv30x = halfwordandzero(lk4[(idx&0xC0)>>6]);
+
+ __m128i sv30 = _mm_srli_epi32(sv, 30);
+ __m128i sv30m = _mm_and_si128(sv30, mask);
+ /* __m128i sv30ms = _mm_shuffle_epi32(sv30m, _MM_SHUFFLE(2,1,0,3)); */
+ __m128i sv30ms = wordrotate1l128(sv30m);
+ __m128i sv2 = _mm_slli_epi32(sv, 2);
+ __m128i dv = _mm_xor_si128(sv30ms,sv2);
+ __m128i final = _mm_xor_si128(dv, sv30x);
+ return final;
+}
+static inline __m128i double_block_3_si128_norev(const __m128i sv) {
+ const __m128i mask = _mm_set_epi32(7,7,7,7);
+ const int idx = _mm_extract_epi8(sv,15);
+ /* __m128i sv29x = _mm_insert_epi16(_mm_setzero_si128(),lk4[(idx&0xE0)>>5],0); */
+ __m128i sv29x = halfwordandzero(lk4[(idx&0xE0)>>5]);
+
+ __m128i sv29 = _mm_srli_epi32(sv, 29);
+ __m128i sv29m = _mm_and_si128(sv29, mask);
+ /* __m128i sv29ms = _mm_shuffle_epi32(sv29m, _MM_SHUFFLE(2,1,0,3)); */
+ __m128i sv29ms = wordrotate1l128(sv29m);
+ __m128i sv3 = _mm_slli_epi32(sv, 3);
+ __m128i dv = _mm_xor_si128(sv29ms,sv3);
+ __m128i final = _mm_xor_si128(dv, sv29x);
+ return final;
+}
+static inline __m128i double_block_4_si128_norev(const __m128i sv) {
+ const __m128i mask = _mm_set_epi32(15,15,15,15);
+ const int idx = _mm_extract_epi8(sv,15);
+ /* __m128i sv28x = _mm_insert_epi16(_mm_setzero_si128(),lk4[(idx&0xF0)>>4],0); */
+ __m128i sv28x = halfwordandzero(lk4[(idx&0xF0)>>4]);
+
+ __m128i sv28 = _mm_srli_epi32(sv, 28);
+ __m128i sv28m = _mm_and_si128(sv28, mask);
+ /* __m128i sv28ms = _mm_shuffle_epi32(sv28m, _MM_SHUFFLE(2,1,0,3)); */
+ __m128i sv28ms = wordrotate1l128(sv28m);
+ __m128i sv4 = _mm_slli_epi32(sv, 4);
+ __m128i dv = _mm_xor_si128(sv28ms,sv4);
+ __m128i final = _mm_xor_si128(dv, sv28x);
+ return final;
+}
+static inline __m128i double_block_5_si128_norev(const __m128i sv) {
+ const __m128i mask = _mm_set_epi32(31,31,31,31);
+ const int idx = _mm_extract_epi8(sv,15);
+ /* __m128i sv27x = _mm_insert_epi16(_mm_setzero_si128(),lk4[(idx&0xF8)>>3],0); */
+ __m128i sv27x = halfwordandzero(lk4[(idx&0xF8)>>3]);
+
+ __m128i sv27 = _mm_srli_epi32(sv, 27);
+ __m128i sv27m = _mm_and_si128(sv27, mask);
+ /* __m128i sv27ms = _mm_shuffle_epi32(sv27m, _MM_SHUFFLE(2,1,0,3)); */
+ __m128i sv27ms = wordrotate1l128(sv27m);
+ __m128i sv5 = _mm_slli_epi32(sv, 5);
+ __m128i dv = _mm_xor_si128(sv27ms,sv5);
+ __m128i final = _mm_xor_si128(dv, sv27x);
+ return final;
+}
+static inline __m128i double_block_6_si128_norev(const __m128i sv) {
+ const __m128i mask = _mm_set_epi32(63,63,63,63);
+ const int idx = _mm_extract_epi8(sv,15);
+ /* __m128i sv26x = _mm_insert_epi16(_mm_setzero_si128(),lk4[(idx&0xFC)>>2],0); */
+ __m128i sv26x = halfwordandzero(lk4[(idx&0xFC)>>2]);
+
+ __m128i sv26 = _mm_srli_epi32(sv, 26);
+ __m128i sv26m = _mm_and_si128(sv26, mask);
+ /* __m128i sv26ms = _mm_shuffle_epi32(sv26m, _MM_SHUFFLE(2,1,0,3)); */
+ __m128i sv26ms = wordrotate1l128(sv26m);
+ __m128i sv6 = _mm_slli_epi32(sv, 6);
+ __m128i dv = _mm_xor_si128(sv26ms,sv6);
+ __m128i final = _mm_xor_si128(dv, sv26x);
+ return final;
+}
+#endif
+#endif
+
+/* ------------------------------------------------------------------------- */
+static inline __m128i calc_L_i_si128(const __m128i ldollarvr, const unsigned j) {
+ /* const __m128i rev = _mm_set_epi8(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15); */
+ /* __m128i ldollarv = _mm_shuffle_epi8(ldollarvr, rev); */
+ __m128i ldollarv = bytereverse128(ldollarvr);
+ unsigned i;
+ __m128i lv;
+ unsigned ntz = __builtin_ctz(j);/* printf("ntz = %u\n", ntz); */
+ switch(ntz) {
+ case 0:
+ lv = double_block_si128_norev(ldollarv);
+ break;
+ case 1:
+ lv = double_block_2_si128_norev(ldollarv);
+ break;
+ case 2:
+ lv = double_block_3_si128_norev(ldollarv);
+ break;
+ case 3:
+ lv = double_block_4_si128_norev(ldollarv);
+ break;
+ case 4:
+ lv = double_block_5_si128_norev(ldollarv);
+ break;
+ default:
+ lv = double_block_6_si128_norev(ldollarv);
+ for (i = 5; i < ntz ; i++)
+ lv = double_block_si128_norev(lv);
+ break;
+ }
+ /* return _mm_shuffle_epi8(lv, rev); */
+ return bytereverse128(lv);
+}
+static inline void calc_L_i(block l, const block ldollar, const unsigned i) {
+ __m128i ldollarv = _mm_loadu_si128((const __m128i*)ldollar);
+ __m128i lv = calc_L_i_si128(ldollarv, i);
+ _mm_storeu_si128((__m128i*)l,lv);
+}
+static inline void precompute_lv(__m128i prelv[32], const __m128i ldollarvr, const unsigned max) {
+ /* const __m128i rev = _mm_set_epi8(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15); */
+ /* __m128i ldollarv = _mm_shuffle_epi8(ldollarvr, rev); */
+ __m128i ldollarv = bytereverse128(ldollarvr);
+ unsigned i;
+ __m128i lv = double_block_si128_norev(ldollarv);
+ for (i = 0 ; i < max-1 ; i++) {
+ /* prelv[i] = _mm_shuffle_epi8(lv, rev); */
+ prelv[i] = bytereverse128(lv);
+ lv = double_block_si128_norev(lv);
+ }
+ /* prelv[i] = _mm_shuffle_epi8(lv, rev); */
+ return bytereverse128(lv);
+}
+
+/* ------------------------------------------------------------------------- */
+
+static void hash(block result, const unsigned char *k,
+ unsigned char *a, unsigned abytes,
+ const __m128i lstar,
+ const __m128i prelv[32], const __m128i aes_key[15]) {
+ __m128i offset, sum, tmp;
+ unsigned i;
+
+ /* Process any whole blocks */
+ /* Sum_0 = zeros(128) */
+ sum = _mm_setzero_si128();
+ /* Offset_0 = zeros(128) */
+ offset = _mm_setzero_si128();
+ i=1;
+ for (; i<=abytes/16; i++, a = a + 16) {
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ tmp = prelv[__builtin_ctz(i)];
+ offset = _mm_xor_si128(offset, tmp);
+ /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */
+ tmp = _mm_xor_si128(offset, _mm_loadu_si128((const __m128i*)a));
+ tmp = aes256_1Tft__encrypt1_si128(tmp, aes_key);
+ sum = _mm_xor_si128(sum, tmp);
+ }
+
+ /* Process any final partial block; compute final hash value */
+
+ abytes = abytes % 16; /* Bytes in final block */
+ if (abytes > 0) {
+ /* Offset_* = Offset_m xor L_* */
+ offset = _mm_xor_si128(offset, lstar);
+ /* tmp = (A_* || 1 || zeros(127-bitlen(A_*))) xor Offset_* */
+ unsigned char pad[16];
+ memset(pad, 0, 16);
+ memcpy(pad, a, abytes);
+ pad[abytes] = 0x80;
+ tmp = _mm_loadu_si128((const __m128i*)pad);
+ tmp = _mm_xor_si128(offset, tmp);
+ /* Sum = Sum_m xor ENCIPHER(K, tmp) */
+ tmp = aes256_1Tft__encrypt1_si128(tmp, aes_key);
+ sum = _mm_xor_si128(tmp, sum);
+ }
+
+ _mm_storeu_si128((__m128i*)result,sum);
+}
+
+/* ------------------------------------------------------------------------- */
+
+static int ocb_crypt(unsigned char *out, unsigned char *k, unsigned char *n,
+ unsigned char *a, unsigned abytes,
+ unsigned char *in, unsigned inbytes, int encrypting) {
+ __m128i prelv[32];
+ __m128i aes_decrypt_key[15];
+ __m128i aes_encrypt_key[15];
+ block ad_hash;
+ __m128i lstar, ldollar, sum, offset, ktop, pad, nonce, tag, tmp, outv;
+ block nonce_b, offset_b;
+ unsigned char stretch[24];
+ unsigned bottom, byteshift, bitshift, i, max;
+
+ /* Setup AES and strip ciphertext of its tag */
+ if ( ! encrypting ) {
+ if (inbytes < TAGBYTES) return -1;
+ inbytes -= TAGBYTES;
+ }
+ aes256_Tsetkey_encrypt(k, aes_encrypt_key);
+ if ( ! encrypting ) {
+ aes256_key_enc2dec(aes_encrypt_key, aes_decrypt_key);
+ }
+
+ /* Key-dependent variables */
+
+ /* L_* = ENCIPHER(K, zeros(128)) */
+ tmp = _mm_setzero_si128();
+ lstar = aes256_1Tft__encrypt1_si128(tmp, aes_encrypt_key);
+ /* L_$ = double(L_*) */
+ ldollar = double_block_si128(lstar);
+ max = abytes >= inbytes ? abytes/4 : inbytes/4;
+ max = (max < 2 ? 2 : max);
+ /* only precompute what's really needed;
+ look at the number of leading zero (to find the leftmost bit set to one)
+ all trailing zero will be at the right of it so we have an upper bound
+ */
+ precompute_lv(prelv,ldollar,31-__builtin_clz(max));
+
+ /* Nonce-dependent and per-encryption variables */
+
+ /* Nonce = zeros(127-bitlen(N)) || 1 || N */
+ memset(nonce_b, 0, 16);
+ memcpy(&nonce_b[16-NONCEBYTES],n,NONCEBYTES);
+ nonce_b[0] = (unsigned char)(((TAGBYTES * 8) % 128) << 1);
+ nonce_b[16-NONCEBYTES-1] |= 0x01;
+ /* bottom = str2num(Nonce[123..128]) */
+ bottom = nonce_b[15] & 0x3F;
+ /* Ktop = ENCIPHER(K, Nonce[1..122] || zeros(6)) */
+ nonce_b[15] &= 0xC0;
+ nonce = _mm_loadu_si128((const __m128i*)nonce_b);
+ ktop = aes256_1Tft__encrypt1_si128(nonce, aes_encrypt_key);
+ /* Stretch = Ktop || (Ktop[1..64] xor Ktop[9..72]) */
+ _mm_storeu_si128((__m128i*)stretch, ktop);
+ _mm_storel_epi64((__m128i*)(stretch+16), _mm_xor_si128(_mm_srli_si128(ktop,1), ktop));
+ /* Offset_0 = Stretch[1+bottom..128+bottom] */
+ byteshift = bottom/8;
+ bitshift = bottom%8;
+ if (bitshift != 0)
+ for (i=0; i<16; i++)
+ offset_b[i] = (stretch[i+byteshift] << bitshift) |
+ (stretch[i+byteshift+1] >> (8-bitshift));
+ else
+ for (i=0; i<16; i++)
+ offset_b[i] = stretch[i+byteshift];
+ offset = _mm_loadu_si128((const __m128i*)offset_b);
+ /* Checksum_0 = zeros(128) */
+ sum = _mm_xor_si128(sum,sum);
+
+ /* Hash associated data */
+ hash(ad_hash, k, a, abytes, lstar, prelv, aes_encrypt_key);
+
+ /* Process any whole blocks */
+ i=1;
+ if (encrypting) {
+
+ for (; i<=inbytes/16; i++, in=in+16, out=out+16) {
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ tmp = prelv[__builtin_ctz(i)];
+
+ offset = _mm_xor_si128(offset, tmp);
+ tmp = _mm_xor_si128(offset, _mm_loadu_si128((const __m128i*)in));
+
+ /* Checksum_i = Checksum_{i-1} xor P_i */
+ sum = _mm_xor_si128(_mm_loadu_si128((const __m128i*)in), sum);
+ /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */
+ tmp = aes256_1Tft__encrypt1_si128(tmp, aes_encrypt_key);
+ outv = _mm_xor_si128(offset, tmp);
+ _mm_storeu_si128((__m128i*)out, outv);
+ }
+ } else {
+
+ for (; i<=inbytes/16; i++, in=in+16, out=out+16) {
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ tmp= prelv[__builtin_ctz(i)];
+ offset = _mm_xor_si128(offset, tmp);
+ tmp = _mm_xor_si128(offset, _mm_loadu_si128((const __m128i*)in));
+
+ /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */
+ tmp = aes256_1Tft__decrypt1_si128(tmp, aes_decrypt_key);
+ outv = _mm_xor_si128(offset, tmp);
+ _mm_storeu_si128((__m128i*)out, outv);
+ /* Checksum_i = Checksum_{i-1} xor P_i */
+ sum = _mm_xor_si128(outv, sum);
+ }
+ }
+
+ /* Process any final partial block and compute raw tag */
+
+ inbytes = inbytes % 16; /* Bytes in final block */
+ if (inbytes > 0) {
+ /* Offset_* = Offset_m xor L_* */
+ offset = _mm_xor_si128(offset, lstar);
+ /* Pad = ENCIPHER(K, Offset_*) */
+ pad = aes256_1Tft__encrypt1_si128(offset, aes_encrypt_key);
+
+ if (encrypting) {
+ /* Checksum_* = Checksum_m xor (P_* || 1 || zeros(127-bitlen(P_*))) */
+ unsigned char tmp_b[16];
+ unsigned char pad_b[16];
+ memset(tmp_b, 0, 16);
+ memcpy(tmp_b, in, inbytes);
+ tmp_b[inbytes] = 0x80;
+ tmp = _mm_loadu_si128((const __m128i*)tmp_b);
+ sum = _mm_xor_si128(tmp, sum);
+ /* C_* = P_* xor Pad[1..bitlen(P_*)] */
+ pad = _mm_xor_si128(tmp, pad);
+ _mm_storeu_si128((__m128i*)pad_b, pad);
+ memcpy(out, pad_b, inbytes);
+ out = out + inbytes;
+ } else {
+ /* P_* = C_* xor Pad[1..bitlen(C_*)] */
+ unsigned char tmp_b[16];
+ unsigned char pad_b[16];
+ _mm_storeu_si128((__m128i*)pad_b, pad);
+ memcpy(tmp_b, pad_b, 16);
+ memcpy(tmp_b, in, inbytes);
+ xor_block(tmp_b,pad_b,tmp_b);
+ tmp_b[inbytes] = 0x80;
+ memcpy(out, tmp_b, inbytes);
+ tmp = _mm_loadu_si128((const __m128i*)tmp_b);
+ /* Checksum_* = Checksum_m xor (P_* || 1 || zeros(127-bitlen(P_*))) */
+ sum = _mm_xor_si128(tmp, sum);
+ in = in + inbytes;
+ }
+ }
+
+ /* Tag = ENCIPHER(K, Checksum xor Offset xor L_$) xor HASH(K,A) */
+ tmp = _mm_xor_si128(sum, offset);
+ tmp = _mm_xor_si128(tmp, ldollar);
+ tag = aes256_1Tft__encrypt1_si128(tmp, aes_encrypt_key);
+ tag = _mm_xor_si128(_mm_loadu_si128((const __m128i*)ad_hash), tag);
+
+ if (encrypting) {
+ unsigned char tag_b[16];
+ _mm_storeu_si128((__m128i*)tag_b, tag);
+ memcpy(out, tag_b, TAGBYTES);
+ return 0;
+ } else {
+ unsigned char tag_b[16];
+ _mm_storeu_si128((__m128i*)tag_b, tag);
+ return (memcmp(in,tag_b,TAGBYTES) ? -1 : 0); /* Check for validity */
+ }
+}
+
+/* ------------------------------------------------------------------------- */
+
+#define OCB_ENCRYPT 1
+#define OCB_DECRYPT 0
+
+void ocb_encrypt(unsigned char *c, unsigned char *k, unsigned char *n,
+ unsigned char *a, unsigned abytes,
+ unsigned char *p, unsigned pbytes) {
+ ocb_crypt(c, k, n, a, abytes, p, pbytes, OCB_ENCRYPT);
+}
+
+/* ------------------------------------------------------------------------- */
+
+int ocb_decrypt(unsigned char *p, unsigned char *k, unsigned char *n,
+ unsigned char *a, unsigned abytes,
+ unsigned char *c, unsigned cbytes) {
+ return ocb_crypt(p, k, n, a, abytes, c, cbytes, OCB_DECRYPT);
+}
+
+/* ------------------------------------------------------------------------- */
+
+int crypto_aead_encrypt(
+unsigned char *c,unsigned long long *clen,
+const unsigned char *m,unsigned long long mlen,
+const unsigned char *ad,unsigned long long adlen,
+const unsigned char *nsec,
+const unsigned char *npub,
+const unsigned char *k
+)
+{
+ *clen = mlen + TAGBYTES;
+ ocb_crypt(c, (unsigned char *)k, (unsigned char *)npub, (unsigned char *)ad,
+ adlen, (unsigned char *)m, mlen, OCB_ENCRYPT);
+ return 0;
+}
+
+int crypto_aead_decrypt(
+unsigned char *m,unsigned long long *mlen,
+unsigned char *nsec,
+const unsigned char *c,unsigned long long clen,
+const unsigned char *ad,unsigned long long adlen,
+const unsigned char *npub,
+const unsigned char *k
+)
+{
+ *mlen = clen - TAGBYTES;
+ return ocb_crypt(m, (unsigned char *)k, (unsigned char *)npub,
+ (unsigned char *)ad, adlen, (unsigned char *)c, clen, OCB_DECRYPT);
+}
+
diff --git a/aeadaes256ocbtaglen128v1-rv32/kernelrandombytes.h b/aeadaes256ocbtaglen128v1-rv32/kernelrandombytes.h
new file mode 100644
index 0000000..2248f60
--- /dev/null
+++ b/aeadaes256ocbtaglen128v1-rv32/kernelrandombytes.h
@@ -0,0 +1,14 @@
+#ifndef kernelrandombytes_h
+#define kernelrandombytes_h
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern void kernelrandombytes(unsigned char *,unsigned long long);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/aeadaes256ocbtaglen128v1-rv32/random.cpp b/aeadaes256ocbtaglen128v1-rv32/random.cpp
new file mode 100644
index 0000000..53fe546
--- /dev/null
+++ b/aeadaes256ocbtaglen128v1-rv32/random.cpp
@@ -0,0 +1,19 @@
+#include
+#include
+
+std::default_random_engine generator;
+std::uniform_int_distribution distribution(0,255);
+auto rbyte = std::bind ( distribution, generator );
+
+extern "C" {
+ void kernelrandombytes(unsigned char *x,unsigned long long xlen)
+ {
+ int i;
+
+ while (xlen > 0) {
+ *x = rbyte();
+ x++;
+ xlen--;
+ }
+ }
+}
diff --git a/aeadaes256ocbtaglen128v1-rv32/riscv.c b/aeadaes256ocbtaglen128v1-rv32/riscv.c
new file mode 100644
index 0000000..2ed0c0b
--- /dev/null
+++ b/aeadaes256ocbtaglen128v1-rv32/riscv.c
@@ -0,0 +1,83 @@
+/*
+cpucycles/riscv.c version 20190803
+D. J. Bernstein
+Romain Dolbeau
+Public domain.
+*/
+
+#include
+#include
+#include
+
+long long cpucycles_riscv(void)
+{
+ long long result;
+#if defined(__riscv_xlen)
+#if __riscv_xlen == 64
+ asm volatile("rdcycle %0" : "=r" (result));
+#elif __riscv_xlen == 32
+ unsigned int l, h, h2;
+ asm volatile( "start:\n"
+ "rdcycleh %0\n"
+ "rdcycle %1\n"
+ "rdcycleh %2\n"
+ "bne %0, %2, start\n"
+ : "=r" (h), "=r" (l), "=r" (h2));
+
+ result = (((unsigned long long)h)<<32) | ((unsigned long long)l);
+#else
+#error "unknown __riscv_xlen"
+#endif
+#else // __riscv_xlen
+#error "__riscv_xlen required for RISC-V support"
+#endif // __riscv_xlen
+ return result;
+}
+
+static long long microseconds(void)
+{
+ struct timeval t;
+ gettimeofday(&t,(struct timezone *) 0);
+ return t.tv_sec * (long long) 1000000 + t.tv_usec;
+}
+
+static double guessfreq(void)
+{
+ long long tb0; long long us0;
+ long long tb1; long long us1;
+
+ tb0 = cpucycles_riscv();
+ us0 = microseconds();
+ do {
+ tb1 = cpucycles_riscv();
+ us1 = microseconds();
+ } while (us1 - us0 < 10000 || tb1 - tb0 < 1000);
+ if (tb1 <= tb0) return 0;
+ tb1 -= tb0;
+ us1 -= us0;
+ return ((double) tb1) / (0.000001 * (double) us1);
+}
+
+static long long cpufrequency = 0;
+
+static void init(void)
+{
+ double guess1;
+ double guess2;
+ int loop;
+
+ for (loop = 0;loop < 100;++loop) {
+ guess1 = guessfreq();
+ guess2 = guessfreq();
+ if (guess1 > 1.01 * guess2) continue;
+ if (guess2 > 1.01 * guess1) continue;
+ cpufrequency = 0.5 * (guess1 + guess2);
+ break;
+ }
+}
+
+long long cpucycles_riscv_persecond(void)
+{
+ if (!cpufrequency) init();
+ return cpufrequency;
+}
diff --git a/aeadaes256ocbtaglen128v1-rv32/try-anything.c b/aeadaes256ocbtaglen128v1-rv32/try-anything.c
new file mode 100644
index 0000000..84517c4
--- /dev/null
+++ b/aeadaes256ocbtaglen128v1-rv32/try-anything.c
@@ -0,0 +1,323 @@
+/*
+ * try-anything.c version 20190729
+ * D. J. Bernstein
+ * Some portions adapted from TweetNaCl by Bernstein, Janssen, Lange, Schwabe.
+ * Public domain.
+ */
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include "kernelrandombytes.h"
+#include "cpucycles.h"
+#include "crypto_uint8.h"
+#include "crypto_uint32.h"
+#include "crypto_uint64.h"
+#include "try.h"
+
+typedef crypto_uint8 u8;
+typedef crypto_uint32 u32;
+typedef crypto_uint64 u64;
+
+#define FOR(i,n) for (i = 0;i < n;++i)
+
+static u32 L32(u32 x,int c) { return (x << c) | ((x&0xffffffff) >> (32 - c)); }
+
+static u32 ld32(const u8 *x)
+{
+ u32 u = x[3];
+ u = (u<<8)|x[2];
+ u = (u<<8)|x[1];
+ return (u<<8)|x[0];
+}
+
+static void st32(u8 *x,u32 u)
+{
+ int i;
+ FOR(i,4) { x[i] = u; u >>= 8; }
+}
+
+static const u8 sigma[17] = "expand 32-byte k";
+
+static void core(u8 *out,const u8 *in,const u8 *k)
+{
+ u32 w[16],x[16],y[16],t[4];
+ int i,j,m;
+
+ FOR(i,4) {
+ x[5*i] = ld32(sigma+4*i);
+ x[1+i] = ld32(k+4*i);
+ x[6+i] = ld32(in+4*i);
+ x[11+i] = ld32(k+16+4*i);
+ }
+
+ FOR(i,16) y[i] = x[i];
+
+ FOR(i,20) {
+ FOR(j,4) {
+ FOR(m,4) t[m] = x[(5*j+4*m)%16];
+ t[1] ^= L32(t[0]+t[3], 7);
+ t[2] ^= L32(t[1]+t[0], 9);
+ t[3] ^= L32(t[2]+t[1],13);
+ t[0] ^= L32(t[3]+t[2],18);
+ FOR(m,4) w[4*j+(j+m)%4] = t[m];
+ }
+ FOR(m,16) x[m] = w[m];
+ }
+
+ FOR(i,16) st32(out + 4 * i,x[i] + y[i]);
+}
+
+static void salsa20(u8 *c,u64 b,const u8 *n,const u8 *k)
+{
+ u8 z[16],x[64];
+ u32 u,i;
+ if (!b) return;
+ FOR(i,16) z[i] = 0;
+ FOR(i,8) z[i] = n[i];
+ while (b >= 64) {
+ core(x,z,k);
+ FOR(i,64) c[i] = x[i];
+ u = 1;
+ for (i = 8;i < 16;++i) {
+ u += (u32) z[i];
+ z[i] = u;
+ u >>= 8;
+ }
+ b -= 64;
+ c += 64;
+ }
+ if (b) {
+ core(x,z,k);
+ FOR(i,b) c[i] = x[i];
+ }
+}
+
+static void increment(u8 *n)
+{
+ if (!++n[0])
+ if (!++n[1])
+ if (!++n[2])
+ if (!++n[3])
+ if (!++n[4])
+ if (!++n[5])
+ if (!++n[6])
+ if (!++n[7])
+ ;
+}
+
+static void testvector(unsigned char *x,unsigned long long xlen)
+{
+ const static unsigned char testvector_k[33] = "generate inputs for test vectors";
+ static unsigned char testvector_n[8];
+ salsa20(x,xlen,testvector_n,testvector_k);
+ increment(testvector_n);
+}
+
+unsigned long long myrandom(void)
+{
+ unsigned char x[8];
+ unsigned long long result;
+ testvector(x,8);
+ result = x[7];
+ result = (result<<8)|x[6];
+ result = (result<<8)|x[5];
+ result = (result<<8)|x[4];
+ result = (result<<8)|x[3];
+ result = (result<<8)|x[2];
+ result = (result<<8)|x[1];
+ result = (result<<8)|x[0];
+ return result;
+}
+
+static void canary(unsigned char *x,unsigned long long xlen)
+{
+ const static unsigned char canary_k[33] = "generate pad to catch overwrites";
+ static unsigned char canary_n[8];
+ salsa20(x,xlen,canary_n,canary_k);
+ increment(canary_n);
+}
+
+void double_canary(unsigned char *x2,unsigned char *x,unsigned long long xlen)
+{
+ canary(x - 16,16);
+ canary(x + xlen,16);
+ memcpy(x2 - 16,x - 16,16);
+ memcpy(x2 + xlen,x + xlen,16);
+}
+
+void input_prepare(unsigned char *x2,unsigned char *x,unsigned long long xlen)
+{
+ testvector(x,xlen);
+ canary(x - 16,16);
+ canary(x + xlen,16);
+ memcpy(x2 - 16,x - 16,xlen + 32);
+}
+
+void input_compare(const unsigned char *x2,const unsigned char *x,unsigned long long xlen,const char *fun)
+{
+ if (memcmp(x2 - 16,x - 16,xlen + 32)) {
+ fprintf(stderr,"%s overwrites input\n",fun);
+ exit(111);
+ }
+}
+
+void output_prepare(unsigned char *x2,unsigned char *x,unsigned long long xlen)
+{
+ canary(x - 16,xlen + 32);
+ memcpy(x2 - 16,x - 16,xlen + 32);
+}
+
+void output_compare(const unsigned char *x2,const unsigned char *x,unsigned long long xlen,const char *fun)
+{
+ if (memcmp(x2 - 16,x - 16,16)) {
+ fprintf(stderr,"%s writes before output\n",fun);
+ exit(111);
+ }
+ if (memcmp(x2 + xlen,x + xlen,16)) {
+ fprintf(stderr,"%s writes after output\n",fun);
+ exit(111);
+ }
+}
+
+static unsigned char checksum_state[64];
+static char checksum_hex[65];
+
+void checksum(const unsigned char *x,unsigned long long xlen)
+{
+ u8 block[16];
+ int i;
+ while (xlen >= 16) {
+ core(checksum_state,x,checksum_state);
+ x += 16;
+ xlen -= 16;
+ }
+ FOR(i,16) block[i] = 0;
+ FOR(i,xlen) block[i] = x[i];
+ block[xlen] = 1;
+ checksum_state[0] ^= 1;
+ core(checksum_state,block,checksum_state);
+}
+
+static void printword(const char *s)
+{
+ if (!*s) putchar('-');
+ while (*s) {
+ if (*s == ' ') putchar('_');
+ else if (*s == '\t') putchar('_');
+ else if (*s == '\r') putchar('_');
+ else if (*s == '\n') putchar('_');
+ else putchar(*s);
+ ++s;
+ }
+ putchar(' ');
+}
+
+static void printnum(long long x)
+{
+ printf("%lld ",x);
+}
+
+void fail(const char *why)
+{
+ fprintf(stderr,"%s\n",why);
+ exit(111);
+}
+
+unsigned char *alignedcalloc(unsigned long long len)
+{
+ unsigned char *x = (unsigned char *) calloc(1,len + 256);
+ long long i;
+ if (!x) fail("out of memory");
+ /* will never deallocate so shifting is ok */
+ for (i = 0;i < len + 256;++i) x[i] = random();
+ x += 64;
+ x += 63 & (-(unsigned long) x);
+ for (i = 0;i < len;++i) x[i] = 0;
+ return x;
+}
+
+#define TIMINGS 63
+static long long cycles[TIMINGS + 1];
+
+void limits()
+{
+#ifdef RLIM_INFINITY
+ struct rlimit r;
+ r.rlim_cur = 0;
+ r.rlim_max = 0;
+#ifdef RLIMIT_NOFILE
+ setrlimit(RLIMIT_NOFILE,&r);
+#endif
+#ifdef RLIMIT_NPROC
+ setrlimit(RLIMIT_NPROC,&r);
+#endif
+#ifdef RLIMIT_CORE
+ setrlimit(RLIMIT_CORE,&r);
+#endif
+#endif
+}
+
+static unsigned char randombyte[1];
+
+int main()
+{
+ long long i;
+ long long j;
+ long long abovej;
+ long long belowj;
+ long long checksumcycles;
+ long long cyclespersecond;
+
+ cycles[0] = cpucycles();
+ cycles[1] = cpucycles();
+ cyclespersecond = cpucycles_persecond();
+
+ kernelrandombytes(randombyte,1);
+ preallocate();
+ limits();
+
+ allocate();
+ srandom(getpid());
+
+ cycles[0] = cpucycles();
+ test();
+ cycles[1] = cpucycles();
+ checksumcycles = cycles[1] - cycles[0];
+
+ predoit();
+ for (i = 0;i <= TIMINGS;++i) {
+ cycles[i] = cpucycles();
+ }
+ for (i = 0;i <= TIMINGS;++i) {
+ cycles[i] = cpucycles();
+ doit();
+ }
+ for (i = 0;i < TIMINGS;++i) cycles[i] = cycles[i + 1] - cycles[i];
+ for (j = 0;j < TIMINGS;++j) {
+ belowj = 0;
+ for (i = 0;i < TIMINGS;++i) if (cycles[i] < cycles[j]) ++belowj;
+ abovej = 0;
+ for (i = 0;i < TIMINGS;++i) if (cycles[i] > cycles[j]) ++abovej;
+ if (belowj * 2 < TIMINGS && abovej * 2 < TIMINGS) break;
+ }
+
+ for (i = 0;i < 32;++i) {
+ checksum_hex[2 * i] = "0123456789abcdef"[15 & (checksum_state[i] >> 4)];
+ checksum_hex[2 * i + 1] = "0123456789abcdef"[15 & checksum_state[i]];
+ }
+ checksum_hex[2 * i] = 0;
+
+ printword(checksum_hex);
+ printnum(cycles[j]);
+ printnum(checksumcycles);
+ printnum(cyclespersecond);
+ printword(primitiveimplementation);
+ printf("\n");
+ return 0;
+}
diff --git a/aeadaes256ocbtaglen128v1-rv32/try.c b/aeadaes256ocbtaglen128v1-rv32/try.c
new file mode 100644
index 0000000..687a21d
--- /dev/null
+++ b/aeadaes256ocbtaglen128v1-rv32/try.c
@@ -0,0 +1,242 @@
+/*
+ * crypto_aead/try.c version 20200406
+ * D. J. Bernstein
+ * Public domain.
+ * Auto-generated by trygen.py; do not edit.
+ */
+
+#include "crypto_aead.h"
+#include "try.h"
+
+const char *primitiveimplementation = crypto_aead_IMPLEMENTATION;
+
+#define TUNE_BYTES 1536
+#ifdef SMALL
+#define MAXTEST_BYTES 128
+#else
+#define MAXTEST_BYTES 4096
+#endif
+#ifdef SMALL
+#define LOOPS 64
+#else
+#define LOOPS 512
+#endif
+
+static unsigned char *k;
+static unsigned char *s;
+static unsigned char *p;
+static unsigned char *a;
+static unsigned char *m;
+static unsigned char *c;
+static unsigned char *t;
+static unsigned char *r;
+static unsigned char *k2;
+static unsigned char *s2;
+static unsigned char *p2;
+static unsigned char *a2;
+static unsigned char *m2;
+static unsigned char *c2;
+static unsigned char *t2;
+static unsigned char *r2;
+#define klen crypto_aead_KEYBYTES
+#define slen crypto_aead_NSECBYTES
+#define plen crypto_aead_NPUBBYTES
+unsigned long long alen;
+unsigned long long mlen;
+unsigned long long clen;
+unsigned long long tlen;
+#define rlen crypto_aead_NSECBYTES
+
+void preallocate(void)
+{
+}
+
+void allocate(void)
+{
+ unsigned long long alloclen = 0;
+ if (alloclen < TUNE_BYTES) alloclen = TUNE_BYTES;
+ if (alloclen < MAXTEST_BYTES + crypto_aead_ABYTES) alloclen = MAXTEST_BYTES + crypto_aead_ABYTES;
+ if (alloclen < crypto_aead_KEYBYTES) alloclen = crypto_aead_KEYBYTES;
+ if (alloclen < crypto_aead_NSECBYTES) alloclen = crypto_aead_NSECBYTES;
+ if (alloclen < crypto_aead_NPUBBYTES) alloclen = crypto_aead_NPUBBYTES;
+ if (alloclen < crypto_aead_NSECBYTES) alloclen = crypto_aead_NSECBYTES;
+ k = alignedcalloc(alloclen);
+ s = alignedcalloc(alloclen);
+ p = alignedcalloc(alloclen);
+ a = alignedcalloc(alloclen);
+ m = alignedcalloc(alloclen);
+ c = alignedcalloc(alloclen);
+ t = alignedcalloc(alloclen);
+ r = alignedcalloc(alloclen);
+ k2 = alignedcalloc(alloclen);
+ s2 = alignedcalloc(alloclen);
+ p2 = alignedcalloc(alloclen);
+ a2 = alignedcalloc(alloclen);
+ m2 = alignedcalloc(alloclen);
+ c2 = alignedcalloc(alloclen);
+ t2 = alignedcalloc(alloclen);
+ r2 = alignedcalloc(alloclen);
+}
+
+void predoit(void)
+{
+}
+
+void doit(void)
+{
+ crypto_aead_encrypt(c,&clen,m,TUNE_BYTES,a,TUNE_BYTES,s,p,k);
+ crypto_aead_decrypt(t,&tlen,r,c,clen,a,TUNE_BYTES,p,k);
+}
+
+void test(void)
+{
+ unsigned long long loop;
+
+ for (loop = 0;loop < LOOPS;++loop) {
+ mlen = myrandom() % (MAXTEST_BYTES + 1);
+ alen = myrandom() % (MAXTEST_BYTES + 1);
+
+ clen = mlen + crypto_aead_ABYTES;
+ output_prepare(c2,c,clen);
+ input_prepare(m2,m,mlen);
+ input_prepare(a2,a,alen);
+ input_prepare(s2,s,slen);
+ input_prepare(p2,p,plen);
+ input_prepare(k2,k,klen);
+ if (crypto_aead_encrypt(c,&clen,m,mlen,a,alen,s,p,k) != 0) fail("crypto_aead_encrypt returns nonzero");
+ if (clen < mlen) fail("crypto_aead_encrypt returns smaller output than input");
+ if (clen > mlen + crypto_aead_ABYTES) fail("crypto_aead_encrypt returns more than crypto_aead_ABYTES extra bytes");
+ checksum(c,clen);
+ output_compare(c2,c,clen,"crypto_aead_encrypt");
+ input_compare(m2,m,mlen,"crypto_aead_encrypt");
+ input_compare(a2,a,alen,"crypto_aead_encrypt");
+ input_compare(s2,s,slen,"crypto_aead_encrypt");
+ input_compare(p2,p,plen,"crypto_aead_encrypt");
+ input_compare(k2,k,klen,"crypto_aead_encrypt");
+
+ double_canary(c2,c,clen);
+ double_canary(m2,m,mlen);
+ double_canary(a2,a,alen);
+ double_canary(s2,s,slen);
+ double_canary(p2,p,plen);
+ double_canary(k2,k,klen);
+ if (crypto_aead_encrypt(c2,&clen,m2,mlen,a2,alen,s2,p2,k2) != 0) fail("crypto_aead_encrypt returns nonzero");
+ if (memcmp(c2,c,clen) != 0) fail("crypto_aead_encrypt is nondeterministic");
+
+#if crypto_aead_NOOVERLAP == 1
+#else
+ double_canary(c2,c,clen);
+ double_canary(m2,m,mlen);
+ double_canary(a2,a,alen);
+ double_canary(s2,s,slen);
+ double_canary(p2,p,plen);
+ double_canary(k2,k,klen);
+ if (crypto_aead_encrypt(m2,&clen,m2,mlen,a,alen,s,p,k) != 0) fail("crypto_aead_encrypt with m=c overlap returns nonzero");
+ if (memcmp(m2,c,clen) != 0) fail("crypto_aead_encrypt does not handle m=c overlap");
+ memcpy(m2,m,mlen);
+ if (crypto_aead_encrypt(a2,&clen,m,mlen,a2,alen,s,p,k) != 0) fail("crypto_aead_encrypt with a=c overlap returns nonzero");
+ if (memcmp(a2,c,clen) != 0) fail("crypto_aead_encrypt does not handle a=c overlap");
+ memcpy(a2,a,alen);
+ if (crypto_aead_encrypt(s2,&clen,m,mlen,a,alen,s2,p,k) != 0) fail("crypto_aead_encrypt with s=c overlap returns nonzero");
+ if (memcmp(s2,c,clen) != 0) fail("crypto_aead_encrypt does not handle s=c overlap");
+ memcpy(s2,s,slen);
+ if (crypto_aead_encrypt(p2,&clen,m,mlen,a,alen,s,p2,k) != 0) fail("crypto_aead_encrypt with p=c overlap returns nonzero");
+ if (memcmp(p2,c,clen) != 0) fail("crypto_aead_encrypt does not handle p=c overlap");
+ memcpy(p2,p,plen);
+ if (crypto_aead_encrypt(k2,&clen,m,mlen,a,alen,s,p,k2) != 0) fail("crypto_aead_encrypt with k=c overlap returns nonzero");
+ if (memcmp(k2,c,clen) != 0) fail("crypto_aead_encrypt does not handle k=c overlap");
+ memcpy(k2,k,klen);
+#endif
+
+ tlen = clen;
+ output_prepare(t2,t,tlen);
+ output_prepare(r2,r,rlen);
+ memcpy(c2,c,clen);
+ double_canary(c2,c,clen);
+ memcpy(a2,a,alen);
+ double_canary(a2,a,alen);
+ memcpy(p2,p,plen);
+ double_canary(p2,p,plen);
+ memcpy(k2,k,klen);
+ double_canary(k2,k,klen);
+ if (crypto_aead_decrypt(t,&tlen,r,c,clen,a,alen,p,k) != 0) fail("crypto_aead_decrypt returns nonzero");
+ if (tlen != mlen) fail("crypto_aead_decrypt does not match mlen");
+ if (memcmp(t,m,mlen) != 0) fail("crypto_aead_decrypt does not match m");
+ if (memcmp(r,s,slen) != 0) fail("crypto_aead_decrypt does not match s");
+ checksum(t,tlen);
+ checksum(r,rlen);
+ output_compare(t2,t,clen,"crypto_aead_decrypt");
+ output_compare(r2,r,rlen,"crypto_aead_decrypt");
+ input_compare(c2,c,clen,"crypto_aead_decrypt");
+ input_compare(a2,a,alen,"crypto_aead_decrypt");
+ input_compare(p2,p,plen,"crypto_aead_decrypt");
+ input_compare(k2,k,klen,"crypto_aead_decrypt");
+
+ double_canary(t2,t,tlen);
+ double_canary(r2,r,rlen);
+ double_canary(c2,c,clen);
+ double_canary(a2,a,alen);
+ double_canary(p2,p,plen);
+ double_canary(k2,k,klen);
+ if (crypto_aead_decrypt(t2,&tlen,r2,c2,clen,a2,alen,p2,k2) != 0) fail("crypto_aead_decrypt returns nonzero");
+ if (memcmp(t2,t,tlen) != 0) fail("crypto_aead_decrypt is nondeterministic");
+ if (memcmp(r2,r,rlen) != 0) fail("crypto_aead_decrypt is nondeterministic");
+
+#if crypto_aead_NOOVERLAP == 1
+#else
+ double_canary(t2,t,tlen);
+ double_canary(r2,r,rlen);
+ double_canary(c2,c,clen);
+ double_canary(a2,a,alen);
+ double_canary(p2,p,plen);
+ double_canary(k2,k,klen);
+ if (crypto_aead_decrypt(c2,&tlen,r,c2,clen,a,alen,p,k) != 0) fail("crypto_aead_decrypt with c=t overlap returns nonzero");
+ if (memcmp(c2,t,tlen) != 0) fail("crypto_aead_decrypt does not handle c=t overlap");
+ memcpy(c2,c,clen);
+ if (crypto_aead_decrypt(a2,&tlen,r,c,clen,a2,alen,p,k) != 0) fail("crypto_aead_decrypt with a=t overlap returns nonzero");
+ if (memcmp(a2,t,tlen) != 0) fail("crypto_aead_decrypt does not handle a=t overlap");
+ memcpy(a2,a,alen);
+ if (crypto_aead_decrypt(p2,&tlen,r,c,clen,a,alen,p2,k) != 0) fail("crypto_aead_decrypt with p=t overlap returns nonzero");
+ if (memcmp(p2,t,tlen) != 0) fail("crypto_aead_decrypt does not handle p=t overlap");
+ memcpy(p2,p,plen);
+ if (crypto_aead_decrypt(k2,&tlen,r,c,clen,a,alen,p,k2) != 0) fail("crypto_aead_decrypt with k=t overlap returns nonzero");
+ if (memcmp(k2,t,tlen) != 0) fail("crypto_aead_decrypt does not handle k=t overlap");
+ memcpy(k2,k,klen);
+#endif
+
+#if crypto_aead_NOOVERLAP == 1
+#else
+ double_canary(t2,t,tlen);
+ double_canary(r2,r,rlen);
+ double_canary(c2,c,clen);
+ double_canary(a2,a,alen);
+ double_canary(p2,p,plen);
+ double_canary(k2,k,klen);
+ if (crypto_aead_decrypt(t,&tlen,c2,c2,clen,a,alen,p,k) != 0) fail("crypto_aead_decrypt with c=r overlap returns nonzero");
+ if (memcmp(c2,r,rlen) != 0) fail("crypto_aead_decrypt does not handle c=r overlap");
+ memcpy(c2,c,clen);
+ if (crypto_aead_decrypt(t,&tlen,a2,c,clen,a2,alen,p,k) != 0) fail("crypto_aead_decrypt with a=r overlap returns nonzero");
+ if (memcmp(a2,r,rlen) != 0) fail("crypto_aead_decrypt does not handle a=r overlap");
+ memcpy(a2,a,alen);
+ if (crypto_aead_decrypt(t,&tlen,p2,c,clen,a,alen,p2,k) != 0) fail("crypto_aead_decrypt with p=r overlap returns nonzero");
+ if (memcmp(p2,r,rlen) != 0) fail("crypto_aead_decrypt does not handle p=r overlap");
+ memcpy(p2,p,plen);
+ if (crypto_aead_decrypt(t,&tlen,k2,c,clen,a,alen,p,k2) != 0) fail("crypto_aead_decrypt with k=r overlap returns nonzero");
+ if (memcmp(k2,r,rlen) != 0) fail("crypto_aead_decrypt does not handle k=r overlap");
+ memcpy(k2,k,klen);
+#endif
+
+ c[myrandom() % clen] += 1 + (myrandom() % 255);
+ if (crypto_aead_decrypt(t,&tlen,r,c,clen,a,alen,p,k) == 0)
+ if ((tlen != mlen) || (memcmp(t,m,mlen) != 0) || (memcmp(r,s,slen) != 0))
+ fail("crypto_aead_decrypt allows trivial forgeries");
+ c[myrandom() % clen] += 1 + (myrandom() % 255);
+ if (crypto_aead_decrypt(t,&tlen,r,c,clen,a,alen,p,k) == 0)
+ if ((tlen != mlen) || (memcmp(t,m,mlen) != 0) || (memcmp(r,s,slen) != 0))
+ fail("crypto_aead_decrypt allows trivial forgeries");
+ c[myrandom() % clen] += 1 + (myrandom() % 255);
+ if (crypto_aead_decrypt(t,&tlen,r,c,clen,a,alen,p,k) == 0)
+ if ((tlen != mlen) || (memcmp(t,m,mlen) != 0) || (memcmp(r,s,slen) != 0))
+ fail("crypto_aead_decrypt allows trivial forgeries");
+ }
+}
diff --git a/aeadaes256ocbtaglen128v1-rv32/try.h b/aeadaes256ocbtaglen128v1-rv32/try.h
new file mode 100644
index 0000000..47db359
--- /dev/null
+++ b/aeadaes256ocbtaglen128v1-rv32/try.h
@@ -0,0 +1,21 @@
+#include
+#include
+
+/* provided by try.c: */
+extern const char *primitiveimplementation;
+extern void preallocate(void);
+extern void allocate(void);;
+extern void test(void);
+extern void predoit(void);
+extern void doit(void);
+
+/* provided by try-anything.c: */
+extern void fail(const char *);
+extern unsigned char *alignedcalloc(unsigned long long);
+extern void checksum(const unsigned char *,unsigned long long);
+extern void double_canary(unsigned char *,unsigned char *,unsigned long long);
+extern void input_prepare(unsigned char *,unsigned char *,unsigned long long);
+extern void output_prepare(unsigned char *,unsigned char *,unsigned long long);
+extern void input_compare(const unsigned char *,const unsigned char *,unsigned long long,const char *);
+extern void output_compare(const unsigned char *,const unsigned char *,unsigned long long,const char *);
+extern unsigned long long myrandom(void);
diff --git a/aeadaes256ocbtaglen128v1-rv32/verify.c b/aeadaes256ocbtaglen128v1-rv32/verify.c
new file mode 100644
index 0000000..d356060
--- /dev/null
+++ b/aeadaes256ocbtaglen128v1-rv32/verify.c
@@ -0,0 +1,24 @@
+#include "crypto_verify.h"
+
+int crypto_verify(const unsigned char *x,const unsigned char *y)
+{
+ unsigned int differentbits = 0;
+#define F(i) differentbits |= x[i] ^ y[i];
+ F(0)
+ F(1)
+ F(2)
+ F(3)
+ F(4)
+ F(5)
+ F(6)
+ F(7)
+ F(8)
+ F(9)
+ F(10)
+ F(11)
+ F(12)
+ F(13)
+ F(14)
+ F(15)
+ return (1 & ((differentbits - 1) >> 8)) - 1;
+}
diff --git a/aes256decrypt-rv32/amd64cpuinfo.c b/aes256decrypt-rv32/amd64cpuinfo.c
deleted file mode 100644
index 076e7eb..0000000
--- a/aes256decrypt-rv32/amd64cpuinfo.c
+++ /dev/null
@@ -1,17 +0,0 @@
-#include
-#include
-#include
-#include "osfreq.c"
-
-long long cpucycles_riscv(void)
-{
- unsigned long long result;
- asm volatile(".byte 15;.byte 49;shlq $32,%%rdx;orq %%rdx,%%rax"
- : "=a" (result) :: "%rdx");
- return result;
-}
-
-long long cpucycles_riscv_persecond(void)
-{
- return osfreq();
-}
diff --git a/aes256decrypt-rv32/osfreq.c b/aes256decrypt-rv32/osfreq.c
deleted file mode 100644
index b705fa4..0000000
--- a/aes256decrypt-rv32/osfreq.c
+++ /dev/null
@@ -1,93 +0,0 @@
-static double osfreq(void)
-{
- FILE *f;
- char *x;
- double result;
- int s;
-
- f = fopen("/etc/cpucyclespersecond", "r");
- if (f) {
- s = fscanf(f,"%lf",&result);
- fclose(f);
- if (s > 0) return result;
- }
-
- f = fopen("/sys/devices/system/cpu/cpu0/cpufreq/scaling_setspeed", "r");
- if (f) {
- s = fscanf(f,"%lf",&result);
- fclose(f);
- if (s > 0) return 1000.0 * result;
- }
-
- f = fopen("/sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq", "r");
- if (f) {
- s = fscanf(f,"%lf",&result);
- fclose(f);
- if (s > 0) return 1000.0 * result;
- }
-
- f = fopen("/sys/devices/system/cpu/cpu0/clock_tick", "r");
- if (f) {
- s = fscanf(f,"%lf",&result);
- fclose(f);
- if (s > 0) return result;
- }
-
- f = fopen("/proc/cpuinfo","r");
- if (f) {
- for (;;) {
- s = fscanf(f,"cpu MHz : %lf",&result);
- if (s > 0) break;
- if (s == 0) s = fscanf(f,"%*[^\n]\n");
- if (s < 0) { result = 0; break; }
- }
- fclose(f);
- if (result) return 1000000.0 * result;
- }
-
- f = fopen("/proc/cpuinfo","r");
- if (f) {
- for (;;) {
- s = fscanf(f,"clock : %lf",&result);
- if (s > 0) break;
- if (s == 0) s = fscanf(f,"%*[^\n]\n");
- if (s < 0) { result = 0; break; }
- }
- fclose(f);
- if (result) return 1000000.0 * result;
- }
-
- f = popen("sysctl hw.cpufrequency 2>/dev/null","r");
- if (f) {
- s = fscanf(f,"hw.cpufrequency: %lf",&result);
- pclose(f);
- if (s > 0) if (result > 0) return result;
- }
-
- f = popen("/usr/sbin/lsattr -E -l proc0 -a frequency 2>/dev/null","r");
- if (f) {
- s = fscanf(f,"frequency %lf",&result);
- pclose(f);
- if (s > 0) return result;
- }
-
- f = popen("/usr/sbin/psrinfo -v 2>/dev/null","r");
- if (f) {
- for (;;) {
- s = fscanf(f," The %*s processor operates at %lf MHz",&result);
- if (s > 0) break;
- if (s == 0) s = fscanf(f,"%*[^\n]\n");
- if (s < 0) { result = 0; break; }
- }
- pclose(f);
- if (result) return 1000000.0 * result;
- }
-
- x = getenv("cpucyclespersecond");
- if (x) {
- s = sscanf(x,"%lf",&result);
- if (s > 0) return result;
- }
-
- return 0;
-}
diff --git a/aes256gcmv1standalone-rv32/encrypt.c b/aes256gcmv1standalone-rv32/encrypt.c
index e1e9965..cf45407 100644
--- a/aes256gcmv1standalone-rv32/encrypt.c
+++ b/aes256gcmv1standalone-rv32/encrypt.c
@@ -249,190 +249,9 @@ static inline int64_t _rv64_clmulh(int64_t rs1, int64_t rs2)
/* this is basically Supercop's crypto_aead/aes256gcmv1/dolbeau/aesenc-int,
but without the unrolling.
- So we have a thin compatibility layer to SSE's __m128i data format
- and associated instructions to support GHASH & the full algo.
*/
-/* ouch */
-typedef struct {
- uint64_t l;
- uint64_t h;
-} __m128i;
-
-//#define _mm_loadu_si128(a) (*(const __m128i*)a)
-static inline __m128i _mm_loadu_si128(const __m128i *ptr) {
- __m128i r;
- r.l = ((const uint64_t*)ptr)[0];
- r.h = ((const uint64_t*)ptr)[1];
- return r;
-}
-
-//#define _mm_storeu_si128(x,a) (*(__m128i*)x)=a
-static inline void _mm_storeu_si128(__m128i *ptr, const __m128i data) {
- ((uint64_t*)ptr)[0] = data.l;
- ((uint64_t*)ptr)[1] = data.h;
-}
-
-static inline __m128i _mm_clmulepi64_si128(const __m128i a, const __m128i b, const int x) {
- __m128i r;
- switch (x) {
- case 0x00:
- r.l = _rv64_clmul(a.l, b.l);
- r.h = _rv64_clmulh(a.l, b.l);
- break;
- case 0x01:
- r.l = _rv64_clmul(a.l, b.h);
- r.h = _rv64_clmulh(a.l, b.h);
- break;
- case 0x10:
- r.l = _rv64_clmul(a.h, b.l);
- r.h = _rv64_clmulh(a.h, b.l);
- break;
- case 0x11:
- r.l = _rv64_clmul(a.h, b.h);
- r.h = _rv64_clmulh(a.h, b.h);
- break;
- }
- return r;
-}
-
-/*
-static inline __m128i (const __m128i a, const __m128i b) {
- __m128i r;
- return r;
-}
-*/
-static inline __m128i _mm_xor_si128(const __m128i a, const __m128i b) {
- __m128i r;
- r.l = a.l ^ b.l;
- r.h = a.h ^ b.h;
- return r;
-}
-static inline __m128i _mm_or_si128(const __m128i a, const __m128i b) {
- __m128i r;
- r.l = a.l | b.l;
- r.h = a.h | b.h;
- return r;
-}
-static inline __m128i _mm_and_si128(const __m128i a, const __m128i b) {
- __m128i r;
- r.l = a.l & b.l;
- r.h = a.h & b.h;
- return r;
-}
-static inline __m128i _mm_slli_si128(const __m128i a, const int b) {
- __m128i r;
- switch (b) {
- case 4:
- r.l = a.l << 32;
- r.h = a.h << 32 | a.l >> 32;
- break;
- case 8:
- r.l = 0;
- r.h = a.l;
- break;
- case 12:
- r.l = 0;
- r.h = a.l << 32;
- break;
- }
- return r;
-}
-static inline __m128i _mm_srli_si128(const __m128i a, const int b) {
- __m128i r;
- switch (b) {
- case 4:
- r.l = a.l >> 32 | a.h << 32;
- r.h = a.h >> 32;
- break;
- case 8:
- r.l = a.h;
- r.h = 0;
- break;
- case 12:
- r.l = a.h >> 32;
- r.h = 0;
- break;
- }
- return r;
-}
-static inline __m128i _mm_srli_epi32(const __m128i a, const int b) {
- __m128i r;
- r.l = ((a.l & 0x00000000FFFFFFFFull) >> b) | (((a.l & 0xFFFFFFFF00000000ull) >> b) & 0xFFFFFFFF00000000ull);
- r.h = ((a.h & 0x00000000FFFFFFFFull) >> b) | (((a.h & 0xFFFFFFFF00000000ull) >> b) & 0xFFFFFFFF00000000ull);
- return r;
-}
-static inline __m128i _mm_slli_epi32(const __m128i a, const int b) {
- __m128i r;
- r.l = (((a.l & 0x00000000FFFFFFFFull) << b) & 0x00000000FFFFFFFFull) | ((a.l & 0xFFFFFFFF00000000ull) << b);
- r.h = (((a.h & 0x00000000FFFFFFFFull) << b) & 0x00000000FFFFFFFFull) | ((a.h & 0xFFFFFFFF00000000ull) << b);
- return r;
-}
-static inline __m128i _mm_insert_epi64(const __m128i a, const uint64_t x, const int b) {
- __m128i r;
- if (b == 0) {
- r.l = x;
- r.h = a.h;
- } else {
- r.l = a.l;
- r.h = x;
- }
- return r;
-}
-static inline __m128i _mm_setzero_si128(void) {
- __m128i r;
- r.l = 0;
- r.h = 0;
- return r;
-}
-static inline __m128i _mm_set1_epi32(const uint32_t x) {
- __m128i r;
- r.l = x | ((uint64_t)x) << 32;
- r.h = x | ((uint64_t)x) << 32;
- return r;
-}
-
-static inline uint64_t bytereverse64(const uint64_t a) {
- uint64_t r;
- r = (uint32_t)_rv32_grev((a>>32), 24) | (((uint64_t)_rv32_grev((a&0xFFFFFFFF), 24))<<32);
- return r;
-}
-static inline __m128i bytereverse128(const __m128i a) {
- __m128i r;
- r.l = bytereverse64(a.h);
- r.h = bytereverse64(a.l);
- return r;
-}
-
-static inline uint64_t bitreverse64(const uint64_t a) {
- uint64_t r;
- r = (uint32_t)_rv32_grev((a&0xFFFFFFFF), 7) | (((uint64_t)_rv32_grev((a>>32), 7))<<32);
- return r;
-}
-static inline __m128i bitreverse128(const __m128i a) {
- __m128i r;
- r.l = bitreverse64(a.l);
- r.h = bitreverse64(a.h);
- return r;
-}
-
-static inline uint64_t wordreverse64(const uint64_t a) {
- uint64_t r;
- r = (a>>32)|(a<<32);
- return r;
-}
-static inline __m128i wordreverse128(const __m128i a) {
- __m128i r;
- r.l = wordreverse64(a.h);
- r.h = wordreverse64(a.l);
- return r;
-}
-static inline __m128i doublewordreverse128(const __m128i a) {
- __m128i r;
- r.l = a.h;
- r.h = a.l;
- return r;
-}
+#include "m128_compat.h"
static inline void addmul_rv(unsigned char *c,
const unsigned char *a, int xlen,
diff --git a/m128_compat.h b/m128_compat.h
new file mode 100644
index 0000000..2f79b51
--- /dev/null
+++ b/m128_compat.h
@@ -0,0 +1,241 @@
+/*
+ * A thin compatibility layer to SSE's __m128i data format
+ * and associated instructions to support GHASH & the full algo.
+*/
+
+#ifndef __M128_COMPAT_H__
+#define __M128_COMPAT_H__
+
+#include "new_instructions_support_b.h"
+
+#include
+
+/* ouch */
+typedef struct {
+ uint64_t l;
+ uint64_t h;
+} __m128i;
+
+//#define _mm_loadu_si128(a) (*(const __m128i*)a)
+static inline __m128i _mm_loadu_si128(const __m128i *ptr) {
+ __m128i r;
+ r.l = ((const uint64_t*)ptr)[0];
+ r.h = ((const uint64_t*)ptr)[1];
+ return r;
+}
+
+//#define _mm_storeu_si128(x,a) (*(__m128i*)x)=a
+static inline void _mm_storeu_si128(__m128i *ptr, const __m128i data) {
+ ((uint64_t*)ptr)[0] = data.l;
+ ((uint64_t*)ptr)[1] = data.h;
+}
+static inline void _mm_store_si128(__m128i *ptr, const __m128i data) {
+ ((uint64_t*)ptr)[0] = data.l;
+ ((uint64_t*)ptr)[1] = data.h;
+}
+static inline void _mm_storel_epi64 (__m128i *ptr, const __m128i data) {
+ ((uint64_t*)ptr)[0] = data.l;
+}
+
+static inline __m128i _mm_clmulepi64_si128(const __m128i a, const __m128i b, const int x) {
+ __m128i r;
+ switch (x) {
+ case 0x00:
+ r.l = _rv64_clmul(a.l, b.l);
+ r.h = _rv64_clmulh(a.l, b.l);
+ break;
+ case 0x01:
+ r.l = _rv64_clmul(a.l, b.h);
+ r.h = _rv64_clmulh(a.l, b.h);
+ break;
+ case 0x10:
+ r.l = _rv64_clmul(a.h, b.l);
+ r.h = _rv64_clmulh(a.h, b.l);
+ break;
+ case 0x11:
+ r.l = _rv64_clmul(a.h, b.h);
+ r.h = _rv64_clmulh(a.h, b.h);
+ break;
+ }
+ return r;
+}
+
+/*
+static inline __m128i (const __m128i a, const __m128i b) {
+ __m128i r;
+ return r;
+}
+*/
+static inline __m128i _mm_xor_si128(const __m128i a, const __m128i b) {
+ __m128i r;
+ r.l = a.l ^ b.l;
+ r.h = a.h ^ b.h;
+ return r;
+}
+static inline __m128i _mm_or_si128(const __m128i a, const __m128i b) {
+ __m128i r;
+ r.l = a.l | b.l;
+ r.h = a.h | b.h;
+ return r;
+}
+static inline __m128i _mm_and_si128(const __m128i a, const __m128i b) {
+ __m128i r;
+ r.l = a.l & b.l;
+ r.h = a.h & b.h;
+ return r;
+}
+static inline __m128i _mm_slli_si128(const __m128i a, const int b) {
+ __m128i r;
+ switch (b) {
+ case 4:
+ r.l = a.l << 32;
+ r.h = a.h << 32 | a.l >> 32;
+ break;
+ case 8:
+ r.l = 0;
+ r.h = a.l;
+ break;
+ case 12:
+ r.l = 0;
+ r.h = a.l << 32;
+ break;
+ default:
+ fprintf(stderr, "%s: %d unimplemented\n", __PRETTY_FUNCTION__, b);
+ break;
+ }
+ return r;
+}
+static inline __m128i _mm_srli_si128(const __m128i a, const int b) {
+ __m128i r;
+ switch (b) {
+ case 1:
+ r.l = a.l >> 8 | a.h << 56;
+ r.h = a.h >> 8;
+ break;
+ case 4:
+ r.l = a.l >> 32 | a.h << 32;
+ r.h = a.h >> 32;
+ break;
+ case 8:
+ r.l = a.h;
+ r.h = 0;
+ break;
+ case 12:
+ r.l = a.h >> 32;
+ r.h = 0;
+ break;
+ default:
+ fprintf(stderr, "%s: %d unimplemented\n", __PRETTY_FUNCTION__, b);
+ break;
+ }
+ return r;
+}
+static inline __m128i _mm_srli_epi32(const __m128i a, const int b) {
+ __m128i r;
+ r.l = ((a.l & 0x00000000FFFFFFFFull) >> b) | (((a.l & 0xFFFFFFFF00000000ull) >> b) & 0xFFFFFFFF00000000ull);
+ r.h = ((a.h & 0x00000000FFFFFFFFull) >> b) | (((a.h & 0xFFFFFFFF00000000ull) >> b) & 0xFFFFFFFF00000000ull);
+ return r;
+}
+static inline __m128i _mm_slli_epi32(const __m128i a, const int b) {
+ __m128i r;
+ r.l = (((a.l & 0x00000000FFFFFFFFull) << b) & 0x00000000FFFFFFFFull) | ((a.l & 0xFFFFFFFF00000000ull) << b);
+ r.h = (((a.h & 0x00000000FFFFFFFFull) << b) & 0x00000000FFFFFFFFull) | ((a.h & 0xFFFFFFFF00000000ull) << b);
+ return r;
+}
+/* static inline __m128i _mm_srai_epi32(const __m128i a, const int b) { */
+/* __m128i r; */
+/* r.l = (((int32_t)(a.l & 0x00000000FFFFFFFFull)) >> b) | ((((int32_t)(a.l & 0xFFFFFFFF00000000ull)) >> b) & 0xFFFFFFFF00000000ull); */
+/* r.h = (((int32_t)(a.h & 0x00000000FFFFFFFFull)) >> b) | ((((int32_t)(a.h & 0xFFFFFFFF00000000ull)) >> b) & 0xFFFFFFFF00000000ull); */
+/* return r; */
+/* } */
+static inline __m128i _mm_insert_epi64(const __m128i a, const uint64_t x, const int b) {
+ __m128i r;
+ if (b == 0) {
+ r.l = x;
+ r.h = a.h;
+ } else {
+ r.l = a.l;
+ r.h = x;
+ }
+ return r;
+}
+static inline __m128i _mm_setzero_si128(void) {
+ __m128i r;
+ r.l = 0;
+ r.h = 0;
+ return r;
+}
+static inline __m128i _mm_set1_epi32(const uint32_t x) {
+ __m128i r;
+ r.l = x | ((uint64_t)x) << 32;
+ r.h = x | ((uint64_t)x) << 32;
+ return r;
+}
+static inline __m128i _mm_set_epi32 (uint32_t e3, uint32_t e2, uint32_t e1, uint32_t e0) {
+ __m128i r;
+ r.l = (uint64_t)e0 | ((uint64_t)e1) << 32;
+ r.h = (uint64_t)e2 | ((uint64_t)e3) << 32;
+ return r;
+}
+/* non-intel stuff, used to replace some common use cases */
+static inline uint64_t bytereverse64(const uint64_t a) {
+ uint64_t r;
+ r = (uint32_t)_rv32_grev((a>>32), 24) | (((uint64_t)_rv32_grev((a&0xFFFFFFFF), 24))<<32);
+ return r;
+}
+static inline __m128i bytereverse128(const __m128i a) {
+ __m128i r;
+ r.l = bytereverse64(a.h);
+ r.h = bytereverse64(a.l);
+ return r;
+}
+
+static inline uint64_t bitreverse64(const uint64_t a) {
+ uint64_t r;
+ r = (uint32_t)_rv32_grev((a&0xFFFFFFFF), 7) | (((uint64_t)_rv32_grev((a>>32), 7))<<32);
+ return r;
+}
+static inline __m128i bitreverse128(const __m128i a) {
+ __m128i r;
+ r.l = bitreverse64(a.l);
+ r.h = bitreverse64(a.h);
+ return r;
+}
+
+static inline uint64_t wordreverse64(const uint64_t a) {
+ uint64_t r;
+ r = (a>>32)|(a<<32);
+ return r;
+}
+static inline __m128i wordreverse128(const __m128i a) {
+ __m128i r;
+ r.l = wordreverse64(a.h);
+ r.h = wordreverse64(a.l);
+ return r;
+}
+static inline __m128i doublewordreverse128(const __m128i a) {
+ __m128i r;
+ r.l = a.h;
+ r.h = a.l;
+ return r;
+}
+static inline __m128i wordrotate1l128(const __m128i a) {
+ __m128i r;
+ /* i.e. epi32 _MM_SHUFFLE(2,1,0,3) */
+ r.l = (a.h >> 32) | (a.l << 32);
+ r.h = (a.l >> 32) | (a.h << 32);
+ return r;
+}
+static inline __m128i halfwordandzero(const uint16_t a) {
+ __m128i r;
+ r.l = a;
+ r.h = 0;
+ return r;
+}
+static inline __m128i wordsign128(const __m128i a) {
+ __m128i r;
+ r.l = (a.l & 0x0000000080000000ull ? 0x00000000FFFFFFFFull : 0) | (a.l & 0x8000000000000000ull ? 0xFFFFFFFF00000000ull : 0);
+ r.h = (a.h & 0x0000000080000000ull ? 0x00000000FFFFFFFFull : 0) | (a.h & 0x8000000000000000ull ? 0xFFFFFFFF00000000ull : 0);
+ return r;
+}
+#endif // __M128_COMPAT_H__