Add some crypto codes to test B

This commit is contained in:
Romain Dolbeau 2020-11-05 09:17:16 -05:00
parent ad3a64482b
commit 0b6fecdeb8
41 changed files with 3858 additions and 2 deletions

View file

@ -11,11 +11,11 @@ The generated plugin is for RV32 only. It doesn't yet support all instructions;
* CLMUL*
* BDEP/BEXT/BFP
* CRC32*
* Three-operands instrutioncs (CMIX, CMOV, FS[RL]*)
* Three-operands instructions (CMIX, CMOV, FS[RL]*)
There is no support for 'partial' instruction - implementing only a subset of the functionality of one instruction. So `grev` is supported, but `rev8` alone isn't. Subextension are defined without 'partial' instructions - so Zbb doesn't have `rev8` or `orc.B`.
This has received limited testing in a [Linux-on-Litex-VexRiscv](https://github.com/litex-hub/linux-on-litex-vexriscv) SoC. YMMV.
This has received limited testing in a [Linux-on-Litex-VexRiscv](https://github.com/litex-hub/linux-on-litex-vexriscv) SoC. YMMV. See below for some tests.
Also, the implementations of the instructions in SpinalHDL are written for tuncitonality, and not tuned or optimized in any way for performance/area/... (file usage.txt has some numbers).
@ -34,3 +34,9 @@ Will generate a plugin supporting Zbb (using the full version of `grev` and `gor
```
./gen_plugin BitManipAll data_bitmanip.txt '*' > BitManipAllPlugin.scala
```
## Test codes
test_b.c is a small synthetic test for RV32IMAB Linux, to check B instructions with various test patterns. See in the file on how to use it.
[aes256ctrstandalone-rv32] and [chacha20standalone-rv32] are stand-alone codes extracted from the [Supercop](http://bench.cr.yp.to/supercop.html) benchmark (similar to https://github.com/rdolbeau/EPI-test-codes-vector/). They should give the same results (checksum) as the version in Supercop, and can be compiled for RV32IMA or RV32IMAB. From B, they mostly rely on the rotation instructions (although the B toolchain also generates other instructions, in particular those from Zba).

View file

@ -0,0 +1,44 @@
SRCs=riscv32.c try-anything.c
OBJs=$(SRCs:.c=.o)
SCLIBS=cpucycles.o kernelrandombytes.o
COMPDIR=/home/dolbeau/LITEX/riscv64-unknown-elf-gcc-8.3.0-2019.08.0-x86_64-linux-ubuntu14
ALTCOMPDIR=/opt/riscv64b
CC=$(COMPDIR)/bin/riscv64-unknown-elf-gcc
ALTCC=$(ALTCOMPDIR)/bin/riscv64-unknown-elf-gcc
CC=$(ALTCC)
CXX=$(COMPDIR)/bin/riscv64-unknown-elf-g++
STRIP=$(COMPDIR)/bin/riscv64-unknown-elf-strip
NEWOPT=-march=rv32imab -mabi=ilp32 -I. -O3 -DRV32B #-fno-vectorize #-DUSE_EPI_CUSTOM
OPT=-march=rv32ima -mabi=ilp32 -I. -O3 #-fno-vectorize #-DUSE_EPI_CUSTOM
NEWOPT=$(OPT)
all: aes256ctr aes256ctr_small
clean:
rm -f $(OBJs) try.o try_small.o riscv32.o aes256ctr aes256ctr_small
%.o: %.c
$(CC) $(NEWOPT) $< -c -o $@
try.o: try.c
$(CC) $(NEWOPT) $< -c -o $@
try_small.o: try.c
$(CC) $(NEWOPT) $< -c -o $@ -DSMALL
riscv32.o: riscv32.c
$(CC) $(NEWOPT) $< -c -o $@
aes256ctr: $(OBJs) riscv32.o try.o $(SCLIBS)
$(CXX) $(OPT) $^ -o $@
aes256ctr_small: $(OBJs) riscv32.o try_small.o $(SCLIBS)
$(CXX) $(OPT) $^ -o $@
kernelrandombytes.o: random.cpp
$(CXX) $(OPT) $< -c -o $@
cpucycles.o: riscv.c
$(CC) $< -march=rv32ima -mabi=ilp32 -I. -O1 -c -o $@

View file

@ -0,0 +1,4 @@
#define CRYPTO_OUTPUTBYTES 16
#define CRYPTO_INPUTBYTES 16
#define CRYPTO_KEYBYTES 32
#define CRYPTO_CONSTBYTES 0

View file

@ -0,0 +1,28 @@
/*
cpucycles riscv.h version 20190803
D. J. Bernstein
Romain Dolbeau
Public domain.
*/
#ifndef CPUCYCLES_riscv_h
#define CPUCYCLES_riscv_h
#ifdef __cplusplus
extern "C" {
#endif
extern long long cpucycles_riscv(void);
extern long long cpucycles_riscv_persecond(void);
#ifdef __cplusplus
}
#endif
#ifndef cpucycles_implementation
#define cpucycles_implementation "riscv"
#define cpucycles cpucycles_riscv
#define cpucycles_persecond cpucycles_riscv_persecond
#endif
#endif

View file

@ -0,0 +1,18 @@
#ifndef crypto_stream_H
#define crypto_stream_H
#include "crypto_stream_aes256ctr.h"
#define crypto_stream crypto_stream_aes256ctr
#define crypto_stream_xor crypto_stream_aes256ctr_xor
#define crypto_stream_beforenm crypto_stream_aes256ctr_beforenm
#define crypto_stream_afternm crypto_stream_aes256ctr_afternm
#define crypto_stream_xor_afternm crypto_stream_aes256ctr_xor_afternm
#define crypto_stream_KEYBYTES crypto_stream_aes256ctr_KEYBYTES
#define crypto_stream_NONCEBYTES crypto_stream_aes256ctr_NONCEBYTES
#define crypto_stream_BEFORENMBYTES crypto_stream_aes256ctr_BEFORENMBYTES
#define crypto_stream_PRIMITIVE "aes256ctr"
#define crypto_stream_IMPLEMENTATION crypto_stream_aes256ctr_IMPLEMENTATION
#define crypto_stream_VERSION crypto_stream_aes256ctr_VERSION
#endif

View file

@ -0,0 +1,33 @@
#ifndef crypto_stream_aes256ctr_H
#define crypto_stream_aes256ctr_H
#define crypto_stream_aes256ctr_rv32_KEYBYTES 32
#define crypto_stream_aes256ctr_rv32_NONCEBYTES 16
#ifdef __cplusplus
extern "C" {
#endif
extern int crypto_stream_aes256ctr_rv32(unsigned char *,unsigned long long,const unsigned char *,const unsigned char *);
extern int crypto_stream_aes256ctr_rv32_xor(unsigned char *,const unsigned char *,unsigned long long,const unsigned char *,const unsigned char *);
extern int crypto_stream_aes256ctr_rv32_beforenm(unsigned char *,const unsigned char *);
extern int crypto_stream_aes256ctr_rv32_afternm(unsigned char *,unsigned long long,const unsigned char *,const unsigned char *);
extern int crypto_stream_aes256ctr_rv32_xor_afternm(unsigned char *,const unsigned char *,unsigned long long,const unsigned char *,const unsigned char *);
#ifdef __cplusplus
}
#endif
#define crypto_stream_aes256ctr crypto_stream_aes256ctr_rv32
#define crypto_stream_aes256ctr_xor crypto_stream_aes256ctr_rv32_xor
#define crypto_stream_aes256ctr_beforenm crypto_stream_aes256ctr_rv32_beforenm
#define crypto_stream_aes256ctr_afternm crypto_stream_aes256ctr_rv32_afternm
#define crypto_stream_aes256ctr_xor_afternm crypto_stream_aes256ctr_rv32_xor_afternm
#define crypto_stream_aes256ctr_KEYBYTES crypto_stream_aes256ctr_rv32_KEYBYTES
#define crypto_stream_aes256ctr_NONCEBYTES crypto_stream_aes256ctr_rv32_NONCEBYTES
#define crypto_stream_aes256ctr_BEFORENMBYTES crypto_stream_aes256ctr_rv32_BEFORENMBYTES
#define crypto_stream_aes256ctr_IMPLEMENTATION "crypto_stream/aes256ctr/rv32"
#ifndef crypto_stream_aes256ctr_rv32_VERSION
#define crypto_stream_aes256ctr_rv32_VERSION "-"
#endif
#define crypto_stream_aes256ctr_VERSION crypto_stream_aes256ctr_rv32_VERSION
#endif

View file

@ -0,0 +1,6 @@
#ifndef crypto_uint32_h
#define crypto_uint32_h
typedef unsigned int crypto_uint32;
#endif

View file

@ -0,0 +1,6 @@
#ifndef crypto_uint64_h
#define crypto_uint64_h
typedef unsigned long long crypto_uint64;
#endif

View file

@ -0,0 +1,6 @@
#ifndef crypto_uint8_h
#define crypto_uint8_h
typedef unsigned char crypto_uint8;
#endif

View file

@ -0,0 +1 @@
Romain Dolbeau

View file

@ -0,0 +1,14 @@
#ifndef kernelrandombytes_h
#define kernelrandombytes_h
#ifdef __cplusplus
extern "C" {
#endif
extern void kernelrandombytes(unsigned char *,unsigned long long);
#ifdef __cplusplus
}
#endif
#endif

View file

@ -0,0 +1,19 @@
#include <random>
#include <functional>
std::default_random_engine generator;
std::uniform_int_distribution<unsigned char> distribution(0,255);
auto rbyte = std::bind ( distribution, generator );
extern "C" {
void kernelrandombytes(unsigned char *x,unsigned long long xlen)
{
int i;
while (xlen > 0) {
*x = rbyte();
x++;
xlen--;
}
}
}

View file

@ -0,0 +1,83 @@
/*
cpucycles/riscv.c version 20190803
D. J. Bernstein
Romain Dolbeau
Public domain.
*/
#include <time.h>
#include <sys/time.h>
#include <sys/types.h>
long long cpucycles_riscv(void)
{
long long result;
#if defined(__riscv_xlen)
#if __riscv_xlen == 64
asm volatile("rdcycle %0" : "=r" (result));
#elif __riscv_xlen == 32
unsigned int l, h, h2;
asm volatile( "start:\n"
"rdcycleh %0\n"
"rdcycle %1\n"
"rdcycleh %2\n"
"bne %0, %2, start\n"
: "=r" (h), "=r" (l), "=r" (h2));
result = (((unsigned long long)h)<<32) | ((unsigned long long)l);
#else
#error "unknown __riscv_xlen"
#endif
#else // __riscv_xlen
#error "__riscv_xlen required for RISC-V support"
#endif // __riscv_xlen
return result;
}
static long long microseconds(void)
{
struct timeval t;
gettimeofday(&t,(struct timezone *) 0);
return t.tv_sec * (long long) 1000000 + t.tv_usec;
}
static double guessfreq(void)
{
long long tb0; long long us0;
long long tb1; long long us1;
tb0 = cpucycles_riscv();
us0 = microseconds();
do {
tb1 = cpucycles_riscv();
us1 = microseconds();
} while (us1 - us0 < 10000 || tb1 - tb0 < 1000);
if (tb1 <= tb0) return 0;
tb1 -= tb0;
us1 -= us0;
return ((double) tb1) / (0.000001 * (double) us1);
}
static long long cpufrequency = 0;
static void init(void)
{
double guess1;
double guess2;
int loop;
for (loop = 0;loop < 100;++loop) {
guess1 = guessfreq();
guess2 = guessfreq();
if (guess1 > 1.01 * guess2) continue;
if (guess2 > 1.01 * guess1) continue;
cpufrequency = 0.5 * (guess1 + guess2);
break;
}
}
long long cpucycles_riscv_persecond(void)
{
if (!cpufrequency) init();
return cpufrequency;
}

View file

@ -0,0 +1,245 @@
/*
rv32.c
AES-CTR
Romain Dolbeau
Public Domain
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "crypto_stream.h"
#include "stdaes-common.h"
#include <stdint.h>
#define _bswap64(a) __builtin_bswap64(a)
#define _bswap(a) __builtin_bswap32(a)
static inline void aes256_4ft_encrypt(uint32_t *output, const uint32_t *input, const uint32_t *aes_edrk)
{
uint32_t X0, X1, X2, X3, Y0, Y1, Y2, Y3;
uint32_t i = 0, j = 0;
uint32_t l_aes_nr = 14;
X0 = (_bswap(input[1]) ^ aes_edrk[j++]);
X1 = (_bswap(input[0]) ^ aes_edrk[j++]);
X2 = (_bswap(input[3]) ^ aes_edrk[j++]);
X3 = (_bswap(input[2]) ^ aes_edrk[j++]);
for (i = 4 ; i < (l_aes_nr<<2) ; ) {
AES_ROUND4(aes_edrk, i, Y0, Y1, Y2, Y3, X0, X1, X2, X3 );
X0=Y0;
X1=Y1;
X2=Y2;
X3=Y3;
}
/* last round */
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
Y0 = aes_edrk[i] ^
( FSb[( X0 >> 24 ) &0xFF ] << 24 ) ^
( FSb[( X1 >> 16 ) &0xFF ] << 16 ) ^
( FSb[( X2 >> 8 ) &0xFF ] << 8 ) ^
( FSb[( X3 ) &0xFF ] );
Y1 = aes_edrk[1+i] ^
( FSb[( X1 >> 24 ) &0xFF ] << 24 ) ^
( FSb[( X2 >> 16 ) &0xFF ] << 16 ) ^
( FSb[( X3 >> 8 ) &0xFF ] << 8 ) ^
( FSb[( X0 ) &0xFF ] );
Y2 = aes_edrk[2+i] ^
( FSb[( X2 >> 24 ) &0xFF ] << 24 ) ^
( FSb[( X3 >> 16 ) &0xFF ] << 16 ) ^
( FSb[( X0 >> 8 ) &0xFF ] << 8 ) ^
( FSb[( X1 ) &0xFF ] );
Y3 = aes_edrk[3+i] ^
( FSb[( X3 >> 24 ) &0xFF ] << 24 ) ^
( FSb[( X0 >> 16 ) &0xFF ] << 16 ) ^
( FSb[( X1 >> 8 ) &0xFF ] << 8 ) ^
( FSb[( X2 ) &0xFF ] );
#else
Y0 = (aes_edrk[i]) ^
( FSb[( X0 ) &0xFF ] ) ^
( FSb[( X1 >> 8 ) &0xFF ] << 8 ) ^
( FSb[( X2 >> 16 ) &0xFF ] << 16 ) ^
( FSb[( X3 >> 24 ) &0xFF ] << 24 );
Y1 = (aes_edrk[1+i]) ^
( FSb[( X1 ) &0xFF ] ) ^
( FSb[( X2 >> 8 ) &0xFF ] << 8 ) ^
( FSb[( X3 >> 16 ) &0xFF ] << 16 ) ^
( FSb[( X0 >> 24 ) &0xFF ] << 24 );
Y2 = (aes_edrk[2+i]) ^
( FSb[( X2 ) &0xFF ] ) ^
( FSb[( X3 >> 8 ) &0xFF ] << 8 ) ^
( FSb[( X0 >> 16 ) &0xFF ] << 16 ) ^
( FSb[( X1 >> 24 ) &0xFF ] << 24 );
Y3 = (aes_edrk[3+i]) ^
( FSb[( X3 ) &0xFF ] ) ^
( FSb[( X0 >> 8 ) &0xFF ] << 8 ) ^
( FSb[( X1 >> 16 ) &0xFF ] << 16 ) ^
( FSb[( X2 >> 24 ) &0xFF ] << 24 );
#endif
output[0] = (Y0);
output[1] = (Y1);
output[2] = (Y2);
output[3] = (Y3);
}
static inline void aes256_1ft_encrypt(uint32_t *output, const uint32_t *input, const uint32_t *aes_edrk)
{
unsigned int X0, X1, X2, X3, Y0, Y1, Y2, Y3;
unsigned int i = 0, j = 0;
unsigned int l_aes_nr = 14;
X0 = (_bswap(input[1]) ^ aes_edrk[j++]);
X1 = (_bswap(input[0]) ^ aes_edrk[j++]);
X2 = (_bswap(input[3]) ^ aes_edrk[j++]);
X3 = (_bswap(input[2]) ^ aes_edrk[j++]);
for (i = 4 ; i < (l_aes_nr<<2) ; ) {
AES_ROUND1(aes_edrk, i, Y0, Y1, Y2, Y3, X0, X1, X2, X3 );
X0=Y0;
X1=Y1;
X2=Y2;
X3=Y3;
}
/* last round */
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
Y0 = aes_edrk[i] ^
( FSb[( X0 >> 24 ) &0xFF ] << 24 ) ^
( FSb[( X1 >> 16 ) &0xFF ] << 16 ) ^
( FSb[( X2 >> 8 ) &0xFF ] << 8 ) ^
( FSb[( X3 ) &0xFF ] );
Y1 = aes_edrk[1+i] ^
( FSb[( X1 >> 24 ) &0xFF ] << 24 ) ^
( FSb[( X2 >> 16 ) &0xFF ] << 16 ) ^
( FSb[( X3 >> 8 ) &0xFF ] << 8 ) ^
( FSb[( X0 ) &0xFF ] );
Y2 = aes_edrk[2+i] ^
( FSb[( X2 >> 24 ) &0xFF ] << 24 ) ^
( FSb[( X3 >> 16 ) &0xFF ] << 16 ) ^
( FSb[( X0 >> 8 ) &0xFF ] << 8 ) ^
( FSb[( X1 ) &0xFF ] );
Y3 = aes_edrk[3+i] ^
( FSb[( X3 >> 24 ) &0xFF ] << 24 ) ^
( FSb[( X0 >> 16 ) &0xFF ] << 16 ) ^
( FSb[( X1 >> 8 ) &0xFF ] << 8 ) ^
( FSb[( X2 ) &0xFF ] );
#else
Y0 = (aes_edrk[i]) ^
( FSb[( X0 ) &0xFF ] ) ^
( FSb[( X1 >> 8 ) &0xFF ] << 8 ) ^
( FSb[( X2 >> 16 ) &0xFF ] << 16 ) ^
( FSb[( X3 >> 24 ) &0xFF ] << 24 );
Y1 = (aes_edrk[1+i]) ^
( FSb[( X1 ) &0xFF ] ) ^
( FSb[( X2 >> 8 ) &0xFF ] << 8 ) ^
( FSb[( X3 >> 16 ) &0xFF ] << 16 ) ^
( FSb[( X0 >> 24 ) &0xFF ] << 24 );
Y2 = (aes_edrk[2+i]) ^
( FSb[( X2 ) &0xFF ] ) ^
( FSb[( X3 >> 8 ) &0xFF ] << 8 ) ^
( FSb[( X0 >> 16 ) &0xFF ] << 16 ) ^
( FSb[( X1 >> 24 ) &0xFF ] << 24 );
Y3 = (aes_edrk[3+i]) ^
( FSb[( X3 ) &0xFF ] ) ^
( FSb[( X0 >> 8 ) &0xFF ] << 8 ) ^
( FSb[( X1 >> 16 ) &0xFF ] << 16 ) ^
( FSb[( X2 >> 24 ) &0xFF ] << 24 );
#endif
output[0] = (Y0);
output[1] = (Y1);
output[2] = (Y2);
output[3] = (Y3);
}
int crypto_stream(
uint8_t *out,
uint64_t outlen,
const uint8_t *n,
const uint8_t *k
)
{
uint32_t rkeys[64];
uint64_t n2[2];
uint64_t i, j;
aes256_setkey_encrypt(k, rkeys);
/* n2 is in byte-reversed (i.e., native little endian)
order to make increment/testing easier */
n2[1] = _bswap64((*(uint64_t*)&n[8]));
n2[0] = _bswap64((*(uint64_t*)&n[0]));
#define LOOP(iter) \
int lb = iter * 16; \
for (i = 0 ; i < outlen ; i+= lb) { \
uint8_t outni[lb]; \
aes256_4ft_encrypt(outni, n2, rkeys); \
n2[1]++; \
if (n2[1] == 0) \
n2[0]++; \
uint64_t mj = lb; \
if ((i+mj)>=outlen) \
mj = outlen-i; \
for (j = 0 ; j < mj ; j++) \
out[i+j] = outni[j]; \
}
LOOP(1);
return 0;
}
int crypto_stream_xor(
uint8_t *out,
const uint8_t *in,
uint64_t inlen,
const uint8_t *n,
const uint8_t *k
)
{
uint32_t rkeys[64];
uint64_t n2[2];
uint64_t i, j;
aes256_setkey_encrypt(k, rkeys);
/* n2 is in byte-reversed (i.e., native little endian)
order to make increment/testing easier */
n2[1] = _bswap64((*(uint64_t*)&n[8]));
n2[0] = _bswap64((*(uint64_t*)&n[0]));
#define LOOPXOR(iter) \
int32_t lb = iter * 16; \
for (i = 0 ; i < inlen ; i+= lb) { \
uint8_t outni[lb]; \
aes256_4ft_encrypt(outni, n2, rkeys); \
n2[1]++; \
if (n2[1] == 0) \
n2[0]++; \
uint64_t mj = lb; \
if ((i+mj)>=inlen) \
mj = inlen-i; \
for (j = 0 ; j < mj ; j++) \
out[i+j] = in[i+j] ^ outni[j]; \
}
LOOPXOR(1);
return 0;
}

View file

@ -0,0 +1,888 @@
/*
common.h version $Date: 2020/03/23 14:00:49 $
Romain Dolbeau
Public Domain
*/
#define f_FSb_32__1(x) ((FSb[((x) >> 24) &0xFF] << 24) ^ \
(FSb[((x) >> 16) &0xFF] << 16))
#define f_FSb_32__2(x) ((FSb[((x) >> 8) &0xFF] << 8 ) ^ \
(FSb[((x) ) &0xFF] & 0xFF))
#define FT \
V(C6,63,63,A5), V(F8,7C,7C,84), V(EE,77,77,99), V(F6,7B,7B,8D), \
V(FF,F2,F2,0D), V(D6,6B,6B,BD), V(DE,6F,6F,B1), V(91,C5,C5,54), \
V(60,30,30,50), V(02,01,01,03), V(CE,67,67,A9), V(56,2B,2B,7D), \
V(E7,FE,FE,19), V(B5,D7,D7,62), V(4D,AB,AB,E6), V(EC,76,76,9A), \
V(8F,CA,CA,45), V(1F,82,82,9D), V(89,C9,C9,40), V(FA,7D,7D,87), \
V(EF,FA,FA,15), V(B2,59,59,EB), V(8E,47,47,C9), V(FB,F0,F0,0B), \
V(41,AD,AD,EC), V(B3,D4,D4,67), V(5F,A2,A2,FD), V(45,AF,AF,EA), \
V(23,9C,9C,BF), V(53,A4,A4,F7), V(E4,72,72,96), V(9B,C0,C0,5B), \
V(75,B7,B7,C2), V(E1,FD,FD,1C), V(3D,93,93,AE), V(4C,26,26,6A), \
V(6C,36,36,5A), V(7E,3F,3F,41), V(F5,F7,F7,02), V(83,CC,CC,4F), \
V(68,34,34,5C), V(51,A5,A5,F4), V(D1,E5,E5,34), V(F9,F1,F1,08), \
V(E2,71,71,93), V(AB,D8,D8,73), V(62,31,31,53), V(2A,15,15,3F), \
V(08,04,04,0C), V(95,C7,C7,52), V(46,23,23,65), V(9D,C3,C3,5E), \
V(30,18,18,28), V(37,96,96,A1), V(0A,05,05,0F), V(2F,9A,9A,B5), \
V(0E,07,07,09), V(24,12,12,36), V(1B,80,80,9B), V(DF,E2,E2,3D), \
V(CD,EB,EB,26), V(4E,27,27,69), V(7F,B2,B2,CD), V(EA,75,75,9F), \
V(12,09,09,1B), V(1D,83,83,9E), V(58,2C,2C,74), V(34,1A,1A,2E), \
V(36,1B,1B,2D), V(DC,6E,6E,B2), V(B4,5A,5A,EE), V(5B,A0,A0,FB), \
V(A4,52,52,F6), V(76,3B,3B,4D), V(B7,D6,D6,61), V(7D,B3,B3,CE), \
V(52,29,29,7B), V(DD,E3,E3,3E), V(5E,2F,2F,71), V(13,84,84,97), \
V(A6,53,53,F5), V(B9,D1,D1,68), V(00,00,00,00), V(C1,ED,ED,2C), \
V(40,20,20,60), V(E3,FC,FC,1F), V(79,B1,B1,C8), V(B6,5B,5B,ED), \
V(D4,6A,6A,BE), V(8D,CB,CB,46), V(67,BE,BE,D9), V(72,39,39,4B), \
V(94,4A,4A,DE), V(98,4C,4C,D4), V(B0,58,58,E8), V(85,CF,CF,4A), \
V(BB,D0,D0,6B), V(C5,EF,EF,2A), V(4F,AA,AA,E5), V(ED,FB,FB,16), \
V(86,43,43,C5), V(9A,4D,4D,D7), V(66,33,33,55), V(11,85,85,94), \
V(8A,45,45,CF), V(E9,F9,F9,10), V(04,02,02,06), V(FE,7F,7F,81), \
V(A0,50,50,F0), V(78,3C,3C,44), V(25,9F,9F,BA), V(4B,A8,A8,E3), \
V(A2,51,51,F3), V(5D,A3,A3,FE), V(80,40,40,C0), V(05,8F,8F,8A), \
V(3F,92,92,AD), V(21,9D,9D,BC), V(70,38,38,48), V(F1,F5,F5,04), \
V(63,BC,BC,DF), V(77,B6,B6,C1), V(AF,DA,DA,75), V(42,21,21,63), \
V(20,10,10,30), V(E5,FF,FF,1A), V(FD,F3,F3,0E), V(BF,D2,D2,6D), \
V(81,CD,CD,4C), V(18,0C,0C,14), V(26,13,13,35), V(C3,EC,EC,2F), \
V(BE,5F,5F,E1), V(35,97,97,A2), V(88,44,44,CC), V(2E,17,17,39), \
V(93,C4,C4,57), V(55,A7,A7,F2), V(FC,7E,7E,82), V(7A,3D,3D,47), \
V(C8,64,64,AC), V(BA,5D,5D,E7), V(32,19,19,2B), V(E6,73,73,95), \
V(C0,60,60,A0), V(19,81,81,98), V(9E,4F,4F,D1), V(A3,DC,DC,7F), \
V(44,22,22,66), V(54,2A,2A,7E), V(3B,90,90,AB), V(0B,88,88,83), \
V(8C,46,46,CA), V(C7,EE,EE,29), V(6B,B8,B8,D3), V(28,14,14,3C), \
V(A7,DE,DE,79), V(BC,5E,5E,E2), V(16,0B,0B,1D), V(AD,DB,DB,76), \
V(DB,E0,E0,3B), V(64,32,32,56), V(74,3A,3A,4E), V(14,0A,0A,1E), \
V(92,49,49,DB), V(0C,06,06,0A), V(48,24,24,6C), V(B8,5C,5C,E4), \
V(9F,C2,C2,5D), V(BD,D3,D3,6E), V(43,AC,AC,EF), V(C4,62,62,A6), \
V(39,91,91,A8), V(31,95,95,A4), V(D3,E4,E4,37), V(F2,79,79,8B), \
V(D5,E7,E7,32), V(8B,C8,C8,43), V(6E,37,37,59), V(DA,6D,6D,B7), \
V(01,8D,8D,8C), V(B1,D5,D5,64), V(9C,4E,4E,D2), V(49,A9,A9,E0), \
V(D8,6C,6C,B4), V(AC,56,56,FA), V(F3,F4,F4,07), V(CF,EA,EA,25), \
V(CA,65,65,AF), V(F4,7A,7A,8E), V(47,AE,AE,E9), V(10,08,08,18), \
V(6F,BA,BA,D5), V(F0,78,78,88), V(4A,25,25,6F), V(5C,2E,2E,72), \
V(38,1C,1C,24), V(57,A6,A6,F1), V(73,B4,B4,C7), V(97,C6,C6,51), \
V(CB,E8,E8,23), V(A1,DD,DD,7C), V(E8,74,74,9C), V(3E,1F,1F,21), \
V(96,4B,4B,DD), V(61,BD,BD,DC), V(0D,8B,8B,86), V(0F,8A,8A,85), \
V(E0,70,70,90), V(7C,3E,3E,42), V(71,B5,B5,C4), V(CC,66,66,AA), \
V(90,48,48,D8), V(06,03,03,05), V(F7,F6,F6,01), V(1C,0E,0E,12), \
V(C2,61,61,A3), V(6A,35,35,5F), V(AE,57,57,F9), V(69,B9,B9,D0), \
V(17,86,86,91), V(99,C1,C1,58), V(3A,1D,1D,27), V(27,9E,9E,B9), \
V(D9,E1,E1,38), V(EB,F8,F8,13), V(2B,98,98,B3), V(22,11,11,33), \
V(D2,69,69,BB), V(A9,D9,D9,70), V(07,8E,8E,89), V(33,94,94,A7), \
V(2D,9B,9B,B6), V(3C,1E,1E,22), V(15,87,87,92), V(C9,E9,E9,20), \
V(87,CE,CE,49), V(AA,55,55,FF), V(50,28,28,78), V(A5,DF,DF,7A), \
V(03,8C,8C,8F), V(59,A1,A1,F8), V(09,89,89,80), V(1A,0D,0D,17), \
V(65,BF,BF,DA), V(D7,E6,E6,31), V(84,42,42,C6), V(D0,68,68,B8), \
V(82,41,41,C3), V(29,99,99,B0), V(5A,2D,2D,77), V(1E,0F,0F,11), \
V(7B,B0,B0,CB), V(A8,54,54,FC), V(6D,BB,BB,D6), V(2C,16,16,3A)
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
#define V(a,b,c,d) 0x##a##b##c##d
#else
#define V(a,b,c,d) 0x##d##c##b##a
#endif
static unsigned int FT0[256] = { FT };
#undef V
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
#define V(a,b,c,d) 0x##d##a##b##c
#else
#define V(a,b,c,d) 0x##c##b##a##d
#endif
static unsigned int FT1[256] = { FT };
#undef V
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
#define V(a,b,c,d) 0x##c##d##a##b
#else
#define V(a,b,c,d) 0x##b##a##d##c
#endif
static unsigned int FT2[256] = { FT };
#undef V
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
#define V(a,b,c,d) 0x##b##c##d##a
#else
#define V(a,b,c,d) 0x##a##d##c##b
#endif
static unsigned int FT3[256] = { FT };
#undef V
#undef FT
#define FSbData \
{ \
0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, \
0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76, \
0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0, \
0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0, \
0xB7, 0xFD, 0x93, 0x26, 0x36, 0x3F, 0xF7, 0xCC, \
0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15, \
0x04, 0xC7, 0x23, 0xC3, 0x18, 0x96, 0x05, 0x9A, \
0x07, 0x12, 0x80, 0xE2, 0xEB, 0x27, 0xB2, 0x75, \
0x09, 0x83, 0x2C, 0x1A, 0x1B, 0x6E, 0x5A, 0xA0, \
0x52, 0x3B, 0xD6, 0xB3, 0x29, 0xE3, 0x2F, 0x84, \
0x53, 0xD1, 0x00, 0xED, 0x20, 0xFC, 0xB1, 0x5B, \
0x6A, 0xCB, 0xBE, 0x39, 0x4A, 0x4C, 0x58, 0xCF, \
0xD0, 0xEF, 0xAA, 0xFB, 0x43, 0x4D, 0x33, 0x85, \
0x45, 0xF9, 0x02, 0x7F, 0x50, 0x3C, 0x9F, 0xA8, \
0x51, 0xA3, 0x40, 0x8F, 0x92, 0x9D, 0x38, 0xF5, \
0xBC, 0xB6, 0xDA, 0x21, 0x10, 0xFF, 0xF3, 0xD2, \
0xCD, 0x0C, 0x13, 0xEC, 0x5F, 0x97, 0x44, 0x17, \
0xC4, 0xA7, 0x7E, 0x3D, 0x64, 0x5D, 0x19, 0x73, \
0x60, 0x81, 0x4F, 0xDC, 0x22, 0x2A, 0x90, 0x88, \
0x46, 0xEE, 0xB8, 0x14, 0xDE, 0x5E, 0x0B, 0xDB, \
0xE0, 0x32, 0x3A, 0x0A, 0x49, 0x06, 0x24, 0x5C, \
0xC2, 0xD3, 0xAC, 0x62, 0x91, 0x95, 0xE4, 0x79, \
0xE7, 0xC8, 0x37, 0x6D, 0x8D, 0xD5, 0x4E, 0xA9, \
0x6C, 0x56, 0xF4, 0xEA, 0x65, 0x7A, 0xAE, 0x08, \
0xBA, 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6, \
0xE8, 0xDD, 0x74, 0x1F, 0x4B, 0xBD, 0x8B, 0x8A, \
0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E, \
0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E, \
0xE1, 0xF8, 0x98, 0x11, 0x69, 0xD9, 0x8E, 0x94, \
0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF, \
0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68, \
0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16 \
}
static unsigned int FSb[256] = FSbData;
#undef FSbData
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
#define AES_ROUND1(TAB,I,X0,X1,X2,X3,Y0,Y1,Y2,Y3) \
{ \
X0 = FT0[( Y0 >> 24 ) &0xFF ]; \
X0 ^= rotr(FT0[( Y1 >> 16 ) &0xFF ],8); \
X0 ^= rotr(FT0[( Y2 >> 8 ) &0xFF ],16); \
X0 ^= rotr(FT0[( Y3 ) &0xFF ],24); \
\
X1 = FT0[( Y1 >> 24 ) &0xFF ]; \
X1 ^= rotr(FT0[( Y2 >> 16 ) &0xFF ],8); \
X1 ^= rotr(FT0[( Y3 >> 8 ) &0xFF ],16); \
X1 ^= rotr(FT0[( Y0 ) &0xFF ],24); \
\
X2 = FT0[( Y2 >> 24 ) &0xFF ]; \
X2 ^= rotr(FT0[( Y3 >> 16 ) &0xFF ],8); \
X2 ^= rotr(FT0[( Y0 >> 8 ) &0xFF ],16); \
X2 ^= rotr(FT0[( Y1 ) &0xFF ],24); \
\
X3 = FT0[( Y3 >> 24 ) &0xFF ]; \
X3 ^= rotr(FT0[( Y0 >> 16 ) &0xFF ],8); \
X3 ^= rotr(FT0[( Y1 >> 8 ) &0xFF ],16); \
X3 ^= rotr(FT0[( Y2 ) &0xFF ],24); \
\
X0 ^= TAB[I++]; \
X1 ^= TAB[I++]; \
X2 ^= TAB[I++]; \
X3 ^= TAB[I++]; \
}
#define AES_ROUND2(TAB,I,X0,X1,X2,X3,Y0,Y1,Y2,Y3) \
{ \
X0 = FT0[( Y0 >> 24 ) &0xFF ]; \
X0 ^= FT1[( Y1 >> 16 ) &0xFF ]; \
X0 ^= rotr(FT0[( Y2 >> 8 ) &0xFF ],16); \
X0 ^= rotr(FT1[( Y3 ) &0xFF ],16); \
\
X1 = FT0[( Y1 >> 24 ) &0xFF ]; \
X1 ^= FT1[( Y2 >> 16 ) &0xFF ]; \
X1 ^= rotr(FT0[( Y3 >> 8 ) &0xFF ],16); \
X1 ^= rotr(FT1[( Y0 ) &0xFF ],16); \
\
X2 = FT0[( Y2 >> 24 ) &0xFF ]; \
X2 ^= FT1[( Y3 >> 16 ) &0xFF ]; \
X2 ^= rotr(FT0[( Y0 >> 8 ) &0xFF ],16); \
X2 ^= rotr(FT1[( Y1 ) &0xFF ],16); \
\
X3 = FT0[( Y3 >> 24 ) &0xFF ]; \
X3 ^= FT1[( Y0 >> 16 ) &0xFF ]; \
X3 ^= rotr(FT0[( Y1 >> 8 ) &0xFF ],16); \
X3 ^= rotr(FT1[( Y2 ) &0xFF ],16); \
\
X0 ^= TAB[I++]; \
X1 ^= TAB[I++]; \
X2 ^= TAB[I++]; \
X3 ^= TAB[I++]; \
}
#define AES_ROUND4(TAB,I,X0,X1,X2,X3,Y0,Y1,Y2,Y3) \
{ \
X0 = FT0[( Y0 >> 24 ) &0xFF ]; \
X0 ^= FT1[( Y1 >> 16 ) &0xFF ]; \
X0 ^= FT2[( Y2 >> 8 ) &0xFF ]; \
X0 ^= FT3[( Y3 ) &0xFF ]; \
\
X1 = FT0[( Y1 >> 24 ) &0xFF ]; \
X1 ^= FT1[( Y2 >> 16 ) &0xFF ]; \
X1 ^= FT2[( Y3 >> 8 ) &0xFF ]; \
X1 ^= FT3[( Y0 ) &0xFF ]; \
\
X2 = FT0[( Y2 >> 24 ) &0xFF ]; \
X2 ^= FT1[( Y3 >> 16 ) &0xFF ]; \
X2 ^= FT2[( Y0 >> 8 ) &0xFF ]; \
X2 ^= FT3[( Y1 ) &0xFF ]; \
\
X3 = FT0[( Y3 >> 24 ) &0xFF ]; \
X3 ^= FT1[( Y0 >> 16 ) &0xFF ]; \
X3 ^= FT2[( Y1 >> 8 ) &0xFF ]; \
X3 ^= FT3[( Y2 ) &0xFF ]; \
\
X0 ^= TAB[I++]; \
X1 ^= TAB[I++]; \
X2 ^= TAB[I++]; \
X3 ^= TAB[I++]; \
}
#else
#define AES_ROUND1(TAB,I,X0,X1,X2,X3,Y0,Y1,Y2,Y3) \
{ \
X0 = FT0[( Y0 ) &0xFF ]; \
X0 ^= rotr(FT0[( Y1 >> 8 ) &0xFF ],24); \
X0 ^= rotr(FT0[( Y2 >> 16 ) &0xFF ],16); \
X0 ^= rotr(FT0[( Y3 >> 24 ) &0xFF ],8); \
\
X1 = FT0[( Y1 ) &0xFF ]; \
X1 ^= rotr(FT0[( Y2 >> 8 ) &0xFF ],24); \
X1 ^= rotr(FT0[( Y3 >> 16 ) &0xFF ],16); \
X1 ^= rotr(FT0[( Y0 >> 24 ) &0xFF ],8); \
\
X2 = FT0[( Y2 ) &0xFF ]; \
X2 ^= rotr(FT0[( Y3 >> 8 ) &0xFF ],24); \
X2 ^= rotr(FT0[( Y0 >> 16 ) &0xFF ],16); \
X2 ^= rotr(FT0[( Y1 >> 24 ) &0xFF ],8); \
\
X3 = FT0[( Y3 ) &0xFF ]; \
X3 ^= rotr(FT0[( Y0 >> 8 ) &0xFF ],24); \
X3 ^= rotr(FT0[( Y1 >> 16 ) &0xFF ],16); \
X3 ^= rotr(FT0[( Y2 >> 24 ) &0xFF ],8); \
\
X0 ^= (TAB[I++]); \
X1 ^= (TAB[I++]); \
X2 ^= (TAB[I++]); \
X3 ^= (TAB[I++]); \
}
#define AES_ROUND2(TAB,I,X0,X1,X2,X3,Y0,Y1,Y2,Y3) \
{ \
X0 = FT0[( Y0 ) &0xFF ]; \
X0 ^= FT1[( Y1 >> 8 ) &0xFF ]; \
X0 ^= rotr(FT0[( Y2 >> 16 ) &0xFF ],16); \
X0 ^= rotr(FT1[( Y3 >> 24 ) &0xFF ],16); \
\
X1 = FT0[( Y1 ) &0xFF ]; \
X1 ^= FT1[( Y2 >> 8 ) &0xFF ]; \
X1 ^= rotr(FT0[( Y3 >> 16 ) &0xFF ],16); \
X1 ^= rotr(FT1[( Y0 >> 24 ) &0xFF ],16); \
\
X2 = FT0[( Y2 ) &0xFF ]; \
X2 ^= FT1[( Y3 >> 8 ) &0xFF ]; \
X2 ^= rotr(FT0[( Y0 >> 16 ) &0xFF ],16); \
X2 ^= rotr(FT1[( Y1 >> 24 ) &0xFF ],16); \
\
X3 = FT0[( Y3 ) &0xFF ]; \
X3 ^= FT1[( Y0 >> 8 ) &0xFF ]; \
X3 ^= rotr(FT0[( Y1 >> 16 ) &0xFF ],16); \
X3 ^= rotr(FT1[( Y2 >> 24 ) &0xFF ],16); \
\
X0 ^= (TAB[I++]); \
X1 ^= (TAB[I++]); \
X2 ^= (TAB[I++]); \
X3 ^= (TAB[I++]); \
}
#define AES_ROUND4(TAB,I,X0,X1,X2,X3,Y0,Y1,Y2,Y3) \
{ \
X0 = FT0[( Y0 ) &0xFF ]; \
X0 ^= FT1[( Y1 >> 8 ) &0xFF ]; \
X0 ^= FT2[( Y2 >> 16 ) &0xFF ]; \
X0 ^= FT3[( Y3 >> 24 ) &0xFF ]; \
\
X1 = FT0[( Y1 ) &0xFF ]; \
X1 ^= FT1[( Y2 >> 8 ) &0xFF ]; \
X1 ^= FT2[( Y3 >> 16 ) &0xFF ]; \
X1 ^= FT3[( Y0 >> 24 ) &0xFF ]; \
\
X2 = FT0[( Y2 ) &0xFF ]; \
X2 ^= FT1[( Y3 >> 8 ) &0xFF ]; \
X2 ^= FT2[( Y0 >> 16 ) &0xFF ]; \
X2 ^= FT3[( Y1 >> 24 ) &0xFF ]; \
\
X3 = FT0[( Y3 ) &0xFF ]; \
X3 ^= FT1[( Y0 >> 8 ) &0xFF ]; \
X3 ^= FT2[( Y1 >> 16 ) &0xFF ]; \
X3 ^= FT3[( Y2 >> 24 ) &0xFF ]; \
\
X0 ^= (TAB[I++]); \
X1 ^= (TAB[I++]); \
X2 ^= (TAB[I++]); \
X3 ^= (TAB[I++]); \
}
#endif
#define RSbData \
{ \
0x52, 0x09, 0x6A, 0xD5, 0x30, 0x36, 0xA5, 0x38, \
0xBF, 0x40, 0xA3, 0x9E, 0x81, 0xF3, 0xD7, 0xFB, \
0x7C, 0xE3, 0x39, 0x82, 0x9B, 0x2F, 0xFF, 0x87, \
0x34, 0x8E, 0x43, 0x44, 0xC4, 0xDE, 0xE9, 0xCB, \
0x54, 0x7B, 0x94, 0x32, 0xA6, 0xC2, 0x23, 0x3D, \
0xEE, 0x4C, 0x95, 0x0B, 0x42, 0xFA, 0xC3, 0x4E, \
0x08, 0x2E, 0xA1, 0x66, 0x28, 0xD9, 0x24, 0xB2, \
0x76, 0x5B, 0xA2, 0x49, 0x6D, 0x8B, 0xD1, 0x25, \
0x72, 0xF8, 0xF6, 0x64, 0x86, 0x68, 0x98, 0x16, \
0xD4, 0xA4, 0x5C, 0xCC, 0x5D, 0x65, 0xB6, 0x92, \
0x6C, 0x70, 0x48, 0x50, 0xFD, 0xED, 0xB9, 0xDA, \
0x5E, 0x15, 0x46, 0x57, 0xA7, 0x8D, 0x9D, 0x84, \
0x90, 0xD8, 0xAB, 0x00, 0x8C, 0xBC, 0xD3, 0x0A, \
0xF7, 0xE4, 0x58, 0x05, 0xB8, 0xB3, 0x45, 0x06, \
0xD0, 0x2C, 0x1E, 0x8F, 0xCA, 0x3F, 0x0F, 0x02, \
0xC1, 0xAF, 0xBD, 0x03, 0x01, 0x13, 0x8A, 0x6B, \
0x3A, 0x91, 0x11, 0x41, 0x4F, 0x67, 0xDC, 0xEA, \
0x97, 0xF2, 0xCF, 0xCE, 0xF0, 0xB4, 0xE6, 0x73, \
0x96, 0xAC, 0x74, 0x22, 0xE7, 0xAD, 0x35, 0x85, \
0xE2, 0xF9, 0x37, 0xE8, 0x1C, 0x75, 0xDF, 0x6E, \
0x47, 0xF1, 0x1A, 0x71, 0x1D, 0x29, 0xC5, 0x89, \
0x6F, 0xB7, 0x62, 0x0E, 0xAA, 0x18, 0xBE, 0x1B, \
0xFC, 0x56, 0x3E, 0x4B, 0xC6, 0xD2, 0x79, 0x20, \
0x9A, 0xDB, 0xC0, 0xFE, 0x78, 0xCD, 0x5A, 0xF4, \
0x1F, 0xDD, 0xA8, 0x33, 0x88, 0x07, 0xC7, 0x31, \
0xB1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xEC, 0x5F, \
0x60, 0x51, 0x7F, 0xA9, 0x19, 0xB5, 0x4A, 0x0D, \
0x2D, 0xE5, 0x7A, 0x9F, 0x93, 0xC9, 0x9C, 0xEF, \
0xA0, 0xE0, 0x3B, 0x4D, 0xAE, 0x2A, 0xF5, 0xB0, \
0xC8, 0xEB, 0xBB, 0x3C, 0x83, 0x53, 0x99, 0x61, \
0x17, 0x2B, 0x04, 0x7E, 0xBA, 0x77, 0xD6, 0x26, \
0xE1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0C, 0x7D \
}
static unsigned int RSb[256] = RSbData;
#undef RSbData
#define RT \
V(51,F4,A7,50), V(7E,41,65,53), V(1A,17,A4,C3), V(3A,27,5E,96), \
V(3B,AB,6B,CB), V(1F,9D,45,F1), V(AC,FA,58,AB), V(4B,E3,03,93), \
V(20,30,FA,55), V(AD,76,6D,F6), V(88,CC,76,91), V(F5,02,4C,25), \
V(4F,E5,D7,FC), V(C5,2A,CB,D7), V(26,35,44,80), V(B5,62,A3,8F), \
V(DE,B1,5A,49), V(25,BA,1B,67), V(45,EA,0E,98), V(5D,FE,C0,E1), \
V(C3,2F,75,02), V(81,4C,F0,12), V(8D,46,97,A3), V(6B,D3,F9,C6), \
V(03,8F,5F,E7), V(15,92,9C,95), V(BF,6D,7A,EB), V(95,52,59,DA), \
V(D4,BE,83,2D), V(58,74,21,D3), V(49,E0,69,29), V(8E,C9,C8,44), \
V(75,C2,89,6A), V(F4,8E,79,78), V(99,58,3E,6B), V(27,B9,71,DD), \
V(BE,E1,4F,B6), V(F0,88,AD,17), V(C9,20,AC,66), V(7D,CE,3A,B4), \
V(63,DF,4A,18), V(E5,1A,31,82), V(97,51,33,60), V(62,53,7F,45), \
V(B1,64,77,E0), V(BB,6B,AE,84), V(FE,81,A0,1C), V(F9,08,2B,94), \
V(70,48,68,58), V(8F,45,FD,19), V(94,DE,6C,87), V(52,7B,F8,B7), \
V(AB,73,D3,23), V(72,4B,02,E2), V(E3,1F,8F,57), V(66,55,AB,2A), \
V(B2,EB,28,07), V(2F,B5,C2,03), V(86,C5,7B,9A), V(D3,37,08,A5), \
V(30,28,87,F2), V(23,BF,A5,B2), V(02,03,6A,BA), V(ED,16,82,5C), \
V(8A,CF,1C,2B), V(A7,79,B4,92), V(F3,07,F2,F0), V(4E,69,E2,A1), \
V(65,DA,F4,CD), V(06,05,BE,D5), V(D1,34,62,1F), V(C4,A6,FE,8A), \
V(34,2E,53,9D), V(A2,F3,55,A0), V(05,8A,E1,32), V(A4,F6,EB,75), \
V(0B,83,EC,39), V(40,60,EF,AA), V(5E,71,9F,06), V(BD,6E,10,51), \
V(3E,21,8A,F9), V(96,DD,06,3D), V(DD,3E,05,AE), V(4D,E6,BD,46), \
V(91,54,8D,B5), V(71,C4,5D,05), V(04,06,D4,6F), V(60,50,15,FF), \
V(19,98,FB,24), V(D6,BD,E9,97), V(89,40,43,CC), V(67,D9,9E,77), \
V(B0,E8,42,BD), V(07,89,8B,88), V(E7,19,5B,38), V(79,C8,EE,DB), \
V(A1,7C,0A,47), V(7C,42,0F,E9), V(F8,84,1E,C9), V(00,00,00,00), \
V(09,80,86,83), V(32,2B,ED,48), V(1E,11,70,AC), V(6C,5A,72,4E), \
V(FD,0E,FF,FB), V(0F,85,38,56), V(3D,AE,D5,1E), V(36,2D,39,27), \
V(0A,0F,D9,64), V(68,5C,A6,21), V(9B,5B,54,D1), V(24,36,2E,3A), \
V(0C,0A,67,B1), V(93,57,E7,0F), V(B4,EE,96,D2), V(1B,9B,91,9E), \
V(80,C0,C5,4F), V(61,DC,20,A2), V(5A,77,4B,69), V(1C,12,1A,16), \
V(E2,93,BA,0A), V(C0,A0,2A,E5), V(3C,22,E0,43), V(12,1B,17,1D), \
V(0E,09,0D,0B), V(F2,8B,C7,AD), V(2D,B6,A8,B9), V(14,1E,A9,C8), \
V(57,F1,19,85), V(AF,75,07,4C), V(EE,99,DD,BB), V(A3,7F,60,FD), \
V(F7,01,26,9F), V(5C,72,F5,BC), V(44,66,3B,C5), V(5B,FB,7E,34), \
V(8B,43,29,76), V(CB,23,C6,DC), V(B6,ED,FC,68), V(B8,E4,F1,63), \
V(D7,31,DC,CA), V(42,63,85,10), V(13,97,22,40), V(84,C6,11,20), \
V(85,4A,24,7D), V(D2,BB,3D,F8), V(AE,F9,32,11), V(C7,29,A1,6D), \
V(1D,9E,2F,4B), V(DC,B2,30,F3), V(0D,86,52,EC), V(77,C1,E3,D0), \
V(2B,B3,16,6C), V(A9,70,B9,99), V(11,94,48,FA), V(47,E9,64,22), \
V(A8,FC,8C,C4), V(A0,F0,3F,1A), V(56,7D,2C,D8), V(22,33,90,EF), \
V(87,49,4E,C7), V(D9,38,D1,C1), V(8C,CA,A2,FE), V(98,D4,0B,36), \
V(A6,F5,81,CF), V(A5,7A,DE,28), V(DA,B7,8E,26), V(3F,AD,BF,A4), \
V(2C,3A,9D,E4), V(50,78,92,0D), V(6A,5F,CC,9B), V(54,7E,46,62), \
V(F6,8D,13,C2), V(90,D8,B8,E8), V(2E,39,F7,5E), V(82,C3,AF,F5), \
V(9F,5D,80,BE), V(69,D0,93,7C), V(6F,D5,2D,A9), V(CF,25,12,B3), \
V(C8,AC,99,3B), V(10,18,7D,A7), V(E8,9C,63,6E), V(DB,3B,BB,7B), \
V(CD,26,78,09), V(6E,59,18,F4), V(EC,9A,B7,01), V(83,4F,9A,A8), \
V(E6,95,6E,65), V(AA,FF,E6,7E), V(21,BC,CF,08), V(EF,15,E8,E6), \
V(BA,E7,9B,D9), V(4A,6F,36,CE), V(EA,9F,09,D4), V(29,B0,7C,D6), \
V(31,A4,B2,AF), V(2A,3F,23,31), V(C6,A5,94,30), V(35,A2,66,C0), \
V(74,4E,BC,37), V(FC,82,CA,A6), V(E0,90,D0,B0), V(33,A7,D8,15), \
V(F1,04,98,4A), V(41,EC,DA,F7), V(7F,CD,50,0E), V(17,91,F6,2F), \
V(76,4D,D6,8D), V(43,EF,B0,4D), V(CC,AA,4D,54), V(E4,96,04,DF), \
V(9E,D1,B5,E3), V(4C,6A,88,1B), V(C1,2C,1F,B8), V(46,65,51,7F), \
V(9D,5E,EA,04), V(01,8C,35,5D), V(FA,87,74,73), V(FB,0B,41,2E), \
V(B3,67,1D,5A), V(92,DB,D2,52), V(E9,10,56,33), V(6D,D6,47,13), \
V(9A,D7,61,8C), V(37,A1,0C,7A), V(59,F8,14,8E), V(EB,13,3C,89), \
V(CE,A9,27,EE), V(B7,61,C9,35), V(E1,1C,E5,ED), V(7A,47,B1,3C), \
V(9C,D2,DF,59), V(55,F2,73,3F), V(18,14,CE,79), V(73,C7,37,BF), \
V(53,F7,CD,EA), V(5F,FD,AA,5B), V(DF,3D,6F,14), V(78,44,DB,86), \
V(CA,AF,F3,81), V(B9,68,C4,3E), V(38,24,34,2C), V(C2,A3,40,5F), \
V(16,1D,C3,72), V(BC,E2,25,0C), V(28,3C,49,8B), V(FF,0D,95,41), \
V(39,A8,01,71), V(08,0C,B3,DE), V(D8,B4,E4,9C), V(64,56,C1,90), \
V(7B,CB,84,61), V(D5,32,B6,70), V(48,6C,5C,74), V(D0,B8,57,42)
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
#define V(a,b,c,d) 0x##a##b##c##d
#else
#define V(a,b,c,d) 0x##d##c##b##a
#endif
static unsigned int RT0[256] = { RT };
#undef V
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
#define V(a,b,c,d) 0x##d##a##b##c
#else
#define V(a,b,c,d) 0x##c##b##a##d
#endif
static unsigned int RT1[256] = { RT };
#undef V
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
#define V(a,b,c,d) 0x##c##d##a##b
#else
#define V(a,b,c,d) 0x##b##a##d##c
#endif
static unsigned int RT2[256] = { RT };
#undef V
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
#define V(a,b,c,d) 0x##b##c##d##a
#else
#define V(a,b,c,d) 0x##a##d##c##b
#endif
static unsigned int RT3[256] = { RT };
#undef V
#undef RT
#define KT0Data \
{ \
V(00,00,00,00) ,V(0E,09,0D,0B) ,V(1C,12,1A,16) ,V(12,1B,17,1D) , \
V(38,24,34,2C) ,V(36,2D,39,27) ,V(24,36,2E,3A) ,V(2A,3F,23,31) , \
V(70,48,68,58) ,V(7E,41,65,53) ,V(6C,5A,72,4E) ,V(62,53,7F,45) , \
V(48,6C,5C,74) ,V(46,65,51,7F) ,V(54,7E,46,62) ,V(5A,77,4B,69) , \
V(E0,90,D0,B0) ,V(EE,99,DD,BB) ,V(FC,82,CA,A6) ,V(F2,8B,C7,AD) , \
V(D8,B4,E4,9C) ,V(D6,BD,E9,97) ,V(C4,A6,FE,8A) ,V(CA,AF,F3,81) , \
V(90,D8,B8,E8) ,V(9E,D1,B5,E3) ,V(8C,CA,A2,FE) ,V(82,C3,AF,F5) , \
V(A8,FC,8C,C4) ,V(A6,F5,81,CF) ,V(B4,EE,96,D2) ,V(BA,E7,9B,D9) , \
V(DB,3B,BB,7B) ,V(D5,32,B6,70) ,V(C7,29,A1,6D) ,V(C9,20,AC,66) , \
V(E3,1F,8F,57) ,V(ED,16,82,5C) ,V(FF,0D,95,41) ,V(F1,04,98,4A) , \
V(AB,73,D3,23) ,V(A5,7A,DE,28) ,V(B7,61,C9,35) ,V(B9,68,C4,3E) , \
V(93,57,E7,0F) ,V(9D,5E,EA,04) ,V(8F,45,FD,19) ,V(81,4C,F0,12) , \
V(3B,AB,6B,CB) ,V(35,A2,66,C0) ,V(27,B9,71,DD) ,V(29,B0,7C,D6) , \
V(03,8F,5F,E7) ,V(0D,86,52,EC) ,V(1F,9D,45,F1) ,V(11,94,48,FA) , \
V(4B,E3,03,93) ,V(45,EA,0E,98) ,V(57,F1,19,85) ,V(59,F8,14,8E) , \
V(73,C7,37,BF) ,V(7D,CE,3A,B4) ,V(6F,D5,2D,A9) ,V(61,DC,20,A2) , \
V(AD,76,6D,F6) ,V(A3,7F,60,FD) ,V(B1,64,77,E0) ,V(BF,6D,7A,EB) , \
V(95,52,59,DA) ,V(9B,5B,54,D1) ,V(89,40,43,CC) ,V(87,49,4E,C7) , \
V(DD,3E,05,AE) ,V(D3,37,08,A5) ,V(C1,2C,1F,B8) ,V(CF,25,12,B3) , \
V(E5,1A,31,82) ,V(EB,13,3C,89) ,V(F9,08,2B,94) ,V(F7,01,26,9F) , \
V(4D,E6,BD,46) ,V(43,EF,B0,4D) ,V(51,F4,A7,50) ,V(5F,FD,AA,5B) , \
V(75,C2,89,6A) ,V(7B,CB,84,61) ,V(69,D0,93,7C) ,V(67,D9,9E,77) , \
V(3D,AE,D5,1E) ,V(33,A7,D8,15) ,V(21,BC,CF,08) ,V(2F,B5,C2,03) , \
V(05,8A,E1,32) ,V(0B,83,EC,39) ,V(19,98,FB,24) ,V(17,91,F6,2F) , \
V(76,4D,D6,8D) ,V(78,44,DB,86) ,V(6A,5F,CC,9B) ,V(64,56,C1,90) , \
V(4E,69,E2,A1) ,V(40,60,EF,AA) ,V(52,7B,F8,B7) ,V(5C,72,F5,BC) , \
V(06,05,BE,D5) ,V(08,0C,B3,DE) ,V(1A,17,A4,C3) ,V(14,1E,A9,C8) , \
V(3E,21,8A,F9) ,V(30,28,87,F2) ,V(22,33,90,EF) ,V(2C,3A,9D,E4) , \
V(96,DD,06,3D) ,V(98,D4,0B,36) ,V(8A,CF,1C,2B) ,V(84,C6,11,20) , \
V(AE,F9,32,11) ,V(A0,F0,3F,1A) ,V(B2,EB,28,07) ,V(BC,E2,25,0C) , \
V(E6,95,6E,65) ,V(E8,9C,63,6E) ,V(FA,87,74,73) ,V(F4,8E,79,78) , \
V(DE,B1,5A,49) ,V(D0,B8,57,42) ,V(C2,A3,40,5F) ,V(CC,AA,4D,54) , \
V(41,EC,DA,F7) ,V(4F,E5,D7,FC) ,V(5D,FE,C0,E1) ,V(53,F7,CD,EA) , \
V(79,C8,EE,DB) ,V(77,C1,E3,D0) ,V(65,DA,F4,CD) ,V(6B,D3,F9,C6) , \
V(31,A4,B2,AF) ,V(3F,AD,BF,A4) ,V(2D,B6,A8,B9) ,V(23,BF,A5,B2) , \
V(09,80,86,83) ,V(07,89,8B,88) ,V(15,92,9C,95) ,V(1B,9B,91,9E) , \
V(A1,7C,0A,47) ,V(AF,75,07,4C) ,V(BD,6E,10,51) ,V(B3,67,1D,5A) , \
V(99,58,3E,6B) ,V(97,51,33,60) ,V(85,4A,24,7D) ,V(8B,43,29,76) , \
V(D1,34,62,1F) ,V(DF,3D,6F,14) ,V(CD,26,78,09) ,V(C3,2F,75,02) , \
V(E9,10,56,33) ,V(E7,19,5B,38) ,V(F5,02,4C,25) ,V(FB,0B,41,2E) , \
V(9A,D7,61,8C) ,V(94,DE,6C,87) ,V(86,C5,7B,9A) ,V(88,CC,76,91) , \
V(A2,F3,55,A0) ,V(AC,FA,58,AB) ,V(BE,E1,4F,B6) ,V(B0,E8,42,BD) , \
V(EA,9F,09,D4) ,V(E4,96,04,DF) ,V(F6,8D,13,C2) ,V(F8,84,1E,C9) , \
V(D2,BB,3D,F8) ,V(DC,B2,30,F3) ,V(CE,A9,27,EE) ,V(C0,A0,2A,E5) , \
V(7A,47,B1,3C) ,V(74,4E,BC,37) ,V(66,55,AB,2A) ,V(68,5C,A6,21) , \
V(42,63,85,10) ,V(4C,6A,88,1B) ,V(5E,71,9F,06) ,V(50,78,92,0D) , \
V(0A,0F,D9,64) ,V(04,06,D4,6F) ,V(16,1D,C3,72) ,V(18,14,CE,79) , \
V(32,2B,ED,48) ,V(3C,22,E0,43) ,V(2E,39,F7,5E) ,V(20,30,FA,55) , \
V(EC,9A,B7,01) ,V(E2,93,BA,0A) ,V(F0,88,AD,17) ,V(FE,81,A0,1C) , \
V(D4,BE,83,2D) ,V(DA,B7,8E,26) ,V(C8,AC,99,3B) ,V(C6,A5,94,30) , \
V(9C,D2,DF,59) ,V(92,DB,D2,52) ,V(80,C0,C5,4F) ,V(8E,C9,C8,44) , \
V(A4,F6,EB,75) ,V(AA,FF,E6,7E) ,V(B8,E4,F1,63) ,V(B6,ED,FC,68) , \
V(0C,0A,67,B1) ,V(02,03,6A,BA) ,V(10,18,7D,A7) ,V(1E,11,70,AC) , \
V(34,2E,53,9D) ,V(3A,27,5E,96) ,V(28,3C,49,8B) ,V(26,35,44,80) , \
V(7C,42,0F,E9) ,V(72,4B,02,E2) ,V(60,50,15,FF) ,V(6E,59,18,F4) , \
V(44,66,3B,C5) ,V(4A,6F,36,CE) ,V(58,74,21,D3) ,V(56,7D,2C,D8) , \
V(37,A1,0C,7A) ,V(39,A8,01,71) ,V(2B,B3,16,6C) ,V(25,BA,1B,67) , \
V(0F,85,38,56) ,V(01,8C,35,5D) ,V(13,97,22,40) ,V(1D,9E,2F,4B) , \
V(47,E9,64,22) ,V(49,E0,69,29) ,V(5B,FB,7E,34) ,V(55,F2,73,3F) , \
V(7F,CD,50,0E) ,V(71,C4,5D,05) ,V(63,DF,4A,18) ,V(6D,D6,47,13) , \
V(D7,31,DC,CA) ,V(D9,38,D1,C1) ,V(CB,23,C6,DC) ,V(C5,2A,CB,D7) , \
V(EF,15,E8,E6) ,V(E1,1C,E5,ED) ,V(F3,07,F2,F0) ,V(FD,0E,FF,FB) , \
V(A7,79,B4,92) ,V(A9,70,B9,99) ,V(BB,6B,AE,84) ,V(B5,62,A3,8F) , \
V(9F,5D,80,BE) ,V(91,54,8D,B5) ,V(83,4F,9A,A8) ,V(8D,46,97,A3) \
}
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
#define V(a,b,c,d) 0x##a##b##c##d
#else
#define V(a,b,c,d) 0x##d##c##b##a
#endif
static unsigned int KT0[256] = KT0Data;
#undef V
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
#define V(a,b,c,d) 0x##d##a##b##c
#else
#define V(a,b,c,d) 0x##c##b##a##d
#endif
static unsigned int KT1[256] = KT0Data;
#undef V
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
#define V(a,b,c,d) 0x##c##d##a##b
#else
#define V(a,b,c,d) 0x##b##a##d##c
#endif
static unsigned int KT2[256] = KT0Data;
#undef V
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
#define V(a,b,c,d) 0x##b##c##d##a
#else
#define V(a,b,c,d) 0x##a##d##c##b
#endif
static unsigned int KT3[256] = KT0Data;
#undef V
#undef KT0Data
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
#define AES_ROUND1NODK_DEC(TAB,I,X0,X1,X2,X3,Y0,Y1,Y2,Y3) \
{ \
unsigned int ta0 = TAB[I]; \
unsigned int ta1 = TAB[I+1]; \
unsigned int ta2 = TAB[I+2]; \
unsigned int ta3 = TAB[I+3]; \
X0 = RT0[(Y0 >> 24) &0xFF ] ^ KT0[(ta0 >> 24) & 0xFF]; \
X0 ^= rotr(RT0[(Y3 >> 16) &0xFF ] ^ KT0[(ta0 >> 16) & 0xFF],8); \
X0 ^= rotr(RT0[(Y2 >> 8) &0xFF ] ^ KT0[(ta0 >> 8 ) & 0xFF],16); \
X0 ^= rotr(RT0[(Y1 ) &0xFF ] ^ KT0[(ta0 ) & 0xFF],24); \
\
X1 = RT0[( Y1 >> 24 ) &0xFF ] ^ KT0[(ta1 >> 24) & 0xFF]; \
X1 ^= rotr(RT0[( Y0 >> 16 ) &0xFF ] ^ KT0[(ta1 >> 16) & 0xFF],8); \
X1 ^= rotr(RT0[( Y3 >> 8 ) &0xFF ] ^ KT0[(ta1 >> 8) & 0xFF],16); \
X1 ^= rotr(RT0[( Y2 ) &0xFF ] ^ KT0[(ta1 ) & 0xFF],24); \
\
X2 = RT0[( Y2 >> 24 ) &0xFF ] ^ KT0[(ta2 >> 24) & 0xFF]; \
X2 ^= rotr(RT0[( Y1 >> 16 ) &0xFF ] ^ KT0[(ta2 >> 16) & 0xFF],8); \
X2 ^= rotr(RT0[( Y0 >> 8 ) &0xFF ] ^ KT0[(ta2 >> 8) & 0xFF],16); \
X2 ^= rotr(RT0[( Y3 ) &0xFF ] ^ KT0[(ta2 ) & 0xFF],24); \
\
X3 = RT0[( Y3 >> 24 ) &0xFF ] ^ KT0[(ta3 >> 24) & 0xFF]; \
X3 ^= rotr(RT0[( Y2 >> 16 ) &0xFF ] ^ KT0[(ta3 >> 16) & 0xFF],8); \
X3 ^= rotr(RT0[( Y1 >> 8 ) &0xFF ] ^ KT0[(ta3 >> 8) & 0xFF],16); \
X3 ^= rotr(RT0[( Y0 ) &0xFF ] ^ KT0[(ta3 ) & 0xFF],24); \
}
#define AES_ROUND2NODK_DEC(TAB,I,X0,X1,X2,X3,Y0,Y1,Y2,Y3) \
{ \
unsigned int ta0 = TAB[I]; \
unsigned int ta1 = TAB[I+1]; \
unsigned int ta2 = TAB[I+2]; \
unsigned int ta3 = TAB[I+3]; \
X0 = RT0[(Y0 >> 24) &0xFF ] ^ KT0[(ta0 >> 24) & 0xFF]; \
X0 ^= RT1[(Y3 >> 16) &0xFF ] ^ KT1[(ta0 >> 16) & 0xFF]; \
X0 ^= rotr(RT0[(Y2 >> 8) &0xFF ] ^ KT0[(ta0 >> 8 ) & 0xFF],16); \
X0 ^= rotr(RT1[(Y1 ) &0xFF ] ^ KT1[(ta0 ) & 0xFF],16); \
\
X1 = RT0[( Y1 >> 24 ) &0xFF ] ^ KT0[(ta1 >> 24) & 0xFF]; \
X1 ^= RT1[( Y0 >> 16 ) &0xFF ] ^ KT1[(ta1 >> 16) & 0xFF]; \
X1 ^= rotr(RT0[( Y3 >> 8 ) &0xFF ] ^ KT0[(ta1 >> 8) & 0xFF],16); \
X1 ^= rotr(RT1[( Y2 ) &0xFF ] ^ KT1[(ta1 ) & 0xFF],16); \
\
X2 = RT0[( Y2 >> 24 ) &0xFF ] ^ KT0[(ta2 >> 24) & 0xFF]; \
X2 ^= RT1[( Y1 >> 16 ) &0xFF ] ^ KT1[(ta2 >> 16) & 0xFF]; \
X2 ^= rotr(RT0[( Y0 >> 8 ) &0xFF ] ^ KT0[(ta2 >> 8) & 0xFF],16); \
X2 ^= rotr(RT1[( Y3 ) &0xFF ] ^ KT1[(ta2 ) & 0xFF],16); \
\
X3 = RT0[( Y3 >> 24 ) &0xFF ] ^ KT0[(ta3 >> 24) & 0xFF]; \
X3 ^= RT1[( Y2 >> 16 ) &0xFF ] ^ KT1[(ta3 >> 16) & 0xFF]; \
X3 ^= rotr(RT0[( Y1 >> 8 ) &0xFF ] ^ KT0[(ta3 >> 8) & 0xFF],16); \
X3 ^= rotr(RT1[( Y0 ) &0xFF ] ^ KT1[(ta3 ) & 0xFF],16); \
}
#define AES_ROUND4NODK_DEC(TAB,I,X0,X1,X2,X3,Y0,Y1,Y2,Y3) \
{ \
unsigned int ta0 = TAB[I]; \
unsigned int ta1 = TAB[I+1]; \
unsigned int ta2 = TAB[I+2]; \
unsigned int ta3 = TAB[I+3]; \
X0 = RT0[(Y0 >> 24) &0xFF ] ^ KT0[(ta0 >> 24) & 0xFF]; \
X0 ^= RT1[(Y3 >> 16) &0xFF ] ^ KT1[(ta0 >> 16) & 0xFF]; \
X0 ^= RT2[(Y2 >> 8) &0xFF ] ^ KT2[(ta0 >> 8 ) & 0xFF]; \
X0 ^= RT3[(Y1 ) &0xFF ] ^ KT3[(ta0 ) & 0xFF]; \
\
X1 = RT0[( Y1 >> 24 ) &0xFF ] ^ KT0[(ta1 >> 24) & 0xFF]; \
X1 ^= RT1[( Y0 >> 16 ) &0xFF ] ^ KT1[(ta1 >> 16) & 0xFF]; \
X1 ^= RT2[( Y3 >> 8 ) &0xFF ] ^ KT2[(ta1 >> 8) & 0xFF]; \
X1 ^= RT3[( Y2 ) &0xFF ] ^ KT3[(ta1 ) & 0xFF]; \
\
X2 = RT0[( Y2 >> 24 ) &0xFF ] ^ KT0[(ta2 >> 24) & 0xFF]; \
X2 ^= RT1[( Y1 >> 16 ) &0xFF ] ^ KT1[(ta2 >> 16) & 0xFF]; \
X2 ^= RT2[( Y0 >> 8 ) &0xFF ] ^ KT2[(ta2 >> 8) & 0xFF]; \
X2 ^= RT3[( Y3 ) &0xFF ] ^ KT3[(ta2 ) & 0xFF]; \
\
X3 = RT0[( Y3 >> 24 ) &0xFF ] ^ KT0[(ta3 >> 24) & 0xFF]; \
X3 ^= RT1[( Y2 >> 16 ) &0xFF ] ^ KT1[(ta3 >> 16) & 0xFF]; \
X3 ^= RT2[( Y1 >> 8 ) &0xFF ] ^ KT2[(ta3 >> 8) & 0xFF]; \
X3 ^= RT3[( Y0 ) &0xFF ] ^ KT3[(ta3 ) & 0xFF]; \
}
#else
#define AES_ROUND1NODK_DEC(TAB,I,X0,X1,X2,X3,Y0,Y1,Y2,Y3) \
{ \
unsigned int ta0 = TAB[I]; \
unsigned int ta1 = TAB[I+1]; \
unsigned int ta2 = TAB[I+2]; \
unsigned int ta3 = TAB[I+3]; \
X0 = RT0[(Y0 ) &0xFF ] ^ KT0[(ta0 ) & 0xFF]; \
X0 ^= rotl(RT0[(Y3 >> 8) &0xFF ] ^ KT0[(ta0 >> 8) & 0xFF],8); \
X0 ^= rotl(RT0[(Y2 >> 16) &0xFF ] ^ KT0[(ta0 >> 16) & 0xFF],16); \
X0 ^= rotl(RT0[(Y1 >> 24) &0xFF ] ^ KT0[(ta0 >> 24) & 0xFF],24); \
\
X1 = RT0[( Y1 ) &0xFF ] ^ KT0[(ta1 ) & 0xFF]; \
X1 ^= rotl(RT0[( Y0 >> 8) &0xFF ] ^ KT0[(ta1 >> 8) & 0xFF],8); \
X1 ^= rotl(RT0[( Y3 >> 16) &0xFF ] ^ KT0[(ta1 >> 16) & 0xFF],16); \
X1 ^= rotl(RT0[( Y2 >> 24) &0xFF ] ^ KT0[(ta1 >> 24) & 0xFF],24); \
\
X2 = RT0[( Y2 ) &0xFF ] ^ KT0[(ta2 ) & 0xFF]; \
X2 ^= rotl(RT0[( Y1 >> 8) &0xFF ] ^ KT0[(ta2 >> 8) & 0xFF],8); \
X2 ^= rotl(RT0[( Y0 >> 16) &0xFF ] ^ KT0[(ta2 >> 16) & 0xFF],16); \
X2 ^= rotl(RT0[( Y3 >> 24) &0xFF ] ^ KT0[(ta2 >> 24) & 0xFF],24); \
\
X3 = RT0[( Y3 ) &0xFF ] ^ KT0[(ta3 ) & 0xFF]; \
X3 ^= rotl(RT0[( Y2 >> 8) &0xFF ] ^ KT0[(ta3 >> 8) & 0xFF],8); \
X3 ^= rotl(RT0[( Y1 >> 16) &0xFF ] ^ KT0[(ta3 >> 16) & 0xFF],16); \
X3 ^= rotl(RT0[( Y0 >> 24) &0xFF ] ^ KT0[(ta3 >> 24) & 0xFF],24); \
}
#define AES_ROUND2NODK_DEC(TAB,I,X0,X1,X2,X3,Y0,Y1,Y2,Y3) \
{ \
unsigned int ta0 = TAB[I]; \
unsigned int ta1 = TAB[I+1]; \
unsigned int ta2 = TAB[I+2]; \
unsigned int ta3 = TAB[I+3]; \
X0 = RT0[(Y0 ) &0xFF ] ^ KT0[(ta0 ) & 0xFF]; \
X0 ^= RT1[(Y3 >> 8) &0xFF ] ^ KT1[(ta0 >> 8) & 0xFF]; \
X0 ^= rotl(RT0[(Y2 >> 16) &0xFF ] ^ KT0[(ta0 >> 16) & 0xFF],16); \
X0 ^= rotl(RT1[(Y1 >> 24) &0xFF ] ^ KT1[(ta0 >> 24) & 0xFF],16); \
\
X1 = RT0[( Y1 ) &0xFF ] ^ KT0[(ta1 ) & 0xFF]; \
X1 ^= RT1[( Y0 >> 8) &0xFF ] ^ KT1[(ta1 >> 8) & 0xFF]; \
X1 ^= rotl(RT0[( Y3 >> 16) &0xFF ] ^ KT0[(ta1 >> 16) & 0xFF],16); \
X1 ^= rotl(RT1[( Y2 >> 24) &0xFF ] ^ KT1[(ta1 >> 24) & 0xFF],16); \
\
X2 = RT0[( Y2 ) &0xFF ] ^ KT0[(ta2 ) & 0xFF]; \
X2 ^= RT1[( Y1 >> 8) &0xFF ] ^ KT1[(ta2 >> 8) & 0xFF]; \
X2 ^= rotl(RT0[( Y0 >> 16) &0xFF ] ^ KT0[(ta2 >> 16) & 0xFF],16); \
X2 ^= rotl(RT1[( Y3 >> 24) &0xFF ] ^ KT1[(ta2 >> 24) & 0xFF],16); \
\
X3 = RT0[( Y3 ) &0xFF ] ^ KT0[(ta3 ) & 0xFF]; \
X3 ^= RT1[( Y2 >> 8) &0xFF ] ^ KT1[(ta3 >> 8) & 0xFF]; \
X3 ^= rotl(RT0[( Y1 >> 16) &0xFF ] ^ KT0[(ta3 >> 16) & 0xFF],16); \
X3 ^= rotl(RT1[( Y0 >> 24) &0xFF ] ^ KT1[(ta3 >> 24) & 0xFF],16); \
}
#define AES_ROUND4NODK_DEC(TAB,I,X0,X1,X2,X3,Y0,Y1,Y2,Y3) \
{ \
unsigned int ta0 = TAB[I]; \
unsigned int ta1 = TAB[I+1]; \
unsigned int ta2 = TAB[I+2]; \
unsigned int ta3 = TAB[I+3]; \
X0 = RT0[(Y0 ) &0xFF ] ^ KT0[(ta0 ) & 0xFF]; \
X0 ^= RT1[(Y3 >> 8) &0xFF ] ^ KT1[(ta0 >> 8) & 0xFF]; \
X0 ^= RT2[(Y2 >> 16) &0xFF ] ^ KT2[(ta0 >> 16) & 0xFF]; \
X0 ^= RT3[(Y1 >> 24) &0xFF ] ^ KT3[(ta0 >> 24) & 0xFF]; \
\
X1 = RT0[( Y1 ) &0xFF ] ^ KT0[(ta1 ) & 0xFF]; \
X1 ^= RT1[( Y0 >> 8) &0xFF ] ^ KT1[(ta1 >> 8) & 0xFF]; \
X1 ^= RT2[( Y3 >> 16) &0xFF ] ^ KT2[(ta1 >> 16) & 0xFF]; \
X1 ^= RT3[( Y2 >> 24) &0xFF ] ^ KT3[(ta1 >> 24) & 0xFF]; \
\
X2 = RT0[( Y2 ) &0xFF ] ^ KT0[(ta2 ) & 0xFF]; \
X2 ^= RT1[( Y1 >> 8) &0xFF ] ^ KT1[(ta2 >> 8) & 0xFF]; \
X2 ^= RT2[( Y0 >> 16) &0xFF ] ^ KT2[(ta2 >> 16) & 0xFF]; \
X2 ^= RT3[( Y3 >> 24) &0xFF ] ^ KT3[(ta2 >> 24) & 0xFF]; \
\
X3 = RT0[( Y3 ) &0xFF ] ^ KT0[(ta3 ) & 0xFF]; \
X3 ^= RT1[( Y2 >> 8) &0xFF ] ^ KT1[(ta3 >> 8) & 0xFF]; \
X3 ^= RT2[( Y1 >> 16) &0xFF ] ^ KT2[(ta3 >> 16) & 0xFF]; \
X3 ^= RT3[( Y0 >> 24) &0xFF ] ^ KT3[(ta3 >> 24) & 0xFF]; \
}
#endif
#ifndef RV32B
static inline unsigned int rotr(const unsigned int x, const unsigned int n) {
unsigned int r;
r = ((x >> n) | (x << (32 - n)));
return r;
}
static inline unsigned int rotl(const unsigned int x, const unsigned int n) {
unsigned int r;
r = ((x << n) | (x >> (32 - n)));
return r;
}
#else
#include "rvintrin.h"
#define rotr(a,b) _rv32_ror(a,b)
#define rotl(a,b) _rv32_rol(a,b)
#endif
static inline void aes128_setkey_encrypt(const unsigned int key[], unsigned int *aes_edrk) {
unsigned int i = 0;
unsigned int rotl_aes_edrk;
unsigned int tmp8, tmp9, tmp10, tmp11;
unsigned int temp_lds;
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
unsigned int round = 0x01000000;
#else
unsigned int round = 0x00000001;
#endif
tmp8 = (key[0]);
aes_edrk[0] = tmp8;
tmp9 = (key[1]);
aes_edrk[1] = tmp9;
tmp10 = (key[2]);
aes_edrk[2] = tmp10;
tmp11 = (key[3]);
aes_edrk[3] = tmp11;
for( i = 4; i < 36; /* i+=4 */ )
{
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
rotl_aes_edrk = rotl(tmp11,8);
#else
rotl_aes_edrk = rotr(tmp11,8);
#endif
temp_lds = f_FSb_32__1(rotl_aes_edrk) ^ f_FSb_32__2( rotl_aes_edrk );
tmp8 = tmp8 ^ round ^ temp_lds;
round = round << 1;
aes_edrk[i++] = tmp8;
tmp9 = tmp9 ^ tmp8;
aes_edrk[i++] = tmp9;
tmp10 = tmp10 ^ tmp9;
aes_edrk[i++] = tmp10;
tmp11 = tmp11 ^ tmp10;
aes_edrk[i++] = tmp11;
}
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
round = 0x1B000000;
rotl_aes_edrk = rotl(tmp11,8);
#else
round = 0x0000001B;
rotl_aes_edrk = rotr(tmp11,8);
#endif
temp_lds = f_FSb_32__1(rotl_aes_edrk) ^ f_FSb_32__2( rotl_aes_edrk );
tmp8 = tmp8 ^ round ^ temp_lds;
aes_edrk[i++] = tmp8;
tmp9 = tmp9 ^ tmp8;
aes_edrk[i++] = tmp9;
tmp10 = tmp10 ^ tmp9;
aes_edrk[i++] = tmp10;
tmp11 = tmp11 ^ tmp10;
aes_edrk[i++] = tmp11;
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
round = 0x36000000;
rotl_aes_edrk = rotl(tmp11,8);
#else
round = 0x00000036;
rotl_aes_edrk = rotr(tmp11,8);
#endif
temp_lds = f_FSb_32__1(rotl_aes_edrk) ^ f_FSb_32__2( rotl_aes_edrk );
tmp8 = tmp8 ^ round ^ temp_lds;
aes_edrk[i++] = tmp8;
tmp9 = tmp9 ^ tmp8;
aes_edrk[i++] = tmp9;
tmp10 = tmp10 ^ tmp9;
aes_edrk[i++] = tmp10;
tmp11 = tmp11 ^ tmp10;
aes_edrk[i++] = tmp11;
}
static inline void aes256_setkey_encrypt(const unsigned int key[], unsigned int *aes_edrk) {
unsigned int i = 0;
unsigned int rotl_aes_edrk;
unsigned int tmp8, tmp9, tmp10, tmp11;
unsigned int tmp12, tmp13, tmp14, tmp15;
unsigned int temp_lds;
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
unsigned int round = 0x01000000;
#else
unsigned int round = 0x00000001;
#endif
tmp8 = (key[0]);
aes_edrk[0] = tmp8;
tmp9 = (key[1]);
aes_edrk[1] = tmp9;
tmp10 = (key[2]);
aes_edrk[2] = tmp10;
tmp11 = (key[3]);
aes_edrk[3] = tmp11;
tmp12 = (key[4]);
aes_edrk[4] = tmp12;
tmp13 = (key[5]);
aes_edrk[5] = tmp13;
tmp14 = (key[6]);
aes_edrk[6] = tmp14;
tmp15 = (key[7]);
aes_edrk[7] = tmp15;
for( i = 8; i < 56; /* i+=8 */ )
{
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
rotl_aes_edrk = rotl(tmp15,8);
#else
rotl_aes_edrk = rotr(tmp15,8);
#endif
temp_lds = f_FSb_32__1(rotl_aes_edrk) ^ f_FSb_32__2( rotl_aes_edrk );
tmp8 = tmp8 ^ round ^ temp_lds;
round = round << 1;
aes_edrk[i++] = tmp8;
tmp9 = tmp9 ^ tmp8;
aes_edrk[i++] = tmp9;
tmp10 = tmp10 ^ tmp9;
aes_edrk[i++] = tmp10;
tmp11 = tmp11 ^ tmp10;
aes_edrk[i++] = tmp11;
temp_lds = f_FSb_32__1(tmp11) ^ f_FSb_32__2(tmp11);
tmp12 = tmp12 ^ temp_lds;
aes_edrk[i++] = tmp12;
tmp13 = tmp13 ^ tmp12;
aes_edrk[i++] = tmp13;
tmp14 = tmp14 ^ tmp13;
aes_edrk[i++] = tmp14;
tmp15 = tmp15 ^ tmp14;
aes_edrk[i++] = tmp15;
}
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
rotl_aes_edrk = rotl(tmp15,8);
#else
rotl_aes_edrk = rotr(tmp15,8);
#endif
temp_lds = f_FSb_32__1(rotl_aes_edrk) ^ f_FSb_32__2( rotl_aes_edrk );
tmp8 = tmp8 ^ round ^ temp_lds;
round = round << 1;
aes_edrk[i++] = tmp8;
tmp9 = tmp9 ^ tmp8;
aes_edrk[i++] = tmp9;
tmp10 = tmp10 ^ tmp9;
aes_edrk[i++] = tmp10;
tmp11 = tmp11 ^ tmp10;
aes_edrk[i++] = tmp11;
}

View file

@ -0,0 +1,323 @@
/*
* try-anything.c version 20190729
* D. J. Bernstein
* Some portions adapted from TweetNaCl by Bernstein, Janssen, Lange, Schwabe.
* Public domain.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <unistd.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/resource.h>
#include "kernelrandombytes.h"
#include "cpucycles.h"
#include "crypto_uint8.h"
#include "crypto_uint32.h"
#include "crypto_uint64.h"
#include "try.h"
typedef crypto_uint8 u8;
typedef crypto_uint32 u32;
typedef crypto_uint64 u64;
#define FOR(i,n) for (i = 0;i < n;++i)
static u32 L32(u32 x,int c) { return (x << c) | ((x&0xffffffff) >> (32 - c)); }
static u32 ld32(const u8 *x)
{
u32 u = x[3];
u = (u<<8)|x[2];
u = (u<<8)|x[1];
return (u<<8)|x[0];
}
static void st32(u8 *x,u32 u)
{
int i;
FOR(i,4) { x[i] = u; u >>= 8; }
}
static const u8 sigma[17] = "expand 32-byte k";
static void core(u8 *out,const u8 *in,const u8 *k)
{
u32 w[16],x[16],y[16],t[4];
int i,j,m;
FOR(i,4) {
x[5*i] = ld32(sigma+4*i);
x[1+i] = ld32(k+4*i);
x[6+i] = ld32(in+4*i);
x[11+i] = ld32(k+16+4*i);
}
FOR(i,16) y[i] = x[i];
FOR(i,20) {
FOR(j,4) {
FOR(m,4) t[m] = x[(5*j+4*m)%16];
t[1] ^= L32(t[0]+t[3], 7);
t[2] ^= L32(t[1]+t[0], 9);
t[3] ^= L32(t[2]+t[1],13);
t[0] ^= L32(t[3]+t[2],18);
FOR(m,4) w[4*j+(j+m)%4] = t[m];
}
FOR(m,16) x[m] = w[m];
}
FOR(i,16) st32(out + 4 * i,x[i] + y[i]);
}
static void salsa20(u8 *c,u64 b,const u8 *n,const u8 *k)
{
u8 z[16],x[64];
u32 u,i;
if (!b) return;
FOR(i,16) z[i] = 0;
FOR(i,8) z[i] = n[i];
while (b >= 64) {
core(x,z,k);
FOR(i,64) c[i] = x[i];
u = 1;
for (i = 8;i < 16;++i) {
u += (u32) z[i];
z[i] = u;
u >>= 8;
}
b -= 64;
c += 64;
}
if (b) {
core(x,z,k);
FOR(i,b) c[i] = x[i];
}
}
static void increment(u8 *n)
{
if (!++n[0])
if (!++n[1])
if (!++n[2])
if (!++n[3])
if (!++n[4])
if (!++n[5])
if (!++n[6])
if (!++n[7])
;
}
static void testvector(unsigned char *x,unsigned long long xlen)
{
const static unsigned char testvector_k[33] = "generate inputs for test vectors";
static unsigned char testvector_n[8];
salsa20(x,xlen,testvector_n,testvector_k);
increment(testvector_n);
}
unsigned long long myrandom(void)
{
unsigned char x[8];
unsigned long long result;
testvector(x,8);
result = x[7];
result = (result<<8)|x[6];
result = (result<<8)|x[5];
result = (result<<8)|x[4];
result = (result<<8)|x[3];
result = (result<<8)|x[2];
result = (result<<8)|x[1];
result = (result<<8)|x[0];
return result;
}
static void canary(unsigned char *x,unsigned long long xlen)
{
const static unsigned char canary_k[33] = "generate pad to catch overwrites";
static unsigned char canary_n[8];
salsa20(x,xlen,canary_n,canary_k);
increment(canary_n);
}
void double_canary(unsigned char *x2,unsigned char *x,unsigned long long xlen)
{
canary(x - 16,16);
canary(x + xlen,16);
memcpy(x2 - 16,x - 16,16);
memcpy(x2 + xlen,x + xlen,16);
}
void input_prepare(unsigned char *x2,unsigned char *x,unsigned long long xlen)
{
testvector(x,xlen);
canary(x - 16,16);
canary(x + xlen,16);
memcpy(x2 - 16,x - 16,xlen + 32);
}
void input_compare(const unsigned char *x2,const unsigned char *x,unsigned long long xlen,const char *fun)
{
if (memcmp(x2 - 16,x - 16,xlen + 32)) {
fprintf(stderr,"%s overwrites input\n",fun);
exit(111);
}
}
void output_prepare(unsigned char *x2,unsigned char *x,unsigned long long xlen)
{
canary(x - 16,xlen + 32);
memcpy(x2 - 16,x - 16,xlen + 32);
}
void output_compare(const unsigned char *x2,const unsigned char *x,unsigned long long xlen,const char *fun)
{
if (memcmp(x2 - 16,x - 16,16)) {
fprintf(stderr,"%s writes before output\n",fun);
exit(111);
}
if (memcmp(x2 + xlen,x + xlen,16)) {
fprintf(stderr,"%s writes after output\n",fun);
exit(111);
}
}
static unsigned char checksum_state[64];
static char checksum_hex[65];
void checksum(const unsigned char *x,unsigned long long xlen)
{
u8 block[16];
int i;
while (xlen >= 16) {
core(checksum_state,x,checksum_state);
x += 16;
xlen -= 16;
}
FOR(i,16) block[i] = 0;
FOR(i,xlen) block[i] = x[i];
block[xlen] = 1;
checksum_state[0] ^= 1;
core(checksum_state,block,checksum_state);
}
static void printword(const char *s)
{
if (!*s) putchar('-');
while (*s) {
if (*s == ' ') putchar('_');
else if (*s == '\t') putchar('_');
else if (*s == '\r') putchar('_');
else if (*s == '\n') putchar('_');
else putchar(*s);
++s;
}
putchar(' ');
}
static void printnum(long long x)
{
printf("%lld ",x);
}
void fail(const char *why)
{
fprintf(stderr,"%s\n",why);
exit(111);
}
unsigned char *alignedcalloc(unsigned long long len)
{
unsigned char *x = (unsigned char *) calloc(1,len + 256);
long long i;
if (!x) fail("out of memory");
/* will never deallocate so shifting is ok */
for (i = 0;i < len + 256;++i) x[i] = random();
x += 64;
x += 63 & (-(unsigned long) x);
for (i = 0;i < len;++i) x[i] = 0;
return x;
}
#define TIMINGS 63
static long long cycles[TIMINGS + 1];
void limits()
{
#ifdef RLIM_INFINITY
struct rlimit r;
r.rlim_cur = 0;
r.rlim_max = 0;
#ifdef RLIMIT_NOFILE
setrlimit(RLIMIT_NOFILE,&r);
#endif
#ifdef RLIMIT_NPROC
setrlimit(RLIMIT_NPROC,&r);
#endif
#ifdef RLIMIT_CORE
setrlimit(RLIMIT_CORE,&r);
#endif
#endif
}
static unsigned char randombyte[1];
int main()
{
long long i;
long long j;
long long abovej;
long long belowj;
long long checksumcycles;
long long cyclespersecond;
cycles[0] = cpucycles();
cycles[1] = cpucycles();
cyclespersecond = cpucycles_persecond();
kernelrandombytes(randombyte,1);
preallocate();
limits();
allocate();
srandom(getpid());
cycles[0] = cpucycles();
test();
cycles[1] = cpucycles();
checksumcycles = cycles[1] - cycles[0];
predoit();
for (i = 0;i <= TIMINGS;++i) {
cycles[i] = cpucycles();
}
for (i = 0;i <= TIMINGS;++i) {
cycles[i] = cpucycles();
doit();
}
for (i = 0;i < TIMINGS;++i) cycles[i] = cycles[i + 1] - cycles[i];
for (j = 0;j < TIMINGS;++j) {
belowj = 0;
for (i = 0;i < TIMINGS;++i) if (cycles[i] < cycles[j]) ++belowj;
abovej = 0;
for (i = 0;i < TIMINGS;++i) if (cycles[i] > cycles[j]) ++abovej;
if (belowj * 2 < TIMINGS && abovej * 2 < TIMINGS) break;
}
for (i = 0;i < 32;++i) {
checksum_hex[2 * i] = "0123456789abcdef"[15 & (checksum_state[i] >> 4)];
checksum_hex[2 * i + 1] = "0123456789abcdef"[15 & checksum_state[i]];
}
checksum_hex[2 * i] = 0;
printword(checksum_hex);
printnum(cycles[j]);
printnum(checksumcycles);
printnum(cyclespersecond);
printword(primitiveimplementation);
printf("\n");
return 0;
}

View file

@ -0,0 +1,135 @@
/*
* crypto_stream/try.c version 20140423
* D. J. Bernstein
* Public domain.
* Auto-generated by trygen.py; do not edit.
*/
#include "crypto_stream.h"
#include "try.h"
const char *primitiveimplementation = crypto_stream_IMPLEMENTATION;
#define TUNE_BYTES 1536
#ifdef SMALL
#define MAXTEST_BYTES 128
#else
#define MAXTEST_BYTES 4096
#endif
#ifdef SMALL
#define LOOPS 512
#else
#define LOOPS 4096
#endif
static unsigned char *k;
static unsigned char *n;
static unsigned char *m;
static unsigned char *c;
static unsigned char *s;
static unsigned char *k2;
static unsigned char *n2;
static unsigned char *m2;
static unsigned char *c2;
static unsigned char *s2;
#define klen crypto_stream_KEYBYTES
#define nlen crypto_stream_NONCEBYTES
unsigned long long mlen;
unsigned long long clen;
unsigned long long slen;
void preallocate(void)
{
}
void allocate(void)
{
unsigned long long alloclen = 0;
if (alloclen < TUNE_BYTES) alloclen = TUNE_BYTES;
if (alloclen < MAXTEST_BYTES) alloclen = MAXTEST_BYTES;
if (alloclen < crypto_stream_KEYBYTES) alloclen = crypto_stream_KEYBYTES;
if (alloclen < crypto_stream_NONCEBYTES) alloclen = crypto_stream_NONCEBYTES;
k = alignedcalloc(alloclen);
n = alignedcalloc(alloclen);
m = alignedcalloc(alloclen);
c = alignedcalloc(alloclen);
s = alignedcalloc(alloclen);
k2 = alignedcalloc(alloclen);
n2 = alignedcalloc(alloclen);
m2 = alignedcalloc(alloclen);
c2 = alignedcalloc(alloclen);
s2 = alignedcalloc(alloclen);
}
void predoit(void)
{
}
void doit(void)
{
crypto_stream_xor(c,m,TUNE_BYTES,n,k);
}
void test(void)
{
unsigned long long j;
unsigned long long loop;
for (loop = 0;loop < LOOPS;++loop) {
mlen = myrandom() % (MAXTEST_BYTES + 1);
clen = mlen;
slen = mlen;
output_prepare(s2,s,slen);
input_prepare(n2,n,nlen);
input_prepare(k2,k,klen);
if (crypto_stream(s,slen,n,k) != 0) fail("crypto_stream returns nonzero");
checksum(s,slen);
output_compare(s2,s,slen,"crypto_stream");
input_compare(n2,n,nlen,"crypto_stream");
input_compare(k2,k,klen,"crypto_stream");
double_canary(s2,s,slen);
double_canary(n2,n,nlen);
double_canary(k2,k,klen);
if (crypto_stream(s2,slen,n2,k2) != 0) fail("crypto_stream returns nonzero");
if (memcmp(s2,s,slen) != 0) fail("crypto_stream is nondeterministic");
output_prepare(c2,c,clen);
input_prepare(m2,m,mlen);
memcpy(n2,n,nlen);
double_canary(n2,n,nlen);
memcpy(k2,k,klen);
double_canary(k2,k,klen);
if (crypto_stream_xor(c,m,mlen,n,k) != 0) fail("crypto_stream_xor returns nonzero");
for (j = 0;j < mlen;++j)
if ((s[j] ^ m[j]) != c[j]) fail("crypto_stream_xor does not match crypto_stream");
checksum(c,clen);
output_compare(c2,c,clen,"crypto_stream_xor");
input_compare(m2,m,mlen,"crypto_stream_xor");
input_compare(n2,n,nlen,"crypto_stream_xor");
input_compare(k2,k,klen,"crypto_stream_xor");
double_canary(c2,c,clen);
double_canary(m2,m,mlen);
double_canary(n2,n,nlen);
double_canary(k2,k,klen);
if (crypto_stream_xor(c2,m2,mlen,n2,k2) != 0) fail("crypto_stream_xor returns nonzero");
if (memcmp(c2,c,clen) != 0) fail("crypto_stream_xor is nondeterministic");
double_canary(c2,c,clen);
double_canary(m2,m,mlen);
double_canary(n2,n,nlen);
double_canary(k2,k,klen);
if (crypto_stream_xor(m2,m2,mlen,n,k) != 0) fail("crypto_stream_xor with m=c overlap returns nonzero");
if (memcmp(m2,c,clen) != 0) fail("crypto_stream_xor does not handle m=c overlap");
memcpy(m2,m,mlen);
if (crypto_stream_xor(n2,m,mlen,n2,k) != 0) fail("crypto_stream_xor with n=c overlap returns nonzero");
if (memcmp(n2,c,clen) != 0) fail("crypto_stream_xor does not handle n=c overlap");
memcpy(n2,n,nlen);
if (crypto_stream_xor(k2,m,mlen,n,k2) != 0) fail("crypto_stream_xor with k=c overlap returns nonzero");
if (memcmp(k2,c,clen) != 0) fail("crypto_stream_xor does not handle k=c overlap");
memcpy(k2,k,klen);
}
}

View file

@ -0,0 +1,21 @@
#include <stdlib.h>
#include <string.h>
/* provided by try.c: */
extern const char *primitiveimplementation;
extern void preallocate(void);
extern void allocate(void);;
extern void test(void);
extern void predoit(void);
extern void doit(void);
/* provided by try-anything.c: */
extern void fail(const char *);
extern unsigned char *alignedcalloc(unsigned long long);
extern void checksum(const unsigned char *,unsigned long long);
extern void double_canary(unsigned char *,unsigned char *,unsigned long long);
extern void input_prepare(unsigned char *,unsigned char *,unsigned long long);
extern void output_prepare(unsigned char *,unsigned char *,unsigned long long);
extern void input_compare(const unsigned char *,const unsigned char *,unsigned long long,const char *);
extern void output_compare(const unsigned char *,const unsigned char *,unsigned long long,const char *);
extern unsigned long long myrandom(void);

View file

@ -0,0 +1,49 @@
SRCs=api.c try-anything.c
OBJs=$(SRCs:.c=.o)
SCLIBS=cpucycles.o kernelrandombytes.o
COMPDIR=/home/dolbeau/LITEX/riscv64-unknown-elf-gcc-8.3.0-2019.08.0-x86_64-linux-ubuntu14
ALTCOMPDIR=/opt/riscv64b
CC=$(COMPDIR)/bin/riscv64-unknown-elf-gcc
ALTCC=$(ALTCOMPDIR)/bin/riscv64-unknown-elf-gcc
CC=$(ALTCC)
CXX=$(COMPDIR)/bin/riscv64-unknown-elf-g++
STRIP=$(COMPDIR)/bin/riscv64-unknown-elf-strip
NEWOPT=-march=rv32imab -mabi=ilp32 -I. -O3 #-fno-vectorize #-DUSE_EPI_CUSTOM
OPT=-march=rv32ima -mabi=ilp32 -I. -O3 #-fno-vectorize #-DUSE_EPI_CUSTOM
all: chacha20 chacha20_small
clean:
rm -f $(OBJs) try.o try_small.o chacha.o chacha20 chacha20_small chacha20_small_var chacha.s chacha_var.s
%.o: %.c
$(CC) $(NEWOPT) $< -c -o $@
try.o: try.c
$(CC) $(NEWOPT) $< -c -o $@
try_small.o: try.c
$(CC) $(NEWOPT) $< -c -o $@ -DSMALL
chacha.s: chacha.c
$(CC) $(NEWOPT) $< -S -o $@
chacha.o: chacha.s
$(CC) $(NEWOPT) $< -c -o $@
chacha20: $(OBJs) chacha.o try.o $(SCLIBS)
$(CXX) $(OPT) $^ -o $@
chacha20_small: $(OBJs) chacha.o try_small.o $(SCLIBS)
$(CXX) $(OPT) $^ -o $@
strip:
$(STRIP) chacha20 chacha20_small
kernelrandombytes.o: random.cpp
$(CXX) $(OPT) $< -c -o $@
cpucycles.o: riscv.c
$(CC) $< -march=rv32ima -mabi=ilp32 -I. -O1 -c -o $@

View file

@ -0,0 +1,2 @@
#include "namespace.h"
#include "estream-convert-api.h"

View file

@ -0,0 +1,2 @@
#define CRYPTO_KEYBYTES 32
#define CRYPTO_NONCEBYTES 8

View file

@ -0,0 +1,128 @@
#include "namespace.h"
/*
chacha.c version $Date: 2020/03/13 13:02:57 $
D. J. Bernstein
Romain Dolbeau
Public domain.
*/
#include "ecrypt-sync.h"
#include "api.h"
//#include <arm_sve.h> // fixme: is there a specific include ?
#include <stdio.h>
#define ROUNDS 20
#if 0
#define ROTATE(v,c) (ROTL32(v,c))
#else
#include <rvintrin.h>
#define ROTATE(v,c) _rv32_rol(v,c)
#endif
#define XOR(v,w) ((v) ^ (w))
#define PLUS(v,w) (U32V((v) + (w)))
#define PLUSONE(v) (PLUS((v),1))
#define QUARTERROUND(a,b,c,d) \
x[a] = PLUS(x[a],x[b]); x[d] = ROTATE(XOR(x[d],x[a]),16); \
x[c] = PLUS(x[c],x[d]); x[b] = ROTATE(XOR(x[b],x[c]),12); \
x[a] = PLUS(x[a],x[b]); x[d] = ROTATE(XOR(x[d],x[a]), 8); \
x[c] = PLUS(x[c],x[d]); x[b] = ROTATE(XOR(x[b],x[c]), 7);
static void salsa20_wordtobyte(u8 output[64],const u32 input[16])
{
u32 x[16];
int i;
for (i = 0;i < 16;++i) x[i] = input[i];
for (i = ROUNDS;i > 0;i -= 2) {
QUARTERROUND( 0, 4, 8,12)
QUARTERROUND( 1, 5, 9,13)
QUARTERROUND( 2, 6,10,14)
QUARTERROUND( 3, 7,11,15)
QUARTERROUND( 0, 5,10,15)
QUARTERROUND( 1, 6,11,12)
QUARTERROUND( 2, 7, 8,13)
QUARTERROUND( 3, 4, 9,14)
}
for (i = 0;i < 16;++i) x[i] = PLUS(x[i],input[i]);
for (i = 0;i < 16;++i) U32TO8_LITTLE(output + 4 * i,x[i]);
}
void crypto_stream_chacha20_dolbeau_rv32_ECRYPT_init(void)
{
return;
}
static const char sigma[16] = "expand 32-byte k";
static const char tau[16] = "expand 16-byte k";
void crypto_stream_chacha20_dolbeau_rv32_ECRYPT_keysetup(crypto_stream_chacha20_dolbeau_rv32_ECRYPT_ctx *x,const u8 *k,u32 kbits,u32 ivbits)
{
const char *constants;
x->input[4] = U8TO32_LITTLE(k + 0);
x->input[5] = U8TO32_LITTLE(k + 4);
x->input[6] = U8TO32_LITTLE(k + 8);
x->input[7] = U8TO32_LITTLE(k + 12);
if (kbits == 256) { /* recommended */
k += 16;
constants = sigma;
} else { /* kbits == 128 */
constants = tau;
}
x->input[8] = U8TO32_LITTLE(k + 0);
x->input[9] = U8TO32_LITTLE(k + 4);
x->input[10] = U8TO32_LITTLE(k + 8);
x->input[11] = U8TO32_LITTLE(k + 12);
x->input[0] = U8TO32_LITTLE(constants + 0);
x->input[1] = U8TO32_LITTLE(constants + 4);
x->input[2] = U8TO32_LITTLE(constants + 8);
x->input[3] = U8TO32_LITTLE(constants + 12);
}
void crypto_stream_chacha20_dolbeau_rv32_ECRYPT_ivsetup(crypto_stream_chacha20_dolbeau_rv32_ECRYPT_ctx *x,const u8 *iv)
{
x->input[12] = 0;
x->input[13] = 0;
x->input[14] = U8TO32_LITTLE(iv + 0);
x->input[15] = U8TO32_LITTLE(iv + 4);
}
void crypto_stream_chacha20_dolbeau_rv32_ECRYPT_encrypt_bytes(crypto_stream_chacha20_dolbeau_rv32_ECRYPT_ctx *x_,const u8 *m,u8 *c_,u32 bytes)
{
u8 output[64];
int i;
u32* x = (u32*)&x_->input;
u8* out = c_;
if (!bytes) return;
for (;;) {
salsa20_wordtobyte(output,x);
x[12] = PLUSONE(x[12]);
if (!x[12]) {
x[13] = PLUSONE(x[13]);
/* stopping at 2^70 bytes per nonce is user's responsibility */
}
if (bytes <= 64) {
for (i = 0;i < bytes;++i) out[i] = m[i] ^ output[i];
return;
}
for (i = 0;i < 64;++i) out[i] = m[i] ^ output[i];
bytes -= 64;
out += 64;
m += 64;
}
}
void crypto_stream_chacha20_dolbeau_rv32_ECRYPT_decrypt_bytes(crypto_stream_chacha20_dolbeau_rv32_ECRYPT_ctx *x,const u8 *c,u8 *m,u32 bytes)
{
crypto_stream_chacha20_dolbeau_rv32_ECRYPT_encrypt_bytes(x,c,m,bytes);
}
void crypto_stream_chacha20_dolbeau_rv32_ECRYPT_keystream_bytes(crypto_stream_chacha20_dolbeau_rv32_ECRYPT_ctx *x,u8 *stream,u32 bytes)
{
u32 i;
for (i = 0;i < bytes;++i) stream[i] = 0;
crypto_stream_chacha20_dolbeau_rv32_ECRYPT_encrypt_bytes(x,stream,stream,bytes);
}

View file

@ -0,0 +1,28 @@
/*
cpucycles riscv.h version 20190803
D. J. Bernstein
Romain Dolbeau
Public domain.
*/
#ifndef CPUCYCLES_riscv_h
#define CPUCYCLES_riscv_h
#ifdef __cplusplus
extern "C" {
#endif
extern long long cpucycles_riscv(void);
extern long long cpucycles_riscv_persecond(void);
#ifdef __cplusplus
}
#endif
#ifndef cpucycles_implementation
#define cpucycles_implementation "riscv"
#define cpucycles cpucycles_riscv
#define cpucycles_persecond cpucycles_riscv_persecond
#endif
#endif

View file

@ -0,0 +1,18 @@
#ifndef crypto_stream_H
#define crypto_stream_H
#include "crypto_stream_chacha20.h"
#define crypto_stream crypto_stream_chacha20
#define crypto_stream_xor crypto_stream_chacha20_xor
#define crypto_stream_beforenm crypto_stream_chacha20_beforenm
#define crypto_stream_afternm crypto_stream_chacha20_afternm
#define crypto_stream_xor_afternm crypto_stream_chacha20_xor_afternm
#define crypto_stream_KEYBYTES crypto_stream_chacha20_KEYBYTES
#define crypto_stream_NONCEBYTES crypto_stream_chacha20_NONCEBYTES
#define crypto_stream_BEFORENMBYTES crypto_stream_chacha20_BEFORENMBYTES
#define crypto_stream_PRIMITIVE "chacha20"
#define crypto_stream_IMPLEMENTATION crypto_stream_chacha20_IMPLEMENTATION
#define crypto_stream_VERSION crypto_stream_chacha20_VERSION
#endif

View file

@ -0,0 +1,33 @@
#ifndef crypto_stream_chacha20_H
#define crypto_stream_chacha20_H
#define crypto_stream_chacha20_dolbeau_rv32_KEYBYTES 32
#define crypto_stream_chacha20_dolbeau_rv32_NONCEBYTES 8
#ifdef __cplusplus
extern "C" {
#endif
extern int crypto_stream_chacha20_dolbeau_rv32(unsigned char *,unsigned long long,const unsigned char *,const unsigned char *);
extern int crypto_stream_chacha20_dolbeau_rv32_xor(unsigned char *,const unsigned char *,unsigned long long,const unsigned char *,const unsigned char *);
extern int crypto_stream_chacha20_dolbeau_rv32_beforenm(unsigned char *,const unsigned char *);
extern int crypto_stream_chacha20_dolbeau_rv32_afternm(unsigned char *,unsigned long long,const unsigned char *,const unsigned char *);
extern int crypto_stream_chacha20_dolbeau_rv32_xor_afternm(unsigned char *,const unsigned char *,unsigned long long,const unsigned char *,const unsigned char *);
#ifdef __cplusplus
}
#endif
#define crypto_stream_chacha20 crypto_stream_chacha20_dolbeau_rv32
#define crypto_stream_chacha20_xor crypto_stream_chacha20_dolbeau_rv32_xor
#define crypto_stream_chacha20_beforenm crypto_stream_chacha20_dolbeau_rv32_beforenm
#define crypto_stream_chacha20_afternm crypto_stream_chacha20_dolbeau_rv32_afternm
#define crypto_stream_chacha20_xor_afternm crypto_stream_chacha20_dolbeau_rv32_xor_afternm
#define crypto_stream_chacha20_KEYBYTES crypto_stream_chacha20_dolbeau_rv32_KEYBYTES
#define crypto_stream_chacha20_NONCEBYTES crypto_stream_chacha20_dolbeau_rv32_NONCEBYTES
#define crypto_stream_chacha20_BEFORENMBYTES crypto_stream_chacha20_dolbeau_rv32_BEFORENMBYTES
#define crypto_stream_chacha20_IMPLEMENTATION "crypto_stream/chacha20/dolbeau/rv32"
#ifndef crypto_stream_chacha20_dolbeau_rv32_VERSION
#define crypto_stream_chacha20_dolbeau_rv32_VERSION "-"
#endif
#define crypto_stream_chacha20_VERSION crypto_stream_chacha20_dolbeau_rv32_VERSION
#endif

View file

@ -0,0 +1,6 @@
#ifndef crypto_uint32_h
#define crypto_uint32_h
typedef unsigned int crypto_uint32;
#endif

View file

@ -0,0 +1,6 @@
#ifndef crypto_uint64_h
#define crypto_uint64_h
typedef unsigned long long crypto_uint64;
#endif

View file

@ -0,0 +1,6 @@
#ifndef crypto_uint8_h
#define crypto_uint8_h
typedef unsigned char crypto_uint8;
#endif

View file

@ -0,0 +1,322 @@
/* ecrypt-config.h */
/* *** Normally, it should not be necessary to edit this file. *** */
#ifndef ECRYPT_CONFIG
#define ECRYPT_CONFIG
/* ------------------------------------------------------------------------- */
/* Guess the endianness of the target architecture. */
/*
* The LITTLE endian machines:
*/
#if defined(__ultrix) /* Older MIPS */
#define ECRYPT_LITTLE_ENDIAN
#elif defined(__alpha) /* Alpha */
#define ECRYPT_LITTLE_ENDIAN
#elif defined(i386) /* x86 (gcc) */
#define ECRYPT_LITTLE_ENDIAN
#elif defined(__i386) /* x86 (gcc) */
#define ECRYPT_LITTLE_ENDIAN
#elif defined(__x86_64) /* x86_64 (gcc) */
#define ECRYPT_LITTLE_ENDIAN
#elif defined(_M_IX86) /* x86 (MSC, Borland) */
#define ECRYPT_LITTLE_ENDIAN
#elif defined(_MSC_VER) /* x86 (surely MSC) */
#define ECRYPT_LITTLE_ENDIAN
#elif defined(__INTEL_COMPILER) /* x86 (surely Intel compiler icl.exe) */
#define ECRYPT_LITTLE_ENDIAN
/*
* The BIG endian machines:
*/
#elif defined(__sparc) /* Newer Sparc's */
#define ECRYPT_BIG_ENDIAN
#elif defined(__powerpc64__) /* PPC64 */
#if defined(_LITTLE_ENDIAN) && _LITTLE_ENDIAN == 1
#define ECRYPT_LITTLE_ENDIAN
#else
#define ECRYPT_BIG_ENDIAN
#endif
#elif defined(__powerpc__) /* PowerPC */
#define ECRYPT_BIG_ENDIAN
#elif defined(__ppc__) /* PowerPC */
#define ECRYPT_BIG_ENDIAN
#elif defined(__hppa) /* HP-PA */
#define ECRYPT_BIG_ENDIAN
/*
* Finally machines with UNKNOWN endianness:
*/
#elif defined (_AIX) /* RS6000 */
#define ECRYPT_UNKNOWN
#elif defined(__aux) /* 68K */
#define ECRYPT_UNKNOWN
#elif defined(__dgux) /* 88K (but P6 in latest boxes) */
#define ECRYPT_UNKNOWN
#elif defined(__sgi) /* Newer MIPS */
#define ECRYPT_UNKNOWN
#else /* Any other processor */
#define ECRYPT_UNKNOWN
#endif
/* ------------------------------------------------------------------------- */
/*
* Find minimal-width types to store 8-bit, 16-bit, 32-bit, and 64-bit
* integers.
*
* Note: to enable 64-bit types on 32-bit compilers, it might be
* necessary to switch from ISO C90 mode to ISO C99 mode (e.g., gcc
* -std=c99), or to allow compiler-specific extensions.
*/
#include <limits.h>
/* --- check char --- */
#if (UCHAR_MAX / 0xFU > 0xFU)
#ifndef I8T
#define I8T char
#define U8C(v) (v##U)
#if (UCHAR_MAX == 0xFFU)
#define ECRYPT_I8T_IS_BYTE
#endif
#endif
#if (UCHAR_MAX / 0xFFU > 0xFFU)
#ifndef I16T
#define I16T char
#define U16C(v) (v##U)
#endif
#if (UCHAR_MAX / 0xFFFFU > 0xFFFFU)
#ifndef I32T
#define I32T char
#define U32C(v) (v##U)
#endif
#if (UCHAR_MAX / 0xFFFFFFFFU > 0xFFFFFFFFU)
#ifndef I64T
#define I64T char
#define U64C(v) (v##U)
#define ECRYPT_NATIVE64
#endif
#endif
#endif
#endif
#endif
/* --- check short --- */
#if (USHRT_MAX / 0xFU > 0xFU)
#ifndef I8T
#define I8T short
#define U8C(v) (v##U)
#if (USHRT_MAX == 0xFFU)
#define ECRYPT_I8T_IS_BYTE
#endif
#endif
#if (USHRT_MAX / 0xFFU > 0xFFU)
#ifndef I16T
#define I16T short
#define U16C(v) (v##U)
#endif
#if (USHRT_MAX / 0xFFFFU > 0xFFFFU)
#ifndef I32T
#define I32T short
#define U32C(v) (v##U)
#endif
#if (USHRT_MAX / 0xFFFFFFFFU > 0xFFFFFFFFU)
#ifndef I64T
#define I64T short
#define U64C(v) (v##U)
#define ECRYPT_NATIVE64
#endif
#endif
#endif
#endif
#endif
/* --- check int --- */
#if (UINT_MAX / 0xFU > 0xFU)
#ifndef I8T
#define I8T int
#define U8C(v) (v##U)
#if (ULONG_MAX == 0xFFU)
#define ECRYPT_I8T_IS_BYTE
#endif
#endif
#if (UINT_MAX / 0xFFU > 0xFFU)
#ifndef I16T
#define I16T int
#define U16C(v) (v##U)
#endif
#if (UINT_MAX / 0xFFFFU > 0xFFFFU)
#ifndef I32T
#define I32T int
#define U32C(v) (v##U)
#endif
#if (UINT_MAX / 0xFFFFFFFFU > 0xFFFFFFFFU)
#ifndef I64T
#define I64T int
#define U64C(v) (v##U)
#define ECRYPT_NATIVE64
#endif
#endif
#endif
#endif
#endif
/* --- check long --- */
#if (ULONG_MAX / 0xFUL > 0xFUL)
#ifndef I8T
#define I8T long
#define U8C(v) (v##UL)
#if (ULONG_MAX == 0xFFUL)
#define ECRYPT_I8T_IS_BYTE
#endif
#endif
#if (ULONG_MAX / 0xFFUL > 0xFFUL)
#ifndef I16T
#define I16T long
#define U16C(v) (v##UL)
#endif
#if (ULONG_MAX / 0xFFFFUL > 0xFFFFUL)
#ifndef I32T
#define I32T long
#define U32C(v) (v##UL)
#endif
#if (ULONG_MAX / 0xFFFFFFFFUL > 0xFFFFFFFFUL)
#ifndef I64T
#define I64T long
#define U64C(v) (v##UL)
#define ECRYPT_NATIVE64
#endif
#endif
#endif
#endif
#endif
/* --- check long long --- */
#ifdef ULLONG_MAX
#if (ULLONG_MAX / 0xFULL > 0xFULL)
#ifndef I8T
#define I8T long long
#define U8C(v) (v##ULL)
#if (ULLONG_MAX == 0xFFULL)
#define ECRYPT_I8T_IS_BYTE
#endif
#endif
#if (ULLONG_MAX / 0xFFULL > 0xFFULL)
#ifndef I16T
#define I16T long long
#define U16C(v) (v##ULL)
#endif
#if (ULLONG_MAX / 0xFFFFULL > 0xFFFFULL)
#ifndef I32T
#define I32T long long
#define U32C(v) (v##ULL)
#endif
#if (ULLONG_MAX / 0xFFFFFFFFULL > 0xFFFFFFFFULL)
#ifndef I64T
#define I64T long long
#define U64C(v) (v##ULL)
#endif
#endif
#endif
#endif
#endif
#endif
/* --- check __int64 --- */
#if !defined(__STDC__) && defined(_UI64_MAX)
#ifndef I64T
#define I64T __int64
#define U64C(v) (v##ui64)
#endif
#endif
/* --- if platform doesn't announce anything, use most common choices --- */
#ifndef I8T
#define I8T char
#define U8C(v) (v##U)
#endif
#ifndef I16T
#define I16T short
#define U16C(v) (v##U)
#endif
#ifndef I32T
#define I32T int
#define U32C(v) (v##U)
#endif
#ifndef I64T
#define I64T long long
#define U64C(v) (v##ULL)
#endif
/* ------------------------------------------------------------------------- */
/* find the largest type on this platform (used for alignment) */
#if defined(__SSE__) || (defined(_MSC_VER) && (_MSC_VER >= 1300))
#include <xmmintrin.h>
#define MAXT __m128
#elif defined(__MMX__)
#include <mmintrin.h>
#define MAXT __m64
#elif defined(__ALTIVEC__)
#define MAXT __vector int
#else
#define MAXT long
#endif
/* ------------------------------------------------------------------------- */
#endif

View file

@ -0,0 +1,60 @@
/* ecrypt-machine.h */
/*
* This file is included by 'ecrypt-portable.h'. It allows to override
* the default macros for specific platforms. Please carefully check
* the machine code generated by your compiler (with optimisations
* turned on) before deciding to edit this file.
*/
/* ------------------------------------------------------------------------- */
#if (defined(ECRYPT_DEFAULT_ROT) && !defined(ECRYPT_MACHINE_ROT))
#define ECRYPT_MACHINE_ROT
#if (defined(WIN32) && defined(_MSC_VER))
#undef ROTL32
#undef ROTR32
#undef ROTL64
#undef ROTR64
#include <stdlib.h>
#pragma intrinsic(_lrotl) /* compile rotations "inline" */
#pragma intrinsic(_lrotr)
#define ROTL32(v, n) _lrotl(v, n)
#define ROTR32(v, n) _lrotr(v, n)
#define ROTL64(v, n) _rotl64(v, n)
#define ROTR64(v, n) _rotr64(v, n)
#endif
#ifdef __riscv
#warning "Hardwiring support for B"
#include <rvintrin.h>
#undef ROTL32
#define ROTL32(v,c) _rv32_rol(v,c)
#undef ROTR32
#define ROTR32(v,c) _rv32_ror(v,c)
#endif
#endif
/* ------------------------------------------------------------------------- */
#if (defined(ECRYPT_DEFAULT_SWAP) && !defined(ECRYPT_MACHINE_SWAP))
#define ECRYPT_MACHINE_SWAP
#ifdef __riscv
#warning "Hardwiring support for B"
#include <rvintrin.h>
#undef SWAP32
#define SWAP32(v) _rv32_rev8(v) // grev with imm=24
#endif
#endif
/* ------------------------------------------------------------------------- */

View file

@ -0,0 +1,310 @@
/* ecrypt-portable.h */
/*
* WARNING: the conversions defined below are implemented as macros,
* and should be used carefully. They should NOT be used with
* parameters which perform some action. E.g., the following two lines
* are not equivalent:
*
* 1) ++x; y = ROTL32(x, n);
* 2) y = ROTL32(++x, n);
*/
/*
* *** Please do not edit this file. ***
*
* The default macros can be overridden for specific architectures by
* editing 'ecrypt-machine.h'.
*/
#ifndef ECRYPT_PORTABLE
#define ECRYPT_PORTABLE
#include "ecrypt-config.h"
/* ------------------------------------------------------------------------- */
/*
* The following types are defined (if available):
*
* u8: unsigned integer type, at least 8 bits
* u16: unsigned integer type, at least 16 bits
* u32: unsigned integer type, at least 32 bits
* u64: unsigned integer type, at least 64 bits
*
* s8, s16, s32, s64 -> signed counterparts of u8, u16, u32, u64
*
* The selection of minimum-width integer types is taken care of by
* 'ecrypt-config.h'. Note: to enable 64-bit types on 32-bit
* compilers, it might be necessary to switch from ISO C90 mode to ISO
* C99 mode (e.g., gcc -std=c99).
*/
#ifdef I8T
typedef signed I8T s8;
typedef unsigned I8T u8;
#endif
#ifdef I16T
typedef signed I16T s16;
typedef unsigned I16T u16;
#endif
#ifdef I32T
typedef signed I32T s32;
typedef unsigned I32T u32;
#endif
#ifdef I64T
typedef signed I64T s64;
typedef unsigned I64T u64;
#endif
/*
* The following macros are used to obtain exact-width results.
*/
#define U8V(v) ((u8)(v) & U8C(0xFF))
#define U16V(v) ((u16)(v) & U16C(0xFFFF))
#define U32V(v) ((u32)(v) & U32C(0xFFFFFFFF))
#define U64V(v) ((u64)(v) & U64C(0xFFFFFFFFFFFFFFFF))
/* ------------------------------------------------------------------------- */
/*
* The following macros return words with their bits rotated over n
* positions to the left/right.
*/
#define ECRYPT_DEFAULT_ROT
#define ROTL8(v, n) \
(U8V((v) << (n)) | ((v) >> (8 - (n))))
#define ROTL16(v, n) \
(U16V((v) << (n)) | ((v) >> (16 - (n))))
#define ROTL32(v, n) \
(U32V((v) << (n)) | ((v) >> (32 - (n))))
#define ROTL64(v, n) \
(U64V((v) << (n)) | ((v) >> (64 - (n))))
#define ROTR8(v, n) ROTL8(v, 8 - (n))
#define ROTR16(v, n) ROTL16(v, 16 - (n))
#define ROTR32(v, n) ROTL32(v, 32 - (n))
#define ROTR64(v, n) ROTL64(v, 64 - (n))
#include "ecrypt-machine.h"
/* ------------------------------------------------------------------------- */
/*
* The following macros return a word with bytes in reverse order.
*/
#define ECRYPT_DEFAULT_SWAP
#define SWAP16(v) \
ROTL16(v, 8)
#define SWAP32(v) \
((ROTL32(v, 8) & U32C(0x00FF00FF)) | \
(ROTL32(v, 24) & U32C(0xFF00FF00)))
#ifdef ECRYPT_NATIVE64
#define SWAP64(v) \
((ROTL64(v, 8) & U64C(0x000000FF000000FF)) | \
(ROTL64(v, 24) & U64C(0x0000FF000000FF00)) | \
(ROTL64(v, 40) & U64C(0x00FF000000FF0000)) | \
(ROTL64(v, 56) & U64C(0xFF000000FF000000)))
#else
#define SWAP64(v) \
(((u64)SWAP32(U32V(v)) << 32) | (u64)SWAP32(U32V(v >> 32)))
#endif
#include "ecrypt-machine.h"
#define ECRYPT_DEFAULT_WTOW
#ifdef ECRYPT_LITTLE_ENDIAN
#define U16TO16_LITTLE(v) (v)
#define U32TO32_LITTLE(v) (v)
#define U64TO64_LITTLE(v) (v)
#define U16TO16_BIG(v) SWAP16(v)
#define U32TO32_BIG(v) SWAP32(v)
#define U64TO64_BIG(v) SWAP64(v)
#endif
#ifdef ECRYPT_BIG_ENDIAN
#define U16TO16_LITTLE(v) SWAP16(v)
#define U32TO32_LITTLE(v) SWAP32(v)
#define U64TO64_LITTLE(v) SWAP64(v)
#define U16TO16_BIG(v) (v)
#define U32TO32_BIG(v) (v)
#define U64TO64_BIG(v) (v)
#endif
#include "ecrypt-machine.h"
/*
* The following macros load words from an array of bytes with
* different types of endianness, and vice versa.
*/
#define ECRYPT_DEFAULT_BTOW
#if (!defined(ECRYPT_UNKNOWN) && defined(ECRYPT_I8T_IS_BYTE))
#define U8TO16_LITTLE(p) U16TO16_LITTLE(((u16*)(p))[0])
#define U8TO32_LITTLE(p) U32TO32_LITTLE(((u32*)(p))[0])
#define U8TO64_LITTLE(p) U64TO64_LITTLE(((u64*)(p))[0])
#define U8TO16_BIG(p) U16TO16_BIG(((u16*)(p))[0])
#define U8TO32_BIG(p) U32TO32_BIG(((u32*)(p))[0])
#define U8TO64_BIG(p) U64TO64_BIG(((u64*)(p))[0])
#define U16TO8_LITTLE(p, v) (((u16*)(p))[0] = U16TO16_LITTLE(v))
#define U32TO8_LITTLE(p, v) (((u32*)(p))[0] = U32TO32_LITTLE(v))
#define U64TO8_LITTLE(p, v) (((u64*)(p))[0] = U64TO64_LITTLE(v))
#define U16TO8_BIG(p, v) (((u16*)(p))[0] = U16TO16_BIG(v))
#define U32TO8_BIG(p, v) (((u32*)(p))[0] = U32TO32_BIG(v))
#define U64TO8_BIG(p, v) (((u64*)(p))[0] = U64TO64_BIG(v))
#else
#define U8TO16_LITTLE(p) \
(((u16)((p)[0]) ) | \
((u16)((p)[1]) << 8))
#define U8TO32_LITTLE(p) \
(((u32)((p)[0]) ) | \
((u32)((p)[1]) << 8) | \
((u32)((p)[2]) << 16) | \
((u32)((p)[3]) << 24))
#ifdef ECRYPT_NATIVE64
#define U8TO64_LITTLE(p) \
(((u64)((p)[0]) ) | \
((u64)((p)[1]) << 8) | \
((u64)((p)[2]) << 16) | \
((u64)((p)[3]) << 24) | \
((u64)((p)[4]) << 32) | \
((u64)((p)[5]) << 40) | \
((u64)((p)[6]) << 48) | \
((u64)((p)[7]) << 56))
#else
#define U8TO64_LITTLE(p) \
((u64)U8TO32_LITTLE(p) | ((u64)U8TO32_LITTLE((p) + 4) << 32))
#endif
#define U8TO16_BIG(p) \
(((u16)((p)[0]) << 8) | \
((u16)((p)[1]) ))
#define U8TO32_BIG(p) \
(((u32)((p)[0]) << 24) | \
((u32)((p)[1]) << 16) | \
((u32)((p)[2]) << 8) | \
((u32)((p)[3]) ))
#ifdef ECRYPT_NATIVE64
#define U8TO64_BIG(p) \
(((u64)((p)[0]) << 56) | \
((u64)((p)[1]) << 48) | \
((u64)((p)[2]) << 40) | \
((u64)((p)[3]) << 32) | \
((u64)((p)[4]) << 24) | \
((u64)((p)[5]) << 16) | \
((u64)((p)[6]) << 8) | \
((u64)((p)[7]) ))
#else
#define U8TO64_BIG(p) \
(((u64)U8TO32_BIG(p) << 32) | (u64)U8TO32_BIG((p) + 4))
#endif
#define U16TO8_LITTLE(p, v) \
do { \
(p)[0] = U8V((v) ); \
(p)[1] = U8V((v) >> 8); \
} while (0)
#define U32TO8_LITTLE(p, v) \
do { \
(p)[0] = U8V((v) ); \
(p)[1] = U8V((v) >> 8); \
(p)[2] = U8V((v) >> 16); \
(p)[3] = U8V((v) >> 24); \
} while (0)
#ifdef ECRYPT_NATIVE64
#define U64TO8_LITTLE(p, v) \
do { \
(p)[0] = U8V((v) ); \
(p)[1] = U8V((v) >> 8); \
(p)[2] = U8V((v) >> 16); \
(p)[3] = U8V((v) >> 24); \
(p)[4] = U8V((v) >> 32); \
(p)[5] = U8V((v) >> 40); \
(p)[6] = U8V((v) >> 48); \
(p)[7] = U8V((v) >> 56); \
} while (0)
#else
#define U64TO8_LITTLE(p, v) \
do { \
U32TO8_LITTLE((p), U32V((v) )); \
U32TO8_LITTLE((p) + 4, U32V((v) >> 32)); \
} while (0)
#endif
#define U16TO8_BIG(p, v) \
do { \
(p)[0] = U8V((v) ); \
(p)[1] = U8V((v) >> 8); \
} while (0)
#define U32TO8_BIG(p, v) \
do { \
(p)[0] = U8V((v) >> 24); \
(p)[1] = U8V((v) >> 16); \
(p)[2] = U8V((v) >> 8); \
(p)[3] = U8V((v) ); \
} while (0)
#ifdef ECRYPT_NATIVE64
#define U64TO8_BIG(p, v) \
do { \
(p)[0] = U8V((v) >> 56); \
(p)[1] = U8V((v) >> 48); \
(p)[2] = U8V((v) >> 40); \
(p)[3] = U8V((v) >> 32); \
(p)[4] = U8V((v) >> 24); \
(p)[5] = U8V((v) >> 16); \
(p)[6] = U8V((v) >> 8); \
(p)[7] = U8V((v) ); \
} while (0)
#else
#define U64TO8_BIG(p, v) \
do { \
U32TO8_BIG((p), U32V((v) >> 32)); \
U32TO8_BIG((p) + 4, U32V((v) )); \
} while (0)
#endif
#endif
#include "ecrypt-machine.h"
/* ------------------------------------------------------------------------- */
#define AT_LEAST_ONE(n) (((n) < 1) ? 1 : (n))
#define ALIGN(t, v, n) \
union { t b[n]; MAXT l[AT_LEAST_ONE(n * sizeof(t) / sizeof(MAXT))]; } v
/* ------------------------------------------------------------------------- */
#endif

View file

@ -0,0 +1,281 @@
#define crypto_stream_chacha20_dolbeau_rv32_ECRYPT_VARIANT 1
#define crypto_stream_chacha20_dolbeau_rv32_ECRYPT_API
/* ecrypt-sync.h */
/*
* Header file for synchronous stream ciphers without authentication
* mechanism.
*
* *** Please only edit parts marked with "[edit]". ***
*/
#ifndef crypto_stream_chacha20_dolbeau_rv32_ECRYPT_SYNC
#define crypto_stream_chacha20_dolbeau_rv32_ECRYPT_SYNC
#include "ecrypt-portable.h"
/* ------------------------------------------------------------------------- */
/* Cipher parameters */
/*
* The name of your cipher.
*/
#define crypto_stream_chacha20_dolbeau_rv32_ECRYPT_NAME "ChaCha20"
#define crypto_stream_chacha20_dolbeau_rv32_ECRYPT_PROFILE "_____"
/*
* Specify which key and IV sizes are supported by your cipher. A user
* should be able to enumerate the supported sizes by running the
* following code:
*
* for (i = 0; crypto_stream_chacha20_dolbeau_rv32_ECRYPT_KEYSIZE(i) <= crypto_stream_chacha20_dolbeau_rv32_ECRYPT_MAXKEYSIZE; ++i)
* {
* keysize = crypto_stream_chacha20_dolbeau_rv32_ECRYPT_KEYSIZE(i);
*
* ...
* }
*
* All sizes are in bits.
*/
#define crypto_stream_chacha20_dolbeau_rv32_ECRYPT_MAXKEYSIZE 256 /* [edit] */
#define crypto_stream_chacha20_dolbeau_rv32_ECRYPT_KEYSIZE(i) (128 + (i)*128) /* [edit] */
#define crypto_stream_chacha20_dolbeau_rv32_ECRYPT_MAXIVSIZE 64 /* [edit] */
#define crypto_stream_chacha20_dolbeau_rv32_ECRYPT_IVSIZE(i) (64 + (i)*64) /* [edit] */
/* ------------------------------------------------------------------------- */
/* Data structures */
/*
* crypto_stream_chacha20_dolbeau_rv32_ECRYPT_ctx is the structure containing the representation of the
* internal state of your cipher.
*/
typedef struct
{
u32 input[16]; /* could be compressed */
/*
* [edit]
*
* Put here all state variable needed during the encryption process.
*/
} crypto_stream_chacha20_dolbeau_rv32_ECRYPT_ctx;
/* ------------------------------------------------------------------------- */
/* Mandatory functions */
/*
* Key and message independent initialization. This function will be
* called once when the program starts (e.g., to build expanded S-box
* tables).
*/
void crypto_stream_chacha20_dolbeau_rv32_ECRYPT_init();
/*
* Key setup. It is the user's responsibility to select the values of
* keysize and ivsize from the set of supported values specified
* above.
*/
void crypto_stream_chacha20_dolbeau_rv32_ECRYPT_keysetup(
crypto_stream_chacha20_dolbeau_rv32_ECRYPT_ctx* ctx,
const u8* key,
u32 keysize, /* Key size in bits. */
u32 ivsize); /* IV size in bits. */
/*
* IV setup. After having called crypto_stream_chacha20_dolbeau_rv32_ECRYPT_keysetup(), the user is
* allowed to call crypto_stream_chacha20_dolbeau_rv32_ECRYPT_ivsetup() different times in order to
* encrypt/decrypt different messages with the same key but different
* IV's.
*/
void crypto_stream_chacha20_dolbeau_rv32_ECRYPT_ivsetup(
crypto_stream_chacha20_dolbeau_rv32_ECRYPT_ctx* ctx,
const u8* iv);
/*
* Encryption/decryption of arbitrary length messages.
*
* For efficiency reasons, the API provides two types of
* encrypt/decrypt functions. The crypto_stream_chacha20_dolbeau_rv32_ECRYPT_encrypt_bytes() function
* (declared here) encrypts byte strings of arbitrary length, while
* the crypto_stream_chacha20_dolbeau_rv32_ECRYPT_encrypt_blocks() function (defined later) only accepts
* lengths which are multiples of crypto_stream_chacha20_dolbeau_rv32_ECRYPT_BLOCKLENGTH.
*
* The user is allowed to make multiple calls to
* crypto_stream_chacha20_dolbeau_rv32_ECRYPT_encrypt_blocks() to incrementally encrypt a long message,
* but he is NOT allowed to make additional encryption calls once he
* has called crypto_stream_chacha20_dolbeau_rv32_ECRYPT_encrypt_bytes() (unless he starts a new message
* of course). For example, this sequence of calls is acceptable:
*
* crypto_stream_chacha20_dolbeau_rv32_ECRYPT_keysetup();
*
* crypto_stream_chacha20_dolbeau_rv32_ECRYPT_ivsetup();
* crypto_stream_chacha20_dolbeau_rv32_ECRYPT_encrypt_blocks();
* crypto_stream_chacha20_dolbeau_rv32_ECRYPT_encrypt_blocks();
* crypto_stream_chacha20_dolbeau_rv32_ECRYPT_encrypt_bytes();
*
* crypto_stream_chacha20_dolbeau_rv32_ECRYPT_ivsetup();
* crypto_stream_chacha20_dolbeau_rv32_ECRYPT_encrypt_blocks();
* crypto_stream_chacha20_dolbeau_rv32_ECRYPT_encrypt_blocks();
*
* crypto_stream_chacha20_dolbeau_rv32_ECRYPT_ivsetup();
* crypto_stream_chacha20_dolbeau_rv32_ECRYPT_encrypt_bytes();
*
* The following sequence is not:
*
* crypto_stream_chacha20_dolbeau_rv32_ECRYPT_keysetup();
* crypto_stream_chacha20_dolbeau_rv32_ECRYPT_ivsetup();
* crypto_stream_chacha20_dolbeau_rv32_ECRYPT_encrypt_blocks();
* crypto_stream_chacha20_dolbeau_rv32_ECRYPT_encrypt_bytes();
* crypto_stream_chacha20_dolbeau_rv32_ECRYPT_encrypt_blocks();
*/
void crypto_stream_chacha20_dolbeau_rv32_ECRYPT_encrypt_bytes(
crypto_stream_chacha20_dolbeau_rv32_ECRYPT_ctx* ctx,
const u8* plaintext,
u8* ciphertext,
u32 msglen); /* Message length in bytes. */
void crypto_stream_chacha20_dolbeau_rv32_ECRYPT_decrypt_bytes(
crypto_stream_chacha20_dolbeau_rv32_ECRYPT_ctx* ctx,
const u8* ciphertext,
u8* plaintext,
u32 msglen); /* Message length in bytes. */
/* ------------------------------------------------------------------------- */
/* Optional features */
/*
* For testing purposes it can sometimes be useful to have a function
* which immediately generates keystream without having to provide it
* with a zero plaintext. If your cipher cannot provide this function
* (e.g., because it is not strictly a synchronous cipher), please
* reset the crypto_stream_chacha20_dolbeau_rv32_ECRYPT_GENERATES_KEYSTREAM flag.
*/
#define crypto_stream_chacha20_dolbeau_rv32_ECRYPT_GENERATES_KEYSTREAM
#ifdef crypto_stream_chacha20_dolbeau_rv32_ECRYPT_GENERATES_KEYSTREAM
void crypto_stream_chacha20_dolbeau_rv32_ECRYPT_keystream_bytes(
crypto_stream_chacha20_dolbeau_rv32_ECRYPT_ctx* ctx,
u8* keystream,
u32 length); /* Length of keystream in bytes. */
#endif
/* ------------------------------------------------------------------------- */
/* Optional optimizations */
/*
* By default, the functions in this section are implemented using
* calls to functions declared above. However, you might want to
* implement them differently for performance reasons.
*/
/*
* All-in-one encryption/decryption of (short) packets.
*
* The default definitions of these functions can be found in
* "ecrypt-sync.c". If you want to implement them differently, please
* undef the crypto_stream_chacha20_dolbeau_rv32_ECRYPT_USES_DEFAULT_ALL_IN_ONE flag.
*/
#define crypto_stream_chacha20_dolbeau_rv32_ECRYPT_USES_DEFAULT_ALL_IN_ONE /* [edit] */
void crypto_stream_chacha20_dolbeau_rv32_ECRYPT_encrypt_packet(
crypto_stream_chacha20_dolbeau_rv32_ECRYPT_ctx* ctx,
const u8* iv,
const u8* plaintext,
u8* ciphertext,
u32 msglen);
void crypto_stream_chacha20_dolbeau_rv32_ECRYPT_decrypt_packet(
crypto_stream_chacha20_dolbeau_rv32_ECRYPT_ctx* ctx,
const u8* iv,
const u8* ciphertext,
u8* plaintext,
u32 msglen);
/*
* Encryption/decryption of blocks.
*
* By default, these functions are defined as macros. If you want to
* provide a different implementation, please undef the
* crypto_stream_chacha20_dolbeau_rv32_ECRYPT_USES_DEFAULT_BLOCK_MACROS flag and implement the functions
* declared below.
*/
#define crypto_stream_chacha20_dolbeau_rv32_ECRYPT_BLOCKLENGTH 64 /* [edit] */
#define crypto_stream_chacha20_dolbeau_rv32_ECRYPT_USES_DEFAULT_BLOCK_MACROS /* [edit] */
#ifdef crypto_stream_chacha20_dolbeau_rv32_ECRYPT_USES_DEFAULT_BLOCK_MACROS
#define crypto_stream_chacha20_dolbeau_rv32_ECRYPT_encrypt_blocks(ctx, plaintext, ciphertext, blocks) \
crypto_stream_chacha20_dolbeau_rv32_ECRYPT_encrypt_bytes(ctx, plaintext, ciphertext, \
(blocks) * crypto_stream_chacha20_dolbeau_rv32_ECRYPT_BLOCKLENGTH)
#define crypto_stream_chacha20_dolbeau_rv32_ECRYPT_decrypt_blocks(ctx, ciphertext, plaintext, blocks) \
crypto_stream_chacha20_dolbeau_rv32_ECRYPT_decrypt_bytes(ctx, ciphertext, plaintext, \
(blocks) * crypto_stream_chacha20_dolbeau_rv32_ECRYPT_BLOCKLENGTH)
#ifdef crypto_stream_chacha20_dolbeau_rv32_ECRYPT_GENERATES_KEYSTREAM
#define crypto_stream_chacha20_dolbeau_rv32_ECRYPT_keystream_blocks(ctx, keystream, blocks) \
crypto_stream_chacha20_dolbeau_rv32_ECRYPT_keystream_bytes(ctx, keystream, \
(blocks) * crypto_stream_chacha20_dolbeau_rv32_ECRYPT_BLOCKLENGTH)
#endif
#else
void crypto_stream_chacha20_dolbeau_rv32_ECRYPT_encrypt_blocks(
crypto_stream_chacha20_dolbeau_rv32_ECRYPT_ctx* ctx,
const u8* plaintext,
u8* ciphertext,
u32 blocks); /* Message length in blocks. */
void crypto_stream_chacha20_dolbeau_rv32_ECRYPT_decrypt_blocks(
crypto_stream_chacha20_dolbeau_rv32_ECRYPT_ctx* ctx,
const u8* ciphertext,
u8* plaintext,
u32 blocks); /* Message length in blocks. */
#ifdef crypto_stream_chacha20_dolbeau_rv32_ECRYPT_GENERATES_KEYSTREAM
void crypto_stream_chacha20_dolbeau_rv32_ECRYPT_keystream_blocks(
crypto_stream_chacha20_dolbeau_rv32_ECRYPT_ctx* ctx,
const u8* keystream,
u32 blocks); /* Keystream length in blocks. */
#endif
#endif
/*
* If your cipher can be implemented in different ways, you can use
* the crypto_stream_chacha20_dolbeau_rv32_ECRYPT_VARIANT parameter to allow the user to choose between
* them at compile time (e.g., gcc -Dcrypto_stream_chacha20_dolbeau_rv32_ECRYPT_VARIANT=3 ...). Please
* only use this possibility if you really think it could make a
* significant difference and keep the number of variants
* (crypto_stream_chacha20_dolbeau_rv32_ECRYPT_MAXVARIANT) as small as possible (definitely not more than
* 10). Note also that all variants should have exactly the same
* external interface (i.e., the same crypto_stream_chacha20_dolbeau_rv32_ECRYPT_BLOCKLENGTH, etc.).
*/
#define crypto_stream_chacha20_dolbeau_rv32_ECRYPT_MAXVARIANT 1 /* [edit] */
#ifndef crypto_stream_chacha20_dolbeau_rv32_ECRYPT_VARIANT
#define crypto_stream_chacha20_dolbeau_rv32_ECRYPT_VARIANT 1
#endif
#if (crypto_stream_chacha20_dolbeau_rv32_ECRYPT_VARIANT > crypto_stream_chacha20_dolbeau_rv32_ECRYPT_MAXVARIANT)
#error this variant does not exist
#endif
/* ------------------------------------------------------------------------- */
#endif

View file

@ -0,0 +1,118 @@
/*
* Copied from the eSTREAM api/ecrypt-sync.h,
* and then edited to provide the crypto_stream/crypto_stream_xor interface.
*/
#include "crypto_stream.h"
#include "ecrypt-sync.h"
#ifdef ECRYPT_USES_DEFAULT_ALL_IN_ONE
/*
* * Default implementation of all-in-one encryption/decryption of
* * (short) packets.
* */
#ifdef ECRYPT_HAS_SINGLE_PACKET_FUNCTION
void ECRYPT_process_packet(
int action,
ECRYPT_ctx* ctx,
const u8* iv,
const u8* input,
u8* output,
u32 msglen)
{
ECRYPT_ivsetup(ctx, iv);
#ifdef ECRYPT_HAS_SINGLE_BYTE_FUNCTION
ECRYPT_process_bytes(action, ctx, input, output, msglen);
#else
if (action == 0)
ECRYPT_encrypt_bytes(ctx, input, output, msglen);
else
ECRYPT_decrypt_bytes(ctx, input, output, msglen);
#endif
}
#else
void ECRYPT_encrypt_packet(
ECRYPT_ctx* ctx,
const u8* iv,
const u8* plaintext,
u8* ciphertext,
u32 msglen)
{
ECRYPT_ivsetup(ctx, iv);
ECRYPT_encrypt_bytes(ctx, plaintext, ciphertext, msglen);
}
void ECRYPT_decrypt_packet(
ECRYPT_ctx* ctx,
const u8* iv,
const u8* ciphertext,
u8* plaintext,
u32 msglen)
{
ECRYPT_ivsetup(ctx, iv);
ECRYPT_decrypt_bytes(ctx, ciphertext, plaintext, msglen);
}
#endif
#endif
static int flaginitialized = 0;
int crypto_stream(
unsigned char *c,unsigned long long clen,
const unsigned char *n,
const unsigned char *k
)
{
#ifdef ECRYPT_GENERATES_KEYSTREAM
ECRYPT_ctx ctx;
if (!flaginitialized) { ECRYPT_init(); flaginitialized = 1; }
ECRYPT_keysetup(&ctx,k,crypto_stream_KEYBYTES * 8,crypto_stream_NONCEBYTES * 8);
ECRYPT_ivsetup(&ctx,n);
while (clen > 65536) {
ECRYPT_keystream_bytes(&ctx,c,65536);
c += 65536; clen -= 65536;
}
ECRYPT_keystream_bytes(&ctx,c,clen);
return 0;
#else
ECRYPT_ctx ctx;
unsigned long long i;
if (!flaginitialized) { ECRYPT_init(); flaginitialized = 1; }
ECRYPT_keysetup(&ctx,k,crypto_stream_KEYBYTES * 8,crypto_stream_NONCEBYTES * 8);
ECRYPT_ivsetup(&ctx,n);
for (i = 0;i < clen;++i) c[i] = 0;
while (clen > 65536) {
ECRYPT_encrypt_bytes(&ctx,c,c,65536);
c += 65536; clen -= 65536;
}
ECRYPT_encrypt_bytes(&ctx,c,c,clen);
return 0;
#endif
}
int crypto_stream_xor(
unsigned char *c,
const unsigned char *m,unsigned long long mlen,
const unsigned char *n,
const unsigned char *k
)
{
ECRYPT_ctx ctx;
if (!flaginitialized) { ECRYPT_init(); flaginitialized = 1; }
ECRYPT_keysetup(&ctx,k,crypto_stream_KEYBYTES * 8,crypto_stream_NONCEBYTES * 8);
ECRYPT_ivsetup(&ctx,n);
while (mlen > 65536) {
ECRYPT_encrypt_bytes(&ctx,m,c,65536);
m += 65536; c += 65536; mlen -= 65536;
}
ECRYPT_encrypt_bytes(&ctx,m,c,mlen);
return 0;
}

View file

@ -0,0 +1 @@
Romain Dolbeau (based on code by Daniel J. Bernstein)

View file

@ -0,0 +1,14 @@
#ifndef kernelrandombytes_h
#define kernelrandombytes_h
#ifdef __cplusplus
extern "C" {
#endif
extern void kernelrandombytes(unsigned char *,unsigned long long);
#ifdef __cplusplus
}
#endif
#endif

View file

@ -0,0 +1,11 @@
#define ECRYPT_ctx crypto_stream_chacha20_dolbeau_rv32_ECRYPT_ctx
#define ECRYPT_init crypto_stream_chacha20_dolbeau_rv32_ECRYPT_init
#define ECRYPT_keysetup crypto_stream_chacha20_dolbeau_rv32_ECRYPT_keysetup
#define ECRYPT_ivsetup crypto_stream_chacha20_dolbeau_rv32_ECRYPT_ivsetup
#define ECRYPT_keystream_bytes crypto_stream_chacha20_dolbeau_rv32_ECRYPT_keystream_bytes
#define ECRYPT_process_bytes crypto_stream_chacha20_dolbeau_rv32_ECRYPT_process_bytes
#define ECRYPT_decrypt_bytes crypto_stream_chacha20_dolbeau_rv32_ECRYPT_decrypt_bytes
#define ECRYPT_encrypt_bytes crypto_stream_chacha20_dolbeau_rv32_ECRYPT_encrypt_bytes
#define ECRYPT_process_packet crypto_stream_chacha20_dolbeau_rv32_ECRYPT_process_packet
#define ECRYPT_decrypt_packet crypto_stream_chacha20_dolbeau_rv32_ECRYPT_decrypt_packet
#define ECRYPT_encrypt_packet crypto_stream_chacha20_dolbeau_rv32_ECRYPT_encrypt_packet

View file

@ -0,0 +1,19 @@
#include <random>
#include <functional>
std::default_random_engine generator;
std::uniform_int_distribution<unsigned char> distribution(0,255);
auto rbyte = std::bind ( distribution, generator );
extern "C" {
void kernelrandombytes(unsigned char *x,unsigned long long xlen)
{
int i;
while (xlen > 0) {
*x = rbyte();
x++;
xlen--;
}
}
}

View file

@ -0,0 +1,83 @@
/*
cpucycles/riscv.c version 20190803
D. J. Bernstein
Romain Dolbeau
Public domain.
*/
#include <time.h>
#include <sys/time.h>
#include <sys/types.h>
long long cpucycles_riscv(void)
{
long long result;
#if defined(__riscv_xlen)
#if __riscv_xlen == 64
asm volatile("rdcycle %0" : "=r" (result));
#elif __riscv_xlen == 32
unsigned int l, h, h2;
asm volatile( "start:\n"
"rdcycleh %0\n"
"rdcycle %1\n"
"rdcycleh %2\n"
"bne %0, %2, start\n"
: "=r" (h), "=r" (l), "=r" (h2));
result = (((unsigned long long)h)<<32) | ((unsigned long long)l);
#else
#error "unknown __riscv_xlen"
#endif
#else // __riscv_xlen
#error "__riscv_xlen required for RISC-V support"
#endif // __riscv_xlen
return result;
}
static long long microseconds(void)
{
struct timeval t;
gettimeofday(&t,(struct timezone *) 0);
return t.tv_sec * (long long) 1000000 + t.tv_usec;
}
static double guessfreq(void)
{
long long tb0; long long us0;
long long tb1; long long us1;
tb0 = cpucycles_riscv();
us0 = microseconds();
do {
tb1 = cpucycles_riscv();
us1 = microseconds();
} while (us1 - us0 < 10000 || tb1 - tb0 < 1000);
if (tb1 <= tb0) return 0;
tb1 -= tb0;
us1 -= us0;
return ((double) tb1) / (0.000001 * (double) us1);
}
static long long cpufrequency = 0;
static void init(void)
{
double guess1;
double guess2;
int loop;
for (loop = 0;loop < 100;++loop) {
guess1 = guessfreq();
guess2 = guessfreq();
if (guess1 > 1.01 * guess2) continue;
if (guess2 > 1.01 * guess1) continue;
cpufrequency = 0.5 * (guess1 + guess2);
break;
}
}
long long cpucycles_riscv_persecond(void)
{
if (!cpufrequency) init();
return cpufrequency;
}

View file

@ -0,0 +1,323 @@
/*
* try-anything.c version 20190729
* D. J. Bernstein
* Some portions adapted from TweetNaCl by Bernstein, Janssen, Lange, Schwabe.
* Public domain.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <unistd.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/resource.h>
#include "kernelrandombytes.h"
#include "cpucycles.h"
#include "crypto_uint8.h"
#include "crypto_uint32.h"
#include "crypto_uint64.h"
#include "try.h"
typedef crypto_uint8 u8;
typedef crypto_uint32 u32;
typedef crypto_uint64 u64;
#define FOR(i,n) for (i = 0;i < n;++i)
static u32 L32(u32 x,int c) { return (x << c) | ((x&0xffffffff) >> (32 - c)); }
static u32 ld32(const u8 *x)
{
u32 u = x[3];
u = (u<<8)|x[2];
u = (u<<8)|x[1];
return (u<<8)|x[0];
}
static void st32(u8 *x,u32 u)
{
int i;
FOR(i,4) { x[i] = u; u >>= 8; }
}
static const u8 sigma[17] = "expand 32-byte k";
static void core(u8 *out,const u8 *in,const u8 *k)
{
u32 w[16],x[16],y[16],t[4];
int i,j,m;
FOR(i,4) {
x[5*i] = ld32(sigma+4*i);
x[1+i] = ld32(k+4*i);
x[6+i] = ld32(in+4*i);
x[11+i] = ld32(k+16+4*i);
}
FOR(i,16) y[i] = x[i];
FOR(i,20) {
FOR(j,4) {
FOR(m,4) t[m] = x[(5*j+4*m)%16];
t[1] ^= L32(t[0]+t[3], 7);
t[2] ^= L32(t[1]+t[0], 9);
t[3] ^= L32(t[2]+t[1],13);
t[0] ^= L32(t[3]+t[2],18);
FOR(m,4) w[4*j+(j+m)%4] = t[m];
}
FOR(m,16) x[m] = w[m];
}
FOR(i,16) st32(out + 4 * i,x[i] + y[i]);
}
static void salsa20(u8 *c,u64 b,const u8 *n,const u8 *k)
{
u8 z[16],x[64];
u32 u,i;
if (!b) return;
FOR(i,16) z[i] = 0;
FOR(i,8) z[i] = n[i];
while (b >= 64) {
core(x,z,k);
FOR(i,64) c[i] = x[i];
u = 1;
for (i = 8;i < 16;++i) {
u += (u32) z[i];
z[i] = u;
u >>= 8;
}
b -= 64;
c += 64;
}
if (b) {
core(x,z,k);
FOR(i,b) c[i] = x[i];
}
}
static void increment(u8 *n)
{
if (!++n[0])
if (!++n[1])
if (!++n[2])
if (!++n[3])
if (!++n[4])
if (!++n[5])
if (!++n[6])
if (!++n[7])
;
}
static void testvector(unsigned char *x,unsigned long long xlen)
{
const static unsigned char testvector_k[33] = "generate inputs for test vectors";
static unsigned char testvector_n[8];
salsa20(x,xlen,testvector_n,testvector_k);
increment(testvector_n);
}
unsigned long long myrandom(void)
{
unsigned char x[8];
unsigned long long result;
testvector(x,8);
result = x[7];
result = (result<<8)|x[6];
result = (result<<8)|x[5];
result = (result<<8)|x[4];
result = (result<<8)|x[3];
result = (result<<8)|x[2];
result = (result<<8)|x[1];
result = (result<<8)|x[0];
return result;
}
static void canary(unsigned char *x,unsigned long long xlen)
{
const static unsigned char canary_k[33] = "generate pad to catch overwrites";
static unsigned char canary_n[8];
salsa20(x,xlen,canary_n,canary_k);
increment(canary_n);
}
void double_canary(unsigned char *x2,unsigned char *x,unsigned long long xlen)
{
canary(x - 16,16);
canary(x + xlen,16);
memcpy(x2 - 16,x - 16,16);
memcpy(x2 + xlen,x + xlen,16);
}
void input_prepare(unsigned char *x2,unsigned char *x,unsigned long long xlen)
{
testvector(x,xlen);
canary(x - 16,16);
canary(x + xlen,16);
memcpy(x2 - 16,x - 16,xlen + 32);
}
void input_compare(const unsigned char *x2,const unsigned char *x,unsigned long long xlen,const char *fun)
{
if (memcmp(x2 - 16,x - 16,xlen + 32)) {
fprintf(stderr,"%s overwrites input\n",fun);
exit(111);
}
}
void output_prepare(unsigned char *x2,unsigned char *x,unsigned long long xlen)
{
canary(x - 16,xlen + 32);
memcpy(x2 - 16,x - 16,xlen + 32);
}
void output_compare(const unsigned char *x2,const unsigned char *x,unsigned long long xlen,const char *fun)
{
if (memcmp(x2 - 16,x - 16,16)) {
fprintf(stderr,"%s writes before output\n",fun);
exit(111);
}
if (memcmp(x2 + xlen,x + xlen,16)) {
fprintf(stderr,"%s writes after output\n",fun);
exit(111);
}
}
static unsigned char checksum_state[64];
static char checksum_hex[65];
void checksum(const unsigned char *x,unsigned long long xlen)
{
u8 block[16];
int i;
while (xlen >= 16) {
core(checksum_state,x,checksum_state);
x += 16;
xlen -= 16;
}
FOR(i,16) block[i] = 0;
FOR(i,xlen) block[i] = x[i];
block[xlen] = 1;
checksum_state[0] ^= 1;
core(checksum_state,block,checksum_state);
}
static void printword(const char *s)
{
if (!*s) putchar('-');
while (*s) {
if (*s == ' ') putchar('_');
else if (*s == '\t') putchar('_');
else if (*s == '\r') putchar('_');
else if (*s == '\n') putchar('_');
else putchar(*s);
++s;
}
putchar(' ');
}
static void printnum(long long x)
{
printf("%lld ",x);
}
void fail(const char *why)
{
fprintf(stderr,"%s\n",why);
exit(111);
}
unsigned char *alignedcalloc(unsigned long long len)
{
unsigned char *x = (unsigned char *) calloc(1,len + 256);
long long i;
if (!x) fail("out of memory");
/* will never deallocate so shifting is ok */
for (i = 0;i < len + 256;++i) x[i] = random();
x += 64;
x += 63 & (-(unsigned long) x);
for (i = 0;i < len;++i) x[i] = 0;
return x;
}
#define TIMINGS 63
static long long cycles[TIMINGS + 1];
void limits()
{
#ifdef RLIM_INFINITY
struct rlimit r;
r.rlim_cur = 0;
r.rlim_max = 0;
#ifdef RLIMIT_NOFILE
setrlimit(RLIMIT_NOFILE,&r);
#endif
#ifdef RLIMIT_NPROC
setrlimit(RLIMIT_NPROC,&r);
#endif
#ifdef RLIMIT_CORE
setrlimit(RLIMIT_CORE,&r);
#endif
#endif
}
static unsigned char randombyte[1];
int main()
{
long long i;
long long j;
long long abovej;
long long belowj;
long long checksumcycles;
long long cyclespersecond;
cycles[0] = cpucycles();
cycles[1] = cpucycles();
cyclespersecond = cpucycles_persecond();
kernelrandombytes(randombyte,1);
preallocate();
limits();
allocate();
srandom(getpid());
cycles[0] = cpucycles();
test();
cycles[1] = cpucycles();
checksumcycles = cycles[1] - cycles[0];
predoit();
for (i = 0;i <= TIMINGS;++i) {
cycles[i] = cpucycles();
}
for (i = 0;i <= TIMINGS;++i) {
cycles[i] = cpucycles();
doit();
}
for (i = 0;i < TIMINGS;++i) cycles[i] = cycles[i + 1] - cycles[i];
for (j = 0;j < TIMINGS;++j) {
belowj = 0;
for (i = 0;i < TIMINGS;++i) if (cycles[i] < cycles[j]) ++belowj;
abovej = 0;
for (i = 0;i < TIMINGS;++i) if (cycles[i] > cycles[j]) ++abovej;
if (belowj * 2 < TIMINGS && abovej * 2 < TIMINGS) break;
}
for (i = 0;i < 32;++i) {
checksum_hex[2 * i] = "0123456789abcdef"[15 & (checksum_state[i] >> 4)];
checksum_hex[2 * i + 1] = "0123456789abcdef"[15 & checksum_state[i]];
}
checksum_hex[2 * i] = 0;
printword(checksum_hex);
printnum(cycles[j]);
printnum(checksumcycles);
printnum(cyclespersecond);
printword(primitiveimplementation);
printf("\n");
return 0;
}

View file

@ -0,0 +1,135 @@
/*
* crypto_stream/try.c version 20140423
* D. J. Bernstein
* Public domain.
* Auto-generated by trygen.py; do not edit.
*/
#include "crypto_stream.h"
#include "try.h"
const char *primitiveimplementation = crypto_stream_IMPLEMENTATION;
#define TUNE_BYTES 1536
#ifdef SMALL
#define MAXTEST_BYTES 128
#else
#define MAXTEST_BYTES 4096
#endif
#ifdef SMALL
#define LOOPS 512
#else
#define LOOPS 4096
#endif
static unsigned char *k;
static unsigned char *n;
static unsigned char *m;
static unsigned char *c;
static unsigned char *s;
static unsigned char *k2;
static unsigned char *n2;
static unsigned char *m2;
static unsigned char *c2;
static unsigned char *s2;
#define klen crypto_stream_KEYBYTES
#define nlen crypto_stream_NONCEBYTES
unsigned long long mlen;
unsigned long long clen;
unsigned long long slen;
void preallocate(void)
{
}
void allocate(void)
{
unsigned long long alloclen = 0;
if (alloclen < TUNE_BYTES) alloclen = TUNE_BYTES;
if (alloclen < MAXTEST_BYTES) alloclen = MAXTEST_BYTES;
if (alloclen < crypto_stream_KEYBYTES) alloclen = crypto_stream_KEYBYTES;
if (alloclen < crypto_stream_NONCEBYTES) alloclen = crypto_stream_NONCEBYTES;
k = alignedcalloc(alloclen);
n = alignedcalloc(alloclen);
m = alignedcalloc(alloclen);
c = alignedcalloc(alloclen);
s = alignedcalloc(alloclen);
k2 = alignedcalloc(alloclen);
n2 = alignedcalloc(alloclen);
m2 = alignedcalloc(alloclen);
c2 = alignedcalloc(alloclen);
s2 = alignedcalloc(alloclen);
}
void predoit(void)
{
}
void doit(void)
{
crypto_stream_xor(c,m,TUNE_BYTES,n,k);
}
void test(void)
{
unsigned long long j;
unsigned long long loop;
for (loop = 0;loop < LOOPS;++loop) {
mlen = myrandom() % (MAXTEST_BYTES + 1);
clen = mlen;
slen = mlen;
output_prepare(s2,s,slen);
input_prepare(n2,n,nlen);
input_prepare(k2,k,klen);
if (crypto_stream(s,slen,n,k) != 0) fail("crypto_stream returns nonzero");
checksum(s,slen);
output_compare(s2,s,slen,"crypto_stream");
input_compare(n2,n,nlen,"crypto_stream");
input_compare(k2,k,klen,"crypto_stream");
double_canary(s2,s,slen);
double_canary(n2,n,nlen);
double_canary(k2,k,klen);
if (crypto_stream(s2,slen,n2,k2) != 0) fail("crypto_stream returns nonzero");
if (memcmp(s2,s,slen) != 0) fail("crypto_stream is nondeterministic");
output_prepare(c2,c,clen);
input_prepare(m2,m,mlen);
memcpy(n2,n,nlen);
double_canary(n2,n,nlen);
memcpy(k2,k,klen);
double_canary(k2,k,klen);
if (crypto_stream_xor(c,m,mlen,n,k) != 0) fail("crypto_stream_xor returns nonzero");
for (j = 0;j < mlen;++j)
if ((s[j] ^ m[j]) != c[j]) fail("crypto_stream_xor does not match crypto_stream");
checksum(c,clen);
output_compare(c2,c,clen,"crypto_stream_xor");
input_compare(m2,m,mlen,"crypto_stream_xor");
input_compare(n2,n,nlen,"crypto_stream_xor");
input_compare(k2,k,klen,"crypto_stream_xor");
double_canary(c2,c,clen);
double_canary(m2,m,mlen);
double_canary(n2,n,nlen);
double_canary(k2,k,klen);
if (crypto_stream_xor(c2,m2,mlen,n2,k2) != 0) fail("crypto_stream_xor returns nonzero");
if (memcmp(c2,c,clen) != 0) fail("crypto_stream_xor is nondeterministic");
double_canary(c2,c,clen);
double_canary(m2,m,mlen);
double_canary(n2,n,nlen);
double_canary(k2,k,klen);
if (crypto_stream_xor(m2,m2,mlen,n,k) != 0) fail("crypto_stream_xor with m=c overlap returns nonzero");
if (memcmp(m2,c,clen) != 0) fail("crypto_stream_xor does not handle m=c overlap");
memcpy(m2,m,mlen);
if (crypto_stream_xor(n2,m,mlen,n2,k) != 0) fail("crypto_stream_xor with n=c overlap returns nonzero");
if (memcmp(n2,c,clen) != 0) fail("crypto_stream_xor does not handle n=c overlap");
memcpy(n2,n,nlen);
if (crypto_stream_xor(k2,m,mlen,n,k2) != 0) fail("crypto_stream_xor with k=c overlap returns nonzero");
if (memcmp(k2,c,clen) != 0) fail("crypto_stream_xor does not handle k=c overlap");
memcpy(k2,k,klen);
}
}

View file

@ -0,0 +1,21 @@
#include <stdlib.h>
#include <string.h>
/* provided by try.c: */
extern const char *primitiveimplementation;
extern void preallocate(void);
extern void allocate(void);;
extern void test(void);
extern void predoit(void);
extern void doit(void);
/* provided by try-anything.c: */
extern void fail(const char *);
extern unsigned char *alignedcalloc(unsigned long long);
extern void checksum(const unsigned char *,unsigned long long);
extern void double_canary(unsigned char *,unsigned char *,unsigned long long);
extern void input_prepare(unsigned char *,unsigned char *,unsigned long long);
extern void output_prepare(unsigned char *,unsigned char *,unsigned long long);
extern void input_compare(const unsigned char *,const unsigned char *,unsigned long long,const char *);
extern void output_compare(const unsigned char *,const unsigned char *,unsigned long long,const char *);
extern unsigned long long myrandom(void);