vector benchmark

This commit is contained in:
Euna Kim 2019-11-24 09:05:07 -05:00
parent 7f95534d27
commit ec068d2428
22 changed files with 284578 additions and 2 deletions

View file

@ -0,0 +1,33 @@
LIB_PATH = ../../../runtime
COMP = /nethome/ekim79/riscv-gnu-toolchain/drops/bin/riscv32-unknown-elf-gcc
CC_FLAGS = -ffreestanding -O0 -Wl,--gc-sections -nostartfiles -nostdlib -nostartfiles -nodefaultlibs -Wl,-Bstatic,-T,$(LIB_PATH)/mains/vortex_link.ld -march=rv32imv -mabi=ilp32
DMP = /nethome/ekim79/riscv-gnu-toolchain/drops/bin/riscv32-unknown-elf-objdump
CPY = /nethome/ekim79/riscv-gnu-toolchain/drops/bin/riscv32-unknown-elf-objcopy
# VX_STR = ../../startup/vx_start.s
NEWLIB = $(LIB_PATH)/newlib/newlib.c
VX_STR = $(LIB_PATH)/startup/vx_start.s
VX_INT = $(LIB_PATH)/intrinsics/vx_intrinsics.s
VX_IO = $(LIB_PATH)/io/vx_io.s $(LIB_PATH)/io/vx_io.c
VX_API = $(LIB_PATH)/vx_api/vx_api.c
VX_TEST = $(LIB_PATH)/tests/tests.c
VX_FIO = $(LIB_PATH)/fileio/fileio.s
VX_VEC = vx_vec_saxpy.s #float --> int
LIBS = /nethome/ekim79/riscv-gnu-toolchain/drops/riscv32-unknown-elf/lib/libc.a /nethome/ekim79/riscv-gnu-toolchain/drops/riscv32-unknown-elf/lib/libstdc++.a -static-libgcc -lgcc
VX_MAIN = vx_vec_saxpy
all: HEX DUMP ELF
DUMP: ELF
$(DMP) -D $(VX_MAIN).elf > $(VX_MAIN).dump
HEX: ELF
$(CPY) -O ihex $(VX_MAIN).elf $(VX_MAIN).hex
ELF:
$(COMP) $(CC_FLAGS) $(VX_STR) $(VX_VEC) $(VX_FIO) $(NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST) $(VX_MAIN).c $(LIBS) -Iinclude -o $(VX_MAIN).elf

View file

@ -0,0 +1,69 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "../../../runtime/intrinsics/vx_intrinsics.h"
#include "vx_vec_saxpy.h"
//---------------------------------------------------------------
/* # void saxpy(size_t n, const float a, const float *x, float *y)
# ==> convert to int!!
# void saxpy(size_t n, const int a, const int *x, int *y)
# { size_t i;
# for (i=0; i<n; i++) y[i] = a * x[i] + y[i]; } */
//---------------------------------------------------------------
int main()
{
vx_tmc(1);
int n = 4; //#define NUM_DATA 65536
int *a = (int*)malloc(sizeof(int) * n);
int *b = (int*)malloc(sizeof(int) * n);
int *c = (int*)malloc(sizeof(int) * n); //verification
// float factor = ((float)rand()/(float)(RAND_MAX)) * 100.0;
int factor = ((float)rand()/(RAND_MAX)) * 100.0;
for (int i = 0; i < n; ++i) {
a[i] = ((float)rand()/(RAND_MAX)) * 100.0;
b[i] = 0;
c[i] = 0;
}
//; c[i] = 2;}
#if 1
printf("saxpy\nfactor: %d\na[%d]: ", factor, n);
for(int i = 0; i < n; ++i) printf("%d ", a[i]);
// printf("\nb[%d]: ", n);
// for(int i = 0; i < n; ++i) printf("%d \n", b[i]);
#endif
vx_vec_saxpy(n, factor, a, b);
#if 1
printf("\nsaxpy\na[%d]: ", n);
for(int i = 0; i < n; ++i) printf("%d ", a[i]);
printf("\n\nb[%d]: ", n);
for(int i = 0; i < n; ++i) printf("%d ", b[i]);
#endif
for(int i = 0; i < n; ++i)
{
if(b[i] != ((a[i] * factor) + c[i]))
{
printf("\n<saxpy> FAILED at <index: %d>! \n", i);
return 1;
}
}
printf("\nPASSED.......................... <saxpy> \n");
free(a); free(b); free(c);
vx_tmc(0);
return 0;
}

File diff suppressed because it is too large Load diff

Binary file not shown.

View file

@ -0,0 +1,12 @@
#pragma once
#ifdef __cplusplus
extern "C" {
#endif
void vx_vec_saxpy(int n, int scalar, int* a, int* b);
#ifdef __cplusplus
}
#endif

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,26 @@
.type vx_vec_saxpy, @function
.global vx_vec_saxpy
# void
# saxpy(size_t n, int factor, int *a, int *b)
# { for (int i=0; i<n; i++) { y[i] = a * x[i] + y[i];} }
#
# register arguments:
# a0 n
# a1 factor
# a2 a
# a3 b
vx_vec_saxpy:
vsetvli a4, a0, e32, m8
loop:
vlw.v v0, (a2)
sub a0, a0, a4
slli a4, a4, 2
add a2, a2, a4
vlw.v v1, (a3)
vmul.vx v0, v0, a1
vadd.vv v1, v0, v1
# vmacc.vx v1, rs1, v0
vsw.v v1, (a3)
add a3, a3, a4
bnez a0, loop
ret

View file

@ -0,0 +1,33 @@
LIB_PATH = ../../../runtime
COMP = /nethome/ekim79/riscv-gnu-toolchain/drops/bin/riscv32-unknown-elf-gcc
CC_FLAGS = -ffreestanding -O0 -Wl,--gc-sections -nostartfiles -nostdlib -nostartfiles -nodefaultlibs -Wl,-Bstatic,-T,$(LIB_PATH)/mains/vortex_link.ld -march=rv32imv -mabi=ilp32
DMP = /nethome/ekim79/riscv-gnu-toolchain/drops/bin/riscv32-unknown-elf-objdump
CPY = /nethome/ekim79/riscv-gnu-toolchain/drops/bin/riscv32-unknown-elf-objcopy
# VX_STR = ../../startup/vx_start.s
NEWLIB = $(LIB_PATH)/newlib/newlib.c
VX_STR = $(LIB_PATH)/startup/vx_start.s
VX_INT = $(LIB_PATH)/intrinsics/vx_intrinsics.s
VX_IO = $(LIB_PATH)/io/vx_io.s $(LIB_PATH)/io/vx_io.c
VX_API = $(LIB_PATH)/vx_api/vx_api.c
VX_TEST = $(LIB_PATH)/tests/tests.c
VX_FIO = $(LIB_PATH)/fileio/fileio.s
VX_VEC = vx_vec_sgemm_nn.s #float --> int
LIBS = /nethome/ekim79/riscv-gnu-toolchain/drops/riscv32-unknown-elf/lib/libc.a /nethome/ekim79/riscv-gnu-toolchain/drops/riscv32-unknown-elf/lib/libstdc++.a -static-libgcc -lgcc
VX_MAIN = vx_vec_sgemm_nn
all: HEX DUMP ELF
DUMP: ELF
$(DMP) -D $(VX_MAIN).elf > $(VX_MAIN).dump
HEX: ELF
$(CPY) -O ihex $(VX_MAIN).elf $(VX_MAIN).hex
ELF:
$(COMP) $(CC_FLAGS) $(VX_STR) $(VX_VEC) $(VX_FIO) $(NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST) $(VX_MAIN).c $(LIBS) -Iinclude -o $(VX_MAIN).elf

View file

@ -0,0 +1,95 @@
#include <stdio.h>
#include <stdlib.h>
#include "../../../runtime/intrinsics/vx_intrinsics.h"
#include "vx_vec_sgemm_nn.h"
//---------------------------------------------------------------
/* # void sgemm_nn(size_t n, size_t m, size_t k,
# int *a, // m * k matri size_t lda,
# int *b, // k * n matrix size_t ldb,
# int *c, // m * n matrix size_t ldc)
# c += a*b (alpha=1, no transpose on input matrices)
# matrices stored in C row-major order */
//---------------------------------------------------------------
int main()
{
vx_tmc(1);
int m = 3;
int k = 3;
int n = 3;
int* a1 = (int*)malloc(sizeof(int) * m * k);
int* b1 = (int*)malloc(sizeof(int) * k * n);
int* c1 = (int*)malloc(sizeof(int) * m * n);
int* d1 = (int*)malloc(sizeof(int) * m * n); //verfication
for (int i = 0; i < (m * k); ++i) a1[i] = i;
for (int i = 0; i < (k * n); ++i) b1[i] = 1;
for (int i = 0; i < (m * n); ++i) c1[i] = 0;
for (int i = 0; i < (m * n); ++i) d1[i] = 0;
#if 1
printf("sgemm_nn\na[%d]:", m*k);
for (int i = 0; i < m*k; ++i) {
if(!(i % k)) printf("\n");
printf("%d ", a1[i]);
}
printf("\n\nb[%d]:", k*n);
for (int i = 0; i < k*n; ++i) {
if (!(i % n)) printf("\n");
printf("%d ", b1[i]);
}
#endif
vx_vec_sgemm_nn(n, m, k, a1, b1, c1);
// vx_vec_sgemm_nn(n, a1, b1, c1);
#if 1
printf("\n\nc[%d]:\n", m*n);
for (int i = 0; i < m*n; ++i) {
if (!(i % n)) printf("\n");
printf("%d ", c1[i]);
}
#endif
for (int r = 0; r < k; r++) {
for (int c = 0; c < m; c++) {
for (int i = 0; i < n; i++) {
d1[r*k+i] += a1[r*k+c]*b1[i*n+c];
}
}
}
#if 1
printf("\n\nc[%d]:\n", m*n);
for(int i = 0; i < m; ++i) {
for(int j = 0; j < n; ++j) {
printf("%d ", c1[i*m+j]);
}
printf("\n");
}
#endif
for(int i = 0; i < m*n; ++i)
{
if(c1[i] != d1[i])
{
printf("\n<sgemm_nn> FAILED at <index: %d>! \n", i);
return 1;
}
}
printf("\nPASS.......................... <sgemm_nn> \n");
free(a1); free(b1); free(c1);
vx_tmc(0);
return 0;
}

File diff suppressed because it is too large Load diff

Binary file not shown.

View file

@ -0,0 +1,13 @@
#pragma once
#ifdef __cplusplus
extern "C" {
#endif
//void vx_vec_sgemm_nn(int n, int m, int k, int* a1, int lda, int* b1, int ldb, int* c1, int ldc);
void vx_vec_sgemm_nn(int n, int m, int k, int* a1, int* b1, int* c1);
//void vx_vec_sgemm_nn(int n, int* a1, int* b1, int* c1);
#ifdef __cplusplus
}
#endif

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,61 @@
.type vx_vec_sgemm_nn, @function
.global vx_vec_sgemm_nn
# RV64IDV system
#
# void
# sgemm_nn(size_t n, size_t m, size_t k,
# int *a, // m * k matrix
# int *b, // k * n matrix
# int *c) // m * n matrix
#
# c += a*b (alpha=1, no transpose on input matrices)
# matrices stored in C row-major order
#
# for (int r = 0; r < k; r++) {
# for (int c = 0; c < m; c++) {
# for (int i = 0; i < n; i++) {
# c[r*k+i] += a[r*k+c]*b[i*n+c];
# }
# }
# }
# a0 = n, a1 = m, a2 = k
# a3 = a, a4 = b, a5 = c
# v0 = a, v2 = b, v2 = c
# x0 = i, x1 = c, x2 = r
#
vx_vec_sgemm_nn:
vsetvli t0, a2, e32, m8 # k
loop_row: # a[m][k]
vlw.v v0, (a3)
sub a2, a2, t0
slli t0, t0, 2
add a3, a3, t0
vsetvli t1, a1, e32, m8 # m
loop_col: # b[k][n]
vlw.v v1, (a4)
sub a1, a1, t1
slli t1, t1, 2
add a4, a4, t1
vsetvli t2, a0, e32, m8 # n
loop_iner:
vlw.v v2, (a5) # c[][]
sub a0, a0, t2
slli t2, t2, 2
add a5, a5, t2
bnez t2, loop_iner
bnez t1, loop_col
# vadd.vv v0, v0, v0
# vsw.v v0, (a5)
# add a5, a5, t0
bnez t0, loop_row
ret

View file

@ -0,0 +1,41 @@
LIB_PATH = ../../../runtime
COMP = /nethome/ekim79/riscv-gnu-toolchain/drops/bin/riscv32-unknown-elf-gcc
CC_FLAGS = -ffreestanding -O0 -Wl,--gc-sections -nostartfiles -nostdlib -nostartfiles -nodefaultlibs -Wl,-Bstatic,-T,$(LIB_PATH)/mains/vortex_link.ld -march=rv32imv -mabi=ilp32
DMP = /nethome/ekim79/riscv-gnu-toolchain/drops/bin/riscv32-unknown-elf-objdump
CPY = /nethome/ekim79/riscv-gnu-toolchain/drops/bin/riscv32-unknown-elf-objcopy
# VX_STR = ../../startup/vx_start.s
NEWLIB = $(LIB_PATH)/newlib/newlib.c
VX_STR = $(LIB_PATH)/startup/vx_start.s
VX_INT = $(LIB_PATH)/intrinsics/vx_intrinsics.s
VX_IO = $(LIB_PATH)/io/vx_io.s $(LIB_PATH)/io/vx_io.c
VX_API = $(LIB_PATH)/vx_api/vx_api.c
VX_TEST = $(LIB_PATH)/tests/tests.c
VX_FIO = $(LIB_PATH)/fileio/fileio.s
VX_VEC1 = vx_vec_vvaddint32.s
#VX_VEC2 = vx_vec_saxpy.s #float --> int
#VX_VEC3 = vx_vec_sgemm.s #float --> int
#VX_VEC4 = vx_vec_vsadd.s
#VX_VEC5 = vx_vec_memcpy.s
LIBS = /nethome/ekim79/riscv-gnu-toolchain/drops/riscv32-unknown-elf/lib/libc.a /nethome/ekim79/riscv-gnu-toolchain/drops/riscv32-unknown-elf/lib/libstdc++.a -static-libgcc -lgcc
VX_MAIN = vx_vec_vecadd
all: HEX DUMP ELF
DUMP: ELF
$(DMP) -D $(VX_MAIN).elf > $(VX_MAIN).dump
HEX: ELF
$(CPY) -O ihex $(VX_MAIN).elf $(VX_MAIN).hex
ELF:
$(COMP) $(CC_FLAGS) $(VX_STR) $(VX_VEC1) $(VX_FIO) $(NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST) $(VX_MAIN).c $(LIBS) -Iinclude -o $(VX_MAIN).elf
# $(COMP) $(CC_FLAGS) $(VX_STR) $(VX_VEC2) $(VX_FIO) $(NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST) $(VX_MAIN).c $(LIBS) -Iinclude -o $(VX_MAIN).elf
# $(COMP) $(CC_FLAGS) $(VX_STR) $(VX_VEC3) $(VX_FIO) $(NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST) $(VX_MAIN).c $(LIBS) -Iinclude -o $(VX_MAIN).elf
# $(COMP) $(CC_FLAGS) $(VX_STR) $(VX_VEC4) $(VX_FIO) $(NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST) $(VX_MAIN).c $(LIBS) -Iinclude -o $(VX_MAIN).elf
# $(COMP) $(CC_FLAGS) $(VX_STR) $(VX_VEC5) $(VX_FIO) $(NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST) $(VX_MAIN).c $(LIBS) -Iinclude -o $(VX_MAIN).elf~

View file

@ -0,0 +1,57 @@
#include <stdio.h>
#include <stdlib.h>
#include "../../../runtime/intrinsics/vx_intrinsics.h"
#include "vx_vec_vecadd.h"
//---------------------------------------------------------------
/* vvaddint32
* # vector-vector add routine of 32-bit integers
* # void vvaddint32(size_t n, const int*x, const int*y, int*z)
* # { for (size_t i=0; i<n; i++) { z[i]=x[i]+y[i]; } } */
//---------------------------------------------------------------
int main()
{
vx_tmc(1);
int n = 4; //SIZE
int *a = (int*)malloc(sizeof(int) * n);
int *b = (int*)malloc(sizeof(int) * n);
int *c = (int*)malloc(sizeof(int) * n);
// Initialize values for array members.
for (int i = 0; i < n; ++i) {
a[i] = i * 2 + 0;
b[i] = i * 2 + 1;
c[i] = 0;
}
#if 0
printf("vvaddint...\na[%d]: ", n);
for(int i = 0; i < n; ++i) printf("%d ", a[i]);
printf("\nb[%d]: ", n);
for(int i = 0; i < n; ++i) printf("%d ", b[i]);
printf("\nc[%d] = a[%d] + b[%d]: ", n, n, n);
for(int i = 0; i < n; ++i) printf("%d ", c[i]);
#endif
vx_vec_vvaddint32(n, a, b, c);
for(int i = 0; i < n; ++i)
{
if(c[i] != (a[i]+b[i]))
{
printf("\n<vddint32> FAILED at <index: %d>! \n", i);
return 1;
}
}
printf("\nPASSED.......................... <vddint32> \n");
free(a); free(b); free(c);
vx_tmc(0);
return 0;
}

File diff suppressed because it is too large Load diff

Binary file not shown.

View file

@ -0,0 +1,17 @@
#pragma once
#ifdef __cplusplus
extern "C" {
#endif
void vx_vec_vvaddint32(int n, int* a, int* b, int *c);
//void vx_vec_vsadd(int n, int* a, int scalar);
//void vx_vec_memcpy(int* a, int* b, int n);
//void vx_vec_saxpy(int n, int scalar, int* a, int* b);
//void vx_vec_sgemm_nn(int n, int m, int k, int* a1, int lda, int* b1, int ldb, int* c1, int ldc);
//void vx_vec_sgemm_nn(int n, int m, int k, int* a1, int* b1, int* c1);
//void vx_vec_sgemm_nn(int n, int* a1, int* b1, int* c1);
#ifdef __cplusplus
}
#endif

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,22 @@
.type vx_vec_vvaddi32, @function
.global vx_vec_vvaddint32
# vector-vector add routine of 32-bit integers
# void vvaddint32(size_t n, const int*x, const int*y, int*z)
# { for (size_t i=0; i<n; i++) { z[i]=x[i]+y[i]; } }
#
# a0 = n, a1 = x, a2 = y, a3 = z
# Non-vector instructions are indented
vx_vec_vvaddint32:
vsetvli t0, a0, e32 # Set vector length based on 32-bit vectors
loop:
vlw.v v0, (a1) # Get first vector
sub a0, a0, t0 # Decrement number done
slli t0, t0, 2 # Multiply number done by 4 bytes
add a1, a1, t0 # Bump pointer
vlw.v v1, (a2) # Get second vector
add a2, a2, t0 # Bump pointer
vadd.vv v2, v0, v1 # Sum vectors
vsw.v v2, (a3) # Store result
add a3, a3, t0 # Bump pointer
bnez a0, loop # Loop back
ret # Finished

View file

@ -1,6 +1,7 @@
echo start > results.txt
# echo ../kernel/vortex_test.hex
make
printf "Fasten your seatbelts ladies and gentelmen!!\n\n\n\n"
cd obj_dir && ./Vcache_simX -E -a rv32i --core ../../rvvector/benchmark_temp/vx_vec_benchmark.hex -s -b 1> emulator.debug
#cd obj_dir && ./Vcache_simX -E -a rv32i --core ../../benchmarks/vector/vecadd/vx_vec_vecadd.hex -s -b 1> emulator.debug
#cd obj_dir && ./Vcache_simX -E -a rv32i --core ../../benchmarks/vector/saxpy/vx_vec_saxpy.hex -s -b 1> emulator.debug
cd obj_dir && ./Vcache_simX -E -a rv32i --core ../../benchmarks/vector/sgemm_nn/vx_vec_sgemm_nn.hex -s -b 1> emulator.debug