This commit is contained in:
Blaise Tine 2019-11-22 22:35:03 -05:00
commit 288e1863ba
10 changed files with 212 additions and 92690 deletions

View file

@ -35,3 +35,8 @@ HEX: ELF
ELF:
$(COMP) $(CC_FLAGS) $(VX_STR) $(VX_VEC) $(VX_FIO) $(NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST) $(VX_MAIN).c $(LIBS) -Iinclude -o $(VX_MAIN).elf
run:
../../simX/obj_dir/Vcache_simX -E -a rv32i --core vx_vector_main.hex -s -b 1> emulator.debug

View file

@ -7,7 +7,7 @@
extern "C" {
#endif
void vx_vec_test(int *);
void vx_vec_test(int n, int* a, int* b, int* c); //vvaddint32
#ifdef __cplusplus

View file

@ -1,30 +1,23 @@
.type vx_vec_test, @function
.global vx_vec_test
vx_vec_test:
li a1, 7
sw a1, 0(a0)
ret
# slli a0, a0, 2
# add a0, a0, a3
# vmv.v.x vv0, a2
# # vsplat4 vv0, a2
# stripmine_loop:
# vlb4 vv1, (a1)
# vcmpez4 vp0, vv1
# !vp0 vlw4 vv1, (a3)
# !vp0 vlw4 vv2, (a4)
# !vp0 vfma4 vv1, vv0, vv1, vv2
# !vp0 vsw4 vv1, (a4)
# addi a1, a1, 4
# addi a3, a3, 16
# addi a4, a4, 16
# bleu a3, a0, stripmine_loop
# handle edge cases
# when (n % 4) != 0 ...
# vector-vector add routine of 32-bit integers
# void vvaddint32(size_t n, const int*x, const int*y, int*z)
# { for (size_t i=0; i<n; i++) { z[i]=x[i]+y[i]; } }
#
# a0 = n, a1 = x, a2 = y, a3 = z
# Non-vector instructions are indented
vsetvli t0, a0, e32 # Set vector length based on 32-bit vectors
vlw.v v0, (a1) # Get first vector
sub a0, a0, t0 # Decrement number done
slli t0, t0, 2 # Multiply number done by 4 bytes
add a1, a1, t0 # Bump pointer
vlw.v v1, (a2) # Get second vector
add a2, a2, t0 # Bump pointer
vadd.vv v2, v0, v1 # Sum vectors
vsw.v v2, (a3) # Store result
add a3, a3, t0 # Bump pointer
bnez a0, vx_vec_test # Loop back
ret # Finished

View file

@ -1,32 +1,29 @@
#include "../../runtime/intrinsics/vx_intrinsics.h"
#include "vx_vec.h"
int main()
{
vx_tmc(1);
// int * a = malloc(4);
// int * b = malloc(4);
// int * c = malloc(4);
vx_tmc(1);
printf("Hello\n");
int n = 64;
int *a = (int*)malloc(sizeof(int) * n); //{1, 1, 1, 1, 1};
int *b = (int*)malloc(sizeof(int) * n); //{1, 1, 1, 1, 1};
int *c = (int*)malloc(sizeof(int) * n); //{1, 1, 1, 1, 1};
for(int i = 0; i < n; ++i)
{
a[i] = b[i] = c[i] = 1;
}
vx_vec_test(n, a, b, c);
for (int i = 0; i < n; ++i)
{
printf("a[%d]=%d, b[%d]=%d, c[%d]=%d\n", i, a[i], i, b[i], i, c[i]);
}
int * a = malloc(4);
*a = 5;
printf("Value of a: %d\n", *a);
vx_vec_test(a);
printf("Value of a: %d\n", *a);
// for (int i = 0; i < 4; i++)
// {
// if (c[i] != (a[i] + b[i]))
// {
// printf("Fail\n");
// break;
// }
// }
vx_tmc(0);
vx_tmc(0);
}

File diff suppressed because it is too large Load diff

Binary file not shown.

File diff suppressed because it is too large Load diff

View file

@ -1,7 +1,7 @@
################################################################################
# HARPtools by Chad D. Kersey, Summer 2011 #
################################################################################
CXXFLAGS ?= -std=c++11 -fPIC -O3 # -g -DUSE_DEBUG=3 -DPRINT_ACTIVE_THREADS
CXXFLAGS ?= -std=c++11 -fPIC -O3 -g # -g -DUSE_DEBUG=3 -DPRINT_ACTIVE_THREADS
LIB_OBJS=simX.cpp args.cpp mem.cpp core.cpp instruction.cpp enc.cpp util.cpp

View file

@ -46,7 +46,7 @@
trace_inst.vd = -1; \
trace_inst.is_lw = false; \
trace_inst.is_sw = false; \
trace_inst.mem_addresses = new unsigned[a.getNThds()]; \
trace_inst.mem_addresses = (unsigned *) malloc(32 * sizeof(unsigned)); \
for (int tid = 0; tid < a.getNThds(); tid++) trace_inst.mem_addresses[tid] = 0xdeadbeef; \
trace_inst.mem_stall_cycles = 0; \
trace_inst.fetch_stall_cycles = 0; \
@ -163,6 +163,8 @@ void Core::step()
{
cout << "\n\n\n------------------------------------------------------\n";
D(3, "Started core::step" << flush);
steps++;
cout << "CYCLE: " << steps << '\n';
@ -179,20 +181,30 @@ void Core::step()
// cout << regii << ": " << renameTable[0][regii] << '\n';
// }
cout << '\n';
cout << '\n' << flush;
cout << "About to call writeback" << endl;
this->writeback();
cout << "About to call load_store" << endl;
this->load_store();
cout << "About to call execute_unit" << endl;
this->execute_unit();
cout << "About to call scheduler" << endl;
this->scheduler();
cout << "About to call decode" << endl;
this->decode();
D(3, "About to call fetch" << flush);
this->fetch();
D(3, "Finished fetch" << flush);
if (release_warp)
{
release_warp = false;
stallWarp[release_warp_num] = false;
}
D(3, "released warp" << flush);
D(3, "Finished core::step" << flush);
}
void Core::getCacheDelays(trace_inst_t * trace_inst)
@ -396,15 +408,19 @@ void Core::fetch()
{
D(3, "Core step stepping warp " << schedule_w << '[' << w[schedule_w].activeThreads << ']');
w[schedule_w].step(&inst_in_fetch);
D(3, "Now " << w[schedule_w].activeThreads << " active threads in " << schedule_w);
D(3, "Now " << w[schedule_w].activeThreads << " active threads in " << schedule_w << flush);
this->getCacheDelays(&inst_in_fetch);
D(3, "Got cache delays" << flush);
if (inst_in_fetch.stall_warp)
{
stallWarp[inst_in_fetch.wid] = true;
}
D(3, "staled warps\n" << flush);
}
D(3, "About to schedule warp\n" << flush);
warpScheduler();
D(3, "Scheduled warp" << flush);
}
}
else
@ -413,21 +429,25 @@ void Core::fetch()
if (inst_in_fetch.fetch_stall_cycles > 0) inst_in_fetch.fetch_stall_cycles--;
}
D(3, "Printing trace" << flush);
printTrace(&inst_in_fetch, "Fetch");
D(3, "printed trace" << flush);
// #ifdef PRINT_ACTIVE_THREADS
D(3, "About to print active threads" << flush << "\n");
for (unsigned j = 0; j < w[schedule_w].tmask.size(); ++j) {
if (w[schedule_w].activeThreads > j && w[schedule_w].tmask[j]) cout << " 1";
else cout << " 0";
if (j != w[schedule_w].tmask.size()-1 || schedule_w != w.size()-1) cout << ',';
}
D(3, "\nPrinted active threads" << flush);
// #endif
#ifdef PRINT_ACTIVE_THREADS
// #ifdef PRINT_ACTIVE_THREADS
cout << endl;
#endif
// #endif
}
void Core::decode()
@ -522,7 +542,7 @@ void Core::load_store()
void Core::execute_unit()
{
// cout << "$$$$$$$$$$$$$$$$$$$ EXE START\n";
cout << "$$$$$$$$$$$$$$$$$$$ EXE START\n" << flush;
bool do_nothing = false;
// EXEC is always not busy
if (inst_in_scheduler.is_lw || inst_in_scheduler.is_sw)
@ -546,6 +566,7 @@ void Core::execute_unit()
// cout << "Rename RS2: " << inst_in_scheduler.rs1 << " is " << renameTable[inst_in_scheduler.wid][inst_in_scheduler.rs2] << " wid: " << inst_in_scheduler.wid << '\n';
}
cout << "About to check vs*\n" << flush;
if(inst_in_scheduler.vs1 > 0)
{
scheduler_srcs_ready = scheduler_srcs_ready && vecRenameTable[inst_in_scheduler.vs1];
@ -554,6 +575,7 @@ void Core::execute_unit()
{
scheduler_srcs_ready = scheduler_srcs_ready && vecRenameTable[inst_in_scheduler.vs2];
}
cout << "Finished sources\n" << flush;
if (scheduler_srcs_ready)
{
@ -561,11 +583,15 @@ void Core::execute_unit()
// cout << "rename setting rd: " << inst_in_scheduler.rd << " to not useabel wid: " << inst_in_scheduler.wid << '\n';
renameTable[inst_in_scheduler.wid][inst_in_scheduler.rd] = false;
}
cout << "About to check vector wb: " << inst_in_scheduler.vd << "\n" << flush;
if(inst_in_scheduler.vd != -1) {
vecRenameTable[inst_in_scheduler.vd] = false;
}
cout << "Finished wb checking" << "\n" << flush;
CPY_TRACE(inst_in_exe, inst_in_scheduler);
INIT_TRACE(inst_in_scheduler);
cout << "Finished trace copying and clearning" << "\n" << flush;
}
else
{
@ -583,6 +609,7 @@ void Core::execute_unit()
//printTrace(&inst_in_exe, "execute_unit");
// INIT_TRACE(inst_in_exe);
D(3, "EXECUTE END" << flush);
}
void Core::writeback()

View file

@ -1105,10 +1105,10 @@ void Instruction::executeOn(Warp &c, trace_inst_t * trace_inst) {
{
is_vec = true;
D(3, "Addition " << rsrc[0] << " " << rsrc[1] << " Dest:" << rdest);
vector<Reg<char *>> vr1 = c.vreg[rsrc[0]];
vector<Reg<char *>> vr2 = c.vreg[rsrc[1]];
vector<Reg<char *>> vd = c.vreg[rdest];
vector<Reg<char *>> mask = c.vreg[0];
vector<Reg<char *>> & vr1 = c.vreg[rsrc[0]];
vector<Reg<char *>> & vr2 = c.vreg[rsrc[1]];
vector<Reg<char *>> & vd = c.vreg[rdest];
vector<Reg<char *>> & mask = c.vreg[0];
if (c.vtype.vsew == 8)
{
@ -1166,8 +1166,9 @@ void Instruction::executeOn(Warp &c, trace_inst_t * trace_inst) {
}
}
D(3, "Vector Register state after addition:");
D(3, "Vector Register state after addition:" << flush);
for(int i=0; i < c.vreg.size(); i++)
{
for(int j=0; j< c.vreg[0].size(); j++)
{
if (c.vtype.vsew == 8)
@ -1184,13 +1185,16 @@ void Instruction::executeOn(Warp &c, trace_inst_t * trace_inst) {
std::cout << "reg[" << i << "][" << j << "] = " << *ptr_val << std::endl;
}
}
}
D(3, "After vector register state after addition" << flush);
}
break;
case 24: //vmseq
{
vector<Reg<char *>> vr1 = c.vreg[rsrc[0]];
vector<Reg<char *>> vr2 = c.vreg[rsrc[1]];
vector<Reg<char *>> vd = c.vreg[rdest];
vector<Reg<char *>> & vr1 = c.vreg[rsrc[0]];
vector<Reg<char *>> & vr2 = c.vreg[rsrc[1]];
vector<Reg<char *>> & vd = c.vreg[rdest];
if(c.vtype.vsew == 8){
for(uint8_t i = 0; i < c.vl; i++){
uint8_t *first_ptr = (uint8_t *)vr1[i].val;
@ -1229,9 +1233,9 @@ void Instruction::executeOn(Warp &c, trace_inst_t * trace_inst) {
break;
case 25: //vmsne
{
vector<Reg<char *>> vr1 = c.vreg[rsrc[0]];
vector<Reg<char *>> vr2 = c.vreg[rsrc[1]];
vector<Reg<char *>> vd = c.vreg[rdest];
vector<Reg<char *>> & vr1 = c.vreg[rsrc[0]];
vector<Reg<char *>> & vr2 = c.vreg[rsrc[1]];
vector<Reg<char *>> & vd = c.vreg[rdest];
if(c.vtype.vsew == 8){
for(uint8_t i = 0; i < c.vl; i++){
uint8_t *first_ptr = (uint8_t *)vr1[i].val;
@ -1270,9 +1274,9 @@ void Instruction::executeOn(Warp &c, trace_inst_t * trace_inst) {
break;
case 26: //vmsltu
{
vector<Reg<char *>> vr1 = c.vreg[rsrc[0]];
vector<Reg<char *>> vr2 = c.vreg[rsrc[1]];
vector<Reg<char *>> vd = c.vreg[rdest];
vector<Reg<char *>> & vr1 = c.vreg[rsrc[0]];
vector<Reg<char *>> & vr2 = c.vreg[rsrc[1]];
vector<Reg<char *>> & vd = c.vreg[rdest];
if(c.vtype.vsew == 8){
for(uint8_t i = 0; i < c.vl; i++){
uint8_t *first_ptr = (uint8_t *)vr1[i].val;
@ -1311,9 +1315,9 @@ void Instruction::executeOn(Warp &c, trace_inst_t * trace_inst) {
break;
case 27: //vmslt
{
vector<Reg<char *>> vr1 = c.vreg[rsrc[0]];
vector<Reg<char *>> vr2 = c.vreg[rsrc[1]];
vector<Reg<char *>> vd = c.vreg[rdest];
vector<Reg<char *>> & vr1 = c.vreg[rsrc[0]];
vector<Reg<char *>> & vr2 = c.vreg[rsrc[1]];
vector<Reg<char *>> & vd = c.vreg[rdest];
if(c.vtype.vsew == 8){
for(int8_t i = 0; i < c.vl; i++){
int8_t *first_ptr = (int8_t *)vr1[i].val;
@ -1351,9 +1355,9 @@ void Instruction::executeOn(Warp &c, trace_inst_t * trace_inst) {
break;
case 28: //vmsleu
{
vector<Reg<char *>> vr1 = c.vreg[rsrc[0]];
vector<Reg<char *>> vr2 = c.vreg[rsrc[1]];
vector<Reg<char *>> vd = c.vreg[rdest];
vector<Reg<char *>> & vr1 = c.vreg[rsrc[0]];
vector<Reg<char *>> & vr2 = c.vreg[rsrc[1]];
vector<Reg<char *>> & vd = c.vreg[rdest];
if(c.vtype.vsew == 8){
for(uint8_t i = 0; i < c.vl; i++){
uint8_t *first_ptr = (uint8_t *)vr1[i].val;
@ -1391,9 +1395,9 @@ void Instruction::executeOn(Warp &c, trace_inst_t * trace_inst) {
break;
case 29: //vmsle
{
vector<Reg<char *>> vr1 = c.vreg[rsrc[0]];
vector<Reg<char *>> vr2 = c.vreg[rsrc[1]];
vector<Reg<char *>> vd = c.vreg[rdest];
vector<Reg<char *>> & vr1 = c.vreg[rsrc[0]];
vector<Reg<char *>> & vr2 = c.vreg[rsrc[1]];
vector<Reg<char *>> & vd = c.vreg[rdest];
if(c.vtype.vsew == 8){
for(int8_t i = 0; i < c.vl; i++){
int8_t *first_ptr = (int8_t *)vr1[i].val;
@ -1431,9 +1435,9 @@ void Instruction::executeOn(Warp &c, trace_inst_t * trace_inst) {
break;
case 30: //vmsgtu
{
vector<Reg<char *>> vr1 = c.vreg[rsrc[0]];
vector<Reg<char *>> vr2 = c.vreg[rsrc[1]];
vector<Reg<char *>> vd = c.vreg[rdest];
vector<Reg<char *>> & vr1 = c.vreg[rsrc[0]];
vector<Reg<char *>> & vr2 = c.vreg[rsrc[1]];
vector<Reg<char *>> & vd = c.vreg[rdest];
if(c.vtype.vsew == 8){
for(uint8_t i = 0; i < c.vl; i++){
uint8_t *first_ptr = (uint8_t *)vr1[i].val;
@ -1471,9 +1475,9 @@ void Instruction::executeOn(Warp &c, trace_inst_t * trace_inst) {
break;
case 31: //vmsgt
{
vector<Reg<char *>> vr1 = c.vreg[rsrc[0]];
vector<Reg<char *>> vr2 = c.vreg[rsrc[1]];
vector<Reg<char *>> vd = c.vreg[rdest];
vector<Reg<char *>> & vr1 = c.vreg[rsrc[0]];
vector<Reg<char *>> & vr2 = c.vreg[rsrc[1]];
vector<Reg<char *>> & vd = c.vreg[rdest];
if(c.vtype.vsew == 8){
for(int8_t i = 0; i < c.vl; i++){
int8_t *first_ptr = (int8_t *)vr1[i].val;
@ -1522,9 +1526,9 @@ void Instruction::executeOn(Warp &c, trace_inst_t * trace_inst) {
case 24: //vmandnot
{
D(3, "vmandnot");
vector<Reg<char *>> vr1 = c.vreg[rsrc[0]];
vector<Reg<char *>> vr2 = c.vreg[rsrc[1]];
vector<Reg<char *>> vd = c.vreg[rdest];
vector<Reg<char *>> & vr1 = c.vreg[rsrc[0]];
vector<Reg<char *>> & vr2 = c.vreg[rsrc[1]];
vector<Reg<char *>> & vd = c.vreg[rdest];
if(c.vtype.vsew == 8){
for(uint8_t i = 0; i < c.vl; i++){
uint8_t *first_ptr = (uint8_t *)vr1[i].val;
@ -1584,9 +1588,9 @@ void Instruction::executeOn(Warp &c, trace_inst_t * trace_inst) {
case 25: //vmand
{
D(3, "vmand");
vector<Reg<char *>> vr1 = c.vreg[rsrc[0]];
vector<Reg<char *>> vr2 = c.vreg[rsrc[1]];
vector<Reg<char *>> vd = c.vreg[rdest];
vector<Reg<char *>> & vr1 = c.vreg[rsrc[0]];
vector<Reg<char *>> & vr2 = c.vreg[rsrc[1]];
vector<Reg<char *>> & vd = c.vreg[rdest];
if(c.vtype.vsew == 8){
for(uint8_t i = 0; i < c.vl; i++){
uint8_t *first_ptr = (uint8_t *)vr1[i].val;
@ -1645,9 +1649,9 @@ void Instruction::executeOn(Warp &c, trace_inst_t * trace_inst) {
case 26: //vmor
{
D(3, "vmor");
vector<Reg<char *>> vr1 = c.vreg[rsrc[0]];
vector<Reg<char *>> vr2 = c.vreg[rsrc[1]];
vector<Reg<char *>> vd = c.vreg[rdest];
vector<Reg<char *>> & vr1 = c.vreg[rsrc[0]];
vector<Reg<char *>> & vr2 = c.vreg[rsrc[1]];
vector<Reg<char *>> & vd = c.vreg[rdest];
if(c.vtype.vsew == 8){
for(uint8_t i = 0; i < c.vl; i++){
uint8_t *first_ptr = (uint8_t *)vr1[i].val;
@ -1706,9 +1710,9 @@ void Instruction::executeOn(Warp &c, trace_inst_t * trace_inst) {
case 27: //vmxor
{
D(3, "vmxor");
vector<Reg<char *>> vr1 = c.vreg[rsrc[0]];
vector<Reg<char *>> vr2 = c.vreg[rsrc[1]];
vector<Reg<char *>> vd = c.vreg[rdest];
vector<Reg<char *>> & vr1 = c.vreg[rsrc[0]];
vector<Reg<char *>> & vr2 = c.vreg[rsrc[1]];
vector<Reg<char *>> & vd = c.vreg[rdest];
if(c.vtype.vsew == 8){
uint8_t *result_ptr;
for(uint8_t i = 0; i < c.vl; i++){
@ -1767,9 +1771,9 @@ void Instruction::executeOn(Warp &c, trace_inst_t * trace_inst) {
case 28: //vmornot
{
D(3, "vmornot");
vector<Reg<char *>> vr1 = c.vreg[rsrc[0]];
vector<Reg<char *>> vr2 = c.vreg[rsrc[1]];
vector<Reg<char *>> vd = c.vreg[rdest];
vector<Reg<char *>> & vr1 = c.vreg[rsrc[0]];
vector<Reg<char *>> & vr2 = c.vreg[rsrc[1]];
vector<Reg<char *>> & vd = c.vreg[rdest];
if(c.vtype.vsew == 8){
for(uint8_t i = 0; i < c.vl; i++){
uint8_t *first_ptr = (uint8_t *)vr1[i].val;
@ -1825,9 +1829,9 @@ void Instruction::executeOn(Warp &c, trace_inst_t * trace_inst) {
case 29: //vmnand
{
D(3, "vmnand");
vector<Reg<char *>> vr1 = c.vreg[rsrc[0]];
vector<Reg<char *>> vr2 = c.vreg[rsrc[1]];
vector<Reg<char *>> vd = c.vreg[rdest];
vector<Reg<char *>> & vr1 = c.vreg[rsrc[0]];
vector<Reg<char *>> & vr2 = c.vreg[rsrc[1]];
vector<Reg<char *>> & vd = c.vreg[rdest];
if(c.vtype.vsew == 8){
for(uint8_t i = 0; i < c.vl; i++){
uint8_t *first_ptr = (uint8_t *)vr1[i].val;
@ -1887,9 +1891,9 @@ void Instruction::executeOn(Warp &c, trace_inst_t * trace_inst) {
case 30: //vmnor
{
D(3, "vmnor");
vector<Reg<char *>> vr1 = c.vreg[rsrc[0]];
vector<Reg<char *>> vr2 = c.vreg[rsrc[1]];
vector<Reg<char *>> vd = c.vreg[rdest];
vector<Reg<char *>> & vr1 = c.vreg[rsrc[0]];
vector<Reg<char *>> & vr2 = c.vreg[rsrc[1]];
vector<Reg<char *>> & vd = c.vreg[rdest];
if(c.vtype.vsew == 8){
uint8_t *result_ptr;
@ -1951,9 +1955,9 @@ void Instruction::executeOn(Warp &c, trace_inst_t * trace_inst) {
D(3, "vmxnor");
uint8_t *result_ptr;
vector<Reg<char *>> vr1 = c.vreg[rsrc[0]];
vector<Reg<char *>> vr2 = c.vreg[rsrc[1]];
vector<Reg<char *>> vd = c.vreg[rdest];
vector<Reg<char *>> & vr1 = c.vreg[rsrc[0]];
vector<Reg<char *>> & vr2 = c.vreg[rsrc[1]];
vector<Reg<char *>> & vd = c.vreg[rdest];
if(c.vtype.vsew == 8){
for(uint8_t i = 0; i < c.vl; i++){
uint8_t *first_ptr = (uint8_t *)vr1[i].val;
@ -2040,6 +2044,7 @@ void Instruction::executeOn(Warp &c, trace_inst_t * trace_inst) {
Word regNum(0);
c.vreg.clear();
for (int j = 0; j < 32; j++)
{
c.vreg.push_back(vector<Reg<char*>>());
@ -2052,6 +2057,11 @@ void Instruction::executeOn(Warp &c, trace_inst_t * trace_inst) {
}
}
break;
default:
{
cout << "default???\n" << flush;
}
}
break;
case VL:
@ -2063,54 +2073,73 @@ void Instruction::executeOn(Warp &c, trace_inst_t * trace_inst) {
D(3, "src: " << rsrc[0] << " " << reg[rsrc[0]]);
D(3, "dest" << rdest);
D(3, "width" << vlsWidth);
vector<Reg<char *>> vd = c.vreg[rdest];
vector<Reg<char *>> & vd = c.vreg[rdest];
switch(vlsWidth) {
case 6: //load word and unit strided (not checking for unit stride)
for(Word i = 0; i < c.vl; i++) {
memAddr = ((reg[rsrc[0]]) & 0xFFFFFFFC) + (i*c.vtype.vsew/8);
data_read = c.core->mem.read(memAddr, c.supervisorMode);
D(3, "Mem addr: " << std::hex << memAddr << " Data read " << data_read);
int * result_ptr = (int *) vd[i].val;
*result_ptr = data_read;
switch(vlsWidth)
{
case 6: //load word and unit strided (not checking for unit stride)
{
for(Word i = 0; i < c.vl; i++) {
memAddr = ((reg[rsrc[0]]) & 0xFFFFFFFC) + (i*c.vtype.vsew/8);
data_read = c.core->mem.read(memAddr, c.supervisorMode);
D(3, "Mem addr: " << std::hex << memAddr << " Data read " << data_read);
int * result_ptr = (int *) vd[i].val;
*result_ptr = data_read;
trace_inst->is_lw = true;
trace_inst->mem_addresses[i] = memAddr;
}
/*for(Word i = c.vl; i < VLMAX; i++){
int * result_ptr = (int *) vd[i].val;
*result_ptr = 0;
}*/
D(3, "Vector Register state after addition:");
for(int i=0; i < c.vreg.size(); i++)
for(int j=0; j< c.vreg[0].size(); j++)
{
if (c.vtype.vsew == 8)
{
uint8_t * ptr_val = (uint8_t *) c.vreg[i][j].val;
std::cout << "reg[" << i << "][" << j << "] = " << *ptr_val << std::endl;
} else if (c.vtype.vsew == 16)
{
uint16_t * ptr_val = (uint16_t *) c.vreg[i][j].val;
std::cout << "reg[" << i << "][" << j << "] = " << *ptr_val << std::endl;
} else if (c.vtype.vsew == 32)
{
uint32_t * ptr_val = (uint32_t *) c.vreg[i][j].val;
std::cout << "reg[" << i << "][" << j << "] = " << *ptr_val << std::endl;
}
trace_inst->is_lw = true;
trace_inst->mem_addresses[i] = memAddr;
}
break;
/*for(Word i = c.vl; i < VLMAX; i++){
int * result_ptr = (int *) vd[i].val;
*result_ptr = 0;
}*/
D(3, "Vector Register state ----:");
// for(int i=0; i < 32; i++)
// {
// for(int j=0; j< c.vl; j++)
// {
// cout << "starting iter" << endl;
// if (c.vtype.vsew == 8)
// {
// uint8_t * ptr_val = (uint8_t *) c.vreg[i][j].val;
// std::cout << "reg[" << i << "][" << j << "] = " << *ptr_val << std::endl;
// } else if (c.vtype.vsew == 16)
// {
// uint16_t * ptr_val = (uint16_t *) c.vreg[i][j].val;
// std::cout << "reg[" << i << "][" << j << "] = " << *ptr_val << std::endl;
// } else if (c.vtype.vsew == 32)
// {
// uint32_t * ptr_val = (uint32_t *) c.vreg[i][j].val;
// std::cout << "reg[" << i << "][" << j << "] = " << *ptr_val << std::endl;
// }
// cout << "Finished iter" << endl;
// }
// }
cout << "Finished loop" << endl;
}
cout << "aaaaaaaaaaaaaaaaaaaaaa" << endl;
break;
default:
{
cout << "Serious default??\n" << flush;
}
break;
}
break;
}
break;
case VS:
is_vec = true;
VLMAX = (c.vtype.vlmul * c.VLEN)/c.vtype.vsew;
for(Word i = 0; i < c.vl; i++) {
for(Word i = 0; i < c.vl; i++)
{
cout << "iter" << endl;
++c.stores;
memAddr = reg[rsrc[0]] + (i*c.vtype.vsew/8);
std::cout << "STORE MEM ADDRESS: " << std::hex << memAddr << "\n";
std::cout << "STORE MEM ADDRESS *** : " << std::hex << memAddr << "\n";
trace_inst->is_sw = true;
@ -2121,25 +2150,35 @@ void Instruction::executeOn(Warp &c, trace_inst_t * trace_inst) {
case 6: //store word and unit strided (not checking for unit stride)
{
uint32_t * ptr_val = (uint32_t *) c.vreg[vs3][i].val;
D(3, "value: " << flush << (*ptr_val) << flush);
c.core->mem.write(memAddr, *ptr_val, c.supervisorMode, 4);
D(3, "store: " << memAddr << " value:" << *ptr_val);
D(3, "store: " << memAddr << " value:" << *ptr_val << flush);
}
break;
default:
cout << "ERROR: UNSUPPORTED S INST\n";
cout << "ERROR: UNSUPPORTED S INST\n" << flush;
exit(1);
}
c.memAccesses.push_back(Warp::MemAccess(true, memAddr));
cout << "Loop finished" << endl;
// c.memAccesses.push_back(Warp::MemAccess(true, memAddr));
}
cout << "After for loop" << endl;
break;
default:
cout << "pc: " << hex << (c.pc-4) << "\n";
cout << "aERROR: Unsupported instruction: " << *this << "\n" << flush;
exit(1);
}
// break;
cout << "outside case" << endl << flush;
}
D(3, "End instruction execute.");
std::cout << "finished instruction" << endl << flush;
D(3, "End instruction execute." << flush);
c.activeThreads = nextActiveThreads;