mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-24 05:47:35 -04:00
implementing gpu library
This commit is contained in:
parent
c3c3cb0b45
commit
39003073f9
14 changed files with 9745 additions and 432 deletions
|
@ -256,10 +256,18 @@ int emu_main(int argc, char **argv) {
|
|||
mu.attach(console, 1ll<<(arch.getWordSize()*8 - 1));
|
||||
// mu.attach(console, 0xf0000000);
|
||||
|
||||
std::cout << "ABOUT TO START\n";
|
||||
while (core.running()) { console.poll(); core.step(); }
|
||||
|
||||
if (showStats) core.printStats();
|
||||
|
||||
Addr base_addr = 0x81000000;
|
||||
for (Addr i = 0; i < 16; i++)
|
||||
{
|
||||
Addr new_addr = base_addr + (i *4);
|
||||
std::cout << std::hex << new_addr << " = " << std::dec << old_ram.read(new_addr) << "\n";
|
||||
}
|
||||
|
||||
std::cout << "\n";
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -23,8 +23,8 @@ namespace Harp {
|
|||
encChar = 'w';
|
||||
nRegs = 32;
|
||||
nPRegs = 0;
|
||||
nThds = 1;
|
||||
nWarps = 1;
|
||||
nThds = 8;
|
||||
nWarps = 3;
|
||||
|
||||
extent = EXT_WARPS;
|
||||
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
#ifndef __DEBUG_H
|
||||
#define __DEBUG_H
|
||||
|
||||
// #define USE_DEBUG 9
|
||||
#define USE_DEBUG 9
|
||||
|
||||
#ifdef USE_DEBUG
|
||||
#include <iostream>
|
||||
|
|
|
@ -27,7 +27,8 @@ namespace Harp {
|
|||
JALR_INST = 103,
|
||||
SYS_INST = 115,
|
||||
TRAP = 0x7f,
|
||||
FENCE = 0x0f
|
||||
FENCE = 0x0f,
|
||||
GPGPU = 0x6b
|
||||
};
|
||||
|
||||
enum InstType { N_TYPE, R_TYPE, I_TYPE, S_TYPE, B_TYPE, U_TYPE, J_TYPE};
|
||||
|
@ -42,19 +43,20 @@ namespace Harp {
|
|||
|
||||
static std::map<int, struct InstTableEntry_t> instTable =
|
||||
{
|
||||
{Opcode::NOP, {"nop" , false, false, false, false, InstType::N_TYPE }},
|
||||
{Opcode::R_INST, {"r_type", false, false, false, false, InstType::R_TYPE }},
|
||||
{Opcode::L_INST, {"load" , false, false, false, false, InstType::I_TYPE }},
|
||||
{Opcode::I_INST, {"i_type", false, false, false, false, InstType::I_TYPE }},
|
||||
{Opcode::S_INST, {"store" , false, false, false, false, InstType::S_TYPE }},
|
||||
{Opcode::B_INST, {"branch", true , false, false, false, InstType::B_TYPE }},
|
||||
{Opcode::LUI_INST, {"lui" , false, false, false, false, InstType::U_TYPE }},
|
||||
{Opcode::AUIPC_INST, {"auipc" , false, false, false, false, InstType::U_TYPE }},
|
||||
{Opcode::JAL_INST, {"jal" , true , false, false, false, InstType::J_TYPE }},
|
||||
{Opcode::JALR_INST, {"jalr" , true , false, false, false, InstType::I_TYPE }},
|
||||
{Opcode::SYS_INST, {"SYS" , true , false, false, false, InstType::I_TYPE }},
|
||||
{Opcode::TRAP, {"TRAP" , true , false, false, false, InstType::I_TYPE }},
|
||||
{Opcode::FENCE, {"fence" , true , false, false, false, InstType::I_TYPE }}
|
||||
{Opcode::NOP, {"nop" , false, false, false, false, InstType::N_TYPE }},
|
||||
{Opcode::R_INST, {"r_type", false, false, false, false, InstType::R_TYPE }},
|
||||
{Opcode::L_INST, {"load" , false, false, false, false, InstType::I_TYPE }},
|
||||
{Opcode::I_INST, {"i_type", false, false, false, false, InstType::I_TYPE }},
|
||||
{Opcode::S_INST, {"store" , false, false, false, false, InstType::S_TYPE }},
|
||||
{Opcode::B_INST, {"branch", true , false, false, false, InstType::B_TYPE }},
|
||||
{Opcode::LUI_INST, {"lui" , false, false, false, false, InstType::U_TYPE }},
|
||||
{Opcode::AUIPC_INST, {"auipc" , false, false, false, false, InstType::U_TYPE }},
|
||||
{Opcode::JAL_INST, {"jal" , true , false, false, false, InstType::J_TYPE }},
|
||||
{Opcode::JALR_INST, {"jalr" , true , false, false, false, InstType::I_TYPE }},
|
||||
{Opcode::SYS_INST, {"SYS" , true , false, false, false, InstType::I_TYPE }},
|
||||
{Opcode::TRAP, {"TRAP" , true , false, false, false, InstType::I_TYPE }},
|
||||
{Opcode::FENCE, {"fence" , true , false, false, false, InstType::I_TYPE }},
|
||||
{Opcode::GPGPU, {"gpgpu" , false, false, false, false, InstType::R_TYPE }}
|
||||
};
|
||||
|
||||
static const Size MAX_REG_SOURCES(3);
|
||||
|
|
|
@ -431,6 +431,7 @@ void Instruction::executeOn(Warp &c) {
|
|||
break;
|
||||
case SYS_INST:
|
||||
temp = reg[rsrc[0]];
|
||||
std::cout << "STORING IN CSR: " << std::hex << reg[rsrc[0]] << " csr#: " << (immsrc & 0x00000FFF) << "\n";
|
||||
switch (func3)
|
||||
{
|
||||
case 1:
|
||||
|
@ -438,7 +439,9 @@ void Instruction::executeOn(Warp &c) {
|
|||
{
|
||||
reg[rdest] = c.csr[immsrc & 0x00000FFF];
|
||||
}
|
||||
c.csr[immsrc & 0x00000FFF] = temp;
|
||||
|
||||
std::cout << "FOR SURE WRITING TO CSR#: " << (immsrc & 0x00000FFF) << " value: " << temp << "\n";
|
||||
c.csr[immsrc & 0x00000FFF] = temp;
|
||||
|
||||
break;
|
||||
case 2:
|
||||
|
@ -492,6 +495,7 @@ void Instruction::executeOn(Warp &c) {
|
|||
default:
|
||||
break;
|
||||
}
|
||||
std::cout << "READING FROM CSR: " << reg[rdest] << " csr#: " << (immsrc & 0x00000FFF) << "\n";
|
||||
break;
|
||||
case TRAP:
|
||||
std::cout << "INTERRUPT TRAP\n";
|
||||
|
@ -500,6 +504,49 @@ void Instruction::executeOn(Warp &c) {
|
|||
break;
|
||||
case FENCE:
|
||||
break;
|
||||
case GPGPU:
|
||||
switch(func3)
|
||||
{
|
||||
case 0:
|
||||
// WSPAWN
|
||||
D(0, "Spawning a new warp.");
|
||||
std::cout << "SIZE: " << c.core->w.size() << "\n";
|
||||
for (unsigned i = 0; i < c.core->w.size(); ++i)
|
||||
{
|
||||
std::cout << "WHATTT\n";
|
||||
Warp &newWarp(c.core->w[i]);
|
||||
std::cout << "STARTING\n";
|
||||
if (newWarp.spawned == false) {
|
||||
std::cout << "ABOUT TO START\n";
|
||||
newWarp.pc = reg[rsrc[0]];
|
||||
newWarp.reg[0][rdest] = reg[rsrc[1]];
|
||||
newWarp.csr = c.csr;
|
||||
newWarp.activeThreads = 1;
|
||||
newWarp.supervisorMode = false;
|
||||
newWarp.spawned = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case 5:
|
||||
// CLONE
|
||||
std::cout << "CLONE\n";
|
||||
std::cout << "CLONING THREAD: " << reg[rsrc[0]] << "\n";
|
||||
c.reg[reg[rsrc[0]]] = reg;
|
||||
break;
|
||||
case 6:
|
||||
// JALRS
|
||||
nextActiveThreads = reg[rsrc[1]];
|
||||
reg[rdest] = c.pc;
|
||||
if (!pcSet) nextPc = reg[rsrc[0]];
|
||||
pcSet = true;
|
||||
std::cout << "ACTIVE_THREDS: " << rsrc[1] << " val: " << reg[rsrc[1]] << "\n";
|
||||
std::cout << "nextPC: " << rsrc[0] << " val: " << reg[rsrc[0]] << "\n";
|
||||
break;
|
||||
default:
|
||||
cout << "ERROR: UNSUPPORTED GPGPU INSTRUCTION " << *this << "\n";
|
||||
}
|
||||
break;
|
||||
default:
|
||||
cout << "ERROR: Unsupported instruction: " << *this << "\n";
|
||||
exit(1);
|
||||
|
|
9543
src/results.txt
9543
src/results.txt
File diff suppressed because it is too large
Load diff
|
@ -0,0 +1,40 @@
|
|||
// #include <stdint.h>
|
||||
// #include <stdbool.h>
|
||||
// #include <cstdint>
|
||||
|
||||
|
||||
int main(void);
|
||||
|
||||
#include "./lib/lib.h"
|
||||
|
||||
|
||||
unsigned x[] = {1, 5, 10, 0, 3, 1, 1, 2, 8, 7, 8, 7, 5, 7, 7, 9};
|
||||
unsigned y[] = {0, 2, 2, 0, 5, 0, 1, 1, 4, 2, 2, 0, 3, 2, 3, 2};
|
||||
unsigned z[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
|
||||
unsigned N = 16;
|
||||
|
||||
int main()
|
||||
{
|
||||
|
||||
initiate_stack();
|
||||
|
||||
void mat ();
|
||||
|
||||
createWarps(2, 8, mat);
|
||||
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
||||
|
||||
void mat(unsigned warp_id)
|
||||
{
|
||||
|
||||
unsigned tid = get_tid();
|
||||
unsigned index = (warp_id * 8) + tid;
|
||||
asm __volatile("nop");
|
||||
asm __volatile("nop");
|
||||
asm __volatile("nop");
|
||||
asm __volatile("nop");
|
||||
z[index] = x[index] + y[index];
|
||||
}
|
294
src/riscv_gpgpu/gpgpu_test.dump
Normal file
294
src/riscv_gpgpu/gpgpu_test.dump
Normal file
|
@ -0,0 +1,294 @@
|
|||
|
||||
gpgpu_test.elf: file format elf32-littleriscv
|
||||
|
||||
|
||||
Disassembly of section .text:
|
||||
|
||||
80000000 <main>:
|
||||
80000000: ff010113 addi sp,sp,-16
|
||||
80000004: 00112623 sw ra,12(sp)
|
||||
80000008: 00812423 sw s0,8(sp)
|
||||
8000000c: 01010413 addi s0,sp,16
|
||||
80000010: 2a8000ef jal ra,800002b8 <initiate_stack>
|
||||
80000014: 800007b7 lui a5,0x80000
|
||||
80000018: 04078613 addi a2,a5,64 # 80000040 <N+0xfeffff80>
|
||||
8000001c: 00800593 li a1,8
|
||||
80000020: 00200513 li a0,2
|
||||
80000024: 22c000ef jal ra,80000250 <createWarps>
|
||||
80000028: 00000793 li a5,0
|
||||
8000002c: 00078513 mv a0,a5
|
||||
80000030: 00c12083 lw ra,12(sp)
|
||||
80000034: 00812403 lw s0,8(sp)
|
||||
80000038: 01010113 addi sp,sp,16
|
||||
8000003c: 00008067 ret
|
||||
|
||||
80000040 <mat>:
|
||||
80000040: fd010113 addi sp,sp,-48
|
||||
80000044: 02112623 sw ra,44(sp)
|
||||
80000048: 02812423 sw s0,40(sp)
|
||||
8000004c: 03010413 addi s0,sp,48
|
||||
80000050: fca42e23 sw a0,-36(s0)
|
||||
80000054: 244000ef jal ra,80000298 <get_tid>
|
||||
80000058: fea42623 sw a0,-20(s0)
|
||||
8000005c: fdc42783 lw a5,-36(s0)
|
||||
80000060: 00379793 slli a5,a5,0x3
|
||||
80000064: fec42703 lw a4,-20(s0)
|
||||
80000068: 00f707b3 add a5,a4,a5
|
||||
8000006c: fef42423 sw a5,-24(s0)
|
||||
80000070: 00000013 nop
|
||||
80000074: 00000013 nop
|
||||
80000078: 00000013 nop
|
||||
8000007c: 00000013 nop
|
||||
80000080: 810007b7 lui a5,0x81000
|
||||
80000084: fe842703 lw a4,-24(s0)
|
||||
80000088: 00271713 slli a4,a4,0x2
|
||||
8000008c: 04078793 addi a5,a5,64 # 81000040 <N+0xffffff80>
|
||||
80000090: 00f707b3 add a5,a4,a5
|
||||
80000094: 0007a703 lw a4,0(a5)
|
||||
80000098: 810007b7 lui a5,0x81000
|
||||
8000009c: fe842683 lw a3,-24(s0)
|
||||
800000a0: 00269693 slli a3,a3,0x2
|
||||
800000a4: 08078793 addi a5,a5,128 # 81000080 <N+0xffffffc0>
|
||||
800000a8: 00f687b3 add a5,a3,a5
|
||||
800000ac: 0007a783 lw a5,0(a5)
|
||||
800000b0: 00f70733 add a4,a4,a5
|
||||
800000b4: 810007b7 lui a5,0x81000
|
||||
800000b8: fe842683 lw a3,-24(s0)
|
||||
800000bc: 00269693 slli a3,a3,0x2
|
||||
800000c0: 00078793 mv a5,a5
|
||||
800000c4: 00f687b3 add a5,a3,a5
|
||||
800000c8: 00e7a023 sw a4,0(a5) # 81000000 <N+0xffffff40>
|
||||
800000cc: 00000013 nop
|
||||
800000d0: 02c12083 lw ra,44(sp)
|
||||
800000d4: 02812403 lw s0,40(sp)
|
||||
800000d8: 03010113 addi sp,sp,48
|
||||
800000dc: 00008067 ret
|
||||
|
||||
800000e0 <set_wid>:
|
||||
800000e0: fe010113 addi sp,sp,-32
|
||||
800000e4: 00812e23 sw s0,28(sp)
|
||||
800000e8: 02010413 addi s0,sp,32
|
||||
800000ec: fea42623 sw a0,-20(s0)
|
||||
800000f0: fec42783 lw a5,-20(s0)
|
||||
800000f4: 00e79073 csrw 0xe,a5
|
||||
800000f8: 00000013 nop
|
||||
800000fc: 01c12403 lw s0,28(sp)
|
||||
80000100: 02010113 addi sp,sp,32
|
||||
80000104: 00008067 ret
|
||||
|
||||
80000108 <set_func>:
|
||||
80000108: fe010113 addi sp,sp,-32
|
||||
8000010c: 00812e23 sw s0,28(sp)
|
||||
80000110: 02010413 addi s0,sp,32
|
||||
80000114: fea42623 sw a0,-20(s0)
|
||||
80000118: fec42783 lw a5,-20(s0)
|
||||
8000011c: 00f79073 csrw 0xf,a5
|
||||
80000120: 00000013 nop
|
||||
80000124: 01c12403 lw s0,28(sp)
|
||||
80000128: 02010113 addi sp,sp,32
|
||||
8000012c: 00008067 ret
|
||||
|
||||
80000130 <get_func>:
|
||||
80000130: fe010113 addi sp,sp,-32
|
||||
80000134: 00812e23 sw s0,28(sp)
|
||||
80000138: 02010413 addi s0,sp,32
|
||||
8000013c: 00f027f3 csrr a5,0xf
|
||||
80000140: fef42623 sw a5,-20(s0)
|
||||
80000144: fec42783 lw a5,-20(s0)
|
||||
80000148: 00078513 mv a0,a5
|
||||
8000014c: 01c12403 lw s0,28(sp)
|
||||
80000150: 02010113 addi sp,sp,32
|
||||
80000154: 00008067 ret
|
||||
|
||||
80000158 <get_wid>:
|
||||
80000158: fe010113 addi sp,sp,-32
|
||||
8000015c: 00812e23 sw s0,28(sp)
|
||||
80000160: 02010413 addi s0,sp,32
|
||||
80000164: 00e027f3 csrr a5,0xe
|
||||
80000168: fef42623 sw a5,-20(s0)
|
||||
8000016c: fec42783 lw a5,-20(s0)
|
||||
80000170: 00078513 mv a0,a5
|
||||
80000174: 01c12403 lw s0,28(sp)
|
||||
80000178: 02010113 addi sp,sp,32
|
||||
8000017c: 00008067 ret
|
||||
|
||||
80000180 <createThreads>:
|
||||
80000180: fd010113 addi sp,sp,-48
|
||||
80000184: 02812623 sw s0,44(sp)
|
||||
80000188: 03a12423 sw s10,40(sp)
|
||||
8000018c: 03b12223 sw s11,36(sp)
|
||||
80000190: 03010413 addi s0,sp,48
|
||||
80000194: fca42e23 sw a0,-36(s0)
|
||||
80000198: fcb42c23 sw a1,-40(s0)
|
||||
8000019c: fcc42a23 sw a2,-44(s0)
|
||||
800001a0: 00010f13 mv t5,sp
|
||||
800001a4: 00100793 li a5,1
|
||||
800001a8: fef42623 sw a5,-20(s0)
|
||||
800001ac: 01c0006f j 800001c8 <createThreads+0x48>
|
||||
800001b0: fec42303 lw t1,-20(s0)
|
||||
800001b4: f0010113 addi sp,sp,-256
|
||||
800001b8: 0003506b 0x3506b
|
||||
800001bc: fec42783 lw a5,-20(s0)
|
||||
800001c0: 00178793 addi a5,a5,1
|
||||
800001c4: fef42623 sw a5,-20(s0)
|
||||
800001c8: fec42703 lw a4,-20(s0)
|
||||
800001cc: fdc42783 lw a5,-36(s0)
|
||||
800001d0: fef760e3 bltu a4,a5,800001b0 <createThreads+0x30>
|
||||
800001d4: 000f0113 mv sp,t5
|
||||
800001d8: 00000313 li t1,0
|
||||
800001dc: fd442f83 lw t6,-44(s0)
|
||||
800001e0: fdc42d83 lw s11,-36(s0)
|
||||
800001e4: fd842503 lw a0,-40(s0)
|
||||
800001e8: 01bfe0eb 0x1bfe0eb
|
||||
800001ec: 00000073 ecall
|
||||
800001f0: 00000013 nop
|
||||
800001f4: 02c12403 lw s0,44(sp)
|
||||
800001f8: 02812d03 lw s10,40(sp)
|
||||
800001fc: 02412d83 lw s11,36(sp)
|
||||
80000200: 03010113 addi sp,sp,48
|
||||
80000204: 00008067 ret
|
||||
|
||||
80000208 <wspawn>:
|
||||
80000208: fe010113 addi sp,sp,-32
|
||||
8000020c: 00812e23 sw s0,28(sp)
|
||||
80000210: 02010413 addi s0,sp,32
|
||||
80000214: fea42623 sw a0,-20(s0)
|
||||
80000218: feb42423 sw a1,-24(s0)
|
||||
8000021c: fec42223 sw a2,-28(s0)
|
||||
80000220: fec42503 lw a0,-20(s0)
|
||||
80000224: fe842583 lw a1,-24(s0)
|
||||
80000228: fe442783 lw a5,-28(s0)
|
||||
8000022c: 00078613 mv a2,a5
|
||||
80000230: 800007b7 lui a5,0x80000
|
||||
80000234: 18078793 addi a5,a5,384 # 80000180 <N+0xff0000c0>
|
||||
80000238: 00078313 mv t1,a5
|
||||
8000023c: 0003006b 0x3006b
|
||||
80000240: 00000013 nop
|
||||
80000244: 01c12403 lw s0,28(sp)
|
||||
80000248: 02010113 addi sp,sp,32
|
||||
8000024c: 00008067 ret
|
||||
|
||||
80000250 <createWarps>:
|
||||
80000250: fe010113 addi sp,sp,-32
|
||||
80000254: 00112e23 sw ra,28(sp)
|
||||
80000258: 00812c23 sw s0,24(sp)
|
||||
8000025c: 02010413 addi s0,sp,32
|
||||
80000260: fea42623 sw a0,-20(s0)
|
||||
80000264: feb42423 sw a1,-24(s0)
|
||||
80000268: fec42223 sw a2,-28(s0)
|
||||
8000026c: fe442783 lw a5,-28(s0)
|
||||
80000270: 00078613 mv a2,a5
|
||||
80000274: 00000593 li a1,0
|
||||
80000278: fe842503 lw a0,-24(s0)
|
||||
8000027c: f05ff0ef jal ra,80000180 <createThreads>
|
||||
80000280: 00000073 ecall
|
||||
80000284: 00000013 nop
|
||||
80000288: 01c12083 lw ra,28(sp)
|
||||
8000028c: 01812403 lw s0,24(sp)
|
||||
80000290: 02010113 addi sp,sp,32
|
||||
80000294: 00008067 ret
|
||||
|
||||
80000298 <get_tid>:
|
||||
80000298: ff010113 addi sp,sp,-16
|
||||
8000029c: 00812623 sw s0,12(sp)
|
||||
800002a0: 01010413 addi s0,sp,16
|
||||
800002a4: 00000013 nop
|
||||
800002a8: 00078513 mv a0,a5
|
||||
800002ac: 00c12403 lw s0,12(sp)
|
||||
800002b0: 01010113 addi sp,sp,16
|
||||
800002b4: 00008067 ret
|
||||
|
||||
800002b8 <initiate_stack>:
|
||||
800002b8: ff010113 addi sp,sp,-16
|
||||
800002bc: 00812623 sw s0,12(sp)
|
||||
800002c0: 01010413 addi s0,sp,16
|
||||
800002c4: 7ffff137 lui sp,0x7ffff
|
||||
800002c8: 00000013 nop
|
||||
800002cc: 00c12403 lw s0,12(sp) # 7ffff00c <main-0xff4>
|
||||
800002d0: 01010113 addi sp,sp,16
|
||||
800002d4: 00008067 ret
|
||||
|
||||
Disassembly of section .bss:
|
||||
|
||||
81000000 <z>:
|
||||
...
|
||||
|
||||
Disassembly of section .data:
|
||||
|
||||
81000040 <x>:
|
||||
81000040: 0001 nop
|
||||
81000042: 0000 unimp
|
||||
81000044: 0005 c.nop 1
|
||||
81000046: 0000 unimp
|
||||
81000048: 000a c.slli zero,0x2
|
||||
8100004a: 0000 unimp
|
||||
8100004c: 0000 unimp
|
||||
8100004e: 0000 unimp
|
||||
81000050: 00000003 lb zero,0(zero) # 0 <main-0x80000000>
|
||||
81000054: 0001 nop
|
||||
81000056: 0000 unimp
|
||||
81000058: 0001 nop
|
||||
8100005a: 0000 unimp
|
||||
8100005c: 0002 c.slli64 zero
|
||||
8100005e: 0000 unimp
|
||||
81000060: 0008 0x8
|
||||
81000062: 0000 unimp
|
||||
81000064: 00000007 0x7
|
||||
81000068: 0008 0x8
|
||||
8100006a: 0000 unimp
|
||||
8100006c: 00000007 0x7
|
||||
81000070: 0005 c.nop 1
|
||||
81000072: 0000 unimp
|
||||
81000074: 00000007 0x7
|
||||
81000078: 00000007 0x7
|
||||
8100007c: 0009 c.nop 2
|
||||
...
|
||||
|
||||
81000080 <y>:
|
||||
81000080: 0000 unimp
|
||||
81000082: 0000 unimp
|
||||
81000084: 0002 c.slli64 zero
|
||||
81000086: 0000 unimp
|
||||
81000088: 0002 c.slli64 zero
|
||||
8100008a: 0000 unimp
|
||||
8100008c: 0000 unimp
|
||||
8100008e: 0000 unimp
|
||||
81000090: 0005 c.nop 1
|
||||
81000092: 0000 unimp
|
||||
81000094: 0000 unimp
|
||||
81000096: 0000 unimp
|
||||
81000098: 0001 nop
|
||||
8100009a: 0000 unimp
|
||||
8100009c: 0001 nop
|
||||
8100009e: 0000 unimp
|
||||
810000a0: 0004 0x4
|
||||
810000a2: 0000 unimp
|
||||
810000a4: 0002 c.slli64 zero
|
||||
810000a6: 0000 unimp
|
||||
810000a8: 0002 c.slli64 zero
|
||||
810000aa: 0000 unimp
|
||||
810000ac: 0000 unimp
|
||||
810000ae: 0000 unimp
|
||||
810000b0: 00000003 lb zero,0(zero) # 0 <main-0x80000000>
|
||||
810000b4: 0002 c.slli64 zero
|
||||
810000b6: 0000 unimp
|
||||
810000b8: 00000003 lb zero,0(zero) # 0 <main-0x80000000>
|
||||
810000bc: 0002 c.slli64 zero
|
||||
...
|
||||
|
||||
Disassembly of section .sdata:
|
||||
|
||||
810000c0 <N>:
|
||||
810000c0: 0010 0x10
|
||||
...
|
||||
|
||||
Disassembly of section .comment:
|
||||
|
||||
82000000 <.comment>:
|
||||
82000000: 3a434347 fmsub.d ft6,ft6,ft4,ft7,rmm
|
||||
82000004: 2820 fld fs0,80(s0)
|
||||
82000006: 29554e47 fmsub.s ft8,fa0,fs5,ft5,rmm
|
||||
8200000a: 3820 fld fs0,112(s0)
|
||||
8200000c: 322e fld ft4,232(sp)
|
||||
8200000e: 302e fld ft0,232(sp)
|
||||
...
|
BIN
src/riscv_gpgpu/gpgpu_test.elf
Executable file
BIN
src/riscv_gpgpu/gpgpu_test.elf
Executable file
Binary file not shown.
59
src/riscv_gpgpu/gpgpu_test.hex
Normal file
59
src/riscv_gpgpu/gpgpu_test.hex
Normal file
|
@ -0,0 +1,59 @@
|
|||
:0200000480007A
|
||||
:10000000130101FF232611002324810013040101A1
|
||||
:10001000EF00802AB707008013860704930580004D
|
||||
:1000200013052000EF00C02293070000138507008E
|
||||
:100030008320C100032481001301010167800000B7
|
||||
:10004000130101FD2326110223248102130401035D
|
||||
:10005000232EA4FCEF0040242326A4FE8327C4FD06
|
||||
:10006000939737000327C4FEB307F7002324F4FE59
|
||||
:100070001300000013000000130000001300000034
|
||||
:10008000B7070081032784FE13172700938707040F
|
||||
:10009000B307F70003A70700B7070081832684FE94
|
||||
:1000A0009396260093870708B387F60083A7070077
|
||||
:1000B0003307F700B7070081832684FE9396260056
|
||||
:1000C00093870700B387F60023A0E7001300000022
|
||||
:1000D0008320C10203248102130101036780000011
|
||||
:1000E000130101FE232E8100130401022326A4FE26
|
||||
:1000F0008327C4FE7390E700130000000324C101AE
|
||||
:100100001301010267800000130101FE232E81000C
|
||||
:10011000130401022326A4FE8327C4FE7390F70074
|
||||
:10012000130000000324C1011301010267800000D5
|
||||
:10013000130101FE232E810013040102F327F000B6
|
||||
:100140002326F4FE8327C4FE138507000324C10180
|
||||
:100150001301010267800000130101FE232E8100BC
|
||||
:1001600013040102F327E0002326F4FE8327C4FED4
|
||||
:10017000138507000324C1011301010267800000F9
|
||||
:10018000130101FD232681022324A1032322B103AD
|
||||
:1001900013040103232EA4FC232CB4FC232AC4FC47
|
||||
:1001A000130F0100930710002326F4FE6F00C00117
|
||||
:1001B0000323C4FE130101F06B5003008327C4FE28
|
||||
:1001C000938717002326F4FE0327C4FE8327C4FD6C
|
||||
:1001D000E360F7FE13010F0013030000832F44FDBB
|
||||
:1001E000832DC4FD032584FDEBE0BF0173000000F7
|
||||
:1001F000130000000324C102032D8102832D41025C
|
||||
:100200001301010367800000130101FE232E81000A
|
||||
:10021000130401022326A4FE2324B4FE2322C4FED9
|
||||
:100220000325C4FE832584FE832744FE138607002E
|
||||
:10023000B707008093870718138307006B0003003C
|
||||
:10024000130000000324C1011301010267800000B4
|
||||
:10025000130101FE232E1100232C8100130401023F
|
||||
:100260002326A4FE2324B4FE2322C4FE832744FEB7
|
||||
:100270001386070093050000032584FEEFF05FF06E
|
||||
:1002800073000000130000008320C10103248101DA
|
||||
:100290001301010267800000130101FF2326810082
|
||||
:1002A0001304010113000000138507000324C1009B
|
||||
:1002B0001301010167800000130101FF2326810063
|
||||
:1002C0001304010137F1FF7F130000000324C10074
|
||||
:0802D000130101016780000029
|
||||
:02000004810079
|
||||
:1000400001000000050000000A00000000000000A0
|
||||
:100050000300000001000000010000000200000099
|
||||
:100060000800000007000000080000000700000072
|
||||
:100070000500000007000000070000000900000064
|
||||
:10008000000000000200000002000000000000006C
|
||||
:100090000500000000000000010000000100000059
|
||||
:1000A0000400000002000000020000000000000048
|
||||
:1000B0000300000002000000030000000200000036
|
||||
:0400C000100000002C
|
||||
:040000058000000077
|
||||
:00000001FF
|
|
@ -1,3 +1,3 @@
|
|||
/opt/riscv/bin/riscv32-unknown-linux-gnu-gcc -march=rv32i -mabi=ilp32 -Wl,-Bstatic,-T,linker.ld -ffreestanding -nostdlib firmware.c -o firmware.elf
|
||||
/opt/riscv/bin/riscv32-unknown-linux-gnu-objdump -D firmware.elf > firmware.dump
|
||||
/opt/riscv/bin/riscv32-unknown-linux-gnu-objcopy -O ihex firmware.elf firmware.hex
|
||||
/opt/riscv-nommu/bin/riscv32-unknown-linux-gnu-gcc -march=rv32i -mabi=ilp32 -Wl,-Bstatic,-T,linker.ld -ffreestanding -nostdlib gpgpu_test.c ./lib/lib.c -o gpgpu_test.elf
|
||||
/opt/riscv-nommu/bin/riscv32-unknown-linux-gnu-objdump -D gpgpu_test.elf > gpgpu_test.dump
|
||||
/opt/riscv-nommu/bin/riscv32-unknown-linux-gnu-objcopy -O ihex gpgpu_test.elf gpgpu_test.hex
|
108
src/riscv_gpgpu/lib/lib.c
Normal file
108
src/riscv_gpgpu/lib/lib.c
Normal file
|
@ -0,0 +1,108 @@
|
|||
#include "lib.h"
|
||||
|
||||
// namespace Sphinx
|
||||
// {
|
||||
|
||||
#define FUNC void (func)(unsigned)
|
||||
|
||||
|
||||
|
||||
void set_wid(unsigned i)
|
||||
{
|
||||
SET_WID(i);
|
||||
}
|
||||
|
||||
void set_func(FUNC)
|
||||
{
|
||||
SET_FUNC(func);
|
||||
}
|
||||
|
||||
unsigned get_func()
|
||||
{
|
||||
unsigned ret;
|
||||
GET_FUNC(ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
unsigned get_wid()
|
||||
{
|
||||
unsigned ret;
|
||||
GET_WID(ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void createThreads(unsigned num_threads, unsigned wid, unsigned func_addr)
|
||||
{
|
||||
|
||||
asm __volatile__("addi t5, sp, 0");
|
||||
for (unsigned i = 1; i < num_threads; i++)
|
||||
{
|
||||
|
||||
register unsigned cur_tid asm("t1") = i;
|
||||
asm __volatile__("addi sp, sp, -256");
|
||||
CLONE;
|
||||
}
|
||||
asm __volatile__("addi sp, t5, 0");
|
||||
|
||||
|
||||
register unsigned cur_tid asm("t1") = 0;
|
||||
|
||||
|
||||
// jalis TO FUNC
|
||||
register unsigned num_lanes asm("t6") = func_addr;
|
||||
register unsigned link asm("s11") = num_threads;
|
||||
|
||||
register unsigned n_threads asm("a0") = wid;
|
||||
JALRS;
|
||||
ECALL;
|
||||
|
||||
}
|
||||
|
||||
void wspawn(unsigned num_threads, unsigned wid, FUNC)
|
||||
{
|
||||
|
||||
// set_wid(wid);
|
||||
// set_func(func);
|
||||
|
||||
|
||||
|
||||
register unsigned n_threads asm("a0") = num_threads;
|
||||
register unsigned wwid asm("a1") = wid;
|
||||
register unsigned ffunc asm("a2") = (unsigned) func;
|
||||
|
||||
register unsigned func_add asm("t1") = (unsigned) &createThreads;
|
||||
WSPAWN; // THIS SHOULD COPY THE CSR REGISTERS TO THE NEW WARP
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
void createWarps(unsigned num_Warps, unsigned num_threads, FUNC)
|
||||
{
|
||||
// asm __volatile__("addi t5, sp, 0");
|
||||
|
||||
// for (unsigned i = 1; i < num_Warps; i++)
|
||||
// {
|
||||
// asm __volatile__("addi sp, sp, -2048");
|
||||
// wspawn(num_threads, i, func);
|
||||
// }
|
||||
|
||||
// asm __volatile__("addi sp, t5, 0");
|
||||
|
||||
createThreads(num_threads, 0, (unsigned) func);
|
||||
|
||||
|
||||
ECALL;
|
||||
|
||||
}
|
||||
|
||||
|
||||
unsigned get_tid()
|
||||
{
|
||||
register unsigned tid asm("t1");
|
||||
}
|
||||
|
||||
void initiate_stack()
|
||||
{
|
||||
asm __volatile__("lui sp,0x7ffff");
|
||||
}
|
30
src/riscv_gpgpu/lib/lib.h
Normal file
30
src/riscv_gpgpu/lib/lib.h
Normal file
|
@ -0,0 +1,30 @@
|
|||
|
||||
#ifndef __RISCV_GP_
|
||||
#define __RISCV_GP_
|
||||
|
||||
|
||||
#define WID_CSR 0x00E
|
||||
#define FUNC_CSR 0x00F
|
||||
|
||||
#define SET_WID(val) asm __volatile__("csrw 0x00e,%0"::"r"(val));
|
||||
#define GET_WID(ret) asm __volatile__("csrr %0,0x00e":"=r"(ret));
|
||||
|
||||
#define SET_FUNC(val) asm __volatile__("csrw 0x00f,%0"::"r"(val));
|
||||
#define GET_FUNC(ret) asm __volatile__("csrr %0,0x00f":"=r"(ret));
|
||||
|
||||
|
||||
#define WSPAWN asm __volatile__(".word 0x3006b"::);
|
||||
#define CLONE asm __volatile__(".word 0x3506b":::"t1");
|
||||
#define JALRS asm __volatile__(".word 0x1bfe0eb":::"s10")
|
||||
#define ECALL asm __volatile__(".word 0x00000073")
|
||||
|
||||
|
||||
#define FUNC void (func)(unsigned)
|
||||
void createWarps(unsigned num_Warps, unsigned num_threads, FUNC);
|
||||
unsigned get_tid(void);
|
||||
void initiate_stack(void);
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
|
4
src/test_riscv.sh
Executable file
4
src/test_riscv.sh
Executable file
|
@ -0,0 +1,4 @@
|
|||
echo start > results.txt
|
||||
|
||||
echo ./riscv_gpgpu/gpgpu_test.hex >> results.txt
|
||||
./harptool -E -a rv32i --core ./riscv_gpgpu/gpgpu_test.hex -s -b &>> results.txt
|
Loading…
Add table
Add a link
Reference in a new issue