rtl refactoring

This commit is contained in:
Blaise Tine 2020-05-03 17:10:02 -04:00
parent a1dc90b951
commit 69f607b73e
83 changed files with 30487 additions and 30536 deletions

View file

@ -13,9 +13,9 @@ GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb
VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c
VX_SRCS += $(VX_RT_PATH)/startup/vx_start.S
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.S
VX_SRCS += $(VX_RT_PATH)/io/vx_io.S $(VX_RT_PATH)/io/vx_io.c
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.S
VX_SRCS += $(VX_RT_PATH)/tests/tests.c
VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c

View file

@ -13,9 +13,9 @@ GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb
VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c
VX_SRCS += $(VX_RT_PATH)/startup/vx_start.S
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.S
VX_SRCS += $(VX_RT_PATH)/io/vx_io.S $(VX_RT_PATH)/io/vx_io.c
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.S
VX_SRCS += $(VX_RT_PATH)/tests/tests.c
VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c

View file

@ -13,9 +13,9 @@ GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb
VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c
VX_SRCS += $(VX_RT_PATH)/startup/vx_start.S
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.S
VX_SRCS += $(VX_RT_PATH)/io/vx_io.S $(VX_RT_PATH)/io/vx_io.c
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.S
VX_SRCS += $(VX_RT_PATH)/tests/tests.c
VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c

View file

@ -13,9 +13,9 @@ GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb
VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c
VX_SRCS += $(VX_RT_PATH)/startup/vx_start.S
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.S
VX_SRCS += $(VX_RT_PATH)/io/vx_io.S $(VX_RT_PATH)/io/vx_io.c
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.S
VX_SRCS += $(VX_RT_PATH)/tests/tests.c
VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c

View file

@ -13,9 +13,9 @@ GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb
VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c
VX_SRCS += $(VX_RT_PATH)/startup/vx_start.S
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.S
VX_SRCS += $(VX_RT_PATH)/io/vx_io.S $(VX_RT_PATH)/io/vx_io.c
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.S
VX_SRCS += $(VX_RT_PATH)/tests/tests.c
VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c

View file

@ -13,9 +13,9 @@ GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb
VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c
VX_SRCS += $(VX_RT_PATH)/startup/vx_start.S
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.S
VX_SRCS += $(VX_RT_PATH)/io/vx_io.S $(VX_RT_PATH)/io/vx_io.c
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.S
VX_SRCS += $(VX_RT_PATH)/tests/tests.c
VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c

View file

@ -13,9 +13,9 @@ GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb
VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c
VX_SRCS += $(VX_RT_PATH)/startup/vx_start.S
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.S
VX_SRCS += $(VX_RT_PATH)/io/vx_io.S $(VX_RT_PATH)/io/vx_io.c
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.S
VX_SRCS += $(VX_RT_PATH)/tests/tests.c
VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c

View file

@ -13,9 +13,9 @@ GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb
VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c
VX_SRCS += $(VX_RT_PATH)/startup/vx_start.S
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.S
VX_SRCS += $(VX_RT_PATH)/io/vx_io.S $(VX_RT_PATH)/io/vx_io.c
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.S
VX_SRCS += $(VX_RT_PATH)/tests/tests.c
VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c

View file

@ -13,9 +13,9 @@ GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb
VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c
VX_SRCS += $(VX_RT_PATH)/startup/vx_start.S
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.S
VX_SRCS += $(VX_RT_PATH)/io/vx_io.S $(VX_RT_PATH)/io/vx_io.c
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.S
VX_SRCS += $(VX_RT_PATH)/tests/tests.c
VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c

View file

@ -13,9 +13,9 @@ GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb
VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c
VX_SRCS += $(VX_RT_PATH)/startup/vx_start.S
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.S
VX_SRCS += $(VX_RT_PATH)/io/vx_io.S $(VX_RT_PATH)/io/vx_io.c
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.S
VX_SRCS += $(VX_RT_PATH)/tests/tests.c
VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c

View file

@ -13,9 +13,9 @@ GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb
VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c
VX_SRCS += $(VX_RT_PATH)/startup/vx_start.S
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.S
VX_SRCS += $(VX_RT_PATH)/io/vx_io.S $(VX_RT_PATH)/io/vx_io.c
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.S
VX_SRCS += $(VX_RT_PATH)/tests/tests.c
VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c

View file

@ -13,9 +13,9 @@ GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb
VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c
VX_SRCS += $(VX_RT_PATH)/startup/vx_start.S
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.S
VX_SRCS += $(VX_RT_PATH)/io/vx_io.S $(VX_RT_PATH)/io/vx_io.c
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.S
VX_SRCS += $(VX_RT_PATH)/tests/tests.c
VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c

View file

@ -13,9 +13,9 @@ GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb
VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c
VX_SRCS += $(VX_RT_PATH)/startup/vx_start.S
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.S
VX_SRCS += $(VX_RT_PATH)/io/vx_io.S $(VX_RT_PATH)/io/vx_io.c
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.S
VX_SRCS += $(VX_RT_PATH)/tests/tests.c
VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c

View file

@ -13,9 +13,9 @@ GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb
VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c
VX_SRCS += $(VX_RT_PATH)/startup/vx_start.S
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.S
VX_SRCS += $(VX_RT_PATH)/io/vx_io.S $(VX_RT_PATH)/io/vx_io.c
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.S
VX_SRCS += $(VX_RT_PATH)/tests/tests.c
VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c

View file

@ -13,9 +13,9 @@ GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb
VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c
VX_SRCS += $(VX_RT_PATH)/startup/vx_start.S
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.S
VX_SRCS += $(VX_RT_PATH)/io/vx_io.S $(VX_RT_PATH)/io/vx_io.c
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.S
VX_SRCS += $(VX_RT_PATH)/tests/tests.c
VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c

View file

@ -13,9 +13,9 @@ GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb
VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c
VX_SRCS += $(VX_RT_PATH)/startup/vx_start.S
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.S
VX_SRCS += $(VX_RT_PATH)/io/vx_io.S $(VX_RT_PATH)/io/vx_io.c
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.S
VX_SRCS += $(VX_RT_PATH)/tests/tests.c
VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c

View file

@ -13,9 +13,9 @@ GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb
VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c
VX_SRCS += $(VX_RT_PATH)/startup/vx_start.S
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.S
VX_SRCS += $(VX_RT_PATH)/io/vx_io.S $(VX_RT_PATH)/io/vx_io.c
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.S
VX_SRCS += $(VX_RT_PATH)/tests/tests.c
VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c

View file

@ -13,9 +13,9 @@ GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb
VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c
VX_SRCS += $(VX_RT_PATH)/startup/vx_start.S
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.S
VX_SRCS += $(VX_RT_PATH)/io/vx_io.S $(VX_RT_PATH)/io/vx_io.c
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.S
VX_SRCS += $(VX_RT_PATH)/tests/tests.c
VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c

View file

@ -13,9 +13,9 @@ GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb
VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c
VX_SRCS += $(VX_RT_PATH)/startup/vx_start.S
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.S
VX_SRCS += $(VX_RT_PATH)/io/vx_io.S $(VX_RT_PATH)/io/vx_io.c
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.S
VX_SRCS += $(VX_RT_PATH)/tests/tests.c
VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c

View file

@ -13,9 +13,9 @@ GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb
VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c
VX_SRCS += $(VX_RT_PATH)/startup/vx_start.S
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.S
VX_SRCS += $(VX_RT_PATH)/io/vx_io.S $(VX_RT_PATH)/io/vx_io.c
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.S
VX_SRCS += $(VX_RT_PATH)/tests/tests.c
VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c

View file

@ -11,10 +11,10 @@ CPY = /home/priya/dev/riscv_vec/riscv-gnu/bin/riscv32-unknown-elf-objcopy
NEWLIB = $(LIB_PATH)/newlib/newlib.c
VX_STR = $(LIB_PATH)/startup/vx_start.S
VX_INT = $(LIB_PATH)/intrinsics/vx_intrinsics.s
VX_IO = $(LIB_PATH)/io/vx_io.s $(LIB_PATH)/io/vx_io.c
VX_INT = $(LIB_PATH)/intrinsics/vx_intrinsics.S
VX_IO = $(LIB_PATH)/io/vx_io.S $(LIB_PATH)/io/vx_io.c
VX_API = $(LIB_PATH)/vx_api/vx_api.c
VX_FIO = $(LIB_PATH)/fileio/fileio.s
VX_FIO = $(LIB_PATH)/fileio/fileio.S
VX_VEC = vx_vec_saxpy.s #float --> int
LIBS = /home/priya/dev/riscv_vec/riscv-gnu/riscv32-unknown-elf/lib/libc.a /home/priya/dev/riscv_vec/riscv-gnu/riscv32-unknown-elf/lib/libstdc++.a -static-libgcc -lgcc

View file

@ -39,11 +39,11 @@ int main()
// for(int i = 0; i < n; ++i) printf("%d \n", b[i]);
#endif
int startCycles = vx_getCycles();
int startInst = vx_getInst();
int startCycles = vx_num_cycles();
int startInst = vx_num_instrs();
vx_vec_saxpy(n, factor, a, b);
int endCycles = vx_getCycles();
int endInst = vx_getInst();
int endCycles = vx_num_cycles();
int endInst = vx_num_instrs();
int totalInst = (endInst - startInst);
int totalCycles = (endCycles - startCycles);

View file

@ -484,7 +484,7 @@ Disassembly of section .text:
800006e0: 00112623 sw ra,12(sp)
800006e4: 00812423 sw s0,8(sp)
800006e8: 01010413 addi s0,sp,16
800006ec: 0d4000ef jal ra,800007c0 <vx_threadID>
800006ec: 0d4000ef jal ra,800007c0 <vx_thread_id>
800006f0: 00050793 mv a5,a0
800006f4: 00078513 mv a0,a5
800006f8: 00c12083 lw ra,12(sp)
@ -552,23 +552,23 @@ Disassembly of section .text:
800007b0: 0000306b 0x306b
800007b4: 00008067 ret
800007b8 <vx_warpID>:
800007b8 <vx_warp_id>:
800007b8: 02102573 csrr a0,0x21
800007bc: 00008067 ret
800007c0 <vx_threadID>:
800007c0 <vx_thread_id>:
800007c0: 02002573 csrr a0,0x20
800007c4: 00008067 ret
800007c8 <vx_getCycles>:
800007c8 <vx_num_cycles>:
800007c8: 02602573 csrr a0,0x26
800007cc: 00008067 ret
800007d0 <vx_getInst>:
800007d0 <vx_num_instrs>:
800007d0: 02502573 csrr a0,0x25
800007d4: 00008067 ret
800007d8 <vx_resetStack>:
800007d8 <vx_reset_stack>:
800007d8: 00400513 li a0,4
800007dc: 0005006b 0x5006b
800007e0: 021026f3 csrr a3,0x21
@ -731,10 +731,10 @@ Disassembly of section .text:
80000a24: fe842703 lw a4,-24(s0)
80000a28: fe042783 lw a5,-32(s0)
80000a2c: fcf744e3 blt a4,a5,800009f4 <main+0x1a0>
80000a30: d99ff0ef jal ra,800007c8 <vx_getCycles>
80000a30: d99ff0ef jal ra,800007c8 <vx_num_cycles>
80000a34: 00050793 mv a5,a0
80000a38: fcf42623 sw a5,-52(s0)
80000a3c: d95ff0ef jal ra,800007d0 <vx_getInst>
80000a3c: d95ff0ef jal ra,800007d0 <vx_num_instrs>
80000a40: 00050793 mv a5,a0
80000a44: fcf42423 sw a5,-56(s0)
80000a48: fd842683 lw a3,-40(s0)
@ -742,10 +742,10 @@ Disassembly of section .text:
80000a50: fd042583 lw a1,-48(s0)
80000a54: fe042503 lw a0,-32(s0)
80000a58: e54ff0ef jal ra,800000ac <vx_vec_saxpy>
80000a5c: d6dff0ef jal ra,800007c8 <vx_getCycles>
80000a5c: d6dff0ef jal ra,800007c8 <vx_num_cycles>
80000a60: 00050793 mv a5,a0
80000a64: fcf42223 sw a5,-60(s0)
80000a68: d69ff0ef jal ra,800007d0 <vx_getInst>
80000a68: d69ff0ef jal ra,800007d0 <vx_num_instrs>
80000a6c: 00050793 mv a5,a0
80000a70: fcf42023 sw a5,-64(s0)
80000a74: fc042703 lw a4,-64(s0)

View file

@ -11,10 +11,10 @@ CPY = /home/priya/dev/riscv_vec/riscv-gnu/bin/riscv32-unknown-elf-objcopy
NEWLIB = $(LIB_PATH)/newlib/newlib.c
VX_STR = $(LIB_PATH)/startup/vx_start.S
VX_INT = $(LIB_PATH)/intrinsics/vx_intrinsics.s
VX_IO = $(LIB_PATH)/io/vx_io.s $(LIB_PATH)/io/vx_io.c
VX_INT = $(LIB_PATH)/intrinsics/vx_intrinsics.S
VX_IO = $(LIB_PATH)/io/vx_io.S $(LIB_PATH)/io/vx_io.c
VX_API = $(LIB_PATH)/vx_api/vx_api.c
VX_FIO = $(LIB_PATH)/fileio/fileio.s
VX_FIO = $(LIB_PATH)/fileio/fileio.S
VX_VEC = vx_vec_sfilter.s #float --> int
LIBS = /home/priya/dev/riscv_vec/riscv-gnu/riscv32-unknown-elf/lib/libc.a /home/priya/dev/riscv_vec/riscv-gnu/riscv32-unknown-elf/lib/libstdc++.a -static-libgcc -lgcc

View file

@ -36,15 +36,15 @@ int main()
}
int N = 4;
int startCycles = vx_getCycles();
int startInst = vx_getInst();
int startCycles = vx_num_cycles();
int startInst = vx_num_instrs();
for(int y = 1; y < (NUM_DATA-1); y++){
for(int x = 1; x < (NUM_DATA-1); x = x+N) {
vx_vec_sfilter(a, b, ldc, m, x, y, N);
}
}
int endCycles = vx_getCycles();
int endInst = vx_getInst();
int endCycles = vx_num_cycles();
int endInst = vx_num_instrs();
int totalInst = (endInst - startInst);
int totalCycles = (endCycles - startCycles);

View file

@ -549,7 +549,7 @@ Disassembly of section .text:
800007e4: 00112623 sw ra,12(sp)
800007e8: 00812423 sw s0,8(sp)
800007ec: 01010413 addi s0,sp,16
800007f0: 0d4000ef jal ra,800008c4 <vx_threadID>
800007f0: 0d4000ef jal ra,800008c4 <vx_thread_id>
800007f4: 00050793 mv a5,a0
800007f8: 00078513 mv a0,a5
800007fc: 00c12083 lw ra,12(sp)
@ -617,23 +617,23 @@ Disassembly of section .text:
800008b4: 0000306b 0x306b
800008b8: 00008067 ret
800008bc <vx_warpID>:
800008bc <vx_warp_id>:
800008bc: 02102573 csrr a0,0x21
800008c0: 00008067 ret
800008c4 <vx_threadID>:
800008c4 <vx_thread_id>:
800008c4: 02002573 csrr a0,0x20
800008c8: 00008067 ret
800008cc <vx_getCycles>:
800008cc <vx_num_cycles>:
800008cc: 02602573 csrr a0,0x26
800008d0: 00008067 ret
800008d4 <vx_getInst>:
800008d4 <vx_num_instrs>:
800008d4: 02502573 csrr a0,0x25
800008d8: 00008067 ret
800008dc <vx_resetStack>:
800008dc <vx_reset_stack>:
800008dc: 00400513 li a0,4
800008e0: 0005006b 0x5006b
800008e4: 021026f3 csrr a3,0x21
@ -734,10 +734,10 @@ Disassembly of section .text:
80000a30: faf746e3 blt a4,a5,800009dc <main+0x84>
80000a34: 00400793 li a5,4
80000a38: fcf42023 sw a5,-64(s0)
80000a3c: e91ff0ef jal ra,800008cc <vx_getCycles>
80000a3c: e91ff0ef jal ra,800008cc <vx_num_cycles>
80000a40: 00050793 mv a5,a0
80000a44: faf42e23 sw a5,-68(s0)
80000a48: e8dff0ef jal ra,800008d4 <vx_getInst>
80000a48: e8dff0ef jal ra,800008d4 <vx_num_instrs>
80000a4c: 00050793 mv a5,a0
80000a50: faf42c23 sw a5,-72(s0)
80000a54: 00100793 li a5,1
@ -767,10 +767,10 @@ Disassembly of section .text:
80000ab4: fe842703 lw a4,-24(s0)
80000ab8: 04000793 li a5,64
80000abc: fae7d2e3 bge a5,a4,80000a60 <main+0x108>
80000ac0: e0dff0ef jal ra,800008cc <vx_getCycles>
80000ac0: e0dff0ef jal ra,800008cc <vx_num_cycles>
80000ac4: 00050793 mv a5,a0
80000ac8: faf42a23 sw a5,-76(s0)
80000acc: e09ff0ef jal ra,800008d4 <vx_getInst>
80000acc: e09ff0ef jal ra,800008d4 <vx_num_instrs>
80000ad0: 00050793 mv a5,a0
80000ad4: faf42823 sw a5,-80(s0)
80000ad8: fb042703 lw a4,-80(s0)

View file

@ -11,10 +11,10 @@ CPY = /home/priya/dev/riscv_vec/riscv-gnu/bin/riscv32-unknown-elf-objcopy
NEWLIB = $(LIB_PATH)/newlib/newlib.c
VX_STR = $(LIB_PATH)/startup/vx_start.S
VX_INT = $(LIB_PATH)/intrinsics/vx_intrinsics.s
VX_IO = $(LIB_PATH)/io/vx_io.s $(LIB_PATH)/io/vx_io.c
VX_INT = $(LIB_PATH)/intrinsics/vx_intrinsics.S
VX_IO = $(LIB_PATH)/io/vx_io.S $(LIB_PATH)/io/vx_io.c
VX_API = $(LIB_PATH)/vx_api/vx_api.c
VX_FIO = $(LIB_PATH)/fileio/fileio.s
VX_FIO = $(LIB_PATH)/fileio/fileio.S
VX_VEC = vx_vec_sgemm_nn.s #float --> int
LIBS = /home/priya/dev/riscv_vec/riscv-gnu/riscv32-unknown-elf/lib/libc.a /home/priya/dev/riscv_vec/riscv-gnu/riscv32-unknown-elf/lib/libstdc++.a -static-libgcc -lgcc

View file

@ -50,8 +50,8 @@ int main()
int vsize = 32;
int startCycles = vx_getCycles();
int startInst = vx_getInst();
int startCycles = vx_num_cycles();
int startInst = vx_num_instrs();
for (int r = 0; r < m; r++) {
for (int c = 0; c < n; c++) {
for (int i = 0; i < k;) {
@ -61,8 +61,8 @@ int main()
}
}
}
int endCycles = vx_getCycles();
int endInst = vx_getInst();
int endCycles = vx_num_cycles();
int endInst = vx_num_instrs();
int totalInst = (endInst - startInst);
int totalCycles = (endCycles - startCycles);

View file

@ -492,7 +492,7 @@ Disassembly of section .text:
80000700: 00112623 sw ra,12(sp)
80000704: 00812423 sw s0,8(sp)
80000708: 01010413 addi s0,sp,16
8000070c: 0d4000ef jal ra,800007e0 <vx_threadID>
8000070c: 0d4000ef jal ra,800007e0 <vx_thread_id>
80000710: 00050793 mv a5,a0
80000714: 00078513 mv a0,a5
80000718: 00c12083 lw ra,12(sp)
@ -560,23 +560,23 @@ Disassembly of section .text:
800007d0: 0000306b 0x306b
800007d4: 00008067 ret
800007d8 <vx_warpID>:
800007d8 <vx_warp_id>:
800007d8: 02102573 csrr a0,0x21
800007dc: 00008067 ret
800007e0 <vx_threadID>:
800007e0 <vx_thread_id>:
800007e0: 02002573 csrr a0,0x20
800007e4: 00008067 ret
800007e8 <vx_getCycles>:
800007e8 <vx_num_cycles>:
800007e8: 02602573 csrr a0,0x26
800007ec: 00008067 ret
800007f0 <vx_getInst>:
800007f0 <vx_num_instrs>:
800007f0: 02502573 csrr a0,0x25
800007f4: 00008067 ret
800007f8 <vx_resetStack>:
800007f8 <vx_reset_stack>:
800007f8: 00400513 li a0,4
800007fc: 0005006b 0x5006b
80000800: 021026f3 csrr a3,0x21
@ -734,10 +734,10 @@ Disassembly of section .text:
80000a30: f8f42e23 sw a5,-100(s0)
80000a34: 02000793 li a5,32
80000a38: f8f42c23 sw a5,-104(s0)
80000a3c: dadff0ef jal ra,800007e8 <vx_getCycles>
80000a3c: dadff0ef jal ra,800007e8 <vx_num_cycles>
80000a40: 00050793 mv a5,a0
80000a44: f8f42a23 sw a5,-108(s0)
80000a48: da9ff0ef jal ra,800007f0 <vx_getInst>
80000a48: da9ff0ef jal ra,800007f0 <vx_num_instrs>
80000a4c: 00050793 mv a5,a0
80000a50: f8f42823 sw a5,-112(s0)
80000a54: fc042e23 sw zero,-36(s0)
@ -774,10 +774,10 @@ Disassembly of section .text:
80000ad0: fdc42703 lw a4,-36(s0)
80000ad4: fc042783 lw a5,-64(s0)
80000ad8: f8f742e3 blt a4,a5,80000a5c <main+0x1e8>
80000adc: d0dff0ef jal ra,800007e8 <vx_getCycles>
80000adc: d0dff0ef jal ra,800007e8 <vx_num_cycles>
80000ae0: 00050793 mv a5,a0
80000ae4: f8f42623 sw a5,-116(s0)
80000ae8: d09ff0ef jal ra,800007f0 <vx_getInst>
80000ae8: d09ff0ef jal ra,800007f0 <vx_num_instrs>
80000aec: 00050793 mv a5,a0
80000af0: f8f42423 sw a5,-120(s0)
80000af4: f8842703 lw a4,-120(s0)

View file

@ -11,10 +11,10 @@ CPY = /nethome/ekim79/riscv-gnu-toolchain/drops/bin/riscv32-unknown-elf-objcopy
NEWLIB = $(LIB_PATH)/newlib/newlib.c
VX_STR = $(LIB_PATH)/startup/vx_start.S
VX_INT = $(LIB_PATH)/intrinsics/vx_intrinsics.s
VX_IO = $(LIB_PATH)/io/vx_io.s $(LIB_PATH)/io/vx_io.c
VX_INT = $(LIB_PATH)/intrinsics/vx_intrinsics.S
VX_IO = $(LIB_PATH)/io/vx_io.S $(LIB_PATH)/io/vx_io.c
VX_API = $(LIB_PATH)/vx_api/vx_api.c
VX_FIO = $(LIB_PATH)/fileio/fileio.s
VX_FIO = $(LIB_PATH)/fileio/fileio.S
VX_VEC1 = vx_vec_vvaddint32.s
#VX_VEC2 = vx_vec_saxpy.s #float --> int
#VX_VEC3 = vx_vec_sgemm.s #float --> int

View file

@ -486,7 +486,7 @@ Disassembly of section .text:
800006e0: 00112623 sw ra,12(sp)
800006e4: 00812423 sw s0,8(sp)
800006e8: 01010413 addi s0,sp,16
800006ec: 0d4000ef jal ra,800007c0 <vx_threadID>
800006ec: 0d4000ef jal ra,800007c0 <vx_thread_id>
800006f0: 00050793 mv a5,a0
800006f4: 00078513 mv a0,a5
800006f8: 00c12083 lw ra,12(sp)
@ -554,23 +554,23 @@ Disassembly of section .text:
800007b0: 0000306b 0x306b
800007b4: 00008067 ret
800007b8 <vx_warpID>:
800007b8 <vx_warp_id>:
800007b8: 02102573 csrr a0,0x21
800007bc: 00008067 ret
800007c0 <vx_threadID>:
800007c0 <vx_thread_id>:
800007c0: 02002573 csrr a0,0x20
800007c4: 00008067 ret
800007c8 <vx_getCycles>:
800007c8 <vx_num_cycles>:
800007c8: 02602573 csrr a0,0x26
800007cc: 00008067 ret
800007d0 <vx_getInst>:
800007d0 <vx_num_instrs>:
800007d0: 02502573 csrr a0,0x25
800007d4: 00008067 ret
800007d8 <vx_resetStack>:
800007d8 <vx_reset_stack>:
800007d8: 00400513 li a0,4
800007dc: 0005006b 0x5006b
800007e0: 021026f3 csrr a3,0x21

View file

@ -3,7 +3,7 @@ CFLAGS += -std=c++11 -O2 -DNDEBUG -Wall -Wextra -Wfatal-errors
CFLAGS += -I../../include -I../../../hw/simulate -I../../../runtime
MULTICORE += -DNUM_CLUSTERS=2 -DNUM_CORES=2
MULTICORE += -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0
#DEBUG = 1
CFLAGS += -fPIC
@ -16,7 +16,7 @@ SRCS = vortex.cpp ../common/vx_utils.cpp ../../hw/simulate/simulator.cpp
RTL_INCLUDE = -I../../hw/rtl -I../../hw/rtl/libs -I../../hw/rtl/interfaces -I../../hw/rtl/pipe_regs -I../../hw/rtl/cache
VL_FLAGS += --assert -Wall -Wpedantic $(MULTICORE)
VL_FLAGS += --language 1800-2009 --assert -Wall -Wpedantic $(MULTICORE)
# Enable Verilator multithreaded simulation
#THREADS ?= $(shell python3 -c 'import multiprocessing as mp; print(max(1, mp.cpu_count() // 2))')

View file

@ -8,10 +8,10 @@ VX_CPY = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objcopy
VX_NEWLIB = $(VX_RT_PATH)/newlib/newlib.c
VX_STR = $(VX_RT_PATH)/startup/vx_start.S
VX_INT = $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
VX_IO = $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
VX_INT = $(VX_RT_PATH)/intrinsics/vx_intrinsics.S
VX_IO = $(VX_RT_PATH)/io/vx_io.S $(VX_RT_PATH)/io/vx_io.c
VX_API = $(VX_RT_PATH)/vx_api/vx_api.c
VX_FIO = $(VX_RT_PATH)/fileio/fileio.s
VX_FIO = $(VX_RT_PATH)/fileio/fileio.S
VX_CFLAGS = -march=rv32im -mabi=ilp32 -O3 -Wl,-Bstatic,-T,$(VX_RT_PATH)/startup/vx_link.ld -ffreestanding -nostartfiles -Wl,--gc-sections

View file

@ -8,10 +8,10 @@ VX_CPY = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objcopy
VX_NEWLIB = $(VX_RT_PATH)/newlib/newlib.c
VX_STR = $(VX_RT_PATH)/startup/vx_start.S
VX_INT = $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
VX_IO = $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
VX_INT = $(VX_RT_PATH)/intrinsics/vx_intrinsics.S
VX_IO = $(VX_RT_PATH)/io/vx_io.S $(VX_RT_PATH)/io/vx_io.c
VX_API = $(VX_RT_PATH)/vx_api/vx_api.c
VX_FIO = $(VX_RT_PATH)/fileio/fileio.s
VX_FIO = $(VX_RT_PATH)/fileio/fileio.S
VX_CFLAGS = -march=rv32im -mabi=ilp32 -O3 -Wl,-Bstatic,-T,$(VX_RT_PATH)/startup/vx_link.ld -ffreestanding -nostartfiles -Wl,--gc-sections

View file

@ -10,10 +10,10 @@ void kernel_body(void* arg) {
int* y = (int*)_arg->src1_ptr;
int* z = (int*)_arg->dst_ptr;
unsigned wNo = vx_warpNum();
unsigned tid = vx_threadID();
unsigned wid = vx_warp_gid();
unsigned tid = vx_thread_id();
unsigned i = ((wNo * _arg->num_threads) + tid) * _arg->stride;
unsigned i = ((wid * _arg->num_threads) + tid) * _arg->stride;
for (unsigned j = 0; j < _arg->stride; ++j) {
z[i+j] = x[i+j] + y[i+j];
@ -28,5 +28,5 @@ void main() {
printf("src0_ptr=0x%x\n", arg->src0_ptr);
printf("src1_ptr=0x%x\n", arg->src1_ptr);
printf("dst_ptr=0x%x\n", arg->dst_ptr);*/
vx_spawnWarps(arg->num_warps, arg->num_threads, kernel_body, arg);
vx_spawn_warps(arg->num_warps, arg->num_threads, kernel_body, arg);
}

View file

@ -1730,13 +1730,13 @@ DEBUG ../../../../simX/core.cpp:176: stalled warps: 0 0 0 0
DEBUG ../../../../simX/core.cpp:750: current PC=0x80000094
DEBUG ../../../../simX/enc.cpp:324: Decoded instr 0x2002673 into: SYS
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r12 <- r0, imm=32
DEBUG ../../../../simX/instruction.cpp:788: vx_threadID: r12=0
DEBUG ../../../../simX/instruction.cpp:788: vx_thread_id: r12=0
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r12 <- r0, imm=32
DEBUG ../../../../simX/instruction.cpp:788: vx_threadID: r12=1
DEBUG ../../../../simX/instruction.cpp:788: vx_thread_id: r12=1
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r12 <- r0, imm=32
DEBUG ../../../../simX/instruction.cpp:788: vx_threadID: r12=2
DEBUG ../../../../simX/instruction.cpp:788: vx_thread_id: r12=2
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r12 <- r0, imm=32
DEBUG ../../../../simX/instruction.cpp:788: vx_threadID: r12=3
DEBUG ../../../../simX/instruction.cpp:788: vx_thread_id: r12=3
DEBUG ../../../../simX/core.cpp:781: Register state:
%r 0: 00000000 00000000 00000000 00000000 (0)
%r 1: 00000000 00000000 00000000 00000000 (0)
@ -1789,13 +1789,13 @@ DEBUG ../../../../simX/core.cpp:176: stalled warps: 0 0 0 0
DEBUG ../../../../simX/core.cpp:750: current PC=0x80000094
DEBUG ../../../../simX/enc.cpp:324: Decoded instr 0x2002673 into: SYS
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r12 <- r0, imm=32
DEBUG ../../../../simX/instruction.cpp:788: vx_threadID: r12=0
DEBUG ../../../../simX/instruction.cpp:788: vx_thread_id: r12=0
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r12 <- r0, imm=32
DEBUG ../../../../simX/instruction.cpp:788: vx_threadID: r12=1
DEBUG ../../../../simX/instruction.cpp:788: vx_thread_id: r12=1
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r12 <- r0, imm=32
DEBUG ../../../../simX/instruction.cpp:788: vx_threadID: r12=2
DEBUG ../../../../simX/instruction.cpp:788: vx_thread_id: r12=2
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r12 <- r0, imm=32
DEBUG ../../../../simX/instruction.cpp:788: vx_threadID: r12=3
DEBUG ../../../../simX/instruction.cpp:788: vx_thread_id: r12=3
DEBUG ../../../../simX/core.cpp:781: Register state:
%r 0: 00000000 00000000 00000000 00000000 (0)
%r 1: 00000000 00000000 00000000 00000000 (0)
@ -1848,13 +1848,13 @@ DEBUG ../../../../simX/core.cpp:176: stalled warps: 0 0 0 0
DEBUG ../../../../simX/core.cpp:750: current PC=0x80000094
DEBUG ../../../../simX/enc.cpp:324: Decoded instr 0x2002673 into: SYS
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r12 <- r0, imm=32
DEBUG ../../../../simX/instruction.cpp:788: vx_threadID: r12=0
DEBUG ../../../../simX/instruction.cpp:788: vx_thread_id: r12=0
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r12 <- r0, imm=32
DEBUG ../../../../simX/instruction.cpp:788: vx_threadID: r12=1
DEBUG ../../../../simX/instruction.cpp:788: vx_thread_id: r12=1
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r12 <- r0, imm=32
DEBUG ../../../../simX/instruction.cpp:788: vx_threadID: r12=2
DEBUG ../../../../simX/instruction.cpp:788: vx_thread_id: r12=2
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r12 <- r0, imm=32
DEBUG ../../../../simX/instruction.cpp:788: vx_threadID: r12=3
DEBUG ../../../../simX/instruction.cpp:788: vx_thread_id: r12=3
DEBUG ../../../../simX/core.cpp:781: Register state:
%r 0: 00000000 00000000 00000000 00000000 (0)
%r 1: 00000000 00000000 00000000 00000000 (0)
@ -2127,13 +2127,13 @@ DEBUG ../../../../simX/core.cpp:176: stalled warps: 0 0 0 0
DEBUG ../../../../simX/core.cpp:750: current PC=0x80000094
DEBUG ../../../../simX/enc.cpp:324: Decoded instr 0x2002673 into: SYS
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r12 <- r0, imm=32
DEBUG ../../../../simX/instruction.cpp:788: vx_threadID: r12=0
DEBUG ../../../../simX/instruction.cpp:788: vx_thread_id: r12=0
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r12 <- r0, imm=32
DEBUG ../../../../simX/instruction.cpp:788: vx_threadID: r12=1
DEBUG ../../../../simX/instruction.cpp:788: vx_thread_id: r12=1
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r12 <- r0, imm=32
DEBUG ../../../../simX/instruction.cpp:788: vx_threadID: r12=2
DEBUG ../../../../simX/instruction.cpp:788: vx_thread_id: r12=2
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r12 <- r0, imm=32
DEBUG ../../../../simX/instruction.cpp:788: vx_threadID: r12=3
DEBUG ../../../../simX/instruction.cpp:788: vx_thread_id: r12=3
DEBUG ../../../../simX/core.cpp:781: Register state:
%r 0: 00000000 00000000 00000000 00000000 (0)
%r 1: 80000014 00000000 00000000 00000000 (0)
@ -3286,13 +3286,13 @@ DEBUG ../../../../simX/core.cpp:176: stalled warps: 0 0 0 0
DEBUG ../../../../simX/core.cpp:750: current PC=0x800000b0
DEBUG ../../../../simX/enc.cpp:324: Decoded instr 0x21026f3 into: SYS
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r13 <- r0, imm=33
DEBUG ../../../../simX/instruction.cpp:792: vx_warpID: r13=1
DEBUG ../../../../simX/instruction.cpp:792: vx_warp_id: r13=1
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r13 <- r0, imm=33
DEBUG ../../../../simX/instruction.cpp:792: vx_warpID: r13=1
DEBUG ../../../../simX/instruction.cpp:792: vx_warp_id: r13=1
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r13 <- r0, imm=33
DEBUG ../../../../simX/instruction.cpp:792: vx_warpID: r13=1
DEBUG ../../../../simX/instruction.cpp:792: vx_warp_id: r13=1
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r13 <- r0, imm=33
DEBUG ../../../../simX/instruction.cpp:792: vx_warpID: r13=1
DEBUG ../../../../simX/instruction.cpp:792: vx_warp_id: r13=1
DEBUG ../../../../simX/core.cpp:781: Register state:
%r 0: 00000000 00000000 00000000 00000000 (0)
%r 1: 00000000 00000000 00000000 00000000 (0)
@ -3345,13 +3345,13 @@ DEBUG ../../../../simX/core.cpp:176: stalled warps: 0 0 0 0
DEBUG ../../../../simX/core.cpp:750: current PC=0x800000b0
DEBUG ../../../../simX/enc.cpp:324: Decoded instr 0x21026f3 into: SYS
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r13 <- r0, imm=33
DEBUG ../../../../simX/instruction.cpp:792: vx_warpID: r13=2
DEBUG ../../../../simX/instruction.cpp:792: vx_warp_id: r13=2
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r13 <- r0, imm=33
DEBUG ../../../../simX/instruction.cpp:792: vx_warpID: r13=2
DEBUG ../../../../simX/instruction.cpp:792: vx_warp_id: r13=2
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r13 <- r0, imm=33
DEBUG ../../../../simX/instruction.cpp:792: vx_warpID: r13=2
DEBUG ../../../../simX/instruction.cpp:792: vx_warp_id: r13=2
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r13 <- r0, imm=33
DEBUG ../../../../simX/instruction.cpp:792: vx_warpID: r13=2
DEBUG ../../../../simX/instruction.cpp:792: vx_warp_id: r13=2
DEBUG ../../../../simX/core.cpp:781: Register state:
%r 0: 00000000 00000000 00000000 00000000 (0)
%r 1: 00000000 00000000 00000000 00000000 (0)
@ -3404,13 +3404,13 @@ DEBUG ../../../../simX/core.cpp:176: stalled warps: 0 0 0 0
DEBUG ../../../../simX/core.cpp:750: current PC=0x800000b0
DEBUG ../../../../simX/enc.cpp:324: Decoded instr 0x21026f3 into: SYS
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r13 <- r0, imm=33
DEBUG ../../../../simX/instruction.cpp:792: vx_warpID: r13=3
DEBUG ../../../../simX/instruction.cpp:792: vx_warp_id: r13=3
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r13 <- r0, imm=33
DEBUG ../../../../simX/instruction.cpp:792: vx_warpID: r13=3
DEBUG ../../../../simX/instruction.cpp:792: vx_warp_id: r13=3
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r13 <- r0, imm=33
DEBUG ../../../../simX/instruction.cpp:792: vx_warpID: r13=3
DEBUG ../../../../simX/instruction.cpp:792: vx_warp_id: r13=3
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r13 <- r0, imm=33
DEBUG ../../../../simX/instruction.cpp:792: vx_warpID: r13=3
DEBUG ../../../../simX/instruction.cpp:792: vx_warp_id: r13=3
DEBUG ../../../../simX/core.cpp:781: Register state:
%r 0: 00000000 00000000 00000000 00000000 (0)
%r 1: 00000000 00000000 00000000 00000000 (0)
@ -3683,13 +3683,13 @@ DEBUG ../../../../simX/core.cpp:176: stalled warps: 0 1 1 1
DEBUG ../../../../simX/core.cpp:750: current PC=0x800000b0
DEBUG ../../../../simX/enc.cpp:324: Decoded instr 0x21026f3 into: SYS
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r13 <- r0, imm=33
DEBUG ../../../../simX/instruction.cpp:792: vx_warpID: r13=0
DEBUG ../../../../simX/instruction.cpp:792: vx_warp_id: r13=0
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r13 <- r0, imm=33
DEBUG ../../../../simX/instruction.cpp:792: vx_warpID: r13=0
DEBUG ../../../../simX/instruction.cpp:792: vx_warp_id: r13=0
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r13 <- r0, imm=33
DEBUG ../../../../simX/instruction.cpp:792: vx_warpID: r13=0
DEBUG ../../../../simX/instruction.cpp:792: vx_warp_id: r13=0
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r13 <- r0, imm=33
DEBUG ../../../../simX/instruction.cpp:792: vx_warpID: r13=0
DEBUG ../../../../simX/instruction.cpp:792: vx_warp_id: r13=0
DEBUG ../../../../simX/core.cpp:781: Register state:
%r 0: 00000000 00000000 00000000 00000000 (0)
%r 1: 80000014 00000000 00000000 00000000 (0)
@ -22160,13 +22160,13 @@ DEBUG ../../../../simX/core.cpp:176: stalled warps: 1 0 1 1
DEBUG ../../../../simX/core.cpp:750: current PC=0x80000758
DEBUG ../../../../simX/enc.cpp:324: Decoded instr 0x2002573 into: SYS
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r10 <- r0, imm=32
DEBUG ../../../../simX/instruction.cpp:788: vx_threadID: r10=0
DEBUG ../../../../simX/instruction.cpp:788: vx_thread_id: r10=0
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r10 <- r0, imm=32
DEBUG ../../../../simX/instruction.cpp:788: vx_threadID: r10=1
DEBUG ../../../../simX/instruction.cpp:788: vx_thread_id: r10=1
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r10 <- r0, imm=32
DEBUG ../../../../simX/instruction.cpp:788: vx_threadID: r10=2
DEBUG ../../../../simX/instruction.cpp:788: vx_thread_id: r10=2
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r10 <- r0, imm=32
DEBUG ../../../../simX/instruction.cpp:788: vx_threadID: r10=3
DEBUG ../../../../simX/instruction.cpp:788: vx_thread_id: r10=3
DEBUG ../../../../simX/core.cpp:781: Register state:
%r 0: 00000000 00000000 00000000 00000000 (0)
%r 1: 800009d0 800009d0 800009d0 800009d0 (0)
@ -22219,13 +22219,13 @@ DEBUG ../../../../simX/core.cpp:176: stalled warps: 1 0 0 1
DEBUG ../../../../simX/core.cpp:750: current PC=0x80000758
DEBUG ../../../../simX/enc.cpp:324: Decoded instr 0x2002573 into: SYS
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r10 <- r0, imm=32
DEBUG ../../../../simX/instruction.cpp:788: vx_threadID: r10=0
DEBUG ../../../../simX/instruction.cpp:788: vx_thread_id: r10=0
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r10 <- r0, imm=32
DEBUG ../../../../simX/instruction.cpp:788: vx_threadID: r10=1
DEBUG ../../../../simX/instruction.cpp:788: vx_thread_id: r10=1
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r10 <- r0, imm=32
DEBUG ../../../../simX/instruction.cpp:788: vx_threadID: r10=2
DEBUG ../../../../simX/instruction.cpp:788: vx_thread_id: r10=2
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r10 <- r0, imm=32
DEBUG ../../../../simX/instruction.cpp:788: vx_threadID: r10=3
DEBUG ../../../../simX/instruction.cpp:788: vx_thread_id: r10=3
DEBUG ../../../../simX/core.cpp:781: Register state:
%r 0: 00000000 00000000 00000000 00000000 (0)
%r 1: 800009d0 800009d0 800009d0 800009d0 (0)
@ -22278,13 +22278,13 @@ DEBUG ../../../../simX/core.cpp:176: stalled warps: 1 0 0 0
DEBUG ../../../../simX/core.cpp:750: current PC=0x80000758
DEBUG ../../../../simX/enc.cpp:324: Decoded instr 0x2002573 into: SYS
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r10 <- r0, imm=32
DEBUG ../../../../simX/instruction.cpp:788: vx_threadID: r10=0
DEBUG ../../../../simX/instruction.cpp:788: vx_thread_id: r10=0
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r10 <- r0, imm=32
DEBUG ../../../../simX/instruction.cpp:788: vx_threadID: r10=1
DEBUG ../../../../simX/instruction.cpp:788: vx_thread_id: r10=1
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r10 <- r0, imm=32
DEBUG ../../../../simX/instruction.cpp:788: vx_threadID: r10=2
DEBUG ../../../../simX/instruction.cpp:788: vx_thread_id: r10=2
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r10 <- r0, imm=32
DEBUG ../../../../simX/instruction.cpp:788: vx_threadID: r10=3
DEBUG ../../../../simX/instruction.cpp:788: vx_thread_id: r10=3
DEBUG ../../../../simX/core.cpp:781: Register state:
%r 0: 00000000 00000000 00000000 00000000 (0)
%r 1: 800009d0 800009d0 800009d0 800009d0 (0)
@ -22337,13 +22337,13 @@ DEBUG ../../../../simX/core.cpp:176: stalled warps: 0 0 0 0
DEBUG ../../../../simX/core.cpp:750: current PC=0x80000758
DEBUG ../../../../simX/enc.cpp:324: Decoded instr 0x2002573 into: SYS
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r10 <- r0, imm=32
DEBUG ../../../../simX/instruction.cpp:788: vx_threadID: r10=0
DEBUG ../../../../simX/instruction.cpp:788: vx_thread_id: r10=0
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r10 <- r0, imm=32
DEBUG ../../../../simX/instruction.cpp:788: vx_threadID: r10=1
DEBUG ../../../../simX/instruction.cpp:788: vx_thread_id: r10=1
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r10 <- r0, imm=32
DEBUG ../../../../simX/instruction.cpp:788: vx_threadID: r10=2
DEBUG ../../../../simX/instruction.cpp:788: vx_thread_id: r10=2
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r10 <- r0, imm=32
DEBUG ../../../../simX/instruction.cpp:788: vx_threadID: r10=3
DEBUG ../../../../simX/instruction.cpp:788: vx_thread_id: r10=3
DEBUG ../../../../simX/core.cpp:781: Register state:
%r 0: 00000000 00000000 00000000 00000000 (0)
%r 1: 800009d0 800009d0 800009d0 800009d0 (0)
@ -39870,13 +39870,13 @@ DEBUG ../../../../simX/core.cpp:176: stalled warps: 1 0 0 0
DEBUG ../../../../simX/core.cpp:750: current PC=0x80000748
DEBUG ../../../../simX/enc.cpp:324: Decoded instr 0x2102573 into: SYS
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r10 <- r0, imm=33
DEBUG ../../../../simX/instruction.cpp:792: vx_warpID: r10=2
DEBUG ../../../../simX/instruction.cpp:792: vx_warp_id: r10=2
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r10 <- r0, imm=33
DEBUG ../../../../simX/instruction.cpp:792: vx_warpID: r10=2
DEBUG ../../../../simX/instruction.cpp:792: vx_warp_id: r10=2
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r10 <- r0, imm=33
DEBUG ../../../../simX/instruction.cpp:792: vx_warpID: r10=2
DEBUG ../../../../simX/instruction.cpp:792: vx_warp_id: r10=2
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r10 <- r0, imm=33
DEBUG ../../../../simX/instruction.cpp:792: vx_warpID: r10=2
DEBUG ../../../../simX/instruction.cpp:792: vx_warp_id: r10=2
DEBUG ../../../../simX/core.cpp:781: Register state:
%r 0: 00000000 00000000 00000000 00000000 (0)
%r 1: 80000810 80000810 80000810 80000810 (0)
@ -39929,13 +39929,13 @@ DEBUG ../../../../simX/core.cpp:176: stalled warps: 1 0 0 0
DEBUG ../../../../simX/core.cpp:750: current PC=0x80000748
DEBUG ../../../../simX/enc.cpp:324: Decoded instr 0x2102573 into: SYS
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r10 <- r0, imm=33
DEBUG ../../../../simX/instruction.cpp:792: vx_warpID: r10=3
DEBUG ../../../../simX/instruction.cpp:792: vx_warp_id: r10=3
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r10 <- r0, imm=33
DEBUG ../../../../simX/instruction.cpp:792: vx_warpID: r10=3
DEBUG ../../../../simX/instruction.cpp:792: vx_warp_id: r10=3
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r10 <- r0, imm=33
DEBUG ../../../../simX/instruction.cpp:792: vx_warpID: r10=3
DEBUG ../../../../simX/instruction.cpp:792: vx_warp_id: r10=3
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r10 <- r0, imm=33
DEBUG ../../../../simX/instruction.cpp:792: vx_warpID: r10=3
DEBUG ../../../../simX/instruction.cpp:792: vx_warp_id: r10=3
DEBUG ../../../../simX/core.cpp:781: Register state:
%r 0: 00000000 00000000 00000000 00000000 (0)
%r 1: 80000810 80000810 80000810 80000810 (0)
@ -39988,13 +39988,13 @@ DEBUG ../../../../simX/core.cpp:176: stalled warps: 1 0 0 0
DEBUG ../../../../simX/core.cpp:750: current PC=0x80000748
DEBUG ../../../../simX/enc.cpp:324: Decoded instr 0x2102573 into: SYS
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r10 <- r0, imm=33
DEBUG ../../../../simX/instruction.cpp:792: vx_warpID: r10=1
DEBUG ../../../../simX/instruction.cpp:792: vx_warp_id: r10=1
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r10 <- r0, imm=33
DEBUG ../../../../simX/instruction.cpp:792: vx_warpID: r10=1
DEBUG ../../../../simX/instruction.cpp:792: vx_warp_id: r10=1
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r10 <- r0, imm=33
DEBUG ../../../../simX/instruction.cpp:792: vx_warpID: r10=1
DEBUG ../../../../simX/instruction.cpp:792: vx_warp_id: r10=1
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r10 <- r0, imm=33
DEBUG ../../../../simX/instruction.cpp:792: vx_warpID: r10=1
DEBUG ../../../../simX/instruction.cpp:792: vx_warp_id: r10=1
DEBUG ../../../../simX/core.cpp:781: Register state:
%r 0: 00000000 00000000 00000000 00000000 (0)
%r 1: 80000810 80000810 80000810 80000810 (0)
@ -40159,13 +40159,13 @@ DEBUG ../../../../simX/core.cpp:176: stalled warps: 0 0 1 1
DEBUG ../../../../simX/core.cpp:750: current PC=0x80000748
DEBUG ../../../../simX/enc.cpp:324: Decoded instr 0x2102573 into: SYS
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r10 <- r0, imm=33
DEBUG ../../../../simX/instruction.cpp:792: vx_warpID: r10=0
DEBUG ../../../../simX/instruction.cpp:792: vx_warp_id: r10=0
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r10 <- r0, imm=33
DEBUG ../../../../simX/instruction.cpp:792: vx_warpID: r10=0
DEBUG ../../../../simX/instruction.cpp:792: vx_warp_id: r10=0
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r10 <- r0, imm=33
DEBUG ../../../../simX/instruction.cpp:792: vx_warpID: r10=0
DEBUG ../../../../simX/instruction.cpp:792: vx_warp_id: r10=0
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r10 <- r0, imm=33
DEBUG ../../../../simX/instruction.cpp:792: vx_warpID: r10=0
DEBUG ../../../../simX/instruction.cpp:792: vx_warp_id: r10=0
DEBUG ../../../../simX/core.cpp:781: Register state:
%r 0: 00000000 00000000 00000000 00000000 (0)
%r 1: 800008c4 800008c4 800008c4 800008c4 (0)

View file

@ -2,11 +2,11 @@ all: singlecore
CF += -std=c++11 -fms-extensions
VF += -compiler gcc --language 1800-2009 --assert -Wall -Wpedantic
VF += --language 1800-2009 --assert -Wall -Wpedantic
VF += -exe $(SRCS) $(INCLUDE)
MULTICORE += -DNUM_CLUSTERS=1 -DNUM_CORES=2
MULTICORE += -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0
INCLUDE = -I./rtl/ -I./rtl/libs -I./rtl/interfaces -I./rtl/pipe_regs -I./rtl/cache -I./rtl/simulate
@ -21,58 +21,59 @@ THREADS ?= $(shell python3 -c 'import multiprocessing as mp; print(max(1, mp.cpu
build_config:
./scripts/gen_config.py --outv ./rtl/VX_user_config.vh --outc ./simulate/VX_config.h
gen-singlecore: build_config
gen-s: build_config
verilator $(VF) -DNDEBUG -cc Vortex_Socket.v -CFLAGS '$(CF) -DNDEBUG'
gen-singlecore-t: build_config
verilator $(VF) -cc Vortex_Socket.v -CFLAGS '$(CF) -DNDEBUG -O2' --threads $(THREADS)
gen-singlecore-d: build_config
gen-sd: build_config
verilator $(VF) -cc Vortex_Socket.v -CFLAGS '$(CF) -g -O0 -DVCD_OUTPUT' $(DBG)
gen-multicore: build_config
gen-st: build_config
verilator $(VF) -cc Vortex_Socket.v -CFLAGS '$(CF) -DNDEBUG -O2' --threads $(THREADS)
gen-m: build_config
verilator $(VF) -DNDEBUG -cc Vortex_Socket.v $(MULTICORE) -CFLAGS '$(CF) -DNDEBUG $(MULTICORE)'
gen-multicore-t: build_config
verilator $(VF) -DNDEBUG -cc Vortex_Socket.v $(MULTICORE) -CFLAGS '$(CF) -DNDEBUG -O2 $(MULTICORE)' --threads $(THREADS)
gen-multicore-d: build_config
gen-md: build_config
verilator $(VF) -cc Vortex_Socket.v $(MULTICORE) -CFLAGS '$(CF) -g -O0 -DVCD_OUTPUT $(MULTICORE)' $(DBG)
singlecore: gen-singlecore
gen-mt: build_config
verilator $(VF) -DNDEBUG -cc Vortex_Socket.v $(MULTICORE) -CFLAGS '$(CF) -DNDEBUG -O2 $(MULTICORE)' --threads $(THREADS)
build-s: gen-s
(cd obj_dir && make -j -f VVortex_Socket.mk)
singlecore-t: gen-singlecore-t
build-sd: gen-sd
(cd obj_dir && make -j -f VVortex_Socket.mk)
singlecore-d: gen-singlecore-d
build-st: gen-st
(cd obj_dir && make -j -f VVortex_Socket.mk)
multicore: gen-multicore
build-m: gen-m
(cd obj_dir && make -j -f VVortex_Socket.mk)
multicore-t: gen-multicore-t
build-md: gen-md
(cd obj_dir && make -j -f VVortex_Socket.mk)
multicore-d: gen-multicore-d
build-mt: gen-mt
(cd obj_dir && make -j -f VVortex_Socket.mk)
run: singlecore
run: run-s
run-s: build-s
(cd obj_dir && ./VVortex_Socket)
run-d: singlecore-d
run-sd: build-sd
(cd obj_dir && ./VVortex_Socket)
run-t: singlecore-t
run-st: build-st
(cd obj_dir && ./VVortex_Socket)
run-m: multicore
run-m: build-m
(cd obj_dir && ./VVortex_Socket)
run-md: multicore-d
run-md: build-md
(cd obj_dir && ./VVortex_Socket)
run-mt: multicore-t
run-mt: build-mt
(cd obj_dir && ./VVortex_Socket)
clean:

View file

@ -28,21 +28,39 @@
`endif
`ifndef NUM_CSRS
`define NUM_CSRS 1024
`endif
`ifndef IO_BUS_ADDR
`define IO_BUS_ADDR 32'h00010000
`define NUM_CSRS 2
`endif
`ifndef STARTUP_ADDR
`define STARTUP_ADDR 32'h80000000
`endif
`ifndef SHARED_MEM_ADDR_MATCH
`define SHARED_MEM_ADDR_MATCH(x) (x[31:24] == 8'hFF)
`ifndef SHARED_MEM_TOP_ADDR
`define SHARED_MEM_TOP_ADDR 8'hFF
`endif
`ifndef IO_BUS_ADDR
`define IO_BUS_ADDR 32'h00010000
`endif
`ifndef STACK_BASE_ADDR
`define STACK_BASE_ADDR 20'h6ffff
`endif
`ifndef L2_ENABLE
`define L2_ENABLE (`NUM_CORES > 1)
`endif
`define CSR_LTID 12'h020
`define CSR_LWID 12'h021
`define CSR_GWID 12'h022
`define CSR_GTID 12'h023
`define CSR_CYCLL 12'hC00
`define CSR_CYCLH 12'hC80
`define CSR_INSTL 12'hC02
`define CSR_INSTH 12'hC82
// ========================= Dcache Configurable Knobs ========================
// Size of cache in bytes

View file

@ -1,84 +1,57 @@
`include "VX_define.vh"
module VX_csr_data (
module VX_csr_data #(
parameter CORE_ID = 0
) (
input wire clk, // Clock
input wire reset,
input wire[`CSR_ADDR_SIZE-1:0] read_csr_address,
input wire write_valid,
input wire[`CSR_WIDTH-1:0] write_csr_data,
input wire[`CSR_ADDR_SIZE-1:0] read_addr,
output reg[31:0] read_data,
input wire write_enable,
`IGNORE_WARNINGS_BEGIN
// We use a smaller storage for CSRs than the standard 4KB in RISC-V
input wire[`CSR_ADDR_SIZE-1:0] write_csr_address,
input wire[`CSR_ADDR_SIZE-1:0] write_addr,
`IGNORE_WARNINGS_END
output wire[31:0] read_csr_data,
// For instruction retire counting
input wire writeback_valid
input wire[`CSR_WIDTH-1:0] write_data,
input wire[`NW_BITS-1:0] warp_num,
input wire wb_valid
);
// wire[`NUM_THREADS-1:0][31:0] thread_ids;
// wire[`NUM_THREADS-1:0][31:0] warp_ids;
reg [`CSR_WIDTH-1:0] csr_table[`NUM_CSRS-1:0];
// genvar cur_t;
// for (cur_t = 0; cur_t < `NUM_THREADS; cur_t = cur_t + 1) begin
// assign thread_ids[cur_t] = cur_t;
// end
// genvar cur_tw;
// for (cur_tw = 0; cur_tw < `NUM_THREADS; cur_tw = cur_tw + 1) begin
// assign warp_ids[cur_tw] = {{(31-`NW_BITS-1){1'b0}}, in_read_warp_num};
// end
reg [`CSR_WIDTH-1:0] csr[`NUM_CSRS-1:0];
reg [63:0] cycle;
reg [63:0] instret;
wire read_cycle;
wire read_cycleh;
wire read_instret;
wire read_instreth;
assign read_cycle = read_csr_address == `CSR_CYCL_L;
assign read_cycleh = read_csr_address == `CSR_CYCL_H;
assign read_instret = read_csr_address == `CSR_INST_L;
assign read_instreth = read_csr_address == `CSR_INST_H;
wire [$clog2(`NUM_CSRS)-1:0] read_addr, write_addr;
reg [63:0] num_cycles, num_instrs;
// cast address to physical CSR range
assign read_addr = $size(read_addr)'(read_csr_address);
assign write_addr = $size(write_addr)'(write_csr_address);
// wire thread_select = read_csr_address == 12'h20;
// wire warp_select = read_csr_address == 12'h21;
// assign read_csr_data = thread_select ? thread_ids :
// warp_select ? warp_ids :
// 0;
genvar curr_e;
wire [$clog2(`NUM_CSRS)-1:0] rd_addr, wr_addr;
assign rd_addr = $size(rd_addr)'(read_addr);
assign wr_addr = $size(wr_addr)'(write_addr);
always @(posedge clk) begin
if (reset) begin
cycle <= 0;
instret <= 0;
if (reset) begin
num_cycles <= 0;
num_instrs <= 0;
end else begin
cycle <= cycle + 1;
if (write_valid) begin
csr[write_addr] <= write_csr_data;
if (write_enable) begin
csr_table[wr_addr] <= write_data;
end
if (writeback_valid) begin
instret <= instret + 1;
num_cycles <= num_cycles + 1;
if (wb_valid) begin
num_instrs <= num_instrs + 1;
end
end
end
assign read_csr_data = read_cycle ? cycle[31:0] :
read_cycleh ? cycle[63:32] :
read_instret ? instret[31:0] :
read_instreth ? instret[63:32] :
{{20{1'b0}}, csr[read_addr]};
endmodule : VX_csr_data
always @(*) begin
case (read_addr)
`CSR_LWID : read_data = 32'(warp_num);
`CSR_GTID ,
`CSR_GWID : read_data = CORE_ID * `NUM_WARPS + 32'(warp_num);
`CSR_CYCLL : read_data = num_cycles[31:0];
`CSR_CYCLH : read_data = num_cycles[63:32];
`CSR_INSTL : read_data = num_instrs[31:0];
`CSR_INSTH : read_data = num_instrs[63:32];
default: read_data = 32'(csr_table[rd_addr]);
endcase
end
endmodule

View file

@ -24,24 +24,23 @@ module VX_csr_pipe #(
wire[31:0] csr_read_data_unqual;
wire[31:0] csr_read_data;
assign stall_gpr_csr = no_slot_csr && csr_req_if.is_csr && |(csr_req_if.valid);
assign csr_read_data = (csr_address_s2 == csr_req_if.csr_address) ? csr_updated_data_s2 : csr_read_data_unqual;
wire writeback = |writeback_if.valid;
VX_csr_data csr_data(
.clk (clk),
.reset (reset),
.read_csr_address (csr_req_if.csr_address),
.write_valid (is_csr_s2),
.write_csr_data (csr_updated_data_s2[`CSR_WIDTH-1:0]),
.write_csr_address (csr_address_s2),
.read_csr_data (csr_read_data_unqual),
.writeback_valid (writeback)
VX_csr_data #(
.CORE_ID(CORE_ID)
) csr_data (
.clk (clk),
.reset (reset),
.read_addr (csr_req_if.csr_address),
.read_data (csr_read_data_unqual),
.write_enable (is_csr_s2),
.write_data (csr_updated_data_s2[`CSR_WIDTH-1:0]),
.write_addr (csr_address_s2),
.warp_num (csr_req_if.warp_num),
.wb_valid (| writeback_if.valid)
);
reg [31:0] csr_updated_data;
assign csr_read_data = (csr_address_s2 == csr_req_if.csr_address) ? csr_updated_data_s2 : csr_read_data_unqual;
reg [31:0] csr_updated_data;
always @(*) begin
case (csr_req_if.alu_op)
@ -52,55 +51,29 @@ module VX_csr_pipe #(
endcase
end
wire zero = 0;
VX_generic_register #(
.N(32 + 32 + 12 + 1 + 2 + 5 + (`NW_BITS-1+1) + `NUM_THREADS)
) csr_reg_s2 (
.clk (clk),
.reset(reset),
.stall(no_slot_csr),
.flush(zero),
.flush(0),
.in ({csr_req_if.valid, csr_req_if.warp_num, csr_req_if.rd, csr_req_if.wb, csr_req_if.is_csr, csr_req_if.csr_address, csr_read_data , csr_updated_data }),
.out ({valid_s2 , warp_num_s2 , rd_s2 , wb_s2 , is_csr_s2 , csr_address_s2 , csr_read_data_s2, csr_updated_data_s2})
);
wire [`NUM_THREADS-1:0][31:0] final_csr_data;
assign csr_wb_if.valid = valid_s2;
assign csr_wb_if.warp_num = warp_num_s2;
assign csr_wb_if.rd = rd_s2;
assign csr_wb_if.wb = wb_s2;
wire [`NUM_THREADS-1:0][31:0] thread_ids;
wire [`NUM_THREADS-1:0][31:0] warp_ids;
wire [`NUM_THREADS-1:0][31:0] warp_idz;
wire [`NUM_THREADS-1:0][31:0] csr_vec_read_data_s2;
genvar i;
for (i = 0; i < `NUM_THREADS; i = i + 1) begin
assign csr_wb_if.data[i] = (csr_address_s2 == `CSR_LTID) ? i :
(csr_address_s2 == `CSR_GTID) ? (csr_read_data_s2 * `NUM_THREADS + i) :
csr_read_data_s2;
end
genvar cur_t;
for (cur_t = 0; cur_t < `NUM_THREADS; cur_t = cur_t + 1) begin
assign thread_ids[cur_t] = cur_t;
end
genvar cur_tw;
for (cur_tw = 0; cur_tw < `NUM_THREADS; cur_tw = cur_tw + 1) begin
assign warp_ids[cur_tw] = 32'(warp_num_s2);
assign warp_idz[cur_tw] = 32'(warp_num_s2) + (CORE_ID * `NUM_WARPS);
end
genvar cur_v;
for (cur_v = 0; cur_v < `NUM_THREADS; cur_v = cur_v + 1) begin
assign csr_vec_read_data_s2[cur_v] = csr_read_data_s2;
end
wire thread_select = (csr_address_s2 == `CSR_THREAD);
wire warp_select = (csr_address_s2 == `CSR_WARP);
wire warp_id_select = (csr_address_s2 == `CSR_WARP_ID);
assign final_csr_data = thread_select ? thread_ids :
warp_select ? warp_ids :
warp_id_select ? warp_idz :
csr_vec_read_data_s2;
assign csr_wb_if.valid = valid_s2;
assign csr_wb_if.warp_num = warp_num_s2;
assign csr_wb_if.rd = rd_s2;
assign csr_wb_if.wb = wb_s2;
assign csr_wb_if.data = final_csr_data;
assign stall_gpr_csr = no_slot_csr && csr_req_if.is_csr && (| csr_req_if.valid);
endmodule

View file

@ -50,17 +50,6 @@
`define CSR_WIDTH 12
///////////////////////////////////////////////////////////////////////////////
`define CSR_THREAD 12'h020
`define CSR_WARP 12'h021
`define CSR_WARP_ID 12'h022
`define CSR_CYCL_L 12'hC00;
`define CSR_CYCL_H 12'hC80;
`define CSR_INST_L 12'hC02;
`define CSR_INST_H 12'hC82;
///////////////////////////////////////////////////////////////////////////////
`define R_INST 7'd51
@ -192,7 +181,7 @@
`define L2DRAM_ADDR_WIDTH (32 - `CLOG2(`L2BANK_LINE_SIZE))
// DRAM request tag bits
`define L2DRAM_TAG_WIDTH ((`NUM_CORES > 1) ? `L2DRAM_ADDR_WIDTH : (`L2DRAM_ADDR_WIDTH+1))
`define L2DRAM_TAG_WIDTH (`L2_ENABLE ? `L2DRAM_ADDR_WIDTH : (`L2DRAM_ADDR_WIDTH+`CLOG2(`NUM_CORES*2)))
////////////////////////// L3cache Configurable Knobs /////////////////////////

View file

@ -46,7 +46,7 @@ module VX_dmem_ctrl (
.CORE_TAG_WIDTH(`CORE_REQ_TAG_WIDTH)
) dcache_rsp_dcache_if();
wire to_shm = `SHARED_MEM_ADDR_MATCH(dcache_core_req_if.core_req_addr[0]);
wire to_shm = (dcache_core_req_if.core_req_addr[0][31:24] == `SHARED_MEM_TOP_ADDR);
wire dcache_wants_wb = (|dcache_rsp_dcache_if.core_rsp_valid);
// Dcache Request

95
hw/rtl/VX_dram_arb.v Normal file
View file

@ -0,0 +1,95 @@
`include "VX_define.vh"
module VX_dram_arb #(
parameter BANK_LINE_SIZE = 1,
parameter NUM_REQUESTS = 1,
parameter CORE_TAG_WIDTH = 1,
parameter DRAM_TAG_WIDTH = 1
) (
input wire clk,
input wire reset,
// Core request
input wire [NUM_REQUESTS-1:0] core_req_read,
input wire [NUM_REQUESTS-1:0] core_req_write,
input wire [NUM_REQUESTS-1:0][`DRAM_ADDR_WIDTH-1:0] core_req_addr,
input wire [NUM_REQUESTS-1:0][`BANK_LINE_WIDTH-1:0] core_req_data,
input wire [NUM_REQUESTS-1:0][CORE_TAG_WIDTH-1:0] core_req_tag,
output reg [NUM_REQUESTS-1:0] core_req_ready,
// Core response
output wire [NUM_REQUESTS-1:0] core_rsp_valid,
output wire [NUM_REQUESTS-1:0][`BANK_LINE_WIDTH-1:0]core_rsp_data,
output wire [NUM_REQUESTS-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag,
input wire [NUM_REQUESTS-1:0] core_rsp_ready,
// DRAM request
output reg dram_req_read,
output reg dram_req_write,
output reg [`DRAM_ADDR_WIDTH-1:0] dram_req_addr,
output reg [`BANK_LINE_WIDTH-1:0] dram_req_data,
output reg [DRAM_TAG_WIDTH-1:0] dram_req_tag,
input wire dram_req_ready,
// DRAM response
input wire dram_rsp_valid,
input wire [`BANK_LINE_WIDTH-1:0] dram_rsp_data,
input wire [DRAM_TAG_WIDTH-1:0] dram_rsp_tag,
output wire dram_rsp_ready
);
reg [`LOG2UP(NUM_REQUESTS)-1:0] bus_sel;
always @(posedge clk) begin
if (reset) begin
bus_sel <= 0;
end else begin
bus_sel <= bus_sel + 1;
end
end
integer i;
generate
always @(*) begin
dram_req_read = 'z;
dram_req_write = 'z;
dram_req_addr = 'z;
dram_req_data = 'z;
dram_req_tag = 'z;
for (i = 0; i < NUM_REQUESTS; i++) begin
if (bus_sel == (`LOG2UP(NUM_REQUESTS))'(i)) begin
dram_req_read = core_req_read[i];
dram_req_write = core_req_write[i];
dram_req_addr = core_req_addr[i];
dram_req_data = core_req_data[i];
dram_req_tag = {core_req_tag[i], (`LOG2UP(NUM_REQUESTS))'(i)};
core_req_ready[i] = dram_req_ready;
end else begin
core_req_ready[i] = 0;
end
end
end
endgenerate
reg is_valid;
generate
always @(*) begin
dram_rsp_ready = 0;
for (i = 0; i < NUM_REQUESTS; i++) begin
is_valid = (dram_rsp_tag[`LOG2UP(NUM_REQUESTS)-1:0] == (`LOG2UP(NUM_REQUESTS))'(i));
core_rsp_valid[i] = dram_rsp_valid & is_valid;
core_rsp_data[i] = dram_rsp_data;
core_rsp_tag[i] = dram_rsp_tag[`LOG2UP(NUM_REQUESTS) +: CORE_TAG_WIDTH];
if (is_valid) begin
dram_rsp_ready = core_rsp_ready[i];
end
end
end
endgenerate
endmodule

View file

@ -1,54 +0,0 @@
`include "VX_define.vh"
module VX_l1c_to_dram_arb #(
parameter REQQ_SIZE = 8
) (
input wire clk,
input wire reset,
VX_cache_dram_req_if dcache_dram_req_if,
VX_cache_dram_rsp_if dcache_dram_rsp_if,
VX_cache_dram_req_if icache_dram_req_if,
VX_cache_dram_rsp_if icache_dram_rsp_if,
VX_cache_dram_req_if dram_req_if,
VX_cache_dram_rsp_if dram_rsp_if
);
reg cache_sel;
wire icache_req_valid, icache_sel_out, icache_sel_in;
assign icache_req_valid = icache_dram_req_if.dram_req_read || icache_dram_req_if.dram_req_write;
assign icache_sel_out = icache_req_valid && (cache_sel == 0);
assign dram_req_if.dram_req_read = icache_sel_out ? icache_dram_req_if.dram_req_read : dcache_dram_req_if.dram_req_read;
assign dram_req_if.dram_req_write = icache_sel_out ? icache_dram_req_if.dram_req_write : dcache_dram_req_if.dram_req_write;
assign dram_req_if.dram_req_addr = icache_sel_out ? icache_dram_req_if.dram_req_addr : dcache_dram_req_if.dram_req_addr;
assign dram_req_if.dram_req_data = icache_sel_out ? icache_dram_req_if.dram_req_data : dcache_dram_req_if.dram_req_data;
assign dram_req_if.dram_req_tag = {icache_sel_out ? icache_dram_req_if.dram_req_tag : dcache_dram_req_if.dram_req_tag, icache_sel_out};
assign icache_dram_req_if.dram_req_ready = dram_req_if.dram_req_ready && (cache_sel == 0);
assign dcache_dram_req_if.dram_req_ready = dram_req_if.dram_req_ready && (cache_sel == 1);
assign icache_sel_in = dram_rsp_if.dram_rsp_tag[0];
assign icache_dram_rsp_if.dram_rsp_valid = dram_rsp_if.dram_rsp_valid && icache_sel_in;
assign icache_dram_rsp_if.dram_rsp_data = dram_rsp_if.dram_rsp_data;
assign icache_dram_rsp_if.dram_rsp_tag = dram_rsp_if.dram_rsp_tag[1 +: $bits(icache_dram_rsp_if.dram_rsp_tag)];
assign dcache_dram_rsp_if.dram_rsp_valid = dram_rsp_if.dram_rsp_valid && ~icache_sel_in;
assign dcache_dram_rsp_if.dram_rsp_data = dram_rsp_if.dram_rsp_data;
assign dcache_dram_rsp_if.dram_rsp_tag = dram_rsp_if.dram_rsp_tag[1 +: $bits(dcache_dram_rsp_if.dram_rsp_tag)];
assign dram_rsp_if.dram_rsp_ready = icache_dram_rsp_if.dram_rsp_ready && dcache_dram_rsp_if.dram_rsp_ready;
always @(posedge clk) begin
if (reset) begin
cache_sel <= 0;
end else begin
cache_sel <= ~cache_sel;
end
end
endmodule

View file

@ -8,10 +8,6 @@ module Vortex #(
input wire clk,
input wire reset,
// IO
output wire io_valid,
output wire [31:0] io_data,
// DRAM Dcache Req
output wire D_dram_req_read,
output wire D_dram_req_write,
@ -40,11 +36,17 @@ module Vortex #(
input wire [`IDRAM_TAG_WIDTH-1:0] I_dram_rsp_tag,
output wire I_dram_rsp_ready,
// LLC Snooping
// Cache Snooping
input wire llc_snp_req_valid,
input wire [`DDRAM_ADDR_WIDTH-1:0] llc_snp_req_addr,
output wire llc_snp_req_ready,
// I/O
output wire io_valid,
output wire [31:0] io_data,
input wire io_ready,
// Debug
output wire ebreak
);
`DEBUG_BEGIN
@ -98,20 +100,17 @@ module Vortex #(
assign dcache_dram_rsp_if.dram_rsp_tag = D_dram_rsp_tag;
assign D_dram_rsp_ready = dcache_dram_rsp_if.dram_rsp_ready;
assign io_valid = (!memory_delay)
&& (|dcache_core_req_if.core_req_valid)
&& (dcache_core_req_if.core_req_write[0] != `WORD_SEL_NO)
&& (dcache_core_req_if.core_req_addr[0] == `IO_BUS_ADDR);
wire to_io_bus = (dcache_core_req_if.core_req_addr[0] == `IO_BUS_ADDR);
assign io_valid = |dcache_core_req_if.core_req_valid && to_io_bus;
assign io_data = dcache_core_req_if.core_req_data[0];
assign io_data = dcache_core_req_if.core_req_data[0];
assign dcache_core_req_qual_if.core_req_valid = dcache_core_req_if.core_req_valid & {`NUM_THREADS{~io_valid}};
assign dcache_core_req_qual_if.core_req_valid = dcache_core_req_if.core_req_valid & {`NUM_THREADS{~to_io_bus}};
assign dcache_core_req_qual_if.core_req_read = dcache_core_req_if.core_req_read;
assign dcache_core_req_qual_if.core_req_write = dcache_core_req_if.core_req_write;
assign dcache_core_req_qual_if.core_req_addr = dcache_core_req_if.core_req_addr;
assign dcache_core_req_qual_if.core_req_data = dcache_core_req_if.core_req_data;
assign dcache_core_req_qual_if.core_req_tag = dcache_core_req_if.core_req_tag;
assign dcache_core_req_if.core_req_ready = dcache_core_req_qual_if.core_req_ready;
assign dcache_core_req_if.core_req_ready = to_io_bus ? io_ready : dcache_core_req_qual_if.core_req_ready;
// Icache interfaces
VX_cache_core_req_if #(

View file

@ -8,10 +8,6 @@ module Vortex_Cluster #(
input wire clk,
input wire reset,
// IO
output wire[`NUM_CORES-1:0] io_valid,
output wire[`NUM_CORES-1:0][31:0] io_data,
// DRAM Req
output wire dram_req_read,
output wire dram_req_write,
@ -26,240 +22,140 @@ module Vortex_Cluster #(
input wire[`L2DRAM_TAG_WIDTH-1:0] dram_rsp_tag,
output wire dram_rsp_ready,
// LLC Snooping
// Cache Snooping
input wire llc_snp_req_valid,
input wire[`L2DRAM_ADDR_WIDTH-1:0] llc_snp_req_addr,
output wire llc_snp_req_ready,
// IO
output wire io_valid,
output wire [31:0] io_data,
input wire io_ready,
// Debug
output wire ebreak
);
if (`NUM_CORES == 1) begin
);
// DRAM Dcache Req
wire[`NUM_CORES-1:0] per_core_D_dram_req_read;
wire[`NUM_CORES-1:0] per_core_D_dram_req_write;
wire[`NUM_CORES-1:0][`DDRAM_ADDR_WIDTH-1:0] per_core_D_dram_req_addr;
wire[`NUM_CORES-1:0][`DDRAM_LINE_WIDTH-1:0] per_core_D_dram_req_data;
wire[`NUM_CORES-1:0][`DDRAM_TAG_WIDTH-1:0] per_core_D_dram_req_tag;
wire[`NUM_CORES-1:0] per_core_D_dram_req_ready;
VX_cache_dram_req_if #(
.DRAM_LINE_WIDTH(`DDRAM_LINE_WIDTH),
.DRAM_ADDR_WIDTH(`DDRAM_ADDR_WIDTH),
.DRAM_TAG_WIDTH(`DDRAM_TAG_WIDTH)
) dcache_dram_req_if();
// DRAM Dcache Rsp
wire[`NUM_CORES-1:0] per_core_D_dram_rsp_valid;
wire[`NUM_CORES-1:0][`DDRAM_LINE_WIDTH-1:0] per_core_D_dram_rsp_data;
wire[`NUM_CORES-1:0][`DDRAM_TAG_WIDTH-1:0] per_core_D_dram_rsp_tag;
wire[`NUM_CORES-1:0] per_core_D_dram_rsp_ready;
VX_cache_dram_rsp_if #(
.DRAM_LINE_WIDTH(`DDRAM_LINE_WIDTH),
.DRAM_TAG_WIDTH(`DDRAM_TAG_WIDTH)
) dcache_dram_rsp_if();
// DRAM Icache Req
wire[`NUM_CORES-1:0] per_core_I_dram_req_read;
wire[`NUM_CORES-1:0][`IDRAM_ADDR_WIDTH-1:0] per_core_I_dram_req_addr;
wire[`NUM_CORES-1:0][`IDRAM_LINE_WIDTH-1:0] per_core_I_dram_req_data;
wire[`NUM_CORES-1:0][`IDRAM_TAG_WIDTH-1:0] per_core_I_dram_req_tag;
wire[`NUM_CORES-1:0] per_core_I_dram_req_ready;
VX_cache_dram_req_if #(
.DRAM_LINE_WIDTH(`DDRAM_LINE_WIDTH),
.DRAM_ADDR_WIDTH(`DDRAM_ADDR_WIDTH),
.DRAM_TAG_WIDTH(`DDRAM_TAG_WIDTH)
) icache_dram_req_if();
// DRAM Icache Rsp
wire[`NUM_CORES-1:0] per_core_I_dram_rsp_valid;
wire[`NUM_CORES-1:0][`IDRAM_LINE_WIDTH-1:0] per_core_I_dram_rsp_data;
wire[`NUM_CORES-1:0][`IDRAM_TAG_WIDTH-1:0] per_core_I_dram_rsp_tag;
wire[`NUM_CORES-1:0] per_core_I_dram_rsp_ready;
VX_cache_dram_rsp_if #(
.DRAM_LINE_WIDTH(`IDRAM_LINE_WIDTH),
.DRAM_TAG_WIDTH(`IDRAM_TAG_WIDTH)
) icache_dram_rsp_if();
// Snooping
wire snp_fwd_valid;
wire[`DDRAM_ADDR_WIDTH-1:0] snp_fwd_addr;
wire[`NUM_CORES-1:0] per_core_snp_fwd_ready;
VX_cache_dram_req_if #(
.DRAM_LINE_WIDTH(`L2DRAM_LINE_WIDTH),
.DRAM_ADDR_WIDTH(`L2DRAM_ADDR_WIDTH),
.DRAM_TAG_WIDTH(`L2DRAM_TAG_WIDTH)
) dram_req_if();
`IGNORE_WARNINGS_BEGIN
wire[`NUM_CORES-1:0] per_core_io_valid;
wire[`NUM_CORES-1:0][31:0] per_core_io_data;
`IGNORE_WARNINGS_END
VX_cache_dram_rsp_if #(
.DRAM_LINE_WIDTH(`L2DRAM_LINE_WIDTH),
.DRAM_TAG_WIDTH(`L2DRAM_TAG_WIDTH)
) dram_rsp_if();
// ebreak
wire[`NUM_CORES-1:0] per_core_ebreak;
assign dram_req_read = dram_req_if.dram_req_read;
assign dram_req_write = dram_req_if.dram_req_write;
assign dram_req_addr = dram_req_if.dram_req_addr;
assign dram_req_data = dram_req_if.dram_req_data;
assign dram_req_tag = dram_req_if.dram_req_tag;
assign dram_req_if.dram_req_ready = dram_req_ready;
assign dram_rsp_if.dram_rsp_valid = dram_rsp_valid;
assign dram_rsp_if.dram_rsp_data = dram_rsp_data;
assign dram_rsp_if.dram_rsp_tag = dram_rsp_tag;
assign dram_rsp_ready = dram_rsp_if.dram_rsp_ready;
VX_l1c_to_dram_arb #(
.REQQ_SIZE(`L2REQQ_SIZE)
) l1c_to_dram_arb (
.clk (clk),
.reset (reset),
.dcache_dram_req_if (dcache_dram_req_if),
.dcache_dram_rsp_if (dcache_dram_rsp_if),
.icache_dram_req_if (icache_dram_req_if),
.icache_dram_rsp_if (icache_dram_rsp_if),
.dram_req_if (dram_req_if),
.dram_rsp_if (dram_rsp_if)
);
assign io_valid = per_core_io_valid[0];
assign io_data = per_core_io_data[0];
assign ebreak = (& per_core_ebreak);
genvar i;
for (i = 0; i < `NUM_CORES; i = i + 1) begin
Vortex #(
.CORE_ID(0)
.CORE_ID(i + (CLUSTER_ID * `NUM_CORES))
) vortex_core (
.clk (clk),
.reset (reset),
.io_valid (io_valid[0]),
.io_data (io_data[0]),
.D_dram_req_read (dcache_dram_req_if.dram_req_read),
.D_dram_req_write (dcache_dram_req_if.dram_req_write),
.D_dram_req_addr (dcache_dram_req_if.dram_req_addr),
.D_dram_req_data (dcache_dram_req_if.dram_req_data),
.D_dram_req_tag (dcache_dram_req_if.dram_req_tag),
.D_dram_req_ready (dcache_dram_req_if.dram_req_ready),
.D_dram_rsp_valid (dcache_dram_rsp_if.dram_rsp_valid),
.D_dram_rsp_data (dcache_dram_rsp_if.dram_rsp_data),
.D_dram_rsp_tag (dcache_dram_rsp_if.dram_rsp_tag),
.D_dram_rsp_ready (dcache_dram_rsp_if.dram_rsp_ready),
.I_dram_req_read (icache_dram_req_if.dram_req_read),
.I_dram_req_write (icache_dram_req_if.dram_req_write),
.I_dram_req_addr (icache_dram_req_if.dram_req_addr),
.I_dram_req_data (icache_dram_req_if.dram_req_data),
.I_dram_req_tag (icache_dram_req_if.dram_req_tag),
.I_dram_req_ready (icache_dram_req_if.dram_req_ready),
.I_dram_rsp_valid (icache_dram_rsp_if.dram_rsp_valid),
.I_dram_rsp_data (icache_dram_rsp_if.dram_rsp_data),
.I_dram_rsp_ready (icache_dram_rsp_if.dram_rsp_ready),
.I_dram_rsp_tag (icache_dram_rsp_if.dram_rsp_tag),
.llc_snp_req_valid (llc_snp_req_valid),
.llc_snp_req_addr (llc_snp_req_addr),
.llc_snp_req_ready (llc_snp_req_ready),
.ebreak (ebreak)
.D_dram_req_read (per_core_D_dram_req_read [i]),
.D_dram_req_write (per_core_D_dram_req_write [i]),
.D_dram_req_addr (per_core_D_dram_req_addr [i]),
.D_dram_req_data (per_core_D_dram_req_data [i]),
.D_dram_req_tag (per_core_D_dram_req_tag [i]),
.D_dram_req_ready (per_core_D_dram_req_ready [i]),
.D_dram_rsp_valid (per_core_D_dram_rsp_valid [i]),
.D_dram_rsp_data (per_core_D_dram_rsp_data [i]),
.D_dram_rsp_tag (per_core_D_dram_rsp_tag [i]),
.D_dram_rsp_ready (per_core_D_dram_rsp_ready [i]),
.I_dram_req_read (per_core_I_dram_req_read [i]),
`IGNORE_WARNINGS_BEGIN
.I_dram_req_write (),
`IGNORE_WARNINGS_END
.I_dram_req_addr (per_core_I_dram_req_addr [i]),
.I_dram_req_data (per_core_I_dram_req_data [i]),
.I_dram_req_tag (per_core_I_dram_req_tag [i]),
.I_dram_req_ready (per_core_I_dram_req_ready [i]),
.I_dram_rsp_valid (per_core_I_dram_rsp_valid [i]),
.I_dram_rsp_tag (per_core_I_dram_rsp_tag [i]),
.I_dram_rsp_data (per_core_I_dram_rsp_data [i]),
.I_dram_rsp_ready (per_core_I_dram_rsp_ready [i]),
.llc_snp_req_valid (snp_fwd_valid),
.llc_snp_req_addr (snp_fwd_addr),
.llc_snp_req_ready (per_core_snp_fwd_ready [i]),
.io_valid (per_core_io_valid [i]),
.io_data (per_core_io_data [i]),
.io_ready (io_ready),
.ebreak (per_core_ebreak [i])
);
end
end else begin
// DRAM Dcache Req
wire[`NUM_CORES-1:0] per_core_D_dram_req_read;
wire[`NUM_CORES-1:0] per_core_D_dram_req_write;
wire[`NUM_CORES-1:0][`DDRAM_ADDR_WIDTH-1:0] per_core_D_dram_req_addr;
wire[`NUM_CORES-1:0][`DDRAM_LINE_WIDTH-1:0] per_core_D_dram_req_data;
wire[`NUM_CORES-1:0][`DDRAM_TAG_WIDTH-1:0] per_core_D_dram_req_tag;
// DRAM Dcache Rsp
wire[`NUM_CORES-1:0] per_core_D_dram_rsp_valid;
wire[`NUM_CORES-1:0][`DDRAM_LINE_WIDTH-1:0] per_core_D_dram_rsp_data;
wire[`NUM_CORES-1:0][`DDRAM_TAG_WIDTH-1:0] per_core_D_dram_rsp_tag;
wire[`NUM_CORES-1:0] per_core_D_dram_rsp_ready;
// DRAM Icache Req
wire[`NUM_CORES-1:0] per_core_I_dram_req_read;
wire[`NUM_CORES-1:0] per_core_I_dram_req_write;
wire[`NUM_CORES-1:0][`IDRAM_ADDR_WIDTH-1:0] per_core_I_dram_req_addr;
wire[`NUM_CORES-1:0][`IDRAM_LINE_WIDTH-1:0] per_core_I_dram_req_data;
wire[`NUM_CORES-1:0][`IDRAM_TAG_WIDTH-1:0] per_core_I_dram_req_tag;
// DRAM Icache Rsp
wire[`NUM_CORES-1:0] per_core_I_dram_rsp_valid;
wire[`NUM_CORES-1:0][`IDRAM_LINE_WIDTH-1:0] per_core_I_dram_rsp_data;
wire[`NUM_CORES-1:0][`IDRAM_TAG_WIDTH-1:0] per_core_I_dram_rsp_tag;
wire[`NUM_CORES-1:0] per_core_I_dram_rsp_ready;
// Out ebreak
wire[`NUM_CORES-1:0] per_core_ebreak;
wire[`NUM_CORES-1:0] per_core_io_valid;
wire[`NUM_CORES-1:0][31:0] per_core_io_data;
wire l2_core_req_ready;
wire snp_fwd_valid;
wire[`DDRAM_ADDR_WIDTH-1:0] snp_fwd_addr;
wire[`NUM_CORES-1:0] per_core_snp_fwd_ready;
assign ebreak = (& per_core_ebreak);
genvar i;
for (i = 0; i < `NUM_CORES; i = i + 1) begin
wire [`IDRAM_LINE_WIDTH-1:0] curr_core_D_dram_req_data;
wire [`DDRAM_LINE_WIDTH-1:0] curr_core_I_dram_req_data;
assign io_valid[i] = per_core_io_valid[i];
assign io_data[i] = per_core_io_data[i];
Vortex #(
.CORE_ID(i + (CLUSTER_ID * `NUM_CORES))
) vortex_core (
.clk (clk),
.reset (reset),
.io_valid (per_core_io_valid [i]),
.io_data (per_core_io_data [i]),
.D_dram_req_read (per_core_D_dram_req_read [i]),
.D_dram_req_write (per_core_D_dram_req_write [i]),
.D_dram_req_addr (per_core_D_dram_req_addr [i]),
.D_dram_req_data (curr_core_D_dram_req_data ),
.D_dram_req_tag (per_core_D_dram_req_tag [i]),
.D_dram_req_ready (l2_core_req_ready ),
.D_dram_rsp_valid (per_core_D_dram_rsp_valid [i]),
.D_dram_rsp_data (per_core_D_dram_rsp_data [i]),
.D_dram_rsp_tag (per_core_D_dram_rsp_tag [i]),
.D_dram_rsp_ready (per_core_D_dram_rsp_ready [i]),
.I_dram_req_read (per_core_I_dram_req_read [i]),
.I_dram_req_write (per_core_I_dram_req_write [i]),
.I_dram_req_addr (per_core_I_dram_req_addr [i]),
.I_dram_req_data (curr_core_I_dram_req_data ),
.I_dram_req_tag (per_core_I_dram_req_tag [i]),
.I_dram_req_ready (l2_core_req_ready ),
.I_dram_rsp_valid (per_core_I_dram_rsp_valid [i]),
.I_dram_rsp_tag (per_core_I_dram_rsp_tag [i]),
.I_dram_rsp_data (per_core_I_dram_rsp_data [i]),
.I_dram_rsp_ready (per_core_I_dram_rsp_ready [i]),
.llc_snp_req_valid (snp_fwd_valid),
.llc_snp_req_addr (snp_fwd_addr),
.llc_snp_req_ready (per_core_snp_fwd_ready [i]),
.ebreak (per_core_ebreak [i])
);
assign per_core_D_dram_req_data [i] = curr_core_D_dram_req_data;
assign per_core_I_dram_req_data [i] = curr_core_I_dram_req_data;
end
if (`L2_ENABLE) begin
// L2 Cache ///////////////////////////////////////////////////////////
wire[`L2NUM_REQUESTS-1:0] l2_core_req_valid;
wire[`L2NUM_REQUESTS-1:0][`WORD_SEL_BITS-1:0] l2_core_req_mem_write;
wire[`L2NUM_REQUESTS-1:0][`WORD_SEL_BITS-1:0] l2_core_req_mem_read;
wire[`L2NUM_REQUESTS-1:0][`WORD_SEL_BITS-1:0] l2_core_req_write;
wire[`L2NUM_REQUESTS-1:0][`WORD_SEL_BITS-1:0] l2_core_req_read;
wire[`L2NUM_REQUESTS-1:0][31:0] l2_core_req_addr;
wire[`L2NUM_REQUESTS-1:0][`DDRAM_TAG_WIDTH-1:0] l2_core_req_tag;
wire[`L2NUM_REQUESTS-1:0][`DDRAM_LINE_WIDTH-1:0] l2_core_req_data;
wire l2_core_req_ready;
wire[`L2NUM_REQUESTS-1:0] l2_core_rsp_valid;
wire[`L2NUM_REQUESTS-1:0][`DDRAM_LINE_WIDTH-1:0] l2_core_rsp_data;
wire[`L2NUM_REQUESTS-1:0][`DDRAM_TAG_WIDTH-1:0] l2_core_rsp_tag;
wire[`L2NUM_REQUESTS-1:0] l2_core_rsp_ready;
wire[`DDRAM_LINE_WIDTH-1:0] l2_dram_req_data;
wire[`DDRAM_LINE_WIDTH-1:0] l2_dram_rsp_data;
assign dram_req_data = l2_dram_req_data;
assign l2_dram_rsp_data = dram_rsp_data;
for (i = 0; i < `L2NUM_REQUESTS; i = i + 2) begin
// Core Request
assign l2_core_req_valid [i] = (per_core_D_dram_req_read[(i/2)] | per_core_D_dram_req_write[(i/2)]);
assign l2_core_req_valid [i+1] = (per_core_I_dram_req_read[(i/2)] | per_core_I_dram_req_write[(i/2)]);
assign l2_core_req_valid [i] = (per_core_D_dram_req_read[(i/2)] | per_core_D_dram_req_write[(i/2)]);
assign l2_core_req_valid [i+1] = per_core_I_dram_req_read[(i/2)];
assign l2_core_req_read [i] = per_core_D_dram_req_read[(i/2)] ? `WORD_SEL_LW : `WORD_SEL_NO;
assign l2_core_req_read [i+1] = per_core_I_dram_req_read[(i/2)] ? `WORD_SEL_LW : `WORD_SEL_NO;
assign l2_core_req_mem_write [i] = per_core_D_dram_req_write[(i/2)] ? `WORD_SEL_LW : `WORD_SEL_NO;
assign l2_core_req_mem_write [i+1] = `WORD_SEL_NO;
assign l2_core_req_write [i] = per_core_D_dram_req_write[(i/2)] ? `WORD_SEL_LW : `WORD_SEL_NO;
assign l2_core_req_write [i+1] = `WORD_SEL_NO;
assign l2_core_req_mem_read [i] = per_core_D_dram_req_read[(i/2)] ? `WORD_SEL_LW : `WORD_SEL_NO;
assign l2_core_req_mem_read [i+1] = `WORD_SEL_NO;
assign l2_core_req_addr [i] = {per_core_D_dram_req_addr[(i/2)], {`LOG2UP(`DBANK_LINE_SIZE){1'b0}}};
assign l2_core_req_addr [i+1] = {per_core_I_dram_req_addr[(i/2)], {`LOG2UP(`IBANK_LINE_SIZE){1'b0}}};
assign l2_core_req_addr [i] = {per_core_D_dram_req_addr[(i/2)], {`LOG2UP(`DBANK_LINE_SIZE){1'b0}}};
assign l2_core_req_addr [i+1] = {per_core_I_dram_req_addr[(i/2)], {`LOG2UP(`IBANK_LINE_SIZE){1'b0}}};
assign l2_core_req_data [i] = per_core_D_dram_req_data[(i/2)];
assign l2_core_req_data [i+1] = per_core_I_dram_req_data[(i/2)];
assign l2_core_req_data [i] = per_core_D_dram_req_data[(i/2)];
assign l2_core_req_data [i+1] = per_core_I_dram_req_data[(i/2)];
assign l2_core_req_tag [i] = per_core_D_dram_req_tag[(i/2)];
assign l2_core_req_tag [i+1] = per_core_I_dram_req_tag[(i/2)];
assign l2_core_req_tag [i] = per_core_D_dram_req_tag[(i/2)];
assign l2_core_req_tag [i+1] = per_core_I_dram_req_tag[(i/2)];
assign per_core_D_dram_req_ready[(i/2)] = l2_core_req_ready;
assign per_core_I_dram_req_ready[(i/2)] = l2_core_req_ready;
assign per_core_D_dram_rsp_valid [(i/2)] = l2_core_rsp_valid[i];
assign per_core_I_dram_rsp_valid [(i/2)] = l2_core_rsp_valid[i+1];
@ -302,8 +198,8 @@ module Vortex_Cluster #(
// Core request
.core_req_valid (l2_core_req_valid),
.core_req_read (l2_core_req_mem_read),
.core_req_write (l2_core_req_mem_write),
.core_req_read (l2_core_req_read),
.core_req_write (l2_core_req_write),
.core_req_addr (l2_core_req_addr),
.core_req_data (l2_core_req_data),
.core_req_tag (l2_core_req_tag),
@ -313,20 +209,20 @@ module Vortex_Cluster #(
.core_rsp_valid (l2_core_rsp_valid),
.core_rsp_data (l2_core_rsp_data),
.core_rsp_tag (l2_core_rsp_tag),
.core_rsp_ready (|l2_core_rsp_ready),
.core_rsp_ready (& l2_core_rsp_ready),
// DRAM request
.dram_req_read (dram_req_read),
.dram_req_write (dram_req_write),
.dram_req_addr (dram_req_addr),
.dram_req_data (l2_dram_req_data),
.dram_req_data (dram_req_data),
.dram_req_tag (dram_req_tag),
.dram_req_ready (dram_req_ready),
// L2 Cache DRAM Fill response
// DRAM response
.dram_rsp_valid (dram_rsp_valid),
.dram_rsp_tag (dram_rsp_tag),
.dram_rsp_data (l2_dram_rsp_data),
.dram_rsp_data (dram_rsp_data),
.dram_rsp_ready (dram_rsp_ready),
// Snoop request
@ -339,6 +235,95 @@ module Vortex_Cluster #(
.snp_fwd_addr (snp_fwd_addr),
.snp_fwd_ready (& per_core_snp_fwd_ready)
);
end else begin
wire[`L2NUM_REQUESTS-1:0] per_core_req_read;
wire[`L2NUM_REQUESTS-1:0] per_core_req_write;
wire[`L2NUM_REQUESTS-1:0][`DDRAM_ADDR_WIDTH-1:0] per_core_req_addr;
wire[`L2NUM_REQUESTS-1:0][`DDRAM_TAG_WIDTH-1:0] per_core_req_tag;
wire[`L2NUM_REQUESTS-1:0][`DDRAM_LINE_WIDTH-1:0] per_core_req_data;
wire[`L2NUM_REQUESTS-1:0] per_core_req_ready;
wire[`L2NUM_REQUESTS-1:0] per_core_rsp_valid;
wire[`L2NUM_REQUESTS-1:0][`DDRAM_LINE_WIDTH-1:0] per_core_rsp_data;
wire[`L2NUM_REQUESTS-1:0][`DDRAM_TAG_WIDTH-1:0] per_core_rsp_tag;
wire[`L2NUM_REQUESTS-1:0] per_core_rsp_ready;
for (i = 0; i < `L2NUM_REQUESTS; i = i + 2) begin
assign per_core_req_read [i] = per_core_D_dram_req_read[(i/2)];
assign per_core_req_read [i+1] = per_core_I_dram_req_read[(i/2)];
assign per_core_req_write [i] = per_core_D_dram_req_write[(i/2)];
assign per_core_req_write [i+1] = 0;
assign per_core_req_addr [i] = per_core_D_dram_req_addr[(i/2)];
assign per_core_req_addr [i+1] = per_core_I_dram_req_addr[(i/2)];
assign per_core_req_data [i] = per_core_D_dram_req_data[(i/2)];
assign per_core_req_data [i+1] = per_core_I_dram_req_data[(i/2)];
assign per_core_req_tag [i] = per_core_D_dram_req_tag[(i/2)];
assign per_core_req_tag [i+1] = per_core_I_dram_req_tag[(i/2)];
assign per_core_D_dram_req_ready[(i/2)] = per_core_req_ready[i];
assign per_core_I_dram_req_ready[(i/2)] = per_core_req_ready[i+1];
assign per_core_D_dram_rsp_valid [(i/2)] = per_core_rsp_valid[i];
assign per_core_I_dram_rsp_valid [(i/2)] = per_core_rsp_valid[i+1];
assign per_core_D_dram_rsp_data [(i/2)] = per_core_rsp_data[i];
assign per_core_I_dram_rsp_data [(i/2)] = per_core_rsp_data[i+1];
assign per_core_D_dram_rsp_tag [(i/2)] = per_core_rsp_tag[i];
assign per_core_I_dram_rsp_tag [(i/2)] = per_core_rsp_tag[i+1];
assign per_core_rsp_ready [i] = per_core_D_dram_rsp_ready[(i/2)];
assign per_core_rsp_ready [i+1] = per_core_I_dram_rsp_ready[(i/2)];
end
VX_dram_arb #(
.BANK_LINE_SIZE (`L2BANK_LINE_SIZE),
.NUM_REQUESTS (`L2NUM_REQUESTS),
.CORE_TAG_WIDTH (`DDRAM_TAG_WIDTH),
.DRAM_TAG_WIDTH (`L2DRAM_TAG_WIDTH)
) dram_arb (
.clk (clk),
.reset (reset),
// Core request
.core_req_read (per_core_req_read),
.core_req_write (per_core_req_write),
.core_req_addr (per_core_req_addr),
.core_req_data (per_core_req_data),
.core_req_tag (per_core_req_tag),
.core_req_ready (per_core_req_ready),
// Core response
.core_rsp_valid (per_core_rsp_valid),
.core_rsp_data (per_core_rsp_data),
.core_rsp_tag (per_core_rsp_tag),
.core_rsp_ready (per_core_rsp_ready),
// DRAM request
.dram_req_read (dram_req_read),
.dram_req_write (dram_req_write),
.dram_req_addr (dram_req_addr),
.dram_req_data (dram_req_data),
.dram_req_tag (dram_req_tag),
.dram_req_ready (dram_req_ready),
// DRAM response
.dram_rsp_valid (dram_rsp_valid),
.dram_rsp_tag (dram_rsp_tag),
.dram_rsp_data (dram_rsp_data),
.dram_rsp_ready (dram_rsp_ready)
);
// Cache snooping
assign snp_fwd_valid = llc_snp_req_valid;
assign snp_fwd_addr = llc_snp_req_addr;
assign llc_snp_req_ready = & per_core_snp_fwd_ready;
end
endmodule

View file

@ -6,10 +6,6 @@ module Vortex_Socket (
input wire clk,
input wire reset,
// IO
output wire io_valid[(`NUM_CORES * `NUM_CLUSTERS)-1:0],
output wire[31:0] io_data [(`NUM_CORES * `NUM_CLUSTERS)-1:0],
// DRAM Req
output wire dram_req_read,
output wire dram_req_write,
@ -24,32 +20,26 @@ module Vortex_Socket (
input wire[`L3DRAM_TAG_WIDTH-1:0] dram_rsp_tag,
output wire dram_rsp_ready,
// LLC Snooping
// Cache Snooping
input wire llc_snp_req_valid,
input wire[`L3DRAM_ADDR_WIDTH-1:0] llc_snp_req_addr,
output wire llc_snp_req_ready,
// I/O
output wire io_valid,
output wire [31:0] io_data,
input wire io_ready,
// Debug
output wire ebreak
);
if (`NUM_CLUSTERS == 1) begin
wire[`NUM_CORES-1:0] cluster_io_valid;
wire[`NUM_CORES-1:0][31:0] cluster_io_data;
genvar i;
for (i = 0; i < `NUM_CORES; i=i+1) begin
assign io_valid [i] = cluster_io_valid [i];
assign io_data [i] = cluster_io_data [i];
end
Vortex_Cluster #(
.CLUSTER_ID(0)
) Vortex_Cluster (
.clk (clk),
.reset (reset),
.io_valid (cluster_io_valid),
.io_data (cluster_io_data),
.dram_req_read (dram_req_read),
.dram_req_write (dram_req_write),
@ -67,51 +57,46 @@ module Vortex_Socket (
.llc_snp_req_addr (llc_snp_req_addr),
.llc_snp_req_ready (llc_snp_req_ready),
.io_valid (io_valid),
.io_data (io_data),
.io_ready (io_ready),
.ebreak (ebreak)
);
end else begin
wire snp_fwd_valid;
wire[`L3DRAM_ADDR_WIDTH-1:0] snp_fwd_addr;
wire[`NUM_CLUSTERS-1:0] per_cluster_snp_fwd_ready;
wire[`NUM_CLUSTERS-1:0] per_cluster_ebreak;
assign ebreak = (& per_cluster_ebreak);
// // DRAM Dcache Req
wire[`NUM_CLUSTERS-1:0] per_cluster_dram_req_write;
// DRAM Dcache Req
wire[`NUM_CLUSTERS-1:0] per_cluster_dram_req_read;
wire[`NUM_CLUSTERS-1:0] per_cluster_dram_req_write;
wire[`NUM_CLUSTERS-1:0][`L2DRAM_ADDR_WIDTH-1:0] per_cluster_dram_req_addr;
wire[`NUM_CLUSTERS-1:0][`L2DRAM_LINE_WIDTH-1:0] per_cluster_dram_req_data;
wire[`NUM_CLUSTERS-1:0][`L2DRAM_TAG_WIDTH-1:0] per_cluster_dram_req_tag;
wire[`NUM_CLUSTERS-1:0][`L2DRAM_LINE_WIDTH-1:0] per_cluster_dram_req_data_up;
wire l3_core_req_ready;
// // DRAM Dcache Rsp
wire[`NUM_CLUSTERS-1:0] per_cluster_dram_rsp_ready;
// DRAM Dcache Rsp
wire[`NUM_CLUSTERS-1:0] per_cluster_dram_rsp_valid;
wire[`NUM_CLUSTERS-1:0][`L3DRAM_LINE_WIDTH-1:0] per_cluster_dram_rsp_data;
wire[`NUM_CLUSTERS-1:0][`L3DRAM_TAG_WIDTH-1:0] per_cluster_dram_rsp_tag;
wire[`NUM_CLUSTERS-1:0] per_cluster_dram_rsp_ready;
wire[`NUM_CLUSTERS-1:0][`L3DRAM_LINE_WIDTH-1:0] per_cluster_dram_rsp_data_up;
wire snp_fwd_valid;
wire[`L3DRAM_ADDR_WIDTH-1:0] snp_fwd_addr;
wire[`NUM_CLUSTERS-1:0] per_cluster_snp_fwd_ready;
wire[`NUM_CLUSTERS-1:0][`NUM_CORES-1:0] per_cluster_io_valid;
wire[`NUM_CLUSTERS-1:0][`NUM_CORES-1:0][31:0] per_cluster_io_data;
`IGNORE_WARNINGS_BEGIN
wire[`NUM_CLUSTERS-1:0] per_cluster_io_valid;
wire[`NUM_CLUSTERS-1:0][31:0] per_cluster_io_data;
`IGNORE_WARNINGS_END
genvar i, j;
for (i = 0; i < `NUM_CLUSTERS; i = i + 1) begin
for (j = 0; j < `NUM_CORES; j = j + 1) begin
assign io_valid[j+(i*`NUM_CORES)] = per_cluster_io_valid[i][j];
assign io_data[j+(i*`NUM_CORES)] = per_cluster_io_data[i][j];
end
assign per_cluster_dram_req_data[i] = per_cluster_dram_req_data_up[i];
assign per_cluster_dram_rsp_data_up[i] = per_cluster_dram_rsp_data[i];
end
wire[`NUM_CLUSTERS-1:0] per_cluster_ebreak;
assign io_valid = per_cluster_io_valid[0];
assign io_data = per_cluster_io_data[0];
assign ebreak = (& per_cluster_ebreak);
genvar i;
for (i = 0; i < `NUM_CLUSTERS; i=i+1) begin
Vortex_Cluster #(
.CLUSTER_ID(i)
@ -119,34 +104,35 @@ module Vortex_Socket (
.clk (clk),
.reset (reset),
.io_valid (per_cluster_io_valid [i]),
.io_data (per_cluster_io_data [i]),
.dram_req_write (per_cluster_dram_req_write [i]),
.dram_req_read (per_cluster_dram_req_read [i]),
.dram_req_addr (per_cluster_dram_req_addr [i]),
.dram_req_data (per_cluster_dram_req_data_up [i]),
.dram_req_tag (per_cluster_dram_req_tag [i]),
.dram_req_write (per_cluster_dram_req_write [i]),
.dram_req_read (per_cluster_dram_req_read [i]),
.dram_req_addr (per_cluster_dram_req_addr [i]),
.dram_req_data (per_cluster_dram_req_data [i]),
.dram_req_tag (per_cluster_dram_req_tag [i]),
.dram_req_ready (l3_core_req_ready),
.dram_rsp_valid (per_cluster_dram_rsp_valid [i]),
.dram_rsp_data (per_cluster_dram_rsp_data_up [i]),
.dram_rsp_tag (per_cluster_dram_rsp_tag [i]),
.dram_rsp_ready (per_cluster_dram_rsp_ready [i]),
.dram_rsp_valid (per_cluster_dram_rsp_valid [i]),
.dram_rsp_data (per_cluster_dram_rsp_data [i]),
.dram_rsp_tag (per_cluster_dram_rsp_tag [i]),
.dram_rsp_ready (per_cluster_dram_rsp_ready [i]),
.llc_snp_req_valid (snp_fwd_valid),
.llc_snp_req_addr (snp_fwd_addr),
.llc_snp_req_ready (per_cluster_snp_fwd_ready [i]),
.llc_snp_req_ready (per_cluster_snp_fwd_ready [i]),
.ebreak (per_cluster_ebreak [i])
.io_valid (per_cluster_io_valid [i]),
.io_data (per_cluster_io_data [i]),
.io_ready (io_ready),
.ebreak (per_cluster_ebreak [i])
);
end
// L3 Cache ///////////////////////////////////////////////////////////
wire[`L3NUM_REQUESTS-1:0] l3_core_req_valid;
wire[`L3NUM_REQUESTS-1:0][`WORD_SEL_BITS-1:0] l3_core_req_mem_write;
wire[`L3NUM_REQUESTS-1:0][`WORD_SEL_BITS-1:0] l3_core_req_mem_read;
wire[`L3NUM_REQUESTS-1:0][`WORD_SEL_BITS-1:0] l3_core_req_read;
wire[`L3NUM_REQUESTS-1:0][`WORD_SEL_BITS-1:0] l3_core_req_write;
wire[`L3NUM_REQUESTS-1:0][31:0] l3_core_req_addr;
wire[`L3NUM_REQUESTS-1:0][`L2DRAM_LINE_WIDTH-1:0] l3_core_req_data;
wire[`L3NUM_REQUESTS-1:0][`L2DRAM_TAG_WIDTH-1:0] l3_core_req_tag;
@ -156,23 +142,17 @@ module Vortex_Socket (
wire[`L3NUM_REQUESTS-1:0][`L2DRAM_TAG_WIDTH-1:0] l3_core_rsp_tag;
wire[`L3NUM_REQUESTS-1:0] l3_core_rsp_ready;
wire[`L3DRAM_LINE_WIDTH-1:0] l3_dram_req_data;
wire[`L3DRAM_LINE_WIDTH-1:0] l3_dram_rsp_data;
assign dram_req_data = l3_dram_req_data;
assign l3_dram_rsp_data = dram_rsp_data;
for (i = 0; i < `L3NUM_REQUESTS; i=i+1) begin
// Core Request
assign l3_core_req_valid [i] = (per_cluster_dram_req_read[i] | per_cluster_dram_req_write[i]);
assign l3_core_req_mem_read [i] = per_cluster_dram_req_read [i] ? `WORD_SEL_LW : `WORD_SEL_NO;
assign l3_core_req_mem_write [i] = per_cluster_dram_req_write[i] ? `WORD_SEL_LW : `WORD_SEL_NO;
assign l3_core_req_addr [i] = {per_cluster_dram_req_addr [i], {`LOG2UP(`L2BANK_LINE_SIZE){1'b0}}};
assign l3_core_req_tag [i] = per_cluster_dram_req_tag [i];
assign l3_core_req_data [i] = per_cluster_dram_req_data [i];
assign l3_core_req_valid [i] = (per_cluster_dram_req_read [i] | per_cluster_dram_req_write [i]);
assign l3_core_req_read [i] = per_cluster_dram_req_read [i] ? `WORD_SEL_LW : `WORD_SEL_NO;
assign l3_core_req_write [i] = per_cluster_dram_req_write [i] ? `WORD_SEL_LW : `WORD_SEL_NO;
assign l3_core_req_addr [i] = {per_cluster_dram_req_addr [i], {`LOG2UP(`L2BANK_LINE_SIZE){1'b0}}};
assign l3_core_req_tag [i] = per_cluster_dram_req_tag [i];
assign l3_core_req_data [i] = per_cluster_dram_req_data [i];
// Core can't accept Response
assign l3_core_rsp_ready [i] = per_cluster_dram_rsp_ready[i];
// Core Response
assign l3_core_rsp_ready [i] = per_cluster_dram_rsp_ready[i];
// Cache Fill Response
assign per_cluster_dram_rsp_valid [i] = l3_core_rsp_valid [i];
@ -208,8 +188,8 @@ module Vortex_Socket (
// Core request
.core_req_valid (l3_core_req_valid),
.core_req_read (l3_core_req_mem_read),
.core_req_write (l3_core_req_mem_write),
.core_req_read (l3_core_req_read),
.core_req_write (l3_core_req_write),
.core_req_addr (l3_core_req_addr),
.core_req_data (l3_core_req_data),
.core_req_tag (l3_core_req_tag),
@ -219,19 +199,19 @@ module Vortex_Socket (
.core_rsp_valid (l3_core_rsp_valid),
.core_rsp_data (l3_core_rsp_data),
.core_rsp_tag (l3_core_rsp_tag),
.core_rsp_ready (|l3_core_rsp_ready),
.core_rsp_ready (& l3_core_rsp_ready),
// DRAM request
.dram_req_write (dram_req_write),
.dram_req_read (dram_req_read),
.dram_req_addr (dram_req_addr),
.dram_req_data (l3_dram_req_data),
.dram_req_data (dram_req_data),
.dram_req_tag (dram_req_tag),
.dram_req_ready (dram_req_ready),
// DRAM response
.dram_rsp_valid (dram_rsp_valid),
.dram_rsp_data (l3_dram_rsp_data),
.dram_rsp_data (dram_rsp_data),
.dram_rsp_tag (dram_rsp_tag),
.dram_rsp_ready (dram_rsp_ready),

View file

@ -85,7 +85,7 @@ module VX_cache_core_rsp_merge #(
&& per_bank_core_rsp_valid[i]
&& !core_rsp_valid[per_bank_core_rsp_tid[i]]
&& ((main_bank_index == `LOG2UP(NUM_BANKS)'(i))
|| (per_bank_core_rsp_tid[i] != per_bank_core_rsp_tid[main_bank_index]))) begin
|| (per_bank_core_rsp_tid[i] != per_bank_core_rsp_tid[main_bank_index]))) begin
core_rsp_valid[per_bank_core_rsp_tid[i]] = 1;
core_rsp_data[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_data[i];
core_rsp_tag[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_tag[i];

View file

@ -98,19 +98,13 @@ void Simulator::dbus_driver() {
vortex_->dram_req_ready = ~dram_stalled_;
}
void Simulator::io_handler() {
bool io_valid = false;
for (int c = 0; c < NUM_CORES; c++) {
if (vortex_->io_valid[c]) {
uint32_t data_write = (uint32_t)vortex_->io_data[c];
char c = (char)data_write;
std::cerr << c;
io_valid = true;
}
}
if (io_valid) {
std::cout << std::flush;
void Simulator::io_driver() {
if (vortex_->io_valid) {
uint32_t data_write = (uint32_t)vortex_->io_data;
char c = (char)data_write;
std::cerr << c;
}
vortex_->io_ready = true;
}
void Simulator::reset() {
@ -128,7 +122,7 @@ void Simulator::step() {
this->eval();
dbus_driver();
io_handler();
io_driver();
}
void Simulator::eval() {
@ -149,7 +143,9 @@ bool Simulator::is_busy() {
return (0 == vortex_->ebreak);
}
void Simulator::send_snoops(uint32_t mem_addr, uint32_t size) {
void Simulator::flush_caches(uint32_t mem_addr, uint32_t size) {
// send snoop requests to the caches
printf("[sim] total cycles: %ld\n", time_stamp/2);
// align address to LLC block boundaries
auto aligned_addr_start = mem_addr / GLOBAL_BLOCK_SIZE;
auto aligned_addr_end = (mem_addr + size + GLOBAL_BLOCK_SIZE - 1) / GLOBAL_BLOCK_SIZE;
@ -169,12 +165,6 @@ void Simulator::send_snoops(uint32_t mem_addr, uint32_t size) {
vortex_->llc_snp_req_valid = true;
}
}
}
void Simulator::flush_caches(uint32_t mem_addr, uint32_t size) {
// send snoop requests to the caches
printf("[sim] total cycles: %ld\n", time_stamp/2);
this->send_snoops(mem_addr, size);
this->wait(PIPELINE_FLUSH_LATENCY);
}
@ -192,12 +182,12 @@ bool Simulator::run() {
// check riscv-tests PASSED/FAILED status
#if (NUM_CLUSTERS == 1 && NUM_CORES == 1)
int status = (int)vortex_->Vortex_Socket->genblk1__DOT__Vortex_Cluster->genblk1__DOT__vortex_core->back_end->writeback->last_data_wb & 0xf;
int status = (int)vortex_->Vortex_Socket->genblk1__DOT__Vortex_Cluster->genblk1__BRA__0__KET____DOT__vortex_core->back_end->writeback->last_data_wb & 0xf;
#else
#if (NUM_CLUSTERS == 1)
int status = (int)vortex_->Vortex_Socket->genblk1__DOT__Vortex_Cluster->genblk2__DOT__genblk1__BRA__0__KET____DOT__vortex_core->back_end->writeback->last_data_wb & 0xf;
int status = (int)vortex_->Vortex_Socket->genblk1__DOT__Vortex_Cluster->genblk1__BRA__0__KET____DOT__vortex_core->back_end->writeback->last_data_wb & 0xf;
#else
int status = (int)vortex_->Vortex_Socket->genblk2__DOT__genblk2__BRA__0__KET____DOT__Vortex_Cluster->genblk2__DOT__genblk1__BRA__0__KET____DOT__vortex_core->back_end->writeback->last_data_wb & 0xf;
int status = (int)vortex_->Vortex_Socket->genblk2__DOT__genblk1__BRA__0__KET____DOT__Vortex_Cluster->genblk1__BRA__0__KET____DOT__vortex_core->back_end->writeback->last_data_wb & 0xf;
#endif
#endif

View file

@ -35,18 +35,16 @@ public:
bool is_busy();
void reset();
void step();
void wait(uint32_t cycles);
void flush_caches(uint32_t mem_addr, uint32_t size);
bool run();
bool run();
void print_stats(std::ostream& out);
private:
void eval();
void wait(uint32_t cycles);
void eval();
void dbus_driver();
void io_handler();
void send_snoops(uint32_t mem_addr, uint32_t size);
void io_driver();
bool dram_stalled_;
std::vector<dram_req_t> dram_req_vec_;

View file

@ -15,10 +15,10 @@ CPY = /home/fares/dev/riscv-gnu-toolchain-vector/drops/bin/riscv32-unknown-elf-
NEWLIB = $(LIB_PATH)/newlib/newlib.c
VX_STR = $(LIB_PATH)/startup/vx_start.S
VX_INT = $(LIB_PATH)/intrinsics/vx_intrinsics.s
VX_IO = $(LIB_PATH)/io/vx_io.s $(LIB_PATH)/io/vx_io.c
VX_INT = $(LIB_PATH)/intrinsics/vx_intrinsics.S
VX_IO = $(LIB_PATH)/io/vx_io.S $(LIB_PATH)/io/vx_io.c
VX_API = $(LIB_PATH)/vx_api/vx_api.c
VX_FIO = $(LIB_PATH)/fileio/fileio.s
VX_FIO = $(LIB_PATH)/fileio/fileio.S
VX_VEC = vx_vec.s
LIBS = /home/fares/dev/riscv-gnu-toolchain-vector/drops/riscv32-unknown-elf/lib/libc.a /home/fares/dev/riscv-gnu-toolchain-vector/drops/riscv32-unknown-elf/lib/libstdc++.a -static-libgcc -lgcc

View file

@ -11,10 +11,10 @@ CPY = /home/priya/dev/riscv_vec/riscv-gnu/bin/riscv32-unknown-elf-objcopy
NEWLIB = $(LIB_PATH)/newlib/newlib.c
VX_STR = $(LIB_PATH)/startup/vx_start.S
VX_INT = $(LIB_PATH)/intrinsics/vx_intrinsics.s
VX_IO = $(LIB_PATH)/io/vx_io.s $(LIB_PATH)/io/vx_io.c
VX_INT = $(LIB_PATH)/intrinsics/vx_intrinsics.S
VX_IO = $(LIB_PATH)/io/vx_io.S $(LIB_PATH)/io/vx_io.c
VX_API = $(LIB_PATH)/vx_api/vx_api.c
VX_FIO = $(LIB_PATH)/fileio/fileio.s
VX_FIO = $(LIB_PATH)/fileio/fileio.S
VX_VEC1 = vx_vec_vvaddint32.s
VX_VEC2 = vx_vec_saxpy.s #float --> int
VX_VEC3 = vx_vec_sgemm_float.s #float --> int

View file

@ -486,7 +486,7 @@ Disassembly of section .text:
800006e0: 00112623 sw ra,12(sp)
800006e4: 00812423 sw s0,8(sp)
800006e8: 01010413 addi s0,sp,16
800006ec: 0d4000ef jal ra,800007c0 <vx_threadID>
800006ec: 0d4000ef jal ra,800007c0 <vx_thread_id>
800006f0: 00050793 mv a5,a0
800006f4: 00078513 mv a0,a5
800006f8: 00c12083 lw ra,12(sp)
@ -554,23 +554,23 @@ Disassembly of section .text:
800007b0: 0000306b 0x306b
800007b4: 00008067 ret
800007b8 <vx_warpID>:
800007b8 <vx_warp_id>:
800007b8: 02102573 csrr a0,0x21
800007bc: 00008067 ret
800007c0 <vx_threadID>:
800007c0 <vx_thread_id>:
800007c0: 02002573 csrr a0,0x20
800007c4: 00008067 ret
800007c8 <vx_getCycles>:
800007c8 <vx_num_cycles>:
800007c8: 02602573 csrr a0,0x26
800007cc: 00008067 ret
800007d0 <vx_getInst>:
800007d0 <vx_num_instrs>:
800007d0: 02502573 csrr a0,0x25
800007d4: 00008067 ret
800007d8 <vx_resetStack>:
800007d8 <vx_reset_stack>:
800007d8: 00400513 li a0,4
800007dc: 0005006b 0x5006b
800007e0: 021026f3 csrr a3,0x21

View file

@ -1,4 +1,5 @@
#include "../config.h"
# .section .FileIO

View file

@ -0,0 +1,69 @@
#include "../config.h"
.section .text
.type vx_wspawn, @function
.global vx_wspawn
vx_wspawn:
.word 0x00b5106b # wspawn a0(numWarps), a1(PC SPAWN)
ret
.type vx_tmc, @function
.global vx_tmc
vx_tmc:
.word 0x0005006b # tmc a0
ret
.type vx_barrier, @function
.global vx_barrier
vx_barrier:
.word 0x00b5406b # barrier a0(barrier id), a1(numWarps)
ret
.type vx_split, @function
.global vx_split
vx_split:
.word 0x0005206b # split a0
ret
.type vx_join, @function
.global vx_join
vx_join:
.word 0x0000306b #join
ret
.type vx_warp_id, @function
.global vx_warp_id
vx_warp_id:
csrr a0, CSR_LWID # read warp index
ret
.type vx_warp_gid, @function
.global vx_warp_gid
vx_warp_gid:
csrr a0, CSR_GWID # read warp index
ret
.type vx_thread_id, @function
.global vx_thread_id
vx_thread_id:
csrr a0, CSR_LTID # read thread index
ret
.type vx_thread_gid, @function
.global vx_thread_gid
vx_thread_gid:
csrr a0, CSR_GTID # read thread index
ret
.type vx_num_cycles, @function
.global vx_num_cycles
vx_num_cycles:
csrr a0, CSR_CYCLL
ret
.type vx_num_instrs, @function
.global vx_num_instrs
vx_num_instrs:
csrr a0, CSR_INSTL
ret

View file

@ -7,35 +7,38 @@
extern "C" {
#endif
// Spawns Warps
// Spawn warps
void vx_wspawn(unsigned numWarps, unsigned PC_spawn);
// Changes thread mask (activated/deactivates threads)
// Set thread mask
void vx_tmc(unsigned numThreads);
// Warp Barrier
void vx_barrier(unsigned barriedID, unsigned numWarps);
// split on a predicate
// Split on a predicate
void vx_split(unsigned predicate);
// Join
void vx_join(void);
// Get Hardware thread ID
unsigned vx_threadID(void);
// Return the warp thread index
unsigned vx_thread_id(void);
// Get hardware warp ID
unsigned vx_warpID(void);
// Return the core warp index
unsigned vx_warp_id(void);
// Get global warp number
unsigned vx_warpNum(void);
// Return processsor unique thread id
unsigned vx_thread_gid(void);
// Get Number cycles/Inst
unsigned vx_getCycles(void);
unsigned vx_getInst(void);
// Return processsor unique warp id
unsigned vx_warp_gid(void);
void vx_resetStack(void);
// Return number cycles
unsigned vx_num_cycles(void);
// Return number instructions
unsigned vx_num_instrs(void);
#define __if(b) vx_split(b); \
if (b)

View file

@ -1,85 +0,0 @@
.section .text
.type vx_wspawn, @function
.global vx_wspawn
vx_wspawn:
.word 0x00b5106b # wspawn a0(numWarps), a1(PC SPAWN)
ret
.type vx_tmc, @function
.global vx_tmc
vx_tmc:
.word 0x0005006b # tmc a0
ret
.type vx_barrier, @function
.global vx_barrier
vx_barrier:
.word 0x00b5406b # barrier a0(barrier id), a1(numWarps)
ret
.type vx_split, @function
.global vx_split
vx_split:
.word 0x0005206b # split a0
ret
.type vx_join, @function
.global vx_join
vx_join:
.word 0x0000306b #join
ret
.type vx_warpID, @function
.global vx_warpID
vx_warpID:
csrr a0, 0x21 # read warp IDs
ret
.type vx_warpNum, @function
.global vx_warpNum
vx_warpNum:
csrr a0, 0x22 # read warp IDs
ret
.type vx_threadID, @function
.global vx_threadID
vx_threadID:
csrr a0, 0x20 # read thread IDs
ret
.type vx_getCycles, @function
.global vx_getCycles
vx_getCycles:
csrr a0, 0x26 # read thread IDs
ret
.type vx_getInst, @function
.global vx_getInst
vx_getInst:
csrr a0, 0x25 # read thread IDs
ret
.type vx_resetStack, @function
.global vx_resetStack
vx_resetStack:
li a0, 4
.word 0x0005006b # tmc 4
csrr a3, 0x21 # get wid
slli a3, a3, 15 # shift by wid
csrr a2, 0x20 # get tid
slli a1, a2, 10 # multiply tid by 1024
slli a2, a2, 2 # multiply tid by 4
lui sp, 0x6ffff # load base sp
sub sp, sp, a1 # sub sp - (1024*tid)
sub sp, sp, a3 # shoft per warp
add sp, sp, a2 # shift sp for better performance
csrr a3, 0x21 # get wid
beqz a3, RETURN
li a0, 0
.word 0x0005006b # tmc 0
RETURN:
ret

View file

@ -1,4 +1,4 @@
#include "../config.h"
.type vx_print_str, @function
.global vx_print_str
@ -29,5 +29,5 @@ vx_printc:
.section .data
print_addr:
.word 0x00010000
.word IO_BUS_ADDR

View file

@ -8,6 +8,7 @@ extern "C" {
#endif
static char * hextoa[] = {"0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "a", "b", "c", "d", "e", "f"};
void vx_print_hex(unsigned);
void vx_printf(const char *, unsigned);

View file

@ -313,7 +313,7 @@ void _kill()
unsigned _getpid()
{
return vx_threadID();
return vx_thread_id();
}
void _unlink()

View file

@ -38,20 +38,18 @@ vx_set_sp:
addi gp, gp, %pcrel_lo(1b)
.option pop
csrr a3, 0x22 # get global warp number
slli a3, a3, 0x1a # shift by wid
csrr a2, 0x20 # get tid
slli a1, a2, 10 # multiply tid by 1024
csrr a1, CSR_GTID # get gtid
slli a1, a1, 10 # multiply tid by 1024
csrr a2, CSR_LTID # get tid
slli a2, a2, 2 # multiply tid by 4
lui sp, 0x6ffff # load base sp
sub sp, sp, a1 # sub sp - (1024*tid)
sub sp, sp, a3 # shoft per warp
add sp, sp, a2 # shift sp for better performance
lui sp, STACK_BASE_ADDR # load base sp
sub sp, sp, a1 # sub thread block
add sp, sp, a2 # reduce addr collision for perf
csrr a3, 0x21 # get wid
csrr a3, CSR_LWID # get wid
beqz a3, RETURN
li a0, 0
.word 0x0005006b # tmc 0
.word 0x0005006b # tmc 0
RETURN:
ret

View file

@ -12,8 +12,8 @@ CPY = $(TOOLPATH)/riscv32-unknown-elf-objcopy
VX_STR = ../../startup/vx_start.S
VX_INT = ../../intrinsics/vx_intrinsics.s
VX_IO = ../../io/vx_io.s ../../io/vx_io.c
VX_INT = ../../intrinsics/vx_intrinsics.S
VX_IO = ../../io/vx_io.S ../../io/vx_io.c
VX_API = ../../vx_api/vx_api.c
VX_TEST = ../common/tests.c

View file

@ -41,8 +41,8 @@ void mat_add_kernel(void * void_arguments)
{
mat_add_args_t * arguments = (mat_add_args_t *) void_arguments;
unsigned wid = vx_warpID();
unsigned tid = vx_threadID();
unsigned wid = vx_warp_id();
unsigned tid = vx_thread_id();
bool valid = (wid < arguments->numRows) && (tid < arguments->numColums);
@ -77,7 +77,7 @@ int main()
// void * hellp = malloc(4);
vx_print_str("Confirm Dev Main\n");
vx_print_str("vx_spawnWarps\n");
vx_print_str("vx_spawn_warps\n");
mat_add_args_t arguments;
arguments.x = x;
@ -91,7 +91,7 @@ int main()
int numThreads = 4;
// First kernel call
vx_spawnWarps(numWarps, numThreads, mat_add_kernel, &arguments);
vx_spawn_warps(numWarps, numThreads, mat_add_kernel, &arguments);
vx_print_mat(z, arguments.numRows, arguments.numColums);
@ -102,7 +102,7 @@ int main()
arguments.numRows = 4;
// Second Kernel Call
vx_spawnWarps(numWarps, numThreads, mat_add_kernel, &arguments);
vx_spawn_warps(numWarps, numThreads, mat_add_kernel, &arguments);
vx_print_mat(z, arguments.numRows, arguments.numColums);

View file

@ -77,23 +77,23 @@ Disassembly of section .text:
800000cc: 0000306b 0x306b
800000d0: 00008067 ret
800000d4 <vx_warpID>:
800000d4 <vx_warp_id>:
800000d4: 02102573 csrr a0,0x21
800000d8: 00008067 ret
800000dc <vx_threadID>:
800000dc <vx_thread_id>:
800000dc: 02002573 csrr a0,0x20
800000e0: 00008067 ret
800000e4 <vx_getCycles>:
800000e4 <vx_num_cycles>:
800000e4: 02602573 csrr a0,0x26
800000e8: 00008067 ret
800000ec <vx_getInst>:
800000ec <vx_num_instrs>:
800000ec: 02502573 csrr a0,0x25
800000f0: 00008067 ret
800000f4 <vx_resetStack>:
800000f4 <vx_reset_stack>:
800000f4: 00400513 li a0,4
800000f8: 0005006b 0x5006b
800000fc: 021026f3 csrr a3,0x21
@ -219,7 +219,7 @@ Disassembly of section .text:
8000029c: 3a01a783 lw a5,928(gp) # 80016ba8 <global_argument_struct>
800002a0: 00078513 mv a0,a5
800002a4: 000700e7 jalr a4
800002a8: e2dff0ef jal ra,800000d4 <vx_warpID>
800002a8: e2dff0ef jal ra,800000d4 <vx_warp_id>
800002ac: fea42623 sw a0,-20(s0)
800002b0: fec42783 lw a5,-20(s0)
800002b4: 00078863 beqz a5,800002c4 <setup_call+0x48>
@ -234,7 +234,7 @@ Disassembly of section .text:
800002d8: 02010113 addi sp,sp,32
800002dc: 00008067 ret
800002e0 <vx_spawnWarps>:
800002e0 <vx_spawn_warps>:
800002e0: fe010113 addi sp,sp,-32
800002e4: 00112e23 sw ra,28(sp)
800002e8: 00812c23 sw s0,24(sp)
@ -269,10 +269,10 @@ Disassembly of section .text:
80000354: 3b01a783 lw a5,944(gp) # 80016bb8 <pocl_threads>
80000358: 00078513 mv a0,a5
8000035c: d59ff0ef jal ra,800000b4 <vx_tmc>
80000360: d7dff0ef jal ra,800000dc <vx_threadID>
80000360: d7dff0ef jal ra,800000dc <vx_thread_id>
80000364: 00050793 mv a5,a0
80000368: fef42023 sw a5,-32(s0)
8000036c: d69ff0ef jal ra,800000d4 <vx_warpID>
8000036c: d69ff0ef jal ra,800000d4 <vx_warp_id>
80000370: 00050793 mv a5,a0
80000374: fcf42e23 sw a5,-36(s0)
80000378: fe042623 sw zero,-20(s0)
@ -389,14 +389,14 @@ Disassembly of section .text:
8000052c: 00078593 mv a1,a5
80000530: 00070513 mv a0,a4
80000534: b79ff0ef jal ra,800000ac <vx_wspawn>
80000538: badff0ef jal ra,800000e4 <vx_getCycles>
80000538: badff0ef jal ra,800000e4 <vx_num_cycles>
8000053c: fea42623 sw a0,-20(s0)
80000540: badff0ef jal ra,800000ec <vx_getInst>
80000540: badff0ef jal ra,800000ec <vx_num_instrs>
80000544: fea42423 sw a0,-24(s0)
80000548: dfdff0ef jal ra,80000344 <pocl_spawn_real>
8000054c: b99ff0ef jal ra,800000e4 <vx_getCycles>
8000054c: b99ff0ef jal ra,800000e4 <vx_num_cycles>
80000550: fea42223 sw a0,-28(s0)
80000554: b99ff0ef jal ra,800000ec <vx_getInst>
80000554: b99ff0ef jal ra,800000ec <vx_num_instrs>
80000558: fea42023 sw a0,-32(s0)
8000055c: fe442703 lw a4,-28(s0)
80000560: fec42783 lw a5,-20(s0)
@ -424,7 +424,7 @@ Disassembly of section .text:
800005b0: b85ff0ef jal ra,80000134 <vx_print_str>
800005b4: 00400513 li a0,4
800005b8: afdff0ef jal ra,800000b4 <vx_tmc>
800005bc: b21ff0ef jal ra,800000dc <vx_threadID>
800005bc: b21ff0ef jal ra,800000dc <vx_thread_id>
800005c0: fea42623 sw a0,-20(s0)
800005c4: fec42703 lw a4,-20(s0)
800005c8: 88418693 addi a3,gp,-1916 # 8001608c <tmc_array>
@ -473,7 +473,7 @@ Disassembly of section .text:
8000066c: 00112e23 sw ra,28(sp)
80000670: 00812c23 sw s0,24(sp)
80000674: 02010413 addi s0,sp,32
80000678: a65ff0ef jal ra,800000dc <vx_threadID>
80000678: a65ff0ef jal ra,800000dc <vx_thread_id>
8000067c: fea42623 sw a0,-20(s0)
80000680: fec42783 lw a5,-20(s0)
80000684: 0027b793 sltiu a5,a5,2
@ -568,7 +568,7 @@ Disassembly of section .text:
800007e0: 00112e23 sw ra,28(sp)
800007e4: 00812c23 sw s0,24(sp)
800007e8: 02010413 addi s0,sp,32
800007ec: 8e9ff0ef jal ra,800000d4 <vx_warpID>
800007ec: 8e9ff0ef jal ra,800000d4 <vx_warp_id>
800007f0: fea42623 sw a0,-20(s0)
800007f4: 3c418713 addi a4,gp,964 # 80016bcc <wsapwn_arr>
800007f8: fec42783 lw a5,-20(s0)
@ -664,9 +664,9 @@ Disassembly of section .text:
80000948: fca42623 sw a0,-52(s0)
8000094c: fcc42783 lw a5,-52(s0)
80000950: fef42623 sw a5,-20(s0)
80000954: f80ff0ef jal ra,800000d4 <vx_warpID>
80000954: f80ff0ef jal ra,800000d4 <vx_warp_id>
80000958: fea42423 sw a0,-24(s0)
8000095c: f80ff0ef jal ra,800000dc <vx_threadID>
8000095c: f80ff0ef jal ra,800000dc <vx_thread_id>
80000960: fea42223 sw a0,-28(s0)
80000964: fec42783 lw a5,-20(s0)
80000968: 0107a783 lw a5,16(a5)
@ -808,7 +808,7 @@ Disassembly of section .text:
80000b78: 800017b7 lui a5,0x80001
80000b7c: 93878613 addi a2,a5,-1736 # 80000938 <__BSS_END__+0xfffe9d48>
80000b80: 00070513 mv a0,a4
80000b84: f5cff0ef jal ra,800002e0 <vx_spawnWarps>
80000b84: f5cff0ef jal ra,800002e0 <vx_spawn_warps>
80000b88: fe442783 lw a5,-28(s0)
80000b8c: 00078713 mv a4,a5
80000b90: fe042783 lw a5,-32(s0)
@ -833,7 +833,7 @@ Disassembly of section .text:
80000bdc: 800017b7 lui a5,0x80001
80000be0: 93878613 addi a2,a5,-1736 # 80000938 <__BSS_END__+0xfffe9d48>
80000be4: 00070513 mv a0,a4
80000be8: ef8ff0ef jal ra,800002e0 <vx_spawnWarps>
80000be8: ef8ff0ef jal ra,800002e0 <vx_spawn_warps>
80000bec: fe442783 lw a5,-28(s0)
80000bf0: 00078713 mv a4,a5
80000bf4: fe042783 lw a5,-32(s0)

View file

@ -9,7 +9,7 @@ CPY = ../../../../riscv-gnu-toolchain/drops/bin/riscv32-unknown-elf-objcopy
NEWLIB = ../../newlib/newlib.c ../../newlib/newlib_notimp.c ../../newlib/newlib.s
VX_STR =
VX_INT = ../../intrinsics/vx_intrinsics.s
VX_INT = ../../intrinsics/vx_intrinsics.S
VX_IO =
VX_API =
VX_FIO =

View file

@ -328,7 +328,7 @@ Disassembly of section .text:
80000488: 00112623 sw ra,12(sp)
8000048c: 00812423 sw s0,8(sp)
80000490: 01010413 addi s0,sp,16
80000494: 0b4000ef jal ra,80000548 <vx_threadID>
80000494: 0b4000ef jal ra,80000548 <vx_thread_id>
80000498: 00050793 mv a5,a0
8000049c: 00078513 mv a0,a5
800004a0: 00c12083 lw ra,12(sp)
@ -388,15 +388,15 @@ Disassembly of section .text:
80000538: 0000306b 0x306b
8000053c: 00008067 ret
80000540 <vx_warpID>:
80000540 <vx_warp_id>:
80000540: 02102573 csrr a0,0x21
80000544: 00008067 ret
80000548 <vx_threadID>:
80000548 <vx_thread_id>:
80000548: 02002573 csrr a0,0x20
8000054c: 00008067 ret
80000550 <vx_resetStack>:
80000550 <vx_reset_stack>:
80000550: 00400513 li a0,4
80000554: 0005006b 0x5006b
80000558: 021026f3 csrr a3,0x21

View file

@ -13,10 +13,10 @@ CPY = ../../../../riscv-gnu-toolchain/drops/bin/riscv32-unknown-elf-objcopy
NEWLIB = ../../newlib/newlib.c
VX_STR = ../../startup/vx_start.S
VX_INT = ../../intrinsics/vx_intrinsics.s
VX_IO = ../../io/vx_io.s ../../io/vx_io.c
VX_INT = ../../intrinsics/vx_intrinsics.S
VX_IO = ../../io/vx_io.S ../../io/vx_io.c
VX_API = ../../vx_api/vx_api.c
VX_FIO = ../../fileio/fileio.s
VX_FIO = ../../fileio/fileio.S
LIBS = -Wl,--whole-archive ./libs/libvecadd.a -Wl,--no-whole-archive ./libs/libOpenCL.a ../../../../riscv-gnu-toolchain/drops/riscv32-unknown-elf/lib/libc.a ../../../../riscv-gnu-toolchain/drops/riscv32-unknown-elf/lib/libstdc++.a -static-libgcc -lgcc
VX_MAIN = vx_pocl_main

View file

@ -485,7 +485,7 @@ Disassembly of section .text:
800006ec: 00112623 sw ra,12(sp)
800006f0: 00812423 sw s0,8(sp)
800006f4: 01010413 addi s0,sp,16
800006f8: 0e8000ef jal ra,800007e0 <vx_threadID>
800006f8: 0e8000ef jal ra,800007e0 <vx_thread_id>
800006fc: 00050793 mv a5,a0
80000700: 00078513 mv a0,a5
80000704: 00c12083 lw ra,12(sp)
@ -558,15 +558,15 @@ Disassembly of section .text:
800007d0: 0000306b 0x306b
800007d4: 00008067 ret
800007d8 <vx_warpID>:
800007d8 <vx_warp_id>:
800007d8: 02102573 csrr a0,0x21
800007dc: 00008067 ret
800007e0 <vx_threadID>:
800007e0 <vx_thread_id>:
800007e0: 02002573 csrr a0,0x20
800007e4: 00008067 ret
800007e8 <vx_resetStack>:
800007e8 <vx_reset_stack>:
800007e8: 00400513 li a0,4
800007ec: 0005006b 0x5006b
800007f0: 021026f3 csrr a3,0x21

View file

@ -9,10 +9,10 @@ CPY = /opt/riscv/bin/riscv32-unknown-elf-objcopy
NEWLIB = ../../newlib/newlib.c ../../newlib/newlib_notimp.c ../../newlib/newlib.s
VX_STR = ../../startup/vx_start.S
VX_INT = ../../intrinsics/vx_intrinsics.s
VX_IO = ../../io/vx_io.s ../../io/vx_io.c
VX_INT = ../../intrinsics/vx_intrinsics.S
VX_IO = ../../io/vx_io.S ../../io/vx_io.c
VX_API = ../../vx_api/vx_api.c
VX_FIO = ../../fileio/fileio.s
VX_FIO = ../../fileio/fileio.S
VX_MAIN = ./vx_nl_main.c

View file

@ -407,15 +407,15 @@ Disassembly of section .text:
800005bc: 0000306b 0x306b
800005c0: 00008067 ret
800005c4 <vx_warpID>:
800005c4 <vx_warp_id>:
800005c4: 02102573 csrr a0,0x21
800005c8: 00008067 ret
800005cc <vx_threadID>:
800005cc <vx_thread_id>:
800005cc: 02002573 csrr a0,0x20
800005d0: 00008067 ret
800005d4 <vx_resetStack>:
800005d4 <vx_reset_stack>:
800005d4: 00400513 li a0,4
800005d8: 0005006b 0x5006b
800005dc: 021026f3 csrr a3,0x21
@ -544,7 +544,7 @@ Disassembly of section .text:
80000788: 9947a783 lw a5,-1644(a5) # 81001994 <_PathLocale+0xffffff9c>
8000078c: 00078513 mv a0,a5
80000790: 000700e7 jalr a4
80000794: e31ff0ef jal ra,800005c4 <vx_warpID>
80000794: e31ff0ef jal ra,800005c4 <vx_warp_id>
80000798: fea42623 sw a0,-20(s0)
8000079c: fec42783 lw a5,-20(s0)
800007a0: 00078863 beqz a5,800007b0 <setup_call+0x54>
@ -559,7 +559,7 @@ Disassembly of section .text:
800007c4: 02010113 addi sp,sp,32
800007c8: 00008067 ret
800007cc <vx_spawnWarps>:
800007cc <vx_spawn_warps>:
800007cc: fe010113 addi sp,sp,-32
800007d0: 00112e23 sw ra,28(sp)
800007d4: 00812c23 sw s0,24(sp)
@ -599,7 +599,7 @@ Disassembly of section .text:
80000854: dc1ff0ef jal ra,80000614 <vx_print_str>
80000858: 00400513 li a0,4
8000085c: d49ff0ef jal ra,800005a4 <vx_tmc>
80000860: d6dff0ef jal ra,800005cc <vx_threadID>
80000860: d6dff0ef jal ra,800005cc <vx_thread_id>
80000864: fea42623 sw a0,-20(s0)
80000868: fec42703 lw a4,-20(s0)
8000086c: 810017b7 lui a5,0x81001
@ -652,7 +652,7 @@ Disassembly of section .text:
80000920: 00112e23 sw ra,28(sp)
80000924: 00812c23 sw s0,24(sp)
80000928: 02010413 addi s0,sp,32
8000092c: ca1ff0ef jal ra,800005cc <vx_threadID>
8000092c: ca1ff0ef jal ra,800005cc <vx_thread_id>
80000930: fea42623 sw a0,-20(s0)
80000934: fec42783 lw a5,-20(s0)
80000938: 0027b793 sltiu a5,a5,2
@ -754,7 +754,7 @@ Disassembly of section .text:
80000ab0: 00112e23 sw ra,28(sp)
80000ab4: 00812c23 sw s0,24(sp)
80000ab8: 02010413 addi s0,sp,32
80000abc: b09ff0ef jal ra,800005c4 <vx_warpID>
80000abc: b09ff0ef jal ra,800005c4 <vx_warp_id>
80000ac0: fea42623 sw a0,-20(s0)
80000ac4: 810027b7 lui a5,0x81002
80000ac8: fec42703 lw a4,-20(s0)

View file

@ -2,7 +2,7 @@
COMP = ~/dev/riscv-gnu-toolchain/drops/bin/riscv32-unknown-elf-g++
#COMP = /opt/riscv-new/drops/bin/riscv32-unknown-elf-g++
CC_FLAGS = -march=rv32im -mabi=ilp32 -O0 -Wl,-Bstatic,-T,../../startup/vx_link.ld -ffreestanding -nostdlib
CC_FLAGS = -march=rv32im -mabi=ilp32 -O3 -Wl,-Bstatic,-T,../../startup/vx_link.ld -ffreestanding -nostdlib
DMP = ~/dev/riscv-gnu-toolchain/drops/bin/riscv32-unknown-elf-objdump
CPY = ~/dev/riscv-gnu-toolchain/drops/bin/riscv32-unknown-elf-objcopy
@ -10,10 +10,10 @@ CPY = ~/dev/riscv-gnu-toolchain/drops/bin/riscv32-unknown-elf-objcopy
NEWLIB = ../../newlib/newlib.c
VX_STR = ../../startup/vx_start.S
VX_INT = ../../intrinsics/vx_intrinsics.s
VX_IO = ../../io/vx_io.s ../../io/vx_io.c
VX_INT = ../../intrinsics/vx_intrinsics.S
VX_IO = ../../io/vx_io.S ../../io/vx_io.c
VX_API = ../../vx_api/vx_api.c
VX_FIO = ../../fileio/fileio.s
VX_FIO = ../../fileio/fileio.S
VX_MAIN = vx_simple_main

View file

@ -6,15 +6,19 @@
int tmc_array[4] = {5,5,5,5};
void test_tmc_impl()
{
unsigned tid = vx_thread_id(); // Get TID
tmc_array[tid] = tid;
}
void test_tmc()
{
//vx_print_str("testing_tmc\n");
vx_print_str("testing_tmc\n");
vx_tmc(4);
unsigned tid = vx_threadID(); // Get TID
tmc_array[tid] = tid;
test_tmc_impl();
vx_tmc(1);
@ -34,7 +38,7 @@ int div_arr[4];
void test_divergence()
{
unsigned tid = vx_threadID(); // Get TID
unsigned tid = vx_thread_id(); // Get TID
bool b = tid < 2;
__if (b)
@ -73,20 +77,16 @@ void test_divergence()
vx_print_str("\n");
vx_print_hex(div_arr[3]);
vx_print_str("\n");
}
unsigned wsapwn_arr[4];
void simple_kernel()
{
unsigned wid = vx_warpID();
unsigned wid = vx_warp_id();
wsapwn_arr[wid] = wid;
wid = vx_warpID();
if (wid != 0)
{
vx_tmc(0);

View file

@ -24,7 +24,7 @@ unsigned y[] = {1, 1, 1, 1,
1, 1, 1, 1,
1, 1, 1, 1,
1, 1, 1, 1};
F
unsigned z[] = {0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
@ -34,8 +34,8 @@ void mat_add_kernel(void * void_arguments)
{
mat_add_args_t * arguments = (mat_add_args_t *) void_arguments;
unsigned wid = vx_warpID();
unsigned tid = vx_threadID();
unsigned wid = vx_warp_id();
unsigned tid = vx_thread_id();
bool valid = (wid < arguments->numRows) && (tid < arguments->numColums);
@ -50,7 +50,7 @@ void mat_add_kernel(void * void_arguments)
int main()
{
// Main is called with all threads active of warp 0
// ensure single thread
vx_tmc(1);
vx_print_str("Let's start... (This might take a while)\n");
@ -84,11 +84,9 @@ int main()
vx_print_str("Wr->read and repeat(Wr) tests passed!\n");
}
vx_print_str("Simple Main\n");
// // TMC test
// TMC test
test_tmc();
// Control Divergence Test
@ -118,7 +116,7 @@ int main()
}
vx_print_str("vx_spawnWarps mat_add_kernel\n");
vx_print_str("vx_spawn_warps mat_add_kernel\n");
mat_add_args_t arguments;
arguments.x = x;
@ -131,7 +129,7 @@ int main()
int numWarps = 4;
int numThreads = 4;
vx_spawnWarps(numWarps, numThreads, mat_add_kernel, &arguments);
vx_spawn_warps(numWarps, numThreads, mat_add_kernel, &arguments);
vx_print_str("Waiting to ensure other warps are done... (Takes a while)\n");
for (int i = 0; i < 5000; i++) {}

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -2,7 +2,7 @@
#include "io/io.h" // Printing functions
#include "intrinsics/instrinsics.h" // vx_threadID and vx_WarpID
#include "intrinsics/instrinsics.h" // vx_thread_id and vx_WarpID
struct args
{
@ -14,7 +14,7 @@ void function(void * arg)
{
struct args * real_arg = (struct args *) arg;
unsigned tid = vx_threadID();
unsigned tid = vx_thread_id();
unsigned wid = vx_WarpID();
__if(something) // Control divergent if
@ -36,7 +36,7 @@ int main()
struct args arg;
arg.data = data;
vx_spawnWarps(numWarps, numThreads, function, &data);
vx_spawn_warps(numWarps, numThreads, function, &data);
}

View file

@ -13,10 +13,10 @@ CPY = ../../../../riscv-gnu-toolchain/drops/bin/riscv32-unknown-elf-objcopy
NEWLIB = ../../newlib/newlib.c
VX_STR = ../../startup/vx_start.S
VX_INT = ../../intrinsics/vx_intrinsics.s
VX_IO = ../../io/vx_io.s ../../io/vx_io.c
VX_INT = ../../intrinsics/vx_intrinsics.S
VX_IO = ../../io/vx_io.S ../../io/vx_io.c
VX_API = ../../vx_api/vx_api.c
VX_FIO = ../../fileio/fileio.s
VX_FIO = ../../fileio/fileio.S
LIBS = -Wl,--whole-archive ./libs/libvecadd.a -Wl,--no-whole-archive ./libs/libOpenCL.a ../../../../riscv-gnu-toolchain/drops/riscv32-unknown-elf/lib/libc.a ../../../../riscv-gnu-toolchain/drops/riscv32-unknown-elf/lib/libstdc++.a -static-libgcc -lgcc
VX_MAIN = vx_pocl_main

View file

@ -344,7 +344,7 @@ Disassembly of section .text:
800004b8: 00112623 sw ra,12(sp)
800004bc: 00812423 sw s0,8(sp)
800004c0: 01010413 addi s0,sp,16
800004c4: 0ac000ef jal ra,80000570 <vx_threadID>
800004c4: 0ac000ef jal ra,80000570 <vx_thread_id>
800004c8: 00050793 mv a5,a0
800004cc: 00078513 mv a0,a5
800004d0: 00c12083 lw ra,12(sp)
@ -406,11 +406,11 @@ Disassembly of section .text:
80000568: 02102573 csrr a0,0x21
8000056c: 00008067 ret
80000570 <vx_threadID>:
80000570 <vx_thread_id>:
80000570: 02002573 csrr a0,0x20
80000574: 00008067 ret
80000578 <vx_resetStack>:
80000578 <vx_reset_stack>:
80000578: 00400513 li a0,4
8000057c: 0005006b 0x5006b
80000580: 021026f3 csrr a3,0x21

View file

@ -16,9 +16,9 @@ CPY = /home/priya/dev/riscv_vec/riscv-gnu/bin/riscv32-unknown-elf-objcopy
NEWLIB = ../../newlib/newlib.c
VX_STR = ../../startup/vx_start.S
VX_INT = ../../intrinsics/vx_intrinsics.s
VX_IO = ../../io/vx_io.s ../../io/vx_io.c
VX_IO = ../../io/vx_io.S ../../io/vx_io.c
VX_API = ../../vx_api/vx_api.c
VX_FIO = ../../fileio/fileio.s
VX_FIO = ../../fileio/fileio.S
VX_VEC = vx_vec.s
#LIBS = /home/fares/dev/riscv-gnu-toolchain-vector/drops/riscv32-unknown-elf/lib/libc.a /home/fares/dev/riscv-gnu-toolchain-vector/drops/riscv32-unknown-elf/lib/libstdc++.a -static-libgcc -lgcc
LIBS = /home/priya/dev/riscv_vec/riscv-gnu/riscv32-unknown-elf/lib/libc.a /home/priya/dev/riscv_vec/riscv-gnu/riscv32-unknown-elf/lib/libstdc++.a -static-libgcc -lgcc

View file

@ -500,7 +500,7 @@ Disassembly of section .text:
80000718: 00112623 sw ra,12(sp)
8000071c: 00812423 sw s0,8(sp)
80000720: 01010413 addi s0,sp,16
80000724: 0e8000ef jal ra,8000080c <vx_threadID>
80000724: 0e8000ef jal ra,8000080c <vx_thread_id>
80000728: 00050793 mv a5,a0
8000072c: 00078513 mv a0,a5
80000730: 00c12083 lw ra,12(sp)
@ -573,15 +573,15 @@ Disassembly of section .text:
800007fc: 0000306b 0x306b
80000800: 00008067 ret
80000804 <vx_warpID>:
80000804 <vx_warp_id>:
80000804: 02102573 csrr a0,0x21
80000808: 00008067 ret
8000080c <vx_threadID>:
8000080c <vx_thread_id>:
8000080c: 02002573 csrr a0,0x20
80000810: 00008067 ret
80000814 <vx_resetStack>:
80000814 <vx_reset_stack>:
80000814: 00400513 li a0,4
80000818: 0005006b 0x5006b
8000081c: 021026f3 csrr a3,0x21

View file

@ -19,12 +19,12 @@ void spawn_warp_runonce() {
global_function_pointer(global_argument_struct);
// resume single-thread execution on exit
unsigned wid = vx_warpID();
unsigned wid = vx_warp_id();
unsigned tmask = (0 == wid) ? 0x1 : 0x0;
vx_tmc(tmask);
}
void vx_spawnWarps(unsigned numWarps, unsigned numThreads, func_t func_ptr, void * args) {
void vx_spawn_warps(unsigned numWarps, unsigned numThreads, func_t func_ptr, void * args) {
global_function_pointer = func_ptr;
global_argument_struct = args;
global_num_threads = numThreads;
@ -43,15 +43,15 @@ void pocl_spawn_warp_runonce() {
// active all threads
vx_tmc(pocl_threads);
int x = vx_threadID();
int y = vx_warpNum();
int x = vx_thread_id();
int y = vx_warp_gid();
// call kernel routine
(pocl_pfn)(pocl_args, pocl_ctx, x, y, 0);
// resume single-thread execution on exit
int wid = vx_warpID();
unsigned tmask = (0 == wid) ? 0x1 : 0x0;
int wid = vx_warp_id();
unsigned tmask = (0 == wid) ? 0x1 : 0x0;
vx_tmc(tmask);
}

View file

@ -11,7 +11,7 @@ extern "C" {
typedef void (*func_t)(void *);
void vx_spawnWarps(unsigned numWarps, unsigned numThreads, func_t func_ptr , void * args);
void vx_spawn_warps(unsigned numWarps, unsigned numThreads, func_t func_ptr , void * args);
struct context_t {
uint32_t num_groups[3];