mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-24 05:47:35 -04:00
rtl refactoring
This commit is contained in:
parent
a1dc90b951
commit
69f607b73e
83 changed files with 30487 additions and 30536 deletions
|
@ -13,9 +13,9 @@ GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb
|
|||
|
||||
VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c
|
||||
VX_SRCS += $(VX_RT_PATH)/startup/vx_start.S
|
||||
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
|
||||
VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
|
||||
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s
|
||||
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.S
|
||||
VX_SRCS += $(VX_RT_PATH)/io/vx_io.S $(VX_RT_PATH)/io/vx_io.c
|
||||
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.S
|
||||
VX_SRCS += $(VX_RT_PATH)/tests/tests.c
|
||||
VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c
|
||||
|
||||
|
|
|
@ -13,9 +13,9 @@ GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb
|
|||
|
||||
VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c
|
||||
VX_SRCS += $(VX_RT_PATH)/startup/vx_start.S
|
||||
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
|
||||
VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
|
||||
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s
|
||||
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.S
|
||||
VX_SRCS += $(VX_RT_PATH)/io/vx_io.S $(VX_RT_PATH)/io/vx_io.c
|
||||
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.S
|
||||
VX_SRCS += $(VX_RT_PATH)/tests/tests.c
|
||||
VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c
|
||||
|
||||
|
|
|
@ -13,9 +13,9 @@ GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb
|
|||
|
||||
VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c
|
||||
VX_SRCS += $(VX_RT_PATH)/startup/vx_start.S
|
||||
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
|
||||
VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
|
||||
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s
|
||||
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.S
|
||||
VX_SRCS += $(VX_RT_PATH)/io/vx_io.S $(VX_RT_PATH)/io/vx_io.c
|
||||
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.S
|
||||
VX_SRCS += $(VX_RT_PATH)/tests/tests.c
|
||||
VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c
|
||||
|
||||
|
|
|
@ -13,9 +13,9 @@ GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb
|
|||
|
||||
VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c
|
||||
VX_SRCS += $(VX_RT_PATH)/startup/vx_start.S
|
||||
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
|
||||
VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
|
||||
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s
|
||||
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.S
|
||||
VX_SRCS += $(VX_RT_PATH)/io/vx_io.S $(VX_RT_PATH)/io/vx_io.c
|
||||
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.S
|
||||
VX_SRCS += $(VX_RT_PATH)/tests/tests.c
|
||||
VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c
|
||||
|
||||
|
|
|
@ -13,9 +13,9 @@ GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb
|
|||
|
||||
VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c
|
||||
VX_SRCS += $(VX_RT_PATH)/startup/vx_start.S
|
||||
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
|
||||
VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
|
||||
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s
|
||||
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.S
|
||||
VX_SRCS += $(VX_RT_PATH)/io/vx_io.S $(VX_RT_PATH)/io/vx_io.c
|
||||
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.S
|
||||
VX_SRCS += $(VX_RT_PATH)/tests/tests.c
|
||||
VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c
|
||||
|
||||
|
|
|
@ -13,9 +13,9 @@ GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb
|
|||
|
||||
VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c
|
||||
VX_SRCS += $(VX_RT_PATH)/startup/vx_start.S
|
||||
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
|
||||
VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
|
||||
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s
|
||||
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.S
|
||||
VX_SRCS += $(VX_RT_PATH)/io/vx_io.S $(VX_RT_PATH)/io/vx_io.c
|
||||
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.S
|
||||
VX_SRCS += $(VX_RT_PATH)/tests/tests.c
|
||||
VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c
|
||||
|
||||
|
|
|
@ -13,9 +13,9 @@ GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb
|
|||
|
||||
VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c
|
||||
VX_SRCS += $(VX_RT_PATH)/startup/vx_start.S
|
||||
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
|
||||
VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
|
||||
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s
|
||||
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.S
|
||||
VX_SRCS += $(VX_RT_PATH)/io/vx_io.S $(VX_RT_PATH)/io/vx_io.c
|
||||
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.S
|
||||
VX_SRCS += $(VX_RT_PATH)/tests/tests.c
|
||||
VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c
|
||||
|
||||
|
|
|
@ -13,9 +13,9 @@ GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb
|
|||
|
||||
VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c
|
||||
VX_SRCS += $(VX_RT_PATH)/startup/vx_start.S
|
||||
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
|
||||
VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
|
||||
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s
|
||||
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.S
|
||||
VX_SRCS += $(VX_RT_PATH)/io/vx_io.S $(VX_RT_PATH)/io/vx_io.c
|
||||
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.S
|
||||
VX_SRCS += $(VX_RT_PATH)/tests/tests.c
|
||||
VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c
|
||||
|
||||
|
|
|
@ -13,9 +13,9 @@ GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb
|
|||
|
||||
VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c
|
||||
VX_SRCS += $(VX_RT_PATH)/startup/vx_start.S
|
||||
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
|
||||
VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
|
||||
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s
|
||||
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.S
|
||||
VX_SRCS += $(VX_RT_PATH)/io/vx_io.S $(VX_RT_PATH)/io/vx_io.c
|
||||
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.S
|
||||
VX_SRCS += $(VX_RT_PATH)/tests/tests.c
|
||||
VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c
|
||||
|
||||
|
|
|
@ -13,9 +13,9 @@ GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb
|
|||
|
||||
VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c
|
||||
VX_SRCS += $(VX_RT_PATH)/startup/vx_start.S
|
||||
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
|
||||
VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
|
||||
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s
|
||||
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.S
|
||||
VX_SRCS += $(VX_RT_PATH)/io/vx_io.S $(VX_RT_PATH)/io/vx_io.c
|
||||
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.S
|
||||
VX_SRCS += $(VX_RT_PATH)/tests/tests.c
|
||||
VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c
|
||||
|
||||
|
|
|
@ -13,9 +13,9 @@ GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb
|
|||
|
||||
VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c
|
||||
VX_SRCS += $(VX_RT_PATH)/startup/vx_start.S
|
||||
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
|
||||
VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
|
||||
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s
|
||||
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.S
|
||||
VX_SRCS += $(VX_RT_PATH)/io/vx_io.S $(VX_RT_PATH)/io/vx_io.c
|
||||
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.S
|
||||
VX_SRCS += $(VX_RT_PATH)/tests/tests.c
|
||||
VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c
|
||||
|
||||
|
|
|
@ -13,9 +13,9 @@ GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb
|
|||
|
||||
VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c
|
||||
VX_SRCS += $(VX_RT_PATH)/startup/vx_start.S
|
||||
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
|
||||
VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
|
||||
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s
|
||||
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.S
|
||||
VX_SRCS += $(VX_RT_PATH)/io/vx_io.S $(VX_RT_PATH)/io/vx_io.c
|
||||
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.S
|
||||
VX_SRCS += $(VX_RT_PATH)/tests/tests.c
|
||||
VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c
|
||||
|
||||
|
|
|
@ -13,9 +13,9 @@ GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb
|
|||
|
||||
VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c
|
||||
VX_SRCS += $(VX_RT_PATH)/startup/vx_start.S
|
||||
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
|
||||
VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
|
||||
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s
|
||||
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.S
|
||||
VX_SRCS += $(VX_RT_PATH)/io/vx_io.S $(VX_RT_PATH)/io/vx_io.c
|
||||
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.S
|
||||
VX_SRCS += $(VX_RT_PATH)/tests/tests.c
|
||||
VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c
|
||||
|
||||
|
|
|
@ -13,9 +13,9 @@ GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb
|
|||
|
||||
VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c
|
||||
VX_SRCS += $(VX_RT_PATH)/startup/vx_start.S
|
||||
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
|
||||
VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
|
||||
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s
|
||||
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.S
|
||||
VX_SRCS += $(VX_RT_PATH)/io/vx_io.S $(VX_RT_PATH)/io/vx_io.c
|
||||
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.S
|
||||
VX_SRCS += $(VX_RT_PATH)/tests/tests.c
|
||||
VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c
|
||||
|
||||
|
|
|
@ -13,9 +13,9 @@ GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb
|
|||
|
||||
VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c
|
||||
VX_SRCS += $(VX_RT_PATH)/startup/vx_start.S
|
||||
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
|
||||
VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
|
||||
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s
|
||||
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.S
|
||||
VX_SRCS += $(VX_RT_PATH)/io/vx_io.S $(VX_RT_PATH)/io/vx_io.c
|
||||
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.S
|
||||
VX_SRCS += $(VX_RT_PATH)/tests/tests.c
|
||||
VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c
|
||||
|
||||
|
|
|
@ -13,9 +13,9 @@ GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb
|
|||
|
||||
VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c
|
||||
VX_SRCS += $(VX_RT_PATH)/startup/vx_start.S
|
||||
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
|
||||
VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
|
||||
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s
|
||||
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.S
|
||||
VX_SRCS += $(VX_RT_PATH)/io/vx_io.S $(VX_RT_PATH)/io/vx_io.c
|
||||
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.S
|
||||
VX_SRCS += $(VX_RT_PATH)/tests/tests.c
|
||||
VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c
|
||||
|
||||
|
|
|
@ -13,9 +13,9 @@ GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb
|
|||
|
||||
VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c
|
||||
VX_SRCS += $(VX_RT_PATH)/startup/vx_start.S
|
||||
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
|
||||
VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
|
||||
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s
|
||||
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.S
|
||||
VX_SRCS += $(VX_RT_PATH)/io/vx_io.S $(VX_RT_PATH)/io/vx_io.c
|
||||
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.S
|
||||
VX_SRCS += $(VX_RT_PATH)/tests/tests.c
|
||||
VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c
|
||||
|
||||
|
|
|
@ -13,9 +13,9 @@ GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb
|
|||
|
||||
VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c
|
||||
VX_SRCS += $(VX_RT_PATH)/startup/vx_start.S
|
||||
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
|
||||
VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
|
||||
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s
|
||||
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.S
|
||||
VX_SRCS += $(VX_RT_PATH)/io/vx_io.S $(VX_RT_PATH)/io/vx_io.c
|
||||
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.S
|
||||
VX_SRCS += $(VX_RT_PATH)/tests/tests.c
|
||||
VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c
|
||||
|
||||
|
|
|
@ -13,9 +13,9 @@ GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb
|
|||
|
||||
VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c
|
||||
VX_SRCS += $(VX_RT_PATH)/startup/vx_start.S
|
||||
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
|
||||
VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
|
||||
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s
|
||||
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.S
|
||||
VX_SRCS += $(VX_RT_PATH)/io/vx_io.S $(VX_RT_PATH)/io/vx_io.c
|
||||
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.S
|
||||
VX_SRCS += $(VX_RT_PATH)/tests/tests.c
|
||||
VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c
|
||||
|
||||
|
|
|
@ -13,9 +13,9 @@ GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb
|
|||
|
||||
VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c
|
||||
VX_SRCS += $(VX_RT_PATH)/startup/vx_start.S
|
||||
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
|
||||
VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
|
||||
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s
|
||||
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.S
|
||||
VX_SRCS += $(VX_RT_PATH)/io/vx_io.S $(VX_RT_PATH)/io/vx_io.c
|
||||
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.S
|
||||
VX_SRCS += $(VX_RT_PATH)/tests/tests.c
|
||||
VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c
|
||||
|
||||
|
|
|
@ -11,10 +11,10 @@ CPY = /home/priya/dev/riscv_vec/riscv-gnu/bin/riscv32-unknown-elf-objcopy
|
|||
|
||||
NEWLIB = $(LIB_PATH)/newlib/newlib.c
|
||||
VX_STR = $(LIB_PATH)/startup/vx_start.S
|
||||
VX_INT = $(LIB_PATH)/intrinsics/vx_intrinsics.s
|
||||
VX_IO = $(LIB_PATH)/io/vx_io.s $(LIB_PATH)/io/vx_io.c
|
||||
VX_INT = $(LIB_PATH)/intrinsics/vx_intrinsics.S
|
||||
VX_IO = $(LIB_PATH)/io/vx_io.S $(LIB_PATH)/io/vx_io.c
|
||||
VX_API = $(LIB_PATH)/vx_api/vx_api.c
|
||||
VX_FIO = $(LIB_PATH)/fileio/fileio.s
|
||||
VX_FIO = $(LIB_PATH)/fileio/fileio.S
|
||||
VX_VEC = vx_vec_saxpy.s #float --> int
|
||||
LIBS = /home/priya/dev/riscv_vec/riscv-gnu/riscv32-unknown-elf/lib/libc.a /home/priya/dev/riscv_vec/riscv-gnu/riscv32-unknown-elf/lib/libstdc++.a -static-libgcc -lgcc
|
||||
|
||||
|
|
|
@ -39,11 +39,11 @@ int main()
|
|||
// for(int i = 0; i < n; ++i) printf("%d \n", b[i]);
|
||||
#endif
|
||||
|
||||
int startCycles = vx_getCycles();
|
||||
int startInst = vx_getInst();
|
||||
int startCycles = vx_num_cycles();
|
||||
int startInst = vx_num_instrs();
|
||||
vx_vec_saxpy(n, factor, a, b);
|
||||
int endCycles = vx_getCycles();
|
||||
int endInst = vx_getInst();
|
||||
int endCycles = vx_num_cycles();
|
||||
int endInst = vx_num_instrs();
|
||||
|
||||
int totalInst = (endInst - startInst);
|
||||
int totalCycles = (endCycles - startCycles);
|
||||
|
|
|
@ -484,7 +484,7 @@ Disassembly of section .text:
|
|||
800006e0: 00112623 sw ra,12(sp)
|
||||
800006e4: 00812423 sw s0,8(sp)
|
||||
800006e8: 01010413 addi s0,sp,16
|
||||
800006ec: 0d4000ef jal ra,800007c0 <vx_threadID>
|
||||
800006ec: 0d4000ef jal ra,800007c0 <vx_thread_id>
|
||||
800006f0: 00050793 mv a5,a0
|
||||
800006f4: 00078513 mv a0,a5
|
||||
800006f8: 00c12083 lw ra,12(sp)
|
||||
|
@ -552,23 +552,23 @@ Disassembly of section .text:
|
|||
800007b0: 0000306b 0x306b
|
||||
800007b4: 00008067 ret
|
||||
|
||||
800007b8 <vx_warpID>:
|
||||
800007b8 <vx_warp_id>:
|
||||
800007b8: 02102573 csrr a0,0x21
|
||||
800007bc: 00008067 ret
|
||||
|
||||
800007c0 <vx_threadID>:
|
||||
800007c0 <vx_thread_id>:
|
||||
800007c0: 02002573 csrr a0,0x20
|
||||
800007c4: 00008067 ret
|
||||
|
||||
800007c8 <vx_getCycles>:
|
||||
800007c8 <vx_num_cycles>:
|
||||
800007c8: 02602573 csrr a0,0x26
|
||||
800007cc: 00008067 ret
|
||||
|
||||
800007d0 <vx_getInst>:
|
||||
800007d0 <vx_num_instrs>:
|
||||
800007d0: 02502573 csrr a0,0x25
|
||||
800007d4: 00008067 ret
|
||||
|
||||
800007d8 <vx_resetStack>:
|
||||
800007d8 <vx_reset_stack>:
|
||||
800007d8: 00400513 li a0,4
|
||||
800007dc: 0005006b 0x5006b
|
||||
800007e0: 021026f3 csrr a3,0x21
|
||||
|
@ -731,10 +731,10 @@ Disassembly of section .text:
|
|||
80000a24: fe842703 lw a4,-24(s0)
|
||||
80000a28: fe042783 lw a5,-32(s0)
|
||||
80000a2c: fcf744e3 blt a4,a5,800009f4 <main+0x1a0>
|
||||
80000a30: d99ff0ef jal ra,800007c8 <vx_getCycles>
|
||||
80000a30: d99ff0ef jal ra,800007c8 <vx_num_cycles>
|
||||
80000a34: 00050793 mv a5,a0
|
||||
80000a38: fcf42623 sw a5,-52(s0)
|
||||
80000a3c: d95ff0ef jal ra,800007d0 <vx_getInst>
|
||||
80000a3c: d95ff0ef jal ra,800007d0 <vx_num_instrs>
|
||||
80000a40: 00050793 mv a5,a0
|
||||
80000a44: fcf42423 sw a5,-56(s0)
|
||||
80000a48: fd842683 lw a3,-40(s0)
|
||||
|
@ -742,10 +742,10 @@ Disassembly of section .text:
|
|||
80000a50: fd042583 lw a1,-48(s0)
|
||||
80000a54: fe042503 lw a0,-32(s0)
|
||||
80000a58: e54ff0ef jal ra,800000ac <vx_vec_saxpy>
|
||||
80000a5c: d6dff0ef jal ra,800007c8 <vx_getCycles>
|
||||
80000a5c: d6dff0ef jal ra,800007c8 <vx_num_cycles>
|
||||
80000a60: 00050793 mv a5,a0
|
||||
80000a64: fcf42223 sw a5,-60(s0)
|
||||
80000a68: d69ff0ef jal ra,800007d0 <vx_getInst>
|
||||
80000a68: d69ff0ef jal ra,800007d0 <vx_num_instrs>
|
||||
80000a6c: 00050793 mv a5,a0
|
||||
80000a70: fcf42023 sw a5,-64(s0)
|
||||
80000a74: fc042703 lw a4,-64(s0)
|
||||
|
|
|
@ -11,10 +11,10 @@ CPY = /home/priya/dev/riscv_vec/riscv-gnu/bin/riscv32-unknown-elf-objcopy
|
|||
|
||||
NEWLIB = $(LIB_PATH)/newlib/newlib.c
|
||||
VX_STR = $(LIB_PATH)/startup/vx_start.S
|
||||
VX_INT = $(LIB_PATH)/intrinsics/vx_intrinsics.s
|
||||
VX_IO = $(LIB_PATH)/io/vx_io.s $(LIB_PATH)/io/vx_io.c
|
||||
VX_INT = $(LIB_PATH)/intrinsics/vx_intrinsics.S
|
||||
VX_IO = $(LIB_PATH)/io/vx_io.S $(LIB_PATH)/io/vx_io.c
|
||||
VX_API = $(LIB_PATH)/vx_api/vx_api.c
|
||||
VX_FIO = $(LIB_PATH)/fileio/fileio.s
|
||||
VX_FIO = $(LIB_PATH)/fileio/fileio.S
|
||||
VX_VEC = vx_vec_sfilter.s #float --> int
|
||||
LIBS = /home/priya/dev/riscv_vec/riscv-gnu/riscv32-unknown-elf/lib/libc.a /home/priya/dev/riscv_vec/riscv-gnu/riscv32-unknown-elf/lib/libstdc++.a -static-libgcc -lgcc
|
||||
|
||||
|
|
|
@ -36,15 +36,15 @@ int main()
|
|||
}
|
||||
|
||||
int N = 4;
|
||||
int startCycles = vx_getCycles();
|
||||
int startInst = vx_getInst();
|
||||
int startCycles = vx_num_cycles();
|
||||
int startInst = vx_num_instrs();
|
||||
for(int y = 1; y < (NUM_DATA-1); y++){
|
||||
for(int x = 1; x < (NUM_DATA-1); x = x+N) {
|
||||
vx_vec_sfilter(a, b, ldc, m, x, y, N);
|
||||
}
|
||||
}
|
||||
int endCycles = vx_getCycles();
|
||||
int endInst = vx_getInst();
|
||||
int endCycles = vx_num_cycles();
|
||||
int endInst = vx_num_instrs();
|
||||
|
||||
int totalInst = (endInst - startInst);
|
||||
int totalCycles = (endCycles - startCycles);
|
||||
|
|
|
@ -549,7 +549,7 @@ Disassembly of section .text:
|
|||
800007e4: 00112623 sw ra,12(sp)
|
||||
800007e8: 00812423 sw s0,8(sp)
|
||||
800007ec: 01010413 addi s0,sp,16
|
||||
800007f0: 0d4000ef jal ra,800008c4 <vx_threadID>
|
||||
800007f0: 0d4000ef jal ra,800008c4 <vx_thread_id>
|
||||
800007f4: 00050793 mv a5,a0
|
||||
800007f8: 00078513 mv a0,a5
|
||||
800007fc: 00c12083 lw ra,12(sp)
|
||||
|
@ -617,23 +617,23 @@ Disassembly of section .text:
|
|||
800008b4: 0000306b 0x306b
|
||||
800008b8: 00008067 ret
|
||||
|
||||
800008bc <vx_warpID>:
|
||||
800008bc <vx_warp_id>:
|
||||
800008bc: 02102573 csrr a0,0x21
|
||||
800008c0: 00008067 ret
|
||||
|
||||
800008c4 <vx_threadID>:
|
||||
800008c4 <vx_thread_id>:
|
||||
800008c4: 02002573 csrr a0,0x20
|
||||
800008c8: 00008067 ret
|
||||
|
||||
800008cc <vx_getCycles>:
|
||||
800008cc <vx_num_cycles>:
|
||||
800008cc: 02602573 csrr a0,0x26
|
||||
800008d0: 00008067 ret
|
||||
|
||||
800008d4 <vx_getInst>:
|
||||
800008d4 <vx_num_instrs>:
|
||||
800008d4: 02502573 csrr a0,0x25
|
||||
800008d8: 00008067 ret
|
||||
|
||||
800008dc <vx_resetStack>:
|
||||
800008dc <vx_reset_stack>:
|
||||
800008dc: 00400513 li a0,4
|
||||
800008e0: 0005006b 0x5006b
|
||||
800008e4: 021026f3 csrr a3,0x21
|
||||
|
@ -734,10 +734,10 @@ Disassembly of section .text:
|
|||
80000a30: faf746e3 blt a4,a5,800009dc <main+0x84>
|
||||
80000a34: 00400793 li a5,4
|
||||
80000a38: fcf42023 sw a5,-64(s0)
|
||||
80000a3c: e91ff0ef jal ra,800008cc <vx_getCycles>
|
||||
80000a3c: e91ff0ef jal ra,800008cc <vx_num_cycles>
|
||||
80000a40: 00050793 mv a5,a0
|
||||
80000a44: faf42e23 sw a5,-68(s0)
|
||||
80000a48: e8dff0ef jal ra,800008d4 <vx_getInst>
|
||||
80000a48: e8dff0ef jal ra,800008d4 <vx_num_instrs>
|
||||
80000a4c: 00050793 mv a5,a0
|
||||
80000a50: faf42c23 sw a5,-72(s0)
|
||||
80000a54: 00100793 li a5,1
|
||||
|
@ -767,10 +767,10 @@ Disassembly of section .text:
|
|||
80000ab4: fe842703 lw a4,-24(s0)
|
||||
80000ab8: 04000793 li a5,64
|
||||
80000abc: fae7d2e3 bge a5,a4,80000a60 <main+0x108>
|
||||
80000ac0: e0dff0ef jal ra,800008cc <vx_getCycles>
|
||||
80000ac0: e0dff0ef jal ra,800008cc <vx_num_cycles>
|
||||
80000ac4: 00050793 mv a5,a0
|
||||
80000ac8: faf42a23 sw a5,-76(s0)
|
||||
80000acc: e09ff0ef jal ra,800008d4 <vx_getInst>
|
||||
80000acc: e09ff0ef jal ra,800008d4 <vx_num_instrs>
|
||||
80000ad0: 00050793 mv a5,a0
|
||||
80000ad4: faf42823 sw a5,-80(s0)
|
||||
80000ad8: fb042703 lw a4,-80(s0)
|
||||
|
|
|
@ -11,10 +11,10 @@ CPY = /home/priya/dev/riscv_vec/riscv-gnu/bin/riscv32-unknown-elf-objcopy
|
|||
|
||||
NEWLIB = $(LIB_PATH)/newlib/newlib.c
|
||||
VX_STR = $(LIB_PATH)/startup/vx_start.S
|
||||
VX_INT = $(LIB_PATH)/intrinsics/vx_intrinsics.s
|
||||
VX_IO = $(LIB_PATH)/io/vx_io.s $(LIB_PATH)/io/vx_io.c
|
||||
VX_INT = $(LIB_PATH)/intrinsics/vx_intrinsics.S
|
||||
VX_IO = $(LIB_PATH)/io/vx_io.S $(LIB_PATH)/io/vx_io.c
|
||||
VX_API = $(LIB_PATH)/vx_api/vx_api.c
|
||||
VX_FIO = $(LIB_PATH)/fileio/fileio.s
|
||||
VX_FIO = $(LIB_PATH)/fileio/fileio.S
|
||||
VX_VEC = vx_vec_sgemm_nn.s #float --> int
|
||||
LIBS = /home/priya/dev/riscv_vec/riscv-gnu/riscv32-unknown-elf/lib/libc.a /home/priya/dev/riscv_vec/riscv-gnu/riscv32-unknown-elf/lib/libstdc++.a -static-libgcc -lgcc
|
||||
|
||||
|
|
|
@ -50,8 +50,8 @@ int main()
|
|||
int vsize = 32;
|
||||
|
||||
|
||||
int startCycles = vx_getCycles();
|
||||
int startInst = vx_getInst();
|
||||
int startCycles = vx_num_cycles();
|
||||
int startInst = vx_num_instrs();
|
||||
for (int r = 0; r < m; r++) {
|
||||
for (int c = 0; c < n; c++) {
|
||||
for (int i = 0; i < k;) {
|
||||
|
@ -61,8 +61,8 @@ int main()
|
|||
}
|
||||
}
|
||||
}
|
||||
int endCycles = vx_getCycles();
|
||||
int endInst = vx_getInst();
|
||||
int endCycles = vx_num_cycles();
|
||||
int endInst = vx_num_instrs();
|
||||
|
||||
int totalInst = (endInst - startInst);
|
||||
int totalCycles = (endCycles - startCycles);
|
||||
|
|
|
@ -492,7 +492,7 @@ Disassembly of section .text:
|
|||
80000700: 00112623 sw ra,12(sp)
|
||||
80000704: 00812423 sw s0,8(sp)
|
||||
80000708: 01010413 addi s0,sp,16
|
||||
8000070c: 0d4000ef jal ra,800007e0 <vx_threadID>
|
||||
8000070c: 0d4000ef jal ra,800007e0 <vx_thread_id>
|
||||
80000710: 00050793 mv a5,a0
|
||||
80000714: 00078513 mv a0,a5
|
||||
80000718: 00c12083 lw ra,12(sp)
|
||||
|
@ -560,23 +560,23 @@ Disassembly of section .text:
|
|||
800007d0: 0000306b 0x306b
|
||||
800007d4: 00008067 ret
|
||||
|
||||
800007d8 <vx_warpID>:
|
||||
800007d8 <vx_warp_id>:
|
||||
800007d8: 02102573 csrr a0,0x21
|
||||
800007dc: 00008067 ret
|
||||
|
||||
800007e0 <vx_threadID>:
|
||||
800007e0 <vx_thread_id>:
|
||||
800007e0: 02002573 csrr a0,0x20
|
||||
800007e4: 00008067 ret
|
||||
|
||||
800007e8 <vx_getCycles>:
|
||||
800007e8 <vx_num_cycles>:
|
||||
800007e8: 02602573 csrr a0,0x26
|
||||
800007ec: 00008067 ret
|
||||
|
||||
800007f0 <vx_getInst>:
|
||||
800007f0 <vx_num_instrs>:
|
||||
800007f0: 02502573 csrr a0,0x25
|
||||
800007f4: 00008067 ret
|
||||
|
||||
800007f8 <vx_resetStack>:
|
||||
800007f8 <vx_reset_stack>:
|
||||
800007f8: 00400513 li a0,4
|
||||
800007fc: 0005006b 0x5006b
|
||||
80000800: 021026f3 csrr a3,0x21
|
||||
|
@ -734,10 +734,10 @@ Disassembly of section .text:
|
|||
80000a30: f8f42e23 sw a5,-100(s0)
|
||||
80000a34: 02000793 li a5,32
|
||||
80000a38: f8f42c23 sw a5,-104(s0)
|
||||
80000a3c: dadff0ef jal ra,800007e8 <vx_getCycles>
|
||||
80000a3c: dadff0ef jal ra,800007e8 <vx_num_cycles>
|
||||
80000a40: 00050793 mv a5,a0
|
||||
80000a44: f8f42a23 sw a5,-108(s0)
|
||||
80000a48: da9ff0ef jal ra,800007f0 <vx_getInst>
|
||||
80000a48: da9ff0ef jal ra,800007f0 <vx_num_instrs>
|
||||
80000a4c: 00050793 mv a5,a0
|
||||
80000a50: f8f42823 sw a5,-112(s0)
|
||||
80000a54: fc042e23 sw zero,-36(s0)
|
||||
|
@ -774,10 +774,10 @@ Disassembly of section .text:
|
|||
80000ad0: fdc42703 lw a4,-36(s0)
|
||||
80000ad4: fc042783 lw a5,-64(s0)
|
||||
80000ad8: f8f742e3 blt a4,a5,80000a5c <main+0x1e8>
|
||||
80000adc: d0dff0ef jal ra,800007e8 <vx_getCycles>
|
||||
80000adc: d0dff0ef jal ra,800007e8 <vx_num_cycles>
|
||||
80000ae0: 00050793 mv a5,a0
|
||||
80000ae4: f8f42623 sw a5,-116(s0)
|
||||
80000ae8: d09ff0ef jal ra,800007f0 <vx_getInst>
|
||||
80000ae8: d09ff0ef jal ra,800007f0 <vx_num_instrs>
|
||||
80000aec: 00050793 mv a5,a0
|
||||
80000af0: f8f42423 sw a5,-120(s0)
|
||||
80000af4: f8842703 lw a4,-120(s0)
|
||||
|
|
|
@ -11,10 +11,10 @@ CPY = /nethome/ekim79/riscv-gnu-toolchain/drops/bin/riscv32-unknown-elf-objcopy
|
|||
|
||||
NEWLIB = $(LIB_PATH)/newlib/newlib.c
|
||||
VX_STR = $(LIB_PATH)/startup/vx_start.S
|
||||
VX_INT = $(LIB_PATH)/intrinsics/vx_intrinsics.s
|
||||
VX_IO = $(LIB_PATH)/io/vx_io.s $(LIB_PATH)/io/vx_io.c
|
||||
VX_INT = $(LIB_PATH)/intrinsics/vx_intrinsics.S
|
||||
VX_IO = $(LIB_PATH)/io/vx_io.S $(LIB_PATH)/io/vx_io.c
|
||||
VX_API = $(LIB_PATH)/vx_api/vx_api.c
|
||||
VX_FIO = $(LIB_PATH)/fileio/fileio.s
|
||||
VX_FIO = $(LIB_PATH)/fileio/fileio.S
|
||||
VX_VEC1 = vx_vec_vvaddint32.s
|
||||
#VX_VEC2 = vx_vec_saxpy.s #float --> int
|
||||
#VX_VEC3 = vx_vec_sgemm.s #float --> int
|
||||
|
|
|
@ -486,7 +486,7 @@ Disassembly of section .text:
|
|||
800006e0: 00112623 sw ra,12(sp)
|
||||
800006e4: 00812423 sw s0,8(sp)
|
||||
800006e8: 01010413 addi s0,sp,16
|
||||
800006ec: 0d4000ef jal ra,800007c0 <vx_threadID>
|
||||
800006ec: 0d4000ef jal ra,800007c0 <vx_thread_id>
|
||||
800006f0: 00050793 mv a5,a0
|
||||
800006f4: 00078513 mv a0,a5
|
||||
800006f8: 00c12083 lw ra,12(sp)
|
||||
|
@ -554,23 +554,23 @@ Disassembly of section .text:
|
|||
800007b0: 0000306b 0x306b
|
||||
800007b4: 00008067 ret
|
||||
|
||||
800007b8 <vx_warpID>:
|
||||
800007b8 <vx_warp_id>:
|
||||
800007b8: 02102573 csrr a0,0x21
|
||||
800007bc: 00008067 ret
|
||||
|
||||
800007c0 <vx_threadID>:
|
||||
800007c0 <vx_thread_id>:
|
||||
800007c0: 02002573 csrr a0,0x20
|
||||
800007c4: 00008067 ret
|
||||
|
||||
800007c8 <vx_getCycles>:
|
||||
800007c8 <vx_num_cycles>:
|
||||
800007c8: 02602573 csrr a0,0x26
|
||||
800007cc: 00008067 ret
|
||||
|
||||
800007d0 <vx_getInst>:
|
||||
800007d0 <vx_num_instrs>:
|
||||
800007d0: 02502573 csrr a0,0x25
|
||||
800007d4: 00008067 ret
|
||||
|
||||
800007d8 <vx_resetStack>:
|
||||
800007d8 <vx_reset_stack>:
|
||||
800007d8: 00400513 li a0,4
|
||||
800007dc: 0005006b 0x5006b
|
||||
800007e0: 021026f3 csrr a3,0x21
|
||||
|
|
|
@ -3,7 +3,7 @@ CFLAGS += -std=c++11 -O2 -DNDEBUG -Wall -Wextra -Wfatal-errors
|
|||
|
||||
CFLAGS += -I../../include -I../../../hw/simulate -I../../../runtime
|
||||
|
||||
MULTICORE += -DNUM_CLUSTERS=2 -DNUM_CORES=2
|
||||
MULTICORE += -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0
|
||||
#DEBUG = 1
|
||||
|
||||
CFLAGS += -fPIC
|
||||
|
@ -16,7 +16,7 @@ SRCS = vortex.cpp ../common/vx_utils.cpp ../../hw/simulate/simulator.cpp
|
|||
|
||||
RTL_INCLUDE = -I../../hw/rtl -I../../hw/rtl/libs -I../../hw/rtl/interfaces -I../../hw/rtl/pipe_regs -I../../hw/rtl/cache
|
||||
|
||||
VL_FLAGS += --assert -Wall -Wpedantic $(MULTICORE)
|
||||
VL_FLAGS += --language 1800-2009 --assert -Wall -Wpedantic $(MULTICORE)
|
||||
|
||||
# Enable Verilator multithreaded simulation
|
||||
#THREADS ?= $(shell python3 -c 'import multiprocessing as mp; print(max(1, mp.cpu_count() // 2))')
|
||||
|
|
|
@ -8,10 +8,10 @@ VX_CPY = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objcopy
|
|||
|
||||
VX_NEWLIB = $(VX_RT_PATH)/newlib/newlib.c
|
||||
VX_STR = $(VX_RT_PATH)/startup/vx_start.S
|
||||
VX_INT = $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
|
||||
VX_IO = $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
|
||||
VX_INT = $(VX_RT_PATH)/intrinsics/vx_intrinsics.S
|
||||
VX_IO = $(VX_RT_PATH)/io/vx_io.S $(VX_RT_PATH)/io/vx_io.c
|
||||
VX_API = $(VX_RT_PATH)/vx_api/vx_api.c
|
||||
VX_FIO = $(VX_RT_PATH)/fileio/fileio.s
|
||||
VX_FIO = $(VX_RT_PATH)/fileio/fileio.S
|
||||
|
||||
VX_CFLAGS = -march=rv32im -mabi=ilp32 -O3 -Wl,-Bstatic,-T,$(VX_RT_PATH)/startup/vx_link.ld -ffreestanding -nostartfiles -Wl,--gc-sections
|
||||
|
||||
|
|
|
@ -8,10 +8,10 @@ VX_CPY = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objcopy
|
|||
|
||||
VX_NEWLIB = $(VX_RT_PATH)/newlib/newlib.c
|
||||
VX_STR = $(VX_RT_PATH)/startup/vx_start.S
|
||||
VX_INT = $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
|
||||
VX_IO = $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
|
||||
VX_INT = $(VX_RT_PATH)/intrinsics/vx_intrinsics.S
|
||||
VX_IO = $(VX_RT_PATH)/io/vx_io.S $(VX_RT_PATH)/io/vx_io.c
|
||||
VX_API = $(VX_RT_PATH)/vx_api/vx_api.c
|
||||
VX_FIO = $(VX_RT_PATH)/fileio/fileio.s
|
||||
VX_FIO = $(VX_RT_PATH)/fileio/fileio.S
|
||||
|
||||
VX_CFLAGS = -march=rv32im -mabi=ilp32 -O3 -Wl,-Bstatic,-T,$(VX_RT_PATH)/startup/vx_link.ld -ffreestanding -nostartfiles -Wl,--gc-sections
|
||||
|
||||
|
|
|
@ -10,10 +10,10 @@ void kernel_body(void* arg) {
|
|||
int* y = (int*)_arg->src1_ptr;
|
||||
int* z = (int*)_arg->dst_ptr;
|
||||
|
||||
unsigned wNo = vx_warpNum();
|
||||
unsigned tid = vx_threadID();
|
||||
unsigned wid = vx_warp_gid();
|
||||
unsigned tid = vx_thread_id();
|
||||
|
||||
unsigned i = ((wNo * _arg->num_threads) + tid) * _arg->stride;
|
||||
unsigned i = ((wid * _arg->num_threads) + tid) * _arg->stride;
|
||||
|
||||
for (unsigned j = 0; j < _arg->stride; ++j) {
|
||||
z[i+j] = x[i+j] + y[i+j];
|
||||
|
@ -28,5 +28,5 @@ void main() {
|
|||
printf("src0_ptr=0x%x\n", arg->src0_ptr);
|
||||
printf("src1_ptr=0x%x\n", arg->src1_ptr);
|
||||
printf("dst_ptr=0x%x\n", arg->dst_ptr);*/
|
||||
vx_spawnWarps(arg->num_warps, arg->num_threads, kernel_body, arg);
|
||||
vx_spawn_warps(arg->num_warps, arg->num_threads, kernel_body, arg);
|
||||
}
|
|
@ -1730,13 +1730,13 @@ DEBUG ../../../../simX/core.cpp:176: stalled warps: 0 0 0 0
|
|||
DEBUG ../../../../simX/core.cpp:750: current PC=0x80000094
|
||||
DEBUG ../../../../simX/enc.cpp:324: Decoded instr 0x2002673 into: SYS
|
||||
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r12 <- r0, imm=32
|
||||
DEBUG ../../../../simX/instruction.cpp:788: vx_threadID: r12=0
|
||||
DEBUG ../../../../simX/instruction.cpp:788: vx_thread_id: r12=0
|
||||
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r12 <- r0, imm=32
|
||||
DEBUG ../../../../simX/instruction.cpp:788: vx_threadID: r12=1
|
||||
DEBUG ../../../../simX/instruction.cpp:788: vx_thread_id: r12=1
|
||||
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r12 <- r0, imm=32
|
||||
DEBUG ../../../../simX/instruction.cpp:788: vx_threadID: r12=2
|
||||
DEBUG ../../../../simX/instruction.cpp:788: vx_thread_id: r12=2
|
||||
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r12 <- r0, imm=32
|
||||
DEBUG ../../../../simX/instruction.cpp:788: vx_threadID: r12=3
|
||||
DEBUG ../../../../simX/instruction.cpp:788: vx_thread_id: r12=3
|
||||
DEBUG ../../../../simX/core.cpp:781: Register state:
|
||||
%r 0: 00000000 00000000 00000000 00000000 (0)
|
||||
%r 1: 00000000 00000000 00000000 00000000 (0)
|
||||
|
@ -1789,13 +1789,13 @@ DEBUG ../../../../simX/core.cpp:176: stalled warps: 0 0 0 0
|
|||
DEBUG ../../../../simX/core.cpp:750: current PC=0x80000094
|
||||
DEBUG ../../../../simX/enc.cpp:324: Decoded instr 0x2002673 into: SYS
|
||||
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r12 <- r0, imm=32
|
||||
DEBUG ../../../../simX/instruction.cpp:788: vx_threadID: r12=0
|
||||
DEBUG ../../../../simX/instruction.cpp:788: vx_thread_id: r12=0
|
||||
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r12 <- r0, imm=32
|
||||
DEBUG ../../../../simX/instruction.cpp:788: vx_threadID: r12=1
|
||||
DEBUG ../../../../simX/instruction.cpp:788: vx_thread_id: r12=1
|
||||
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r12 <- r0, imm=32
|
||||
DEBUG ../../../../simX/instruction.cpp:788: vx_threadID: r12=2
|
||||
DEBUG ../../../../simX/instruction.cpp:788: vx_thread_id: r12=2
|
||||
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r12 <- r0, imm=32
|
||||
DEBUG ../../../../simX/instruction.cpp:788: vx_threadID: r12=3
|
||||
DEBUG ../../../../simX/instruction.cpp:788: vx_thread_id: r12=3
|
||||
DEBUG ../../../../simX/core.cpp:781: Register state:
|
||||
%r 0: 00000000 00000000 00000000 00000000 (0)
|
||||
%r 1: 00000000 00000000 00000000 00000000 (0)
|
||||
|
@ -1848,13 +1848,13 @@ DEBUG ../../../../simX/core.cpp:176: stalled warps: 0 0 0 0
|
|||
DEBUG ../../../../simX/core.cpp:750: current PC=0x80000094
|
||||
DEBUG ../../../../simX/enc.cpp:324: Decoded instr 0x2002673 into: SYS
|
||||
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r12 <- r0, imm=32
|
||||
DEBUG ../../../../simX/instruction.cpp:788: vx_threadID: r12=0
|
||||
DEBUG ../../../../simX/instruction.cpp:788: vx_thread_id: r12=0
|
||||
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r12 <- r0, imm=32
|
||||
DEBUG ../../../../simX/instruction.cpp:788: vx_threadID: r12=1
|
||||
DEBUG ../../../../simX/instruction.cpp:788: vx_thread_id: r12=1
|
||||
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r12 <- r0, imm=32
|
||||
DEBUG ../../../../simX/instruction.cpp:788: vx_threadID: r12=2
|
||||
DEBUG ../../../../simX/instruction.cpp:788: vx_thread_id: r12=2
|
||||
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r12 <- r0, imm=32
|
||||
DEBUG ../../../../simX/instruction.cpp:788: vx_threadID: r12=3
|
||||
DEBUG ../../../../simX/instruction.cpp:788: vx_thread_id: r12=3
|
||||
DEBUG ../../../../simX/core.cpp:781: Register state:
|
||||
%r 0: 00000000 00000000 00000000 00000000 (0)
|
||||
%r 1: 00000000 00000000 00000000 00000000 (0)
|
||||
|
@ -2127,13 +2127,13 @@ DEBUG ../../../../simX/core.cpp:176: stalled warps: 0 0 0 0
|
|||
DEBUG ../../../../simX/core.cpp:750: current PC=0x80000094
|
||||
DEBUG ../../../../simX/enc.cpp:324: Decoded instr 0x2002673 into: SYS
|
||||
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r12 <- r0, imm=32
|
||||
DEBUG ../../../../simX/instruction.cpp:788: vx_threadID: r12=0
|
||||
DEBUG ../../../../simX/instruction.cpp:788: vx_thread_id: r12=0
|
||||
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r12 <- r0, imm=32
|
||||
DEBUG ../../../../simX/instruction.cpp:788: vx_threadID: r12=1
|
||||
DEBUG ../../../../simX/instruction.cpp:788: vx_thread_id: r12=1
|
||||
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r12 <- r0, imm=32
|
||||
DEBUG ../../../../simX/instruction.cpp:788: vx_threadID: r12=2
|
||||
DEBUG ../../../../simX/instruction.cpp:788: vx_thread_id: r12=2
|
||||
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r12 <- r0, imm=32
|
||||
DEBUG ../../../../simX/instruction.cpp:788: vx_threadID: r12=3
|
||||
DEBUG ../../../../simX/instruction.cpp:788: vx_thread_id: r12=3
|
||||
DEBUG ../../../../simX/core.cpp:781: Register state:
|
||||
%r 0: 00000000 00000000 00000000 00000000 (0)
|
||||
%r 1: 80000014 00000000 00000000 00000000 (0)
|
||||
|
@ -3286,13 +3286,13 @@ DEBUG ../../../../simX/core.cpp:176: stalled warps: 0 0 0 0
|
|||
DEBUG ../../../../simX/core.cpp:750: current PC=0x800000b0
|
||||
DEBUG ../../../../simX/enc.cpp:324: Decoded instr 0x21026f3 into: SYS
|
||||
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r13 <- r0, imm=33
|
||||
DEBUG ../../../../simX/instruction.cpp:792: vx_warpID: r13=1
|
||||
DEBUG ../../../../simX/instruction.cpp:792: vx_warp_id: r13=1
|
||||
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r13 <- r0, imm=33
|
||||
DEBUG ../../../../simX/instruction.cpp:792: vx_warpID: r13=1
|
||||
DEBUG ../../../../simX/instruction.cpp:792: vx_warp_id: r13=1
|
||||
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r13 <- r0, imm=33
|
||||
DEBUG ../../../../simX/instruction.cpp:792: vx_warpID: r13=1
|
||||
DEBUG ../../../../simX/instruction.cpp:792: vx_warp_id: r13=1
|
||||
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r13 <- r0, imm=33
|
||||
DEBUG ../../../../simX/instruction.cpp:792: vx_warpID: r13=1
|
||||
DEBUG ../../../../simX/instruction.cpp:792: vx_warp_id: r13=1
|
||||
DEBUG ../../../../simX/core.cpp:781: Register state:
|
||||
%r 0: 00000000 00000000 00000000 00000000 (0)
|
||||
%r 1: 00000000 00000000 00000000 00000000 (0)
|
||||
|
@ -3345,13 +3345,13 @@ DEBUG ../../../../simX/core.cpp:176: stalled warps: 0 0 0 0
|
|||
DEBUG ../../../../simX/core.cpp:750: current PC=0x800000b0
|
||||
DEBUG ../../../../simX/enc.cpp:324: Decoded instr 0x21026f3 into: SYS
|
||||
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r13 <- r0, imm=33
|
||||
DEBUG ../../../../simX/instruction.cpp:792: vx_warpID: r13=2
|
||||
DEBUG ../../../../simX/instruction.cpp:792: vx_warp_id: r13=2
|
||||
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r13 <- r0, imm=33
|
||||
DEBUG ../../../../simX/instruction.cpp:792: vx_warpID: r13=2
|
||||
DEBUG ../../../../simX/instruction.cpp:792: vx_warp_id: r13=2
|
||||
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r13 <- r0, imm=33
|
||||
DEBUG ../../../../simX/instruction.cpp:792: vx_warpID: r13=2
|
||||
DEBUG ../../../../simX/instruction.cpp:792: vx_warp_id: r13=2
|
||||
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r13 <- r0, imm=33
|
||||
DEBUG ../../../../simX/instruction.cpp:792: vx_warpID: r13=2
|
||||
DEBUG ../../../../simX/instruction.cpp:792: vx_warp_id: r13=2
|
||||
DEBUG ../../../../simX/core.cpp:781: Register state:
|
||||
%r 0: 00000000 00000000 00000000 00000000 (0)
|
||||
%r 1: 00000000 00000000 00000000 00000000 (0)
|
||||
|
@ -3404,13 +3404,13 @@ DEBUG ../../../../simX/core.cpp:176: stalled warps: 0 0 0 0
|
|||
DEBUG ../../../../simX/core.cpp:750: current PC=0x800000b0
|
||||
DEBUG ../../../../simX/enc.cpp:324: Decoded instr 0x21026f3 into: SYS
|
||||
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r13 <- r0, imm=33
|
||||
DEBUG ../../../../simX/instruction.cpp:792: vx_warpID: r13=3
|
||||
DEBUG ../../../../simX/instruction.cpp:792: vx_warp_id: r13=3
|
||||
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r13 <- r0, imm=33
|
||||
DEBUG ../../../../simX/instruction.cpp:792: vx_warpID: r13=3
|
||||
DEBUG ../../../../simX/instruction.cpp:792: vx_warp_id: r13=3
|
||||
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r13 <- r0, imm=33
|
||||
DEBUG ../../../../simX/instruction.cpp:792: vx_warpID: r13=3
|
||||
DEBUG ../../../../simX/instruction.cpp:792: vx_warp_id: r13=3
|
||||
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r13 <- r0, imm=33
|
||||
DEBUG ../../../../simX/instruction.cpp:792: vx_warpID: r13=3
|
||||
DEBUG ../../../../simX/instruction.cpp:792: vx_warp_id: r13=3
|
||||
DEBUG ../../../../simX/core.cpp:781: Register state:
|
||||
%r 0: 00000000 00000000 00000000 00000000 (0)
|
||||
%r 1: 00000000 00000000 00000000 00000000 (0)
|
||||
|
@ -3683,13 +3683,13 @@ DEBUG ../../../../simX/core.cpp:176: stalled warps: 0 1 1 1
|
|||
DEBUG ../../../../simX/core.cpp:750: current PC=0x800000b0
|
||||
DEBUG ../../../../simX/enc.cpp:324: Decoded instr 0x21026f3 into: SYS
|
||||
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r13 <- r0, imm=33
|
||||
DEBUG ../../../../simX/instruction.cpp:792: vx_warpID: r13=0
|
||||
DEBUG ../../../../simX/instruction.cpp:792: vx_warp_id: r13=0
|
||||
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r13 <- r0, imm=33
|
||||
DEBUG ../../../../simX/instruction.cpp:792: vx_warpID: r13=0
|
||||
DEBUG ../../../../simX/instruction.cpp:792: vx_warp_id: r13=0
|
||||
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r13 <- r0, imm=33
|
||||
DEBUG ../../../../simX/instruction.cpp:792: vx_warpID: r13=0
|
||||
DEBUG ../../../../simX/instruction.cpp:792: vx_warp_id: r13=0
|
||||
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r13 <- r0, imm=33
|
||||
DEBUG ../../../../simX/instruction.cpp:792: vx_warpID: r13=0
|
||||
DEBUG ../../../../simX/instruction.cpp:792: vx_warp_id: r13=0
|
||||
DEBUG ../../../../simX/core.cpp:781: Register state:
|
||||
%r 0: 00000000 00000000 00000000 00000000 (0)
|
||||
%r 1: 80000014 00000000 00000000 00000000 (0)
|
||||
|
@ -22160,13 +22160,13 @@ DEBUG ../../../../simX/core.cpp:176: stalled warps: 1 0 1 1
|
|||
DEBUG ../../../../simX/core.cpp:750: current PC=0x80000758
|
||||
DEBUG ../../../../simX/enc.cpp:324: Decoded instr 0x2002573 into: SYS
|
||||
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r10 <- r0, imm=32
|
||||
DEBUG ../../../../simX/instruction.cpp:788: vx_threadID: r10=0
|
||||
DEBUG ../../../../simX/instruction.cpp:788: vx_thread_id: r10=0
|
||||
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r10 <- r0, imm=32
|
||||
DEBUG ../../../../simX/instruction.cpp:788: vx_threadID: r10=1
|
||||
DEBUG ../../../../simX/instruction.cpp:788: vx_thread_id: r10=1
|
||||
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r10 <- r0, imm=32
|
||||
DEBUG ../../../../simX/instruction.cpp:788: vx_threadID: r10=2
|
||||
DEBUG ../../../../simX/instruction.cpp:788: vx_thread_id: r10=2
|
||||
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r10 <- r0, imm=32
|
||||
DEBUG ../../../../simX/instruction.cpp:788: vx_threadID: r10=3
|
||||
DEBUG ../../../../simX/instruction.cpp:788: vx_thread_id: r10=3
|
||||
DEBUG ../../../../simX/core.cpp:781: Register state:
|
||||
%r 0: 00000000 00000000 00000000 00000000 (0)
|
||||
%r 1: 800009d0 800009d0 800009d0 800009d0 (0)
|
||||
|
@ -22219,13 +22219,13 @@ DEBUG ../../../../simX/core.cpp:176: stalled warps: 1 0 0 1
|
|||
DEBUG ../../../../simX/core.cpp:750: current PC=0x80000758
|
||||
DEBUG ../../../../simX/enc.cpp:324: Decoded instr 0x2002573 into: SYS
|
||||
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r10 <- r0, imm=32
|
||||
DEBUG ../../../../simX/instruction.cpp:788: vx_threadID: r10=0
|
||||
DEBUG ../../../../simX/instruction.cpp:788: vx_thread_id: r10=0
|
||||
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r10 <- r0, imm=32
|
||||
DEBUG ../../../../simX/instruction.cpp:788: vx_threadID: r10=1
|
||||
DEBUG ../../../../simX/instruction.cpp:788: vx_thread_id: r10=1
|
||||
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r10 <- r0, imm=32
|
||||
DEBUG ../../../../simX/instruction.cpp:788: vx_threadID: r10=2
|
||||
DEBUG ../../../../simX/instruction.cpp:788: vx_thread_id: r10=2
|
||||
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r10 <- r0, imm=32
|
||||
DEBUG ../../../../simX/instruction.cpp:788: vx_threadID: r10=3
|
||||
DEBUG ../../../../simX/instruction.cpp:788: vx_thread_id: r10=3
|
||||
DEBUG ../../../../simX/core.cpp:781: Register state:
|
||||
%r 0: 00000000 00000000 00000000 00000000 (0)
|
||||
%r 1: 800009d0 800009d0 800009d0 800009d0 (0)
|
||||
|
@ -22278,13 +22278,13 @@ DEBUG ../../../../simX/core.cpp:176: stalled warps: 1 0 0 0
|
|||
DEBUG ../../../../simX/core.cpp:750: current PC=0x80000758
|
||||
DEBUG ../../../../simX/enc.cpp:324: Decoded instr 0x2002573 into: SYS
|
||||
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r10 <- r0, imm=32
|
||||
DEBUG ../../../../simX/instruction.cpp:788: vx_threadID: r10=0
|
||||
DEBUG ../../../../simX/instruction.cpp:788: vx_thread_id: r10=0
|
||||
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r10 <- r0, imm=32
|
||||
DEBUG ../../../../simX/instruction.cpp:788: vx_threadID: r10=1
|
||||
DEBUG ../../../../simX/instruction.cpp:788: vx_thread_id: r10=1
|
||||
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r10 <- r0, imm=32
|
||||
DEBUG ../../../../simX/instruction.cpp:788: vx_threadID: r10=2
|
||||
DEBUG ../../../../simX/instruction.cpp:788: vx_thread_id: r10=2
|
||||
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r10 <- r0, imm=32
|
||||
DEBUG ../../../../simX/instruction.cpp:788: vx_threadID: r10=3
|
||||
DEBUG ../../../../simX/instruction.cpp:788: vx_thread_id: r10=3
|
||||
DEBUG ../../../../simX/core.cpp:781: Register state:
|
||||
%r 0: 00000000 00000000 00000000 00000000 (0)
|
||||
%r 1: 800009d0 800009d0 800009d0 800009d0 (0)
|
||||
|
@ -22337,13 +22337,13 @@ DEBUG ../../../../simX/core.cpp:176: stalled warps: 0 0 0 0
|
|||
DEBUG ../../../../simX/core.cpp:750: current PC=0x80000758
|
||||
DEBUG ../../../../simX/enc.cpp:324: Decoded instr 0x2002573 into: SYS
|
||||
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r10 <- r0, imm=32
|
||||
DEBUG ../../../../simX/instruction.cpp:788: vx_threadID: r10=0
|
||||
DEBUG ../../../../simX/instruction.cpp:788: vx_thread_id: r10=0
|
||||
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r10 <- r0, imm=32
|
||||
DEBUG ../../../../simX/instruction.cpp:788: vx_threadID: r10=1
|
||||
DEBUG ../../../../simX/instruction.cpp:788: vx_thread_id: r10=1
|
||||
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r10 <- r0, imm=32
|
||||
DEBUG ../../../../simX/instruction.cpp:788: vx_threadID: r10=2
|
||||
DEBUG ../../../../simX/instruction.cpp:788: vx_thread_id: r10=2
|
||||
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r10 <- r0, imm=32
|
||||
DEBUG ../../../../simX/instruction.cpp:788: vx_threadID: r10=3
|
||||
DEBUG ../../../../simX/instruction.cpp:788: vx_thread_id: r10=3
|
||||
DEBUG ../../../../simX/core.cpp:781: Register state:
|
||||
%r 0: 00000000 00000000 00000000 00000000 (0)
|
||||
%r 1: 800009d0 800009d0 800009d0 800009d0 (0)
|
||||
|
@ -39870,13 +39870,13 @@ DEBUG ../../../../simX/core.cpp:176: stalled warps: 1 0 0 0
|
|||
DEBUG ../../../../simX/core.cpp:750: current PC=0x80000748
|
||||
DEBUG ../../../../simX/enc.cpp:324: Decoded instr 0x2102573 into: SYS
|
||||
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r10 <- r0, imm=33
|
||||
DEBUG ../../../../simX/instruction.cpp:792: vx_warpID: r10=2
|
||||
DEBUG ../../../../simX/instruction.cpp:792: vx_warp_id: r10=2
|
||||
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r10 <- r0, imm=33
|
||||
DEBUG ../../../../simX/instruction.cpp:792: vx_warpID: r10=2
|
||||
DEBUG ../../../../simX/instruction.cpp:792: vx_warp_id: r10=2
|
||||
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r10 <- r0, imm=33
|
||||
DEBUG ../../../../simX/instruction.cpp:792: vx_warpID: r10=2
|
||||
DEBUG ../../../../simX/instruction.cpp:792: vx_warp_id: r10=2
|
||||
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r10 <- r0, imm=33
|
||||
DEBUG ../../../../simX/instruction.cpp:792: vx_warpID: r10=2
|
||||
DEBUG ../../../../simX/instruction.cpp:792: vx_warp_id: r10=2
|
||||
DEBUG ../../../../simX/core.cpp:781: Register state:
|
||||
%r 0: 00000000 00000000 00000000 00000000 (0)
|
||||
%r 1: 80000810 80000810 80000810 80000810 (0)
|
||||
|
@ -39929,13 +39929,13 @@ DEBUG ../../../../simX/core.cpp:176: stalled warps: 1 0 0 0
|
|||
DEBUG ../../../../simX/core.cpp:750: current PC=0x80000748
|
||||
DEBUG ../../../../simX/enc.cpp:324: Decoded instr 0x2102573 into: SYS
|
||||
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r10 <- r0, imm=33
|
||||
DEBUG ../../../../simX/instruction.cpp:792: vx_warpID: r10=3
|
||||
DEBUG ../../../../simX/instruction.cpp:792: vx_warp_id: r10=3
|
||||
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r10 <- r0, imm=33
|
||||
DEBUG ../../../../simX/instruction.cpp:792: vx_warpID: r10=3
|
||||
DEBUG ../../../../simX/instruction.cpp:792: vx_warp_id: r10=3
|
||||
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r10 <- r0, imm=33
|
||||
DEBUG ../../../../simX/instruction.cpp:792: vx_warpID: r10=3
|
||||
DEBUG ../../../../simX/instruction.cpp:792: vx_warp_id: r10=3
|
||||
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r10 <- r0, imm=33
|
||||
DEBUG ../../../../simX/instruction.cpp:792: vx_warpID: r10=3
|
||||
DEBUG ../../../../simX/instruction.cpp:792: vx_warp_id: r10=3
|
||||
DEBUG ../../../../simX/core.cpp:781: Register state:
|
||||
%r 0: 00000000 00000000 00000000 00000000 (0)
|
||||
%r 1: 80000810 80000810 80000810 80000810 (0)
|
||||
|
@ -39988,13 +39988,13 @@ DEBUG ../../../../simX/core.cpp:176: stalled warps: 1 0 0 0
|
|||
DEBUG ../../../../simX/core.cpp:750: current PC=0x80000748
|
||||
DEBUG ../../../../simX/enc.cpp:324: Decoded instr 0x2102573 into: SYS
|
||||
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r10 <- r0, imm=33
|
||||
DEBUG ../../../../simX/instruction.cpp:792: vx_warpID: r10=1
|
||||
DEBUG ../../../../simX/instruction.cpp:792: vx_warp_id: r10=1
|
||||
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r10 <- r0, imm=33
|
||||
DEBUG ../../../../simX/instruction.cpp:792: vx_warpID: r10=1
|
||||
DEBUG ../../../../simX/instruction.cpp:792: vx_warp_id: r10=1
|
||||
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r10 <- r0, imm=33
|
||||
DEBUG ../../../../simX/instruction.cpp:792: vx_warpID: r10=1
|
||||
DEBUG ../../../../simX/instruction.cpp:792: vx_warp_id: r10=1
|
||||
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r10 <- r0, imm=33
|
||||
DEBUG ../../../../simX/instruction.cpp:792: vx_warpID: r10=1
|
||||
DEBUG ../../../../simX/instruction.cpp:792: vx_warp_id: r10=1
|
||||
DEBUG ../../../../simX/core.cpp:781: Register state:
|
||||
%r 0: 00000000 00000000 00000000 00000000 (0)
|
||||
%r 1: 80000810 80000810 80000810 80000810 (0)
|
||||
|
@ -40159,13 +40159,13 @@ DEBUG ../../../../simX/core.cpp:176: stalled warps: 0 0 1 1
|
|||
DEBUG ../../../../simX/core.cpp:750: current PC=0x80000748
|
||||
DEBUG ../../../../simX/enc.cpp:324: Decoded instr 0x2102573 into: SYS
|
||||
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r10 <- r0, imm=33
|
||||
DEBUG ../../../../simX/instruction.cpp:792: vx_warpID: r10=0
|
||||
DEBUG ../../../../simX/instruction.cpp:792: vx_warp_id: r10=0
|
||||
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r10 <- r0, imm=33
|
||||
DEBUG ../../../../simX/instruction.cpp:792: vx_warpID: r10=0
|
||||
DEBUG ../../../../simX/instruction.cpp:792: vx_warp_id: r10=0
|
||||
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r10 <- r0, imm=33
|
||||
DEBUG ../../../../simX/instruction.cpp:792: vx_warpID: r10=0
|
||||
DEBUG ../../../../simX/instruction.cpp:792: vx_warp_id: r10=0
|
||||
DEBUG ../../../../simX/instruction.cpp:781: SYS_INST: r10 <- r0, imm=33
|
||||
DEBUG ../../../../simX/instruction.cpp:792: vx_warpID: r10=0
|
||||
DEBUG ../../../../simX/instruction.cpp:792: vx_warp_id: r10=0
|
||||
DEBUG ../../../../simX/core.cpp:781: Register state:
|
||||
%r 0: 00000000 00000000 00000000 00000000 (0)
|
||||
%r 1: 800008c4 800008c4 800008c4 800008c4 (0)
|
||||
|
|
49
hw/Makefile
49
hw/Makefile
|
@ -2,11 +2,11 @@ all: singlecore
|
|||
|
||||
CF += -std=c++11 -fms-extensions
|
||||
|
||||
VF += -compiler gcc --language 1800-2009 --assert -Wall -Wpedantic
|
||||
VF += --language 1800-2009 --assert -Wall -Wpedantic
|
||||
|
||||
VF += -exe $(SRCS) $(INCLUDE)
|
||||
|
||||
MULTICORE += -DNUM_CLUSTERS=1 -DNUM_CORES=2
|
||||
MULTICORE += -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0
|
||||
|
||||
INCLUDE = -I./rtl/ -I./rtl/libs -I./rtl/interfaces -I./rtl/pipe_regs -I./rtl/cache -I./rtl/simulate
|
||||
|
||||
|
@ -21,58 +21,59 @@ THREADS ?= $(shell python3 -c 'import multiprocessing as mp; print(max(1, mp.cpu
|
|||
build_config:
|
||||
./scripts/gen_config.py --outv ./rtl/VX_user_config.vh --outc ./simulate/VX_config.h
|
||||
|
||||
gen-singlecore: build_config
|
||||
gen-s: build_config
|
||||
verilator $(VF) -DNDEBUG -cc Vortex_Socket.v -CFLAGS '$(CF) -DNDEBUG'
|
||||
|
||||
gen-singlecore-t: build_config
|
||||
verilator $(VF) -cc Vortex_Socket.v -CFLAGS '$(CF) -DNDEBUG -O2' --threads $(THREADS)
|
||||
|
||||
gen-singlecore-d: build_config
|
||||
gen-sd: build_config
|
||||
verilator $(VF) -cc Vortex_Socket.v -CFLAGS '$(CF) -g -O0 -DVCD_OUTPUT' $(DBG)
|
||||
|
||||
gen-multicore: build_config
|
||||
gen-st: build_config
|
||||
verilator $(VF) -cc Vortex_Socket.v -CFLAGS '$(CF) -DNDEBUG -O2' --threads $(THREADS)
|
||||
|
||||
gen-m: build_config
|
||||
verilator $(VF) -DNDEBUG -cc Vortex_Socket.v $(MULTICORE) -CFLAGS '$(CF) -DNDEBUG $(MULTICORE)'
|
||||
|
||||
gen-multicore-t: build_config
|
||||
verilator $(VF) -DNDEBUG -cc Vortex_Socket.v $(MULTICORE) -CFLAGS '$(CF) -DNDEBUG -O2 $(MULTICORE)' --threads $(THREADS)
|
||||
|
||||
gen-multicore-d: build_config
|
||||
gen-md: build_config
|
||||
verilator $(VF) -cc Vortex_Socket.v $(MULTICORE) -CFLAGS '$(CF) -g -O0 -DVCD_OUTPUT $(MULTICORE)' $(DBG)
|
||||
|
||||
singlecore: gen-singlecore
|
||||
gen-mt: build_config
|
||||
verilator $(VF) -DNDEBUG -cc Vortex_Socket.v $(MULTICORE) -CFLAGS '$(CF) -DNDEBUG -O2 $(MULTICORE)' --threads $(THREADS)
|
||||
|
||||
build-s: gen-s
|
||||
(cd obj_dir && make -j -f VVortex_Socket.mk)
|
||||
|
||||
singlecore-t: gen-singlecore-t
|
||||
build-sd: gen-sd
|
||||
(cd obj_dir && make -j -f VVortex_Socket.mk)
|
||||
|
||||
singlecore-d: gen-singlecore-d
|
||||
build-st: gen-st
|
||||
(cd obj_dir && make -j -f VVortex_Socket.mk)
|
||||
|
||||
multicore: gen-multicore
|
||||
build-m: gen-m
|
||||
(cd obj_dir && make -j -f VVortex_Socket.mk)
|
||||
|
||||
multicore-t: gen-multicore-t
|
||||
build-md: gen-md
|
||||
(cd obj_dir && make -j -f VVortex_Socket.mk)
|
||||
|
||||
multicore-d: gen-multicore-d
|
||||
build-mt: gen-mt
|
||||
(cd obj_dir && make -j -f VVortex_Socket.mk)
|
||||
|
||||
run: singlecore
|
||||
run: run-s
|
||||
run-s: build-s
|
||||
(cd obj_dir && ./VVortex_Socket)
|
||||
|
||||
run-d: singlecore-d
|
||||
run-sd: build-sd
|
||||
(cd obj_dir && ./VVortex_Socket)
|
||||
|
||||
run-t: singlecore-t
|
||||
run-st: build-st
|
||||
(cd obj_dir && ./VVortex_Socket)
|
||||
|
||||
run-m: multicore
|
||||
run-m: build-m
|
||||
(cd obj_dir && ./VVortex_Socket)
|
||||
|
||||
run-md: multicore-d
|
||||
run-md: build-md
|
||||
(cd obj_dir && ./VVortex_Socket)
|
||||
|
||||
run-mt: multicore-t
|
||||
run-mt: build-mt
|
||||
(cd obj_dir && ./VVortex_Socket)
|
||||
|
||||
clean:
|
||||
|
|
|
@ -28,21 +28,39 @@
|
|||
`endif
|
||||
|
||||
`ifndef NUM_CSRS
|
||||
`define NUM_CSRS 1024
|
||||
`endif
|
||||
|
||||
`ifndef IO_BUS_ADDR
|
||||
`define IO_BUS_ADDR 32'h00010000
|
||||
`define NUM_CSRS 2
|
||||
`endif
|
||||
|
||||
`ifndef STARTUP_ADDR
|
||||
`define STARTUP_ADDR 32'h80000000
|
||||
`endif
|
||||
|
||||
`ifndef SHARED_MEM_ADDR_MATCH
|
||||
`define SHARED_MEM_ADDR_MATCH(x) (x[31:24] == 8'hFF)
|
||||
`ifndef SHARED_MEM_TOP_ADDR
|
||||
`define SHARED_MEM_TOP_ADDR 8'hFF
|
||||
`endif
|
||||
|
||||
`ifndef IO_BUS_ADDR
|
||||
`define IO_BUS_ADDR 32'h00010000
|
||||
`endif
|
||||
|
||||
`ifndef STACK_BASE_ADDR
|
||||
`define STACK_BASE_ADDR 20'h6ffff
|
||||
`endif
|
||||
|
||||
`ifndef L2_ENABLE
|
||||
`define L2_ENABLE (`NUM_CORES > 1)
|
||||
`endif
|
||||
|
||||
`define CSR_LTID 12'h020
|
||||
`define CSR_LWID 12'h021
|
||||
`define CSR_GWID 12'h022
|
||||
`define CSR_GTID 12'h023
|
||||
|
||||
`define CSR_CYCLL 12'hC00
|
||||
`define CSR_CYCLH 12'hC80
|
||||
`define CSR_INSTL 12'hC02
|
||||
`define CSR_INSTH 12'hC82
|
||||
|
||||
// ========================= Dcache Configurable Knobs ========================
|
||||
|
||||
// Size of cache in bytes
|
||||
|
|
|
@ -1,84 +1,57 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_csr_data (
|
||||
module VX_csr_data #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk, // Clock
|
||||
input wire reset,
|
||||
|
||||
input wire[`CSR_ADDR_SIZE-1:0] read_csr_address,
|
||||
input wire write_valid,
|
||||
input wire[`CSR_WIDTH-1:0] write_csr_data,
|
||||
|
||||
input wire[`CSR_ADDR_SIZE-1:0] read_addr,
|
||||
output reg[31:0] read_data,
|
||||
input wire write_enable,
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
// We use a smaller storage for CSRs than the standard 4KB in RISC-V
|
||||
input wire[`CSR_ADDR_SIZE-1:0] write_csr_address,
|
||||
input wire[`CSR_ADDR_SIZE-1:0] write_addr,
|
||||
`IGNORE_WARNINGS_END
|
||||
|
||||
output wire[31:0] read_csr_data,
|
||||
|
||||
// For instruction retire counting
|
||||
input wire writeback_valid
|
||||
input wire[`CSR_WIDTH-1:0] write_data,
|
||||
input wire[`NW_BITS-1:0] warp_num,
|
||||
input wire wb_valid
|
||||
);
|
||||
// wire[`NUM_THREADS-1:0][31:0] thread_ids;
|
||||
// wire[`NUM_THREADS-1:0][31:0] warp_ids;
|
||||
reg [`CSR_WIDTH-1:0] csr_table[`NUM_CSRS-1:0];
|
||||
|
||||
// genvar cur_t;
|
||||
// for (cur_t = 0; cur_t < `NUM_THREADS; cur_t = cur_t + 1) begin
|
||||
// assign thread_ids[cur_t] = cur_t;
|
||||
// end
|
||||
|
||||
// genvar cur_tw;
|
||||
// for (cur_tw = 0; cur_tw < `NUM_THREADS; cur_tw = cur_tw + 1) begin
|
||||
// assign warp_ids[cur_tw] = {{(31-`NW_BITS-1){1'b0}}, in_read_warp_num};
|
||||
// end
|
||||
|
||||
reg [`CSR_WIDTH-1:0] csr[`NUM_CSRS-1:0];
|
||||
|
||||
reg [63:0] cycle;
|
||||
reg [63:0] instret;
|
||||
|
||||
wire read_cycle;
|
||||
wire read_cycleh;
|
||||
wire read_instret;
|
||||
wire read_instreth;
|
||||
|
||||
assign read_cycle = read_csr_address == `CSR_CYCL_L;
|
||||
assign read_cycleh = read_csr_address == `CSR_CYCL_H;
|
||||
assign read_instret = read_csr_address == `CSR_INST_L;
|
||||
assign read_instreth = read_csr_address == `CSR_INST_H;
|
||||
|
||||
wire [$clog2(`NUM_CSRS)-1:0] read_addr, write_addr;
|
||||
reg [63:0] num_cycles, num_instrs;
|
||||
|
||||
// cast address to physical CSR range
|
||||
assign read_addr = $size(read_addr)'(read_csr_address);
|
||||
assign write_addr = $size(write_addr)'(write_csr_address);
|
||||
|
||||
// wire thread_select = read_csr_address == 12'h20;
|
||||
// wire warp_select = read_csr_address == 12'h21;
|
||||
|
||||
// assign read_csr_data = thread_select ? thread_ids :
|
||||
// warp_select ? warp_ids :
|
||||
// 0;
|
||||
|
||||
genvar curr_e;
|
||||
wire [$clog2(`NUM_CSRS)-1:0] rd_addr, wr_addr;
|
||||
assign rd_addr = $size(rd_addr)'(read_addr);
|
||||
assign wr_addr = $size(wr_addr)'(write_addr);
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
cycle <= 0;
|
||||
instret <= 0;
|
||||
if (reset) begin
|
||||
num_cycles <= 0;
|
||||
num_instrs <= 0;
|
||||
end else begin
|
||||
cycle <= cycle + 1;
|
||||
if (write_valid) begin
|
||||
csr[write_addr] <= write_csr_data;
|
||||
if (write_enable) begin
|
||||
csr_table[wr_addr] <= write_data;
|
||||
end
|
||||
if (writeback_valid) begin
|
||||
instret <= instret + 1;
|
||||
num_cycles <= num_cycles + 1;
|
||||
if (wb_valid) begin
|
||||
num_instrs <= num_instrs + 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assign read_csr_data = read_cycle ? cycle[31:0] :
|
||||
read_cycleh ? cycle[63:32] :
|
||||
read_instret ? instret[31:0] :
|
||||
read_instreth ? instret[63:32] :
|
||||
{{20{1'b0}}, csr[read_addr]};
|
||||
endmodule : VX_csr_data
|
||||
always @(*) begin
|
||||
case (read_addr)
|
||||
`CSR_LWID : read_data = 32'(warp_num);
|
||||
`CSR_GTID ,
|
||||
`CSR_GWID : read_data = CORE_ID * `NUM_WARPS + 32'(warp_num);
|
||||
`CSR_CYCLL : read_data = num_cycles[31:0];
|
||||
`CSR_CYCLH : read_data = num_cycles[63:32];
|
||||
`CSR_INSTL : read_data = num_instrs[31:0];
|
||||
`CSR_INSTH : read_data = num_instrs[63:32];
|
||||
default: read_data = 32'(csr_table[rd_addr]);
|
||||
endcase
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -24,24 +24,23 @@ module VX_csr_pipe #(
|
|||
wire[31:0] csr_read_data_unqual;
|
||||
wire[31:0] csr_read_data;
|
||||
|
||||
assign stall_gpr_csr = no_slot_csr && csr_req_if.is_csr && |(csr_req_if.valid);
|
||||
|
||||
assign csr_read_data = (csr_address_s2 == csr_req_if.csr_address) ? csr_updated_data_s2 : csr_read_data_unqual;
|
||||
|
||||
wire writeback = |writeback_if.valid;
|
||||
|
||||
VX_csr_data csr_data(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.read_csr_address (csr_req_if.csr_address),
|
||||
.write_valid (is_csr_s2),
|
||||
.write_csr_data (csr_updated_data_s2[`CSR_WIDTH-1:0]),
|
||||
.write_csr_address (csr_address_s2),
|
||||
.read_csr_data (csr_read_data_unqual),
|
||||
.writeback_valid (writeback)
|
||||
VX_csr_data #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) csr_data (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.read_addr (csr_req_if.csr_address),
|
||||
.read_data (csr_read_data_unqual),
|
||||
.write_enable (is_csr_s2),
|
||||
.write_data (csr_updated_data_s2[`CSR_WIDTH-1:0]),
|
||||
.write_addr (csr_address_s2),
|
||||
.warp_num (csr_req_if.warp_num),
|
||||
.wb_valid (| writeback_if.valid)
|
||||
);
|
||||
|
||||
reg [31:0] csr_updated_data;
|
||||
assign csr_read_data = (csr_address_s2 == csr_req_if.csr_address) ? csr_updated_data_s2 : csr_read_data_unqual;
|
||||
|
||||
reg [31:0] csr_updated_data;
|
||||
|
||||
always @(*) begin
|
||||
case (csr_req_if.alu_op)
|
||||
|
@ -52,55 +51,29 @@ module VX_csr_pipe #(
|
|||
endcase
|
||||
end
|
||||
|
||||
wire zero = 0;
|
||||
|
||||
VX_generic_register #(
|
||||
.N(32 + 32 + 12 + 1 + 2 + 5 + (`NW_BITS-1+1) + `NUM_THREADS)
|
||||
) csr_reg_s2 (
|
||||
.clk (clk),
|
||||
.reset(reset),
|
||||
.stall(no_slot_csr),
|
||||
.flush(zero),
|
||||
.flush(0),
|
||||
.in ({csr_req_if.valid, csr_req_if.warp_num, csr_req_if.rd, csr_req_if.wb, csr_req_if.is_csr, csr_req_if.csr_address, csr_read_data , csr_updated_data }),
|
||||
.out ({valid_s2 , warp_num_s2 , rd_s2 , wb_s2 , is_csr_s2 , csr_address_s2 , csr_read_data_s2, csr_updated_data_s2})
|
||||
);
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] final_csr_data;
|
||||
assign csr_wb_if.valid = valid_s2;
|
||||
assign csr_wb_if.warp_num = warp_num_s2;
|
||||
assign csr_wb_if.rd = rd_s2;
|
||||
assign csr_wb_if.wb = wb_s2;
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] thread_ids;
|
||||
wire [`NUM_THREADS-1:0][31:0] warp_ids;
|
||||
wire [`NUM_THREADS-1:0][31:0] warp_idz;
|
||||
wire [`NUM_THREADS-1:0][31:0] csr_vec_read_data_s2;
|
||||
genvar i;
|
||||
for (i = 0; i < `NUM_THREADS; i = i + 1) begin
|
||||
assign csr_wb_if.data[i] = (csr_address_s2 == `CSR_LTID) ? i :
|
||||
(csr_address_s2 == `CSR_GTID) ? (csr_read_data_s2 * `NUM_THREADS + i) :
|
||||
csr_read_data_s2;
|
||||
end
|
||||
|
||||
genvar cur_t;
|
||||
for (cur_t = 0; cur_t < `NUM_THREADS; cur_t = cur_t + 1) begin
|
||||
assign thread_ids[cur_t] = cur_t;
|
||||
end
|
||||
|
||||
genvar cur_tw;
|
||||
for (cur_tw = 0; cur_tw < `NUM_THREADS; cur_tw = cur_tw + 1) begin
|
||||
assign warp_ids[cur_tw] = 32'(warp_num_s2);
|
||||
assign warp_idz[cur_tw] = 32'(warp_num_s2) + (CORE_ID * `NUM_WARPS);
|
||||
end
|
||||
|
||||
genvar cur_v;
|
||||
for (cur_v = 0; cur_v < `NUM_THREADS; cur_v = cur_v + 1) begin
|
||||
assign csr_vec_read_data_s2[cur_v] = csr_read_data_s2;
|
||||
end
|
||||
|
||||
wire thread_select = (csr_address_s2 == `CSR_THREAD);
|
||||
wire warp_select = (csr_address_s2 == `CSR_WARP);
|
||||
wire warp_id_select = (csr_address_s2 == `CSR_WARP_ID);
|
||||
|
||||
assign final_csr_data = thread_select ? thread_ids :
|
||||
warp_select ? warp_ids :
|
||||
warp_id_select ? warp_idz :
|
||||
csr_vec_read_data_s2;
|
||||
|
||||
assign csr_wb_if.valid = valid_s2;
|
||||
assign csr_wb_if.warp_num = warp_num_s2;
|
||||
assign csr_wb_if.rd = rd_s2;
|
||||
assign csr_wb_if.wb = wb_s2;
|
||||
assign csr_wb_if.data = final_csr_data;
|
||||
assign stall_gpr_csr = no_slot_csr && csr_req_if.is_csr && (| csr_req_if.valid);
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -50,17 +50,6 @@
|
|||
|
||||
`define CSR_WIDTH 12
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define CSR_THREAD 12'h020
|
||||
`define CSR_WARP 12'h021
|
||||
`define CSR_WARP_ID 12'h022
|
||||
|
||||
`define CSR_CYCL_L 12'hC00;
|
||||
`define CSR_CYCL_H 12'hC80;
|
||||
`define CSR_INST_L 12'hC02;
|
||||
`define CSR_INST_H 12'hC82;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define R_INST 7'd51
|
||||
|
@ -192,7 +181,7 @@
|
|||
`define L2DRAM_ADDR_WIDTH (32 - `CLOG2(`L2BANK_LINE_SIZE))
|
||||
|
||||
// DRAM request tag bits
|
||||
`define L2DRAM_TAG_WIDTH ((`NUM_CORES > 1) ? `L2DRAM_ADDR_WIDTH : (`L2DRAM_ADDR_WIDTH+1))
|
||||
`define L2DRAM_TAG_WIDTH (`L2_ENABLE ? `L2DRAM_ADDR_WIDTH : (`L2DRAM_ADDR_WIDTH+`CLOG2(`NUM_CORES*2)))
|
||||
|
||||
////////////////////////// L3cache Configurable Knobs /////////////////////////
|
||||
|
||||
|
|
|
@ -46,7 +46,7 @@ module VX_dmem_ctrl (
|
|||
.CORE_TAG_WIDTH(`CORE_REQ_TAG_WIDTH)
|
||||
) dcache_rsp_dcache_if();
|
||||
|
||||
wire to_shm = `SHARED_MEM_ADDR_MATCH(dcache_core_req_if.core_req_addr[0]);
|
||||
wire to_shm = (dcache_core_req_if.core_req_addr[0][31:24] == `SHARED_MEM_TOP_ADDR);
|
||||
wire dcache_wants_wb = (|dcache_rsp_dcache_if.core_rsp_valid);
|
||||
|
||||
// Dcache Request
|
||||
|
|
95
hw/rtl/VX_dram_arb.v
Normal file
95
hw/rtl/VX_dram_arb.v
Normal file
|
@ -0,0 +1,95 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_dram_arb #(
|
||||
parameter BANK_LINE_SIZE = 1,
|
||||
parameter NUM_REQUESTS = 1,
|
||||
parameter CORE_TAG_WIDTH = 1,
|
||||
parameter DRAM_TAG_WIDTH = 1
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// Core request
|
||||
input wire [NUM_REQUESTS-1:0] core_req_read,
|
||||
input wire [NUM_REQUESTS-1:0] core_req_write,
|
||||
input wire [NUM_REQUESTS-1:0][`DRAM_ADDR_WIDTH-1:0] core_req_addr,
|
||||
input wire [NUM_REQUESTS-1:0][`BANK_LINE_WIDTH-1:0] core_req_data,
|
||||
input wire [NUM_REQUESTS-1:0][CORE_TAG_WIDTH-1:0] core_req_tag,
|
||||
output reg [NUM_REQUESTS-1:0] core_req_ready,
|
||||
|
||||
// Core response
|
||||
output wire [NUM_REQUESTS-1:0] core_rsp_valid,
|
||||
output wire [NUM_REQUESTS-1:0][`BANK_LINE_WIDTH-1:0]core_rsp_data,
|
||||
output wire [NUM_REQUESTS-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag,
|
||||
input wire [NUM_REQUESTS-1:0] core_rsp_ready,
|
||||
|
||||
// DRAM request
|
||||
output reg dram_req_read,
|
||||
output reg dram_req_write,
|
||||
output reg [`DRAM_ADDR_WIDTH-1:0] dram_req_addr,
|
||||
output reg [`BANK_LINE_WIDTH-1:0] dram_req_data,
|
||||
output reg [DRAM_TAG_WIDTH-1:0] dram_req_tag,
|
||||
input wire dram_req_ready,
|
||||
|
||||
// DRAM response
|
||||
input wire dram_rsp_valid,
|
||||
input wire [`BANK_LINE_WIDTH-1:0] dram_rsp_data,
|
||||
input wire [DRAM_TAG_WIDTH-1:0] dram_rsp_tag,
|
||||
output wire dram_rsp_ready
|
||||
);
|
||||
reg [`LOG2UP(NUM_REQUESTS)-1:0] bus_sel;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
bus_sel <= 0;
|
||||
end else begin
|
||||
bus_sel <= bus_sel + 1;
|
||||
end
|
||||
end
|
||||
|
||||
integer i;
|
||||
|
||||
generate
|
||||
always @(*) begin
|
||||
dram_req_read = 'z;
|
||||
dram_req_write = 'z;
|
||||
dram_req_addr = 'z;
|
||||
dram_req_data = 'z;
|
||||
dram_req_tag = 'z;
|
||||
|
||||
for (i = 0; i < NUM_REQUESTS; i++) begin
|
||||
if (bus_sel == (`LOG2UP(NUM_REQUESTS))'(i)) begin
|
||||
dram_req_read = core_req_read[i];
|
||||
dram_req_write = core_req_write[i];
|
||||
dram_req_addr = core_req_addr[i];
|
||||
dram_req_data = core_req_data[i];
|
||||
dram_req_tag = {core_req_tag[i], (`LOG2UP(NUM_REQUESTS))'(i)};
|
||||
core_req_ready[i] = dram_req_ready;
|
||||
end else begin
|
||||
core_req_ready[i] = 0;
|
||||
end
|
||||
end
|
||||
end
|
||||
endgenerate
|
||||
|
||||
reg is_valid;
|
||||
|
||||
generate
|
||||
always @(*) begin
|
||||
dram_rsp_ready = 0;
|
||||
|
||||
for (i = 0; i < NUM_REQUESTS; i++) begin
|
||||
is_valid = (dram_rsp_tag[`LOG2UP(NUM_REQUESTS)-1:0] == (`LOG2UP(NUM_REQUESTS))'(i));
|
||||
|
||||
core_rsp_valid[i] = dram_rsp_valid & is_valid;
|
||||
core_rsp_data[i] = dram_rsp_data;
|
||||
core_rsp_tag[i] = dram_rsp_tag[`LOG2UP(NUM_REQUESTS) +: CORE_TAG_WIDTH];
|
||||
|
||||
if (is_valid) begin
|
||||
dram_rsp_ready = core_rsp_ready[i];
|
||||
end
|
||||
end
|
||||
end
|
||||
endgenerate
|
||||
|
||||
endmodule
|
|
@ -1,54 +0,0 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_l1c_to_dram_arb #(
|
||||
parameter REQQ_SIZE = 8
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
VX_cache_dram_req_if dcache_dram_req_if,
|
||||
VX_cache_dram_rsp_if dcache_dram_rsp_if,
|
||||
|
||||
VX_cache_dram_req_if icache_dram_req_if,
|
||||
VX_cache_dram_rsp_if icache_dram_rsp_if,
|
||||
|
||||
VX_cache_dram_req_if dram_req_if,
|
||||
VX_cache_dram_rsp_if dram_rsp_if
|
||||
);
|
||||
reg cache_sel;
|
||||
wire icache_req_valid, icache_sel_out, icache_sel_in;
|
||||
|
||||
assign icache_req_valid = icache_dram_req_if.dram_req_read || icache_dram_req_if.dram_req_write;
|
||||
|
||||
assign icache_sel_out = icache_req_valid && (cache_sel == 0);
|
||||
|
||||
assign dram_req_if.dram_req_read = icache_sel_out ? icache_dram_req_if.dram_req_read : dcache_dram_req_if.dram_req_read;
|
||||
assign dram_req_if.dram_req_write = icache_sel_out ? icache_dram_req_if.dram_req_write : dcache_dram_req_if.dram_req_write;
|
||||
assign dram_req_if.dram_req_addr = icache_sel_out ? icache_dram_req_if.dram_req_addr : dcache_dram_req_if.dram_req_addr;
|
||||
assign dram_req_if.dram_req_data = icache_sel_out ? icache_dram_req_if.dram_req_data : dcache_dram_req_if.dram_req_data;
|
||||
assign dram_req_if.dram_req_tag = {icache_sel_out ? icache_dram_req_if.dram_req_tag : dcache_dram_req_if.dram_req_tag, icache_sel_out};
|
||||
|
||||
assign icache_dram_req_if.dram_req_ready = dram_req_if.dram_req_ready && (cache_sel == 0);
|
||||
assign dcache_dram_req_if.dram_req_ready = dram_req_if.dram_req_ready && (cache_sel == 1);
|
||||
|
||||
assign icache_sel_in = dram_rsp_if.dram_rsp_tag[0];
|
||||
|
||||
assign icache_dram_rsp_if.dram_rsp_valid = dram_rsp_if.dram_rsp_valid && icache_sel_in;
|
||||
assign icache_dram_rsp_if.dram_rsp_data = dram_rsp_if.dram_rsp_data;
|
||||
assign icache_dram_rsp_if.dram_rsp_tag = dram_rsp_if.dram_rsp_tag[1 +: $bits(icache_dram_rsp_if.dram_rsp_tag)];
|
||||
|
||||
assign dcache_dram_rsp_if.dram_rsp_valid = dram_rsp_if.dram_rsp_valid && ~icache_sel_in;
|
||||
assign dcache_dram_rsp_if.dram_rsp_data = dram_rsp_if.dram_rsp_data;
|
||||
assign dcache_dram_rsp_if.dram_rsp_tag = dram_rsp_if.dram_rsp_tag[1 +: $bits(dcache_dram_rsp_if.dram_rsp_tag)];
|
||||
|
||||
assign dram_rsp_if.dram_rsp_ready = icache_dram_rsp_if.dram_rsp_ready && dcache_dram_rsp_if.dram_rsp_ready;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
cache_sel <= 0;
|
||||
end else begin
|
||||
cache_sel <= ~cache_sel;
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
|
@ -8,10 +8,6 @@ module Vortex #(
|
|||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// IO
|
||||
output wire io_valid,
|
||||
output wire [31:0] io_data,
|
||||
|
||||
// DRAM Dcache Req
|
||||
output wire D_dram_req_read,
|
||||
output wire D_dram_req_write,
|
||||
|
@ -40,11 +36,17 @@ module Vortex #(
|
|||
input wire [`IDRAM_TAG_WIDTH-1:0] I_dram_rsp_tag,
|
||||
output wire I_dram_rsp_ready,
|
||||
|
||||
// LLC Snooping
|
||||
// Cache Snooping
|
||||
input wire llc_snp_req_valid,
|
||||
input wire [`DDRAM_ADDR_WIDTH-1:0] llc_snp_req_addr,
|
||||
output wire llc_snp_req_ready,
|
||||
|
||||
// I/O
|
||||
output wire io_valid,
|
||||
output wire [31:0] io_data,
|
||||
input wire io_ready,
|
||||
|
||||
// Debug
|
||||
output wire ebreak
|
||||
);
|
||||
`DEBUG_BEGIN
|
||||
|
@ -98,20 +100,17 @@ module Vortex #(
|
|||
assign dcache_dram_rsp_if.dram_rsp_tag = D_dram_rsp_tag;
|
||||
assign D_dram_rsp_ready = dcache_dram_rsp_if.dram_rsp_ready;
|
||||
|
||||
assign io_valid = (!memory_delay)
|
||||
&& (|dcache_core_req_if.core_req_valid)
|
||||
&& (dcache_core_req_if.core_req_write[0] != `WORD_SEL_NO)
|
||||
&& (dcache_core_req_if.core_req_addr[0] == `IO_BUS_ADDR);
|
||||
wire to_io_bus = (dcache_core_req_if.core_req_addr[0] == `IO_BUS_ADDR);
|
||||
assign io_valid = |dcache_core_req_if.core_req_valid && to_io_bus;
|
||||
assign io_data = dcache_core_req_if.core_req_data[0];
|
||||
|
||||
assign io_data = dcache_core_req_if.core_req_data[0];
|
||||
|
||||
assign dcache_core_req_qual_if.core_req_valid = dcache_core_req_if.core_req_valid & {`NUM_THREADS{~io_valid}};
|
||||
assign dcache_core_req_qual_if.core_req_valid = dcache_core_req_if.core_req_valid & {`NUM_THREADS{~to_io_bus}};
|
||||
assign dcache_core_req_qual_if.core_req_read = dcache_core_req_if.core_req_read;
|
||||
assign dcache_core_req_qual_if.core_req_write = dcache_core_req_if.core_req_write;
|
||||
assign dcache_core_req_qual_if.core_req_addr = dcache_core_req_if.core_req_addr;
|
||||
assign dcache_core_req_qual_if.core_req_data = dcache_core_req_if.core_req_data;
|
||||
assign dcache_core_req_qual_if.core_req_tag = dcache_core_req_if.core_req_tag;
|
||||
assign dcache_core_req_if.core_req_ready = dcache_core_req_qual_if.core_req_ready;
|
||||
assign dcache_core_req_if.core_req_ready = to_io_bus ? io_ready : dcache_core_req_qual_if.core_req_ready;
|
||||
|
||||
// Icache interfaces
|
||||
VX_cache_core_req_if #(
|
||||
|
|
|
@ -8,10 +8,6 @@ module Vortex_Cluster #(
|
|||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// IO
|
||||
output wire[`NUM_CORES-1:0] io_valid,
|
||||
output wire[`NUM_CORES-1:0][31:0] io_data,
|
||||
|
||||
// DRAM Req
|
||||
output wire dram_req_read,
|
||||
output wire dram_req_write,
|
||||
|
@ -26,240 +22,140 @@ module Vortex_Cluster #(
|
|||
input wire[`L2DRAM_TAG_WIDTH-1:0] dram_rsp_tag,
|
||||
output wire dram_rsp_ready,
|
||||
|
||||
// LLC Snooping
|
||||
// Cache Snooping
|
||||
input wire llc_snp_req_valid,
|
||||
input wire[`L2DRAM_ADDR_WIDTH-1:0] llc_snp_req_addr,
|
||||
output wire llc_snp_req_ready,
|
||||
|
||||
// IO
|
||||
output wire io_valid,
|
||||
output wire [31:0] io_data,
|
||||
input wire io_ready,
|
||||
|
||||
// Debug
|
||||
output wire ebreak
|
||||
);
|
||||
if (`NUM_CORES == 1) begin
|
||||
);
|
||||
// DRAM Dcache Req
|
||||
wire[`NUM_CORES-1:0] per_core_D_dram_req_read;
|
||||
wire[`NUM_CORES-1:0] per_core_D_dram_req_write;
|
||||
wire[`NUM_CORES-1:0][`DDRAM_ADDR_WIDTH-1:0] per_core_D_dram_req_addr;
|
||||
wire[`NUM_CORES-1:0][`DDRAM_LINE_WIDTH-1:0] per_core_D_dram_req_data;
|
||||
wire[`NUM_CORES-1:0][`DDRAM_TAG_WIDTH-1:0] per_core_D_dram_req_tag;
|
||||
wire[`NUM_CORES-1:0] per_core_D_dram_req_ready;
|
||||
|
||||
VX_cache_dram_req_if #(
|
||||
.DRAM_LINE_WIDTH(`DDRAM_LINE_WIDTH),
|
||||
.DRAM_ADDR_WIDTH(`DDRAM_ADDR_WIDTH),
|
||||
.DRAM_TAG_WIDTH(`DDRAM_TAG_WIDTH)
|
||||
) dcache_dram_req_if();
|
||||
// DRAM Dcache Rsp
|
||||
wire[`NUM_CORES-1:0] per_core_D_dram_rsp_valid;
|
||||
wire[`NUM_CORES-1:0][`DDRAM_LINE_WIDTH-1:0] per_core_D_dram_rsp_data;
|
||||
wire[`NUM_CORES-1:0][`DDRAM_TAG_WIDTH-1:0] per_core_D_dram_rsp_tag;
|
||||
wire[`NUM_CORES-1:0] per_core_D_dram_rsp_ready;
|
||||
|
||||
VX_cache_dram_rsp_if #(
|
||||
.DRAM_LINE_WIDTH(`DDRAM_LINE_WIDTH),
|
||||
.DRAM_TAG_WIDTH(`DDRAM_TAG_WIDTH)
|
||||
) dcache_dram_rsp_if();
|
||||
// DRAM Icache Req
|
||||
wire[`NUM_CORES-1:0] per_core_I_dram_req_read;
|
||||
wire[`NUM_CORES-1:0][`IDRAM_ADDR_WIDTH-1:0] per_core_I_dram_req_addr;
|
||||
wire[`NUM_CORES-1:0][`IDRAM_LINE_WIDTH-1:0] per_core_I_dram_req_data;
|
||||
wire[`NUM_CORES-1:0][`IDRAM_TAG_WIDTH-1:0] per_core_I_dram_req_tag;
|
||||
wire[`NUM_CORES-1:0] per_core_I_dram_req_ready;
|
||||
|
||||
VX_cache_dram_req_if #(
|
||||
.DRAM_LINE_WIDTH(`DDRAM_LINE_WIDTH),
|
||||
.DRAM_ADDR_WIDTH(`DDRAM_ADDR_WIDTH),
|
||||
.DRAM_TAG_WIDTH(`DDRAM_TAG_WIDTH)
|
||||
) icache_dram_req_if();
|
||||
// DRAM Icache Rsp
|
||||
wire[`NUM_CORES-1:0] per_core_I_dram_rsp_valid;
|
||||
wire[`NUM_CORES-1:0][`IDRAM_LINE_WIDTH-1:0] per_core_I_dram_rsp_data;
|
||||
wire[`NUM_CORES-1:0][`IDRAM_TAG_WIDTH-1:0] per_core_I_dram_rsp_tag;
|
||||
wire[`NUM_CORES-1:0] per_core_I_dram_rsp_ready;
|
||||
|
||||
VX_cache_dram_rsp_if #(
|
||||
.DRAM_LINE_WIDTH(`IDRAM_LINE_WIDTH),
|
||||
.DRAM_TAG_WIDTH(`IDRAM_TAG_WIDTH)
|
||||
) icache_dram_rsp_if();
|
||||
// Snooping
|
||||
wire snp_fwd_valid;
|
||||
wire[`DDRAM_ADDR_WIDTH-1:0] snp_fwd_addr;
|
||||
wire[`NUM_CORES-1:0] per_core_snp_fwd_ready;
|
||||
|
||||
VX_cache_dram_req_if #(
|
||||
.DRAM_LINE_WIDTH(`L2DRAM_LINE_WIDTH),
|
||||
.DRAM_ADDR_WIDTH(`L2DRAM_ADDR_WIDTH),
|
||||
.DRAM_TAG_WIDTH(`L2DRAM_TAG_WIDTH)
|
||||
) dram_req_if();
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
wire[`NUM_CORES-1:0] per_core_io_valid;
|
||||
wire[`NUM_CORES-1:0][31:0] per_core_io_data;
|
||||
`IGNORE_WARNINGS_END
|
||||
|
||||
VX_cache_dram_rsp_if #(
|
||||
.DRAM_LINE_WIDTH(`L2DRAM_LINE_WIDTH),
|
||||
.DRAM_TAG_WIDTH(`L2DRAM_TAG_WIDTH)
|
||||
) dram_rsp_if();
|
||||
// ebreak
|
||||
wire[`NUM_CORES-1:0] per_core_ebreak;
|
||||
|
||||
assign dram_req_read = dram_req_if.dram_req_read;
|
||||
assign dram_req_write = dram_req_if.dram_req_write;
|
||||
assign dram_req_addr = dram_req_if.dram_req_addr;
|
||||
assign dram_req_data = dram_req_if.dram_req_data;
|
||||
assign dram_req_tag = dram_req_if.dram_req_tag;
|
||||
assign dram_req_if.dram_req_ready = dram_req_ready;
|
||||
|
||||
assign dram_rsp_if.dram_rsp_valid = dram_rsp_valid;
|
||||
assign dram_rsp_if.dram_rsp_data = dram_rsp_data;
|
||||
assign dram_rsp_if.dram_rsp_tag = dram_rsp_tag;
|
||||
assign dram_rsp_ready = dram_rsp_if.dram_rsp_ready;
|
||||
|
||||
VX_l1c_to_dram_arb #(
|
||||
.REQQ_SIZE(`L2REQQ_SIZE)
|
||||
) l1c_to_dram_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.dcache_dram_req_if (dcache_dram_req_if),
|
||||
.dcache_dram_rsp_if (dcache_dram_rsp_if),
|
||||
.icache_dram_req_if (icache_dram_req_if),
|
||||
.icache_dram_rsp_if (icache_dram_rsp_if),
|
||||
.dram_req_if (dram_req_if),
|
||||
.dram_rsp_if (dram_rsp_if)
|
||||
);
|
||||
assign io_valid = per_core_io_valid[0];
|
||||
assign io_data = per_core_io_data[0];
|
||||
assign ebreak = (& per_core_ebreak);
|
||||
|
||||
genvar i;
|
||||
for (i = 0; i < `NUM_CORES; i = i + 1) begin
|
||||
Vortex #(
|
||||
.CORE_ID(0)
|
||||
.CORE_ID(i + (CLUSTER_ID * `NUM_CORES))
|
||||
) vortex_core (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
.io_valid (io_valid[0]),
|
||||
.io_data (io_data[0]),
|
||||
|
||||
.D_dram_req_read (dcache_dram_req_if.dram_req_read),
|
||||
.D_dram_req_write (dcache_dram_req_if.dram_req_write),
|
||||
.D_dram_req_addr (dcache_dram_req_if.dram_req_addr),
|
||||
.D_dram_req_data (dcache_dram_req_if.dram_req_data),
|
||||
.D_dram_req_tag (dcache_dram_req_if.dram_req_tag),
|
||||
.D_dram_req_ready (dcache_dram_req_if.dram_req_ready),
|
||||
|
||||
.D_dram_rsp_valid (dcache_dram_rsp_if.dram_rsp_valid),
|
||||
.D_dram_rsp_data (dcache_dram_rsp_if.dram_rsp_data),
|
||||
.D_dram_rsp_tag (dcache_dram_rsp_if.dram_rsp_tag),
|
||||
.D_dram_rsp_ready (dcache_dram_rsp_if.dram_rsp_ready),
|
||||
|
||||
.I_dram_req_read (icache_dram_req_if.dram_req_read),
|
||||
.I_dram_req_write (icache_dram_req_if.dram_req_write),
|
||||
.I_dram_req_addr (icache_dram_req_if.dram_req_addr),
|
||||
.I_dram_req_data (icache_dram_req_if.dram_req_data),
|
||||
.I_dram_req_tag (icache_dram_req_if.dram_req_tag),
|
||||
.I_dram_req_ready (icache_dram_req_if.dram_req_ready),
|
||||
|
||||
.I_dram_rsp_valid (icache_dram_rsp_if.dram_rsp_valid),
|
||||
.I_dram_rsp_data (icache_dram_rsp_if.dram_rsp_data),
|
||||
.I_dram_rsp_ready (icache_dram_rsp_if.dram_rsp_ready),
|
||||
.I_dram_rsp_tag (icache_dram_rsp_if.dram_rsp_tag),
|
||||
|
||||
.llc_snp_req_valid (llc_snp_req_valid),
|
||||
.llc_snp_req_addr (llc_snp_req_addr),
|
||||
.llc_snp_req_ready (llc_snp_req_ready),
|
||||
|
||||
.ebreak (ebreak)
|
||||
.D_dram_req_read (per_core_D_dram_req_read [i]),
|
||||
.D_dram_req_write (per_core_D_dram_req_write [i]),
|
||||
.D_dram_req_addr (per_core_D_dram_req_addr [i]),
|
||||
.D_dram_req_data (per_core_D_dram_req_data [i]),
|
||||
.D_dram_req_tag (per_core_D_dram_req_tag [i]),
|
||||
.D_dram_req_ready (per_core_D_dram_req_ready [i]),
|
||||
.D_dram_rsp_valid (per_core_D_dram_rsp_valid [i]),
|
||||
.D_dram_rsp_data (per_core_D_dram_rsp_data [i]),
|
||||
.D_dram_rsp_tag (per_core_D_dram_rsp_tag [i]),
|
||||
.D_dram_rsp_ready (per_core_D_dram_rsp_ready [i]),
|
||||
.I_dram_req_read (per_core_I_dram_req_read [i]),
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
.I_dram_req_write (),
|
||||
`IGNORE_WARNINGS_END
|
||||
.I_dram_req_addr (per_core_I_dram_req_addr [i]),
|
||||
.I_dram_req_data (per_core_I_dram_req_data [i]),
|
||||
.I_dram_req_tag (per_core_I_dram_req_tag [i]),
|
||||
.I_dram_req_ready (per_core_I_dram_req_ready [i]),
|
||||
.I_dram_rsp_valid (per_core_I_dram_rsp_valid [i]),
|
||||
.I_dram_rsp_tag (per_core_I_dram_rsp_tag [i]),
|
||||
.I_dram_rsp_data (per_core_I_dram_rsp_data [i]),
|
||||
.I_dram_rsp_ready (per_core_I_dram_rsp_ready [i]),
|
||||
.llc_snp_req_valid (snp_fwd_valid),
|
||||
.llc_snp_req_addr (snp_fwd_addr),
|
||||
.llc_snp_req_ready (per_core_snp_fwd_ready [i]),
|
||||
.io_valid (per_core_io_valid [i]),
|
||||
.io_data (per_core_io_data [i]),
|
||||
.io_ready (io_ready),
|
||||
.ebreak (per_core_ebreak [i])
|
||||
);
|
||||
end
|
||||
|
||||
end else begin
|
||||
|
||||
// DRAM Dcache Req
|
||||
wire[`NUM_CORES-1:0] per_core_D_dram_req_read;
|
||||
wire[`NUM_CORES-1:0] per_core_D_dram_req_write;
|
||||
wire[`NUM_CORES-1:0][`DDRAM_ADDR_WIDTH-1:0] per_core_D_dram_req_addr;
|
||||
wire[`NUM_CORES-1:0][`DDRAM_LINE_WIDTH-1:0] per_core_D_dram_req_data;
|
||||
wire[`NUM_CORES-1:0][`DDRAM_TAG_WIDTH-1:0] per_core_D_dram_req_tag;
|
||||
|
||||
// DRAM Dcache Rsp
|
||||
wire[`NUM_CORES-1:0] per_core_D_dram_rsp_valid;
|
||||
wire[`NUM_CORES-1:0][`DDRAM_LINE_WIDTH-1:0] per_core_D_dram_rsp_data;
|
||||
wire[`NUM_CORES-1:0][`DDRAM_TAG_WIDTH-1:0] per_core_D_dram_rsp_tag;
|
||||
wire[`NUM_CORES-1:0] per_core_D_dram_rsp_ready;
|
||||
|
||||
// DRAM Icache Req
|
||||
wire[`NUM_CORES-1:0] per_core_I_dram_req_read;
|
||||
wire[`NUM_CORES-1:0] per_core_I_dram_req_write;
|
||||
wire[`NUM_CORES-1:0][`IDRAM_ADDR_WIDTH-1:0] per_core_I_dram_req_addr;
|
||||
wire[`NUM_CORES-1:0][`IDRAM_LINE_WIDTH-1:0] per_core_I_dram_req_data;
|
||||
wire[`NUM_CORES-1:0][`IDRAM_TAG_WIDTH-1:0] per_core_I_dram_req_tag;
|
||||
|
||||
// DRAM Icache Rsp
|
||||
wire[`NUM_CORES-1:0] per_core_I_dram_rsp_valid;
|
||||
wire[`NUM_CORES-1:0][`IDRAM_LINE_WIDTH-1:0] per_core_I_dram_rsp_data;
|
||||
wire[`NUM_CORES-1:0][`IDRAM_TAG_WIDTH-1:0] per_core_I_dram_rsp_tag;
|
||||
wire[`NUM_CORES-1:0] per_core_I_dram_rsp_ready;
|
||||
|
||||
// Out ebreak
|
||||
wire[`NUM_CORES-1:0] per_core_ebreak;
|
||||
|
||||
wire[`NUM_CORES-1:0] per_core_io_valid;
|
||||
wire[`NUM_CORES-1:0][31:0] per_core_io_data;
|
||||
|
||||
wire l2_core_req_ready;
|
||||
|
||||
wire snp_fwd_valid;
|
||||
wire[`DDRAM_ADDR_WIDTH-1:0] snp_fwd_addr;
|
||||
wire[`NUM_CORES-1:0] per_core_snp_fwd_ready;
|
||||
|
||||
assign ebreak = (& per_core_ebreak);
|
||||
|
||||
genvar i;
|
||||
for (i = 0; i < `NUM_CORES; i = i + 1) begin
|
||||
|
||||
wire [`IDRAM_LINE_WIDTH-1:0] curr_core_D_dram_req_data;
|
||||
wire [`DDRAM_LINE_WIDTH-1:0] curr_core_I_dram_req_data;
|
||||
|
||||
assign io_valid[i] = per_core_io_valid[i];
|
||||
assign io_data[i] = per_core_io_data[i];
|
||||
|
||||
Vortex #(
|
||||
.CORE_ID(i + (CLUSTER_ID * `NUM_CORES))
|
||||
) vortex_core (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.io_valid (per_core_io_valid [i]),
|
||||
.io_data (per_core_io_data [i]),
|
||||
.D_dram_req_read (per_core_D_dram_req_read [i]),
|
||||
.D_dram_req_write (per_core_D_dram_req_write [i]),
|
||||
.D_dram_req_addr (per_core_D_dram_req_addr [i]),
|
||||
.D_dram_req_data (curr_core_D_dram_req_data ),
|
||||
.D_dram_req_tag (per_core_D_dram_req_tag [i]),
|
||||
.D_dram_req_ready (l2_core_req_ready ),
|
||||
.D_dram_rsp_valid (per_core_D_dram_rsp_valid [i]),
|
||||
.D_dram_rsp_data (per_core_D_dram_rsp_data [i]),
|
||||
.D_dram_rsp_tag (per_core_D_dram_rsp_tag [i]),
|
||||
.D_dram_rsp_ready (per_core_D_dram_rsp_ready [i]),
|
||||
.I_dram_req_read (per_core_I_dram_req_read [i]),
|
||||
.I_dram_req_write (per_core_I_dram_req_write [i]),
|
||||
.I_dram_req_addr (per_core_I_dram_req_addr [i]),
|
||||
.I_dram_req_data (curr_core_I_dram_req_data ),
|
||||
.I_dram_req_tag (per_core_I_dram_req_tag [i]),
|
||||
.I_dram_req_ready (l2_core_req_ready ),
|
||||
.I_dram_rsp_valid (per_core_I_dram_rsp_valid [i]),
|
||||
.I_dram_rsp_tag (per_core_I_dram_rsp_tag [i]),
|
||||
.I_dram_rsp_data (per_core_I_dram_rsp_data [i]),
|
||||
.I_dram_rsp_ready (per_core_I_dram_rsp_ready [i]),
|
||||
.llc_snp_req_valid (snp_fwd_valid),
|
||||
.llc_snp_req_addr (snp_fwd_addr),
|
||||
.llc_snp_req_ready (per_core_snp_fwd_ready [i]),
|
||||
.ebreak (per_core_ebreak [i])
|
||||
);
|
||||
|
||||
assign per_core_D_dram_req_data [i] = curr_core_D_dram_req_data;
|
||||
assign per_core_I_dram_req_data [i] = curr_core_I_dram_req_data;
|
||||
end
|
||||
if (`L2_ENABLE) begin
|
||||
|
||||
// L2 Cache ///////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
wire[`L2NUM_REQUESTS-1:0] l2_core_req_valid;
|
||||
wire[`L2NUM_REQUESTS-1:0][`WORD_SEL_BITS-1:0] l2_core_req_mem_write;
|
||||
wire[`L2NUM_REQUESTS-1:0][`WORD_SEL_BITS-1:0] l2_core_req_mem_read;
|
||||
wire[`L2NUM_REQUESTS-1:0][`WORD_SEL_BITS-1:0] l2_core_req_write;
|
||||
wire[`L2NUM_REQUESTS-1:0][`WORD_SEL_BITS-1:0] l2_core_req_read;
|
||||
wire[`L2NUM_REQUESTS-1:0][31:0] l2_core_req_addr;
|
||||
wire[`L2NUM_REQUESTS-1:0][`DDRAM_TAG_WIDTH-1:0] l2_core_req_tag;
|
||||
wire[`L2NUM_REQUESTS-1:0][`DDRAM_LINE_WIDTH-1:0] l2_core_req_data;
|
||||
wire l2_core_req_ready;
|
||||
|
||||
wire[`L2NUM_REQUESTS-1:0] l2_core_rsp_valid;
|
||||
wire[`L2NUM_REQUESTS-1:0][`DDRAM_LINE_WIDTH-1:0] l2_core_rsp_data;
|
||||
wire[`L2NUM_REQUESTS-1:0][`DDRAM_TAG_WIDTH-1:0] l2_core_rsp_tag;
|
||||
wire[`L2NUM_REQUESTS-1:0] l2_core_rsp_ready;
|
||||
|
||||
wire[`DDRAM_LINE_WIDTH-1:0] l2_dram_req_data;
|
||||
wire[`DDRAM_LINE_WIDTH-1:0] l2_dram_rsp_data;
|
||||
|
||||
assign dram_req_data = l2_dram_req_data;
|
||||
assign l2_dram_rsp_data = dram_rsp_data;
|
||||
|
||||
for (i = 0; i < `L2NUM_REQUESTS; i = i + 2) begin
|
||||
// Core Request
|
||||
assign l2_core_req_valid [i] = (per_core_D_dram_req_read[(i/2)] | per_core_D_dram_req_write[(i/2)]);
|
||||
assign l2_core_req_valid [i+1] = (per_core_I_dram_req_read[(i/2)] | per_core_I_dram_req_write[(i/2)]);
|
||||
assign l2_core_req_valid [i] = (per_core_D_dram_req_read[(i/2)] | per_core_D_dram_req_write[(i/2)]);
|
||||
assign l2_core_req_valid [i+1] = per_core_I_dram_req_read[(i/2)];
|
||||
|
||||
assign l2_core_req_read [i] = per_core_D_dram_req_read[(i/2)] ? `WORD_SEL_LW : `WORD_SEL_NO;
|
||||
assign l2_core_req_read [i+1] = per_core_I_dram_req_read[(i/2)] ? `WORD_SEL_LW : `WORD_SEL_NO;
|
||||
|
||||
assign l2_core_req_mem_write [i] = per_core_D_dram_req_write[(i/2)] ? `WORD_SEL_LW : `WORD_SEL_NO;
|
||||
assign l2_core_req_mem_write [i+1] = `WORD_SEL_NO;
|
||||
assign l2_core_req_write [i] = per_core_D_dram_req_write[(i/2)] ? `WORD_SEL_LW : `WORD_SEL_NO;
|
||||
assign l2_core_req_write [i+1] = `WORD_SEL_NO;
|
||||
|
||||
assign l2_core_req_mem_read [i] = per_core_D_dram_req_read[(i/2)] ? `WORD_SEL_LW : `WORD_SEL_NO;
|
||||
assign l2_core_req_mem_read [i+1] = `WORD_SEL_NO;
|
||||
assign l2_core_req_addr [i] = {per_core_D_dram_req_addr[(i/2)], {`LOG2UP(`DBANK_LINE_SIZE){1'b0}}};
|
||||
assign l2_core_req_addr [i+1] = {per_core_I_dram_req_addr[(i/2)], {`LOG2UP(`IBANK_LINE_SIZE){1'b0}}};
|
||||
|
||||
assign l2_core_req_addr [i] = {per_core_D_dram_req_addr[(i/2)], {`LOG2UP(`DBANK_LINE_SIZE){1'b0}}};
|
||||
assign l2_core_req_addr [i+1] = {per_core_I_dram_req_addr[(i/2)], {`LOG2UP(`IBANK_LINE_SIZE){1'b0}}};
|
||||
assign l2_core_req_data [i] = per_core_D_dram_req_data[(i/2)];
|
||||
assign l2_core_req_data [i+1] = per_core_I_dram_req_data[(i/2)];
|
||||
|
||||
assign l2_core_req_data [i] = per_core_D_dram_req_data[(i/2)];
|
||||
assign l2_core_req_data [i+1] = per_core_I_dram_req_data[(i/2)];
|
||||
assign l2_core_req_tag [i] = per_core_D_dram_req_tag[(i/2)];
|
||||
assign l2_core_req_tag [i+1] = per_core_I_dram_req_tag[(i/2)];
|
||||
|
||||
assign l2_core_req_tag [i] = per_core_D_dram_req_tag[(i/2)];
|
||||
assign l2_core_req_tag [i+1] = per_core_I_dram_req_tag[(i/2)];
|
||||
assign per_core_D_dram_req_ready[(i/2)] = l2_core_req_ready;
|
||||
assign per_core_I_dram_req_ready[(i/2)] = l2_core_req_ready;
|
||||
|
||||
assign per_core_D_dram_rsp_valid [(i/2)] = l2_core_rsp_valid[i];
|
||||
assign per_core_I_dram_rsp_valid [(i/2)] = l2_core_rsp_valid[i+1];
|
||||
|
@ -302,8 +198,8 @@ module Vortex_Cluster #(
|
|||
|
||||
// Core request
|
||||
.core_req_valid (l2_core_req_valid),
|
||||
.core_req_read (l2_core_req_mem_read),
|
||||
.core_req_write (l2_core_req_mem_write),
|
||||
.core_req_read (l2_core_req_read),
|
||||
.core_req_write (l2_core_req_write),
|
||||
.core_req_addr (l2_core_req_addr),
|
||||
.core_req_data (l2_core_req_data),
|
||||
.core_req_tag (l2_core_req_tag),
|
||||
|
@ -313,20 +209,20 @@ module Vortex_Cluster #(
|
|||
.core_rsp_valid (l2_core_rsp_valid),
|
||||
.core_rsp_data (l2_core_rsp_data),
|
||||
.core_rsp_tag (l2_core_rsp_tag),
|
||||
.core_rsp_ready (|l2_core_rsp_ready),
|
||||
.core_rsp_ready (& l2_core_rsp_ready),
|
||||
|
||||
// DRAM request
|
||||
.dram_req_read (dram_req_read),
|
||||
.dram_req_write (dram_req_write),
|
||||
.dram_req_addr (dram_req_addr),
|
||||
.dram_req_data (l2_dram_req_data),
|
||||
.dram_req_data (dram_req_data),
|
||||
.dram_req_tag (dram_req_tag),
|
||||
.dram_req_ready (dram_req_ready),
|
||||
|
||||
// L2 Cache DRAM Fill response
|
||||
// DRAM response
|
||||
.dram_rsp_valid (dram_rsp_valid),
|
||||
.dram_rsp_tag (dram_rsp_tag),
|
||||
.dram_rsp_data (l2_dram_rsp_data),
|
||||
.dram_rsp_data (dram_rsp_data),
|
||||
.dram_rsp_ready (dram_rsp_ready),
|
||||
|
||||
// Snoop request
|
||||
|
@ -339,6 +235,95 @@ module Vortex_Cluster #(
|
|||
.snp_fwd_addr (snp_fwd_addr),
|
||||
.snp_fwd_ready (& per_core_snp_fwd_ready)
|
||||
);
|
||||
|
||||
end else begin
|
||||
|
||||
wire[`L2NUM_REQUESTS-1:0] per_core_req_read;
|
||||
wire[`L2NUM_REQUESTS-1:0] per_core_req_write;
|
||||
wire[`L2NUM_REQUESTS-1:0][`DDRAM_ADDR_WIDTH-1:0] per_core_req_addr;
|
||||
wire[`L2NUM_REQUESTS-1:0][`DDRAM_TAG_WIDTH-1:0] per_core_req_tag;
|
||||
wire[`L2NUM_REQUESTS-1:0][`DDRAM_LINE_WIDTH-1:0] per_core_req_data;
|
||||
wire[`L2NUM_REQUESTS-1:0] per_core_req_ready;
|
||||
|
||||
wire[`L2NUM_REQUESTS-1:0] per_core_rsp_valid;
|
||||
wire[`L2NUM_REQUESTS-1:0][`DDRAM_LINE_WIDTH-1:0] per_core_rsp_data;
|
||||
wire[`L2NUM_REQUESTS-1:0][`DDRAM_TAG_WIDTH-1:0] per_core_rsp_tag;
|
||||
wire[`L2NUM_REQUESTS-1:0] per_core_rsp_ready;
|
||||
|
||||
for (i = 0; i < `L2NUM_REQUESTS; i = i + 2) begin
|
||||
assign per_core_req_read [i] = per_core_D_dram_req_read[(i/2)];
|
||||
assign per_core_req_read [i+1] = per_core_I_dram_req_read[(i/2)];
|
||||
|
||||
assign per_core_req_write [i] = per_core_D_dram_req_write[(i/2)];
|
||||
assign per_core_req_write [i+1] = 0;
|
||||
|
||||
assign per_core_req_addr [i] = per_core_D_dram_req_addr[(i/2)];
|
||||
assign per_core_req_addr [i+1] = per_core_I_dram_req_addr[(i/2)];
|
||||
|
||||
assign per_core_req_data [i] = per_core_D_dram_req_data[(i/2)];
|
||||
assign per_core_req_data [i+1] = per_core_I_dram_req_data[(i/2)];
|
||||
|
||||
assign per_core_req_tag [i] = per_core_D_dram_req_tag[(i/2)];
|
||||
assign per_core_req_tag [i+1] = per_core_I_dram_req_tag[(i/2)];
|
||||
|
||||
assign per_core_D_dram_req_ready[(i/2)] = per_core_req_ready[i];
|
||||
assign per_core_I_dram_req_ready[(i/2)] = per_core_req_ready[i+1];
|
||||
|
||||
assign per_core_D_dram_rsp_valid [(i/2)] = per_core_rsp_valid[i];
|
||||
assign per_core_I_dram_rsp_valid [(i/2)] = per_core_rsp_valid[i+1];
|
||||
|
||||
assign per_core_D_dram_rsp_data [(i/2)] = per_core_rsp_data[i];
|
||||
assign per_core_I_dram_rsp_data [(i/2)] = per_core_rsp_data[i+1];
|
||||
|
||||
assign per_core_D_dram_rsp_tag [(i/2)] = per_core_rsp_tag[i];
|
||||
assign per_core_I_dram_rsp_tag [(i/2)] = per_core_rsp_tag[i+1];
|
||||
|
||||
assign per_core_rsp_ready [i] = per_core_D_dram_rsp_ready[(i/2)];
|
||||
assign per_core_rsp_ready [i+1] = per_core_I_dram_rsp_ready[(i/2)];
|
||||
end
|
||||
|
||||
VX_dram_arb #(
|
||||
.BANK_LINE_SIZE (`L2BANK_LINE_SIZE),
|
||||
.NUM_REQUESTS (`L2NUM_REQUESTS),
|
||||
.CORE_TAG_WIDTH (`DDRAM_TAG_WIDTH),
|
||||
.DRAM_TAG_WIDTH (`L2DRAM_TAG_WIDTH)
|
||||
) dram_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
// Core request
|
||||
.core_req_read (per_core_req_read),
|
||||
.core_req_write (per_core_req_write),
|
||||
.core_req_addr (per_core_req_addr),
|
||||
.core_req_data (per_core_req_data),
|
||||
.core_req_tag (per_core_req_tag),
|
||||
.core_req_ready (per_core_req_ready),
|
||||
|
||||
// Core response
|
||||
.core_rsp_valid (per_core_rsp_valid),
|
||||
.core_rsp_data (per_core_rsp_data),
|
||||
.core_rsp_tag (per_core_rsp_tag),
|
||||
.core_rsp_ready (per_core_rsp_ready),
|
||||
|
||||
// DRAM request
|
||||
.dram_req_read (dram_req_read),
|
||||
.dram_req_write (dram_req_write),
|
||||
.dram_req_addr (dram_req_addr),
|
||||
.dram_req_data (dram_req_data),
|
||||
.dram_req_tag (dram_req_tag),
|
||||
.dram_req_ready (dram_req_ready),
|
||||
|
||||
// DRAM response
|
||||
.dram_rsp_valid (dram_rsp_valid),
|
||||
.dram_rsp_tag (dram_rsp_tag),
|
||||
.dram_rsp_data (dram_rsp_data),
|
||||
.dram_rsp_ready (dram_rsp_ready)
|
||||
);
|
||||
|
||||
// Cache snooping
|
||||
assign snp_fwd_valid = llc_snp_req_valid;
|
||||
assign snp_fwd_addr = llc_snp_req_addr;
|
||||
assign llc_snp_req_ready = & per_core_snp_fwd_ready;
|
||||
end
|
||||
|
||||
endmodule
|
|
@ -6,10 +6,6 @@ module Vortex_Socket (
|
|||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// IO
|
||||
output wire io_valid[(`NUM_CORES * `NUM_CLUSTERS)-1:0],
|
||||
output wire[31:0] io_data [(`NUM_CORES * `NUM_CLUSTERS)-1:0],
|
||||
|
||||
// DRAM Req
|
||||
output wire dram_req_read,
|
||||
output wire dram_req_write,
|
||||
|
@ -24,32 +20,26 @@ module Vortex_Socket (
|
|||
input wire[`L3DRAM_TAG_WIDTH-1:0] dram_rsp_tag,
|
||||
output wire dram_rsp_ready,
|
||||
|
||||
// LLC Snooping
|
||||
// Cache Snooping
|
||||
input wire llc_snp_req_valid,
|
||||
input wire[`L3DRAM_ADDR_WIDTH-1:0] llc_snp_req_addr,
|
||||
output wire llc_snp_req_ready,
|
||||
|
||||
// I/O
|
||||
output wire io_valid,
|
||||
output wire [31:0] io_data,
|
||||
input wire io_ready,
|
||||
|
||||
// Debug
|
||||
output wire ebreak
|
||||
);
|
||||
if (`NUM_CLUSTERS == 1) begin
|
||||
|
||||
wire[`NUM_CORES-1:0] cluster_io_valid;
|
||||
wire[`NUM_CORES-1:0][31:0] cluster_io_data;
|
||||
|
||||
genvar i;
|
||||
for (i = 0; i < `NUM_CORES; i=i+1) begin
|
||||
assign io_valid [i] = cluster_io_valid [i];
|
||||
assign io_data [i] = cluster_io_data [i];
|
||||
end
|
||||
|
||||
Vortex_Cluster #(
|
||||
.CLUSTER_ID(0)
|
||||
) Vortex_Cluster (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
.io_valid (cluster_io_valid),
|
||||
.io_data (cluster_io_data),
|
||||
|
||||
.dram_req_read (dram_req_read),
|
||||
.dram_req_write (dram_req_write),
|
||||
|
@ -67,51 +57,46 @@ module Vortex_Socket (
|
|||
.llc_snp_req_addr (llc_snp_req_addr),
|
||||
.llc_snp_req_ready (llc_snp_req_ready),
|
||||
|
||||
.io_valid (io_valid),
|
||||
.io_data (io_data),
|
||||
.io_ready (io_ready),
|
||||
|
||||
.ebreak (ebreak)
|
||||
);
|
||||
|
||||
end else begin
|
||||
|
||||
wire snp_fwd_valid;
|
||||
wire[`L3DRAM_ADDR_WIDTH-1:0] snp_fwd_addr;
|
||||
wire[`NUM_CLUSTERS-1:0] per_cluster_snp_fwd_ready;
|
||||
|
||||
wire[`NUM_CLUSTERS-1:0] per_cluster_ebreak;
|
||||
|
||||
assign ebreak = (& per_cluster_ebreak);
|
||||
|
||||
// // DRAM Dcache Req
|
||||
wire[`NUM_CLUSTERS-1:0] per_cluster_dram_req_write;
|
||||
// DRAM Dcache Req
|
||||
wire[`NUM_CLUSTERS-1:0] per_cluster_dram_req_read;
|
||||
wire[`NUM_CLUSTERS-1:0] per_cluster_dram_req_write;
|
||||
wire[`NUM_CLUSTERS-1:0][`L2DRAM_ADDR_WIDTH-1:0] per_cluster_dram_req_addr;
|
||||
wire[`NUM_CLUSTERS-1:0][`L2DRAM_LINE_WIDTH-1:0] per_cluster_dram_req_data;
|
||||
wire[`NUM_CLUSTERS-1:0][`L2DRAM_TAG_WIDTH-1:0] per_cluster_dram_req_tag;
|
||||
|
||||
wire[`NUM_CLUSTERS-1:0][`L2DRAM_LINE_WIDTH-1:0] per_cluster_dram_req_data_up;
|
||||
|
||||
wire l3_core_req_ready;
|
||||
|
||||
// // DRAM Dcache Rsp
|
||||
wire[`NUM_CLUSTERS-1:0] per_cluster_dram_rsp_ready;
|
||||
// DRAM Dcache Rsp
|
||||
wire[`NUM_CLUSTERS-1:0] per_cluster_dram_rsp_valid;
|
||||
wire[`NUM_CLUSTERS-1:0][`L3DRAM_LINE_WIDTH-1:0] per_cluster_dram_rsp_data;
|
||||
wire[`NUM_CLUSTERS-1:0][`L3DRAM_TAG_WIDTH-1:0] per_cluster_dram_rsp_tag;
|
||||
wire[`NUM_CLUSTERS-1:0] per_cluster_dram_rsp_ready;
|
||||
|
||||
wire[`NUM_CLUSTERS-1:0][`L3DRAM_LINE_WIDTH-1:0] per_cluster_dram_rsp_data_up;
|
||||
wire snp_fwd_valid;
|
||||
wire[`L3DRAM_ADDR_WIDTH-1:0] snp_fwd_addr;
|
||||
wire[`NUM_CLUSTERS-1:0] per_cluster_snp_fwd_ready;
|
||||
|
||||
wire[`NUM_CLUSTERS-1:0][`NUM_CORES-1:0] per_cluster_io_valid;
|
||||
wire[`NUM_CLUSTERS-1:0][`NUM_CORES-1:0][31:0] per_cluster_io_data;
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
wire[`NUM_CLUSTERS-1:0] per_cluster_io_valid;
|
||||
wire[`NUM_CLUSTERS-1:0][31:0] per_cluster_io_data;
|
||||
`IGNORE_WARNINGS_END
|
||||
|
||||
genvar i, j;
|
||||
for (i = 0; i < `NUM_CLUSTERS; i = i + 1) begin
|
||||
for (j = 0; j < `NUM_CORES; j = j + 1) begin
|
||||
assign io_valid[j+(i*`NUM_CORES)] = per_cluster_io_valid[i][j];
|
||||
assign io_data[j+(i*`NUM_CORES)] = per_cluster_io_data[i][j];
|
||||
end
|
||||
assign per_cluster_dram_req_data[i] = per_cluster_dram_req_data_up[i];
|
||||
assign per_cluster_dram_rsp_data_up[i] = per_cluster_dram_rsp_data[i];
|
||||
end
|
||||
wire[`NUM_CLUSTERS-1:0] per_cluster_ebreak;
|
||||
|
||||
|
||||
assign io_valid = per_cluster_io_valid[0];
|
||||
assign io_data = per_cluster_io_data[0];
|
||||
assign ebreak = (& per_cluster_ebreak);
|
||||
|
||||
genvar i;
|
||||
for (i = 0; i < `NUM_CLUSTERS; i=i+1) begin
|
||||
Vortex_Cluster #(
|
||||
.CLUSTER_ID(i)
|
||||
|
@ -119,34 +104,35 @@ module Vortex_Socket (
|
|||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
.io_valid (per_cluster_io_valid [i]),
|
||||
.io_data (per_cluster_io_data [i]),
|
||||
|
||||
.dram_req_write (per_cluster_dram_req_write [i]),
|
||||
.dram_req_read (per_cluster_dram_req_read [i]),
|
||||
.dram_req_addr (per_cluster_dram_req_addr [i]),
|
||||
.dram_req_data (per_cluster_dram_req_data_up [i]),
|
||||
.dram_req_tag (per_cluster_dram_req_tag [i]),
|
||||
.dram_req_write (per_cluster_dram_req_write [i]),
|
||||
.dram_req_read (per_cluster_dram_req_read [i]),
|
||||
.dram_req_addr (per_cluster_dram_req_addr [i]),
|
||||
.dram_req_data (per_cluster_dram_req_data [i]),
|
||||
.dram_req_tag (per_cluster_dram_req_tag [i]),
|
||||
.dram_req_ready (l3_core_req_ready),
|
||||
|
||||
.dram_rsp_valid (per_cluster_dram_rsp_valid [i]),
|
||||
.dram_rsp_data (per_cluster_dram_rsp_data_up [i]),
|
||||
.dram_rsp_tag (per_cluster_dram_rsp_tag [i]),
|
||||
.dram_rsp_ready (per_cluster_dram_rsp_ready [i]),
|
||||
.dram_rsp_valid (per_cluster_dram_rsp_valid [i]),
|
||||
.dram_rsp_data (per_cluster_dram_rsp_data [i]),
|
||||
.dram_rsp_tag (per_cluster_dram_rsp_tag [i]),
|
||||
.dram_rsp_ready (per_cluster_dram_rsp_ready [i]),
|
||||
|
||||
.llc_snp_req_valid (snp_fwd_valid),
|
||||
.llc_snp_req_addr (snp_fwd_addr),
|
||||
.llc_snp_req_ready (per_cluster_snp_fwd_ready [i]),
|
||||
.llc_snp_req_ready (per_cluster_snp_fwd_ready [i]),
|
||||
|
||||
.ebreak (per_cluster_ebreak [i])
|
||||
.io_valid (per_cluster_io_valid [i]),
|
||||
.io_data (per_cluster_io_data [i]),
|
||||
.io_ready (io_ready),
|
||||
|
||||
.ebreak (per_cluster_ebreak [i])
|
||||
);
|
||||
end
|
||||
|
||||
// L3 Cache ///////////////////////////////////////////////////////////
|
||||
|
||||
wire[`L3NUM_REQUESTS-1:0] l3_core_req_valid;
|
||||
wire[`L3NUM_REQUESTS-1:0][`WORD_SEL_BITS-1:0] l3_core_req_mem_write;
|
||||
wire[`L3NUM_REQUESTS-1:0][`WORD_SEL_BITS-1:0] l3_core_req_mem_read;
|
||||
wire[`L3NUM_REQUESTS-1:0][`WORD_SEL_BITS-1:0] l3_core_req_read;
|
||||
wire[`L3NUM_REQUESTS-1:0][`WORD_SEL_BITS-1:0] l3_core_req_write;
|
||||
wire[`L3NUM_REQUESTS-1:0][31:0] l3_core_req_addr;
|
||||
wire[`L3NUM_REQUESTS-1:0][`L2DRAM_LINE_WIDTH-1:0] l3_core_req_data;
|
||||
wire[`L3NUM_REQUESTS-1:0][`L2DRAM_TAG_WIDTH-1:0] l3_core_req_tag;
|
||||
|
@ -156,23 +142,17 @@ module Vortex_Socket (
|
|||
wire[`L3NUM_REQUESTS-1:0][`L2DRAM_TAG_WIDTH-1:0] l3_core_rsp_tag;
|
||||
wire[`L3NUM_REQUESTS-1:0] l3_core_rsp_ready;
|
||||
|
||||
wire[`L3DRAM_LINE_WIDTH-1:0] l3_dram_req_data;
|
||||
wire[`L3DRAM_LINE_WIDTH-1:0] l3_dram_rsp_data;
|
||||
|
||||
assign dram_req_data = l3_dram_req_data;
|
||||
assign l3_dram_rsp_data = dram_rsp_data;
|
||||
|
||||
for (i = 0; i < `L3NUM_REQUESTS; i=i+1) begin
|
||||
// Core Request
|
||||
assign l3_core_req_valid [i] = (per_cluster_dram_req_read[i] | per_cluster_dram_req_write[i]);
|
||||
assign l3_core_req_mem_read [i] = per_cluster_dram_req_read [i] ? `WORD_SEL_LW : `WORD_SEL_NO;
|
||||
assign l3_core_req_mem_write [i] = per_cluster_dram_req_write[i] ? `WORD_SEL_LW : `WORD_SEL_NO;
|
||||
assign l3_core_req_addr [i] = {per_cluster_dram_req_addr [i], {`LOG2UP(`L2BANK_LINE_SIZE){1'b0}}};
|
||||
assign l3_core_req_tag [i] = per_cluster_dram_req_tag [i];
|
||||
assign l3_core_req_data [i] = per_cluster_dram_req_data [i];
|
||||
assign l3_core_req_valid [i] = (per_cluster_dram_req_read [i] | per_cluster_dram_req_write [i]);
|
||||
assign l3_core_req_read [i] = per_cluster_dram_req_read [i] ? `WORD_SEL_LW : `WORD_SEL_NO;
|
||||
assign l3_core_req_write [i] = per_cluster_dram_req_write [i] ? `WORD_SEL_LW : `WORD_SEL_NO;
|
||||
assign l3_core_req_addr [i] = {per_cluster_dram_req_addr [i], {`LOG2UP(`L2BANK_LINE_SIZE){1'b0}}};
|
||||
assign l3_core_req_tag [i] = per_cluster_dram_req_tag [i];
|
||||
assign l3_core_req_data [i] = per_cluster_dram_req_data [i];
|
||||
|
||||
// Core can't accept Response
|
||||
assign l3_core_rsp_ready [i] = per_cluster_dram_rsp_ready[i];
|
||||
// Core Response
|
||||
assign l3_core_rsp_ready [i] = per_cluster_dram_rsp_ready[i];
|
||||
|
||||
// Cache Fill Response
|
||||
assign per_cluster_dram_rsp_valid [i] = l3_core_rsp_valid [i];
|
||||
|
@ -208,8 +188,8 @@ module Vortex_Socket (
|
|||
|
||||
// Core request
|
||||
.core_req_valid (l3_core_req_valid),
|
||||
.core_req_read (l3_core_req_mem_read),
|
||||
.core_req_write (l3_core_req_mem_write),
|
||||
.core_req_read (l3_core_req_read),
|
||||
.core_req_write (l3_core_req_write),
|
||||
.core_req_addr (l3_core_req_addr),
|
||||
.core_req_data (l3_core_req_data),
|
||||
.core_req_tag (l3_core_req_tag),
|
||||
|
@ -219,19 +199,19 @@ module Vortex_Socket (
|
|||
.core_rsp_valid (l3_core_rsp_valid),
|
||||
.core_rsp_data (l3_core_rsp_data),
|
||||
.core_rsp_tag (l3_core_rsp_tag),
|
||||
.core_rsp_ready (|l3_core_rsp_ready),
|
||||
.core_rsp_ready (& l3_core_rsp_ready),
|
||||
|
||||
// DRAM request
|
||||
.dram_req_write (dram_req_write),
|
||||
.dram_req_read (dram_req_read),
|
||||
.dram_req_addr (dram_req_addr),
|
||||
.dram_req_data (l3_dram_req_data),
|
||||
.dram_req_data (dram_req_data),
|
||||
.dram_req_tag (dram_req_tag),
|
||||
.dram_req_ready (dram_req_ready),
|
||||
|
||||
// DRAM response
|
||||
.dram_rsp_valid (dram_rsp_valid),
|
||||
.dram_rsp_data (l3_dram_rsp_data),
|
||||
.dram_rsp_data (dram_rsp_data),
|
||||
.dram_rsp_tag (dram_rsp_tag),
|
||||
.dram_rsp_ready (dram_rsp_ready),
|
||||
|
||||
|
|
2
hw/rtl/cache/VX_cache_core_rsp_merge.v
vendored
2
hw/rtl/cache/VX_cache_core_rsp_merge.v
vendored
|
@ -85,7 +85,7 @@ module VX_cache_core_rsp_merge #(
|
|||
&& per_bank_core_rsp_valid[i]
|
||||
&& !core_rsp_valid[per_bank_core_rsp_tid[i]]
|
||||
&& ((main_bank_index == `LOG2UP(NUM_BANKS)'(i))
|
||||
|| (per_bank_core_rsp_tid[i] != per_bank_core_rsp_tid[main_bank_index]))) begin
|
||||
|| (per_bank_core_rsp_tid[i] != per_bank_core_rsp_tid[main_bank_index]))) begin
|
||||
core_rsp_valid[per_bank_core_rsp_tid[i]] = 1;
|
||||
core_rsp_data[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_data[i];
|
||||
core_rsp_tag[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_tag[i];
|
||||
|
|
|
@ -98,19 +98,13 @@ void Simulator::dbus_driver() {
|
|||
vortex_->dram_req_ready = ~dram_stalled_;
|
||||
}
|
||||
|
||||
void Simulator::io_handler() {
|
||||
bool io_valid = false;
|
||||
for (int c = 0; c < NUM_CORES; c++) {
|
||||
if (vortex_->io_valid[c]) {
|
||||
uint32_t data_write = (uint32_t)vortex_->io_data[c];
|
||||
char c = (char)data_write;
|
||||
std::cerr << c;
|
||||
io_valid = true;
|
||||
}
|
||||
}
|
||||
if (io_valid) {
|
||||
std::cout << std::flush;
|
||||
void Simulator::io_driver() {
|
||||
if (vortex_->io_valid) {
|
||||
uint32_t data_write = (uint32_t)vortex_->io_data;
|
||||
char c = (char)data_write;
|
||||
std::cerr << c;
|
||||
}
|
||||
vortex_->io_ready = true;
|
||||
}
|
||||
|
||||
void Simulator::reset() {
|
||||
|
@ -128,7 +122,7 @@ void Simulator::step() {
|
|||
this->eval();
|
||||
|
||||
dbus_driver();
|
||||
io_handler();
|
||||
io_driver();
|
||||
}
|
||||
|
||||
void Simulator::eval() {
|
||||
|
@ -149,7 +143,9 @@ bool Simulator::is_busy() {
|
|||
return (0 == vortex_->ebreak);
|
||||
}
|
||||
|
||||
void Simulator::send_snoops(uint32_t mem_addr, uint32_t size) {
|
||||
void Simulator::flush_caches(uint32_t mem_addr, uint32_t size) {
|
||||
// send snoop requests to the caches
|
||||
printf("[sim] total cycles: %ld\n", time_stamp/2);
|
||||
// align address to LLC block boundaries
|
||||
auto aligned_addr_start = mem_addr / GLOBAL_BLOCK_SIZE;
|
||||
auto aligned_addr_end = (mem_addr + size + GLOBAL_BLOCK_SIZE - 1) / GLOBAL_BLOCK_SIZE;
|
||||
|
@ -169,12 +165,6 @@ void Simulator::send_snoops(uint32_t mem_addr, uint32_t size) {
|
|||
vortex_->llc_snp_req_valid = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Simulator::flush_caches(uint32_t mem_addr, uint32_t size) {
|
||||
// send snoop requests to the caches
|
||||
printf("[sim] total cycles: %ld\n", time_stamp/2);
|
||||
this->send_snoops(mem_addr, size);
|
||||
this->wait(PIPELINE_FLUSH_LATENCY);
|
||||
}
|
||||
|
||||
|
@ -192,12 +182,12 @@ bool Simulator::run() {
|
|||
|
||||
// check riscv-tests PASSED/FAILED status
|
||||
#if (NUM_CLUSTERS == 1 && NUM_CORES == 1)
|
||||
int status = (int)vortex_->Vortex_Socket->genblk1__DOT__Vortex_Cluster->genblk1__DOT__vortex_core->back_end->writeback->last_data_wb & 0xf;
|
||||
int status = (int)vortex_->Vortex_Socket->genblk1__DOT__Vortex_Cluster->genblk1__BRA__0__KET____DOT__vortex_core->back_end->writeback->last_data_wb & 0xf;
|
||||
#else
|
||||
#if (NUM_CLUSTERS == 1)
|
||||
int status = (int)vortex_->Vortex_Socket->genblk1__DOT__Vortex_Cluster->genblk2__DOT__genblk1__BRA__0__KET____DOT__vortex_core->back_end->writeback->last_data_wb & 0xf;
|
||||
int status = (int)vortex_->Vortex_Socket->genblk1__DOT__Vortex_Cluster->genblk1__BRA__0__KET____DOT__vortex_core->back_end->writeback->last_data_wb & 0xf;
|
||||
#else
|
||||
int status = (int)vortex_->Vortex_Socket->genblk2__DOT__genblk2__BRA__0__KET____DOT__Vortex_Cluster->genblk2__DOT__genblk1__BRA__0__KET____DOT__vortex_core->back_end->writeback->last_data_wb & 0xf;
|
||||
int status = (int)vortex_->Vortex_Socket->genblk2__DOT__genblk1__BRA__0__KET____DOT__Vortex_Cluster->genblk1__BRA__0__KET____DOT__vortex_core->back_end->writeback->last_data_wb & 0xf;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
|
|
@ -35,18 +35,16 @@ public:
|
|||
bool is_busy();
|
||||
void reset();
|
||||
void step();
|
||||
void wait(uint32_t cycles);
|
||||
void flush_caches(uint32_t mem_addr, uint32_t size);
|
||||
bool run();
|
||||
bool run();
|
||||
void print_stats(std::ostream& out);
|
||||
|
||||
private:
|
||||
|
||||
void eval();
|
||||
void wait(uint32_t cycles);
|
||||
|
||||
void eval();
|
||||
void dbus_driver();
|
||||
void io_handler();
|
||||
void send_snoops(uint32_t mem_addr, uint32_t size);
|
||||
void io_driver();
|
||||
|
||||
bool dram_stalled_;
|
||||
std::vector<dram_req_t> dram_req_vec_;
|
||||
|
|
|
@ -15,10 +15,10 @@ CPY = /home/fares/dev/riscv-gnu-toolchain-vector/drops/bin/riscv32-unknown-elf-
|
|||
|
||||
NEWLIB = $(LIB_PATH)/newlib/newlib.c
|
||||
VX_STR = $(LIB_PATH)/startup/vx_start.S
|
||||
VX_INT = $(LIB_PATH)/intrinsics/vx_intrinsics.s
|
||||
VX_IO = $(LIB_PATH)/io/vx_io.s $(LIB_PATH)/io/vx_io.c
|
||||
VX_INT = $(LIB_PATH)/intrinsics/vx_intrinsics.S
|
||||
VX_IO = $(LIB_PATH)/io/vx_io.S $(LIB_PATH)/io/vx_io.c
|
||||
VX_API = $(LIB_PATH)/vx_api/vx_api.c
|
||||
VX_FIO = $(LIB_PATH)/fileio/fileio.s
|
||||
VX_FIO = $(LIB_PATH)/fileio/fileio.S
|
||||
VX_VEC = vx_vec.s
|
||||
LIBS = /home/fares/dev/riscv-gnu-toolchain-vector/drops/riscv32-unknown-elf/lib/libc.a /home/fares/dev/riscv-gnu-toolchain-vector/drops/riscv32-unknown-elf/lib/libstdc++.a -static-libgcc -lgcc
|
||||
|
||||
|
|
|
@ -11,10 +11,10 @@ CPY = /home/priya/dev/riscv_vec/riscv-gnu/bin/riscv32-unknown-elf-objcopy
|
|||
|
||||
NEWLIB = $(LIB_PATH)/newlib/newlib.c
|
||||
VX_STR = $(LIB_PATH)/startup/vx_start.S
|
||||
VX_INT = $(LIB_PATH)/intrinsics/vx_intrinsics.s
|
||||
VX_IO = $(LIB_PATH)/io/vx_io.s $(LIB_PATH)/io/vx_io.c
|
||||
VX_INT = $(LIB_PATH)/intrinsics/vx_intrinsics.S
|
||||
VX_IO = $(LIB_PATH)/io/vx_io.S $(LIB_PATH)/io/vx_io.c
|
||||
VX_API = $(LIB_PATH)/vx_api/vx_api.c
|
||||
VX_FIO = $(LIB_PATH)/fileio/fileio.s
|
||||
VX_FIO = $(LIB_PATH)/fileio/fileio.S
|
||||
VX_VEC1 = vx_vec_vvaddint32.s
|
||||
VX_VEC2 = vx_vec_saxpy.s #float --> int
|
||||
VX_VEC3 = vx_vec_sgemm_float.s #float --> int
|
||||
|
|
|
@ -486,7 +486,7 @@ Disassembly of section .text:
|
|||
800006e0: 00112623 sw ra,12(sp)
|
||||
800006e4: 00812423 sw s0,8(sp)
|
||||
800006e8: 01010413 addi s0,sp,16
|
||||
800006ec: 0d4000ef jal ra,800007c0 <vx_threadID>
|
||||
800006ec: 0d4000ef jal ra,800007c0 <vx_thread_id>
|
||||
800006f0: 00050793 mv a5,a0
|
||||
800006f4: 00078513 mv a0,a5
|
||||
800006f8: 00c12083 lw ra,12(sp)
|
||||
|
@ -554,23 +554,23 @@ Disassembly of section .text:
|
|||
800007b0: 0000306b 0x306b
|
||||
800007b4: 00008067 ret
|
||||
|
||||
800007b8 <vx_warpID>:
|
||||
800007b8 <vx_warp_id>:
|
||||
800007b8: 02102573 csrr a0,0x21
|
||||
800007bc: 00008067 ret
|
||||
|
||||
800007c0 <vx_threadID>:
|
||||
800007c0 <vx_thread_id>:
|
||||
800007c0: 02002573 csrr a0,0x20
|
||||
800007c4: 00008067 ret
|
||||
|
||||
800007c8 <vx_getCycles>:
|
||||
800007c8 <vx_num_cycles>:
|
||||
800007c8: 02602573 csrr a0,0x26
|
||||
800007cc: 00008067 ret
|
||||
|
||||
800007d0 <vx_getInst>:
|
||||
800007d0 <vx_num_instrs>:
|
||||
800007d0: 02502573 csrr a0,0x25
|
||||
800007d4: 00008067 ret
|
||||
|
||||
800007d8 <vx_resetStack>:
|
||||
800007d8 <vx_reset_stack>:
|
||||
800007d8: 00400513 li a0,4
|
||||
800007dc: 0005006b 0x5006b
|
||||
800007e0: 021026f3 csrr a3,0x21
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
|
||||
#include "../config.h"
|
||||
|
||||
# .section .FileIO
|
||||
|
69
runtime/intrinsics/vx_intrinsics.S
Normal file
69
runtime/intrinsics/vx_intrinsics.S
Normal file
|
@ -0,0 +1,69 @@
|
|||
#include "../config.h"
|
||||
|
||||
.section .text
|
||||
|
||||
.type vx_wspawn, @function
|
||||
.global vx_wspawn
|
||||
vx_wspawn:
|
||||
.word 0x00b5106b # wspawn a0(numWarps), a1(PC SPAWN)
|
||||
ret
|
||||
|
||||
.type vx_tmc, @function
|
||||
.global vx_tmc
|
||||
vx_tmc:
|
||||
.word 0x0005006b # tmc a0
|
||||
ret
|
||||
|
||||
.type vx_barrier, @function
|
||||
.global vx_barrier
|
||||
vx_barrier:
|
||||
.word 0x00b5406b # barrier a0(barrier id), a1(numWarps)
|
||||
ret
|
||||
|
||||
.type vx_split, @function
|
||||
.global vx_split
|
||||
vx_split:
|
||||
.word 0x0005206b # split a0
|
||||
ret
|
||||
|
||||
.type vx_join, @function
|
||||
.global vx_join
|
||||
vx_join:
|
||||
.word 0x0000306b #join
|
||||
ret
|
||||
|
||||
.type vx_warp_id, @function
|
||||
.global vx_warp_id
|
||||
vx_warp_id:
|
||||
csrr a0, CSR_LWID # read warp index
|
||||
ret
|
||||
|
||||
.type vx_warp_gid, @function
|
||||
.global vx_warp_gid
|
||||
vx_warp_gid:
|
||||
csrr a0, CSR_GWID # read warp index
|
||||
ret
|
||||
|
||||
.type vx_thread_id, @function
|
||||
.global vx_thread_id
|
||||
vx_thread_id:
|
||||
csrr a0, CSR_LTID # read thread index
|
||||
ret
|
||||
|
||||
.type vx_thread_gid, @function
|
||||
.global vx_thread_gid
|
||||
vx_thread_gid:
|
||||
csrr a0, CSR_GTID # read thread index
|
||||
ret
|
||||
|
||||
.type vx_num_cycles, @function
|
||||
.global vx_num_cycles
|
||||
vx_num_cycles:
|
||||
csrr a0, CSR_CYCLL
|
||||
ret
|
||||
|
||||
.type vx_num_instrs, @function
|
||||
.global vx_num_instrs
|
||||
vx_num_instrs:
|
||||
csrr a0, CSR_INSTL
|
||||
ret
|
|
@ -7,35 +7,38 @@
|
|||
extern "C" {
|
||||
#endif
|
||||
|
||||
// Spawns Warps
|
||||
// Spawn warps
|
||||
void vx_wspawn(unsigned numWarps, unsigned PC_spawn);
|
||||
|
||||
// Changes thread mask (activated/deactivates threads)
|
||||
// Set thread mask
|
||||
void vx_tmc(unsigned numThreads);
|
||||
|
||||
// Warp Barrier
|
||||
void vx_barrier(unsigned barriedID, unsigned numWarps);
|
||||
|
||||
// split on a predicate
|
||||
// Split on a predicate
|
||||
void vx_split(unsigned predicate);
|
||||
|
||||
// Join
|
||||
void vx_join(void);
|
||||
|
||||
// Get Hardware thread ID
|
||||
unsigned vx_threadID(void);
|
||||
// Return the warp thread index
|
||||
unsigned vx_thread_id(void);
|
||||
|
||||
// Get hardware warp ID
|
||||
unsigned vx_warpID(void);
|
||||
// Return the core warp index
|
||||
unsigned vx_warp_id(void);
|
||||
|
||||
// Get global warp number
|
||||
unsigned vx_warpNum(void);
|
||||
// Return processsor unique thread id
|
||||
unsigned vx_thread_gid(void);
|
||||
|
||||
// Get Number cycles/Inst
|
||||
unsigned vx_getCycles(void);
|
||||
unsigned vx_getInst(void);
|
||||
// Return processsor unique warp id
|
||||
unsigned vx_warp_gid(void);
|
||||
|
||||
void vx_resetStack(void);
|
||||
// Return number cycles
|
||||
unsigned vx_num_cycles(void);
|
||||
|
||||
// Return number instructions
|
||||
unsigned vx_num_instrs(void);
|
||||
|
||||
#define __if(b) vx_split(b); \
|
||||
if (b)
|
||||
|
|
|
@ -1,85 +0,0 @@
|
|||
.section .text
|
||||
|
||||
.type vx_wspawn, @function
|
||||
.global vx_wspawn
|
||||
vx_wspawn:
|
||||
.word 0x00b5106b # wspawn a0(numWarps), a1(PC SPAWN)
|
||||
ret
|
||||
|
||||
.type vx_tmc, @function
|
||||
.global vx_tmc
|
||||
vx_tmc:
|
||||
.word 0x0005006b # tmc a0
|
||||
ret
|
||||
|
||||
.type vx_barrier, @function
|
||||
.global vx_barrier
|
||||
vx_barrier:
|
||||
.word 0x00b5406b # barrier a0(barrier id), a1(numWarps)
|
||||
ret
|
||||
|
||||
.type vx_split, @function
|
||||
.global vx_split
|
||||
vx_split:
|
||||
.word 0x0005206b # split a0
|
||||
ret
|
||||
|
||||
.type vx_join, @function
|
||||
.global vx_join
|
||||
vx_join:
|
||||
.word 0x0000306b #join
|
||||
ret
|
||||
|
||||
.type vx_warpID, @function
|
||||
.global vx_warpID
|
||||
vx_warpID:
|
||||
csrr a0, 0x21 # read warp IDs
|
||||
ret
|
||||
|
||||
.type vx_warpNum, @function
|
||||
.global vx_warpNum
|
||||
vx_warpNum:
|
||||
csrr a0, 0x22 # read warp IDs
|
||||
ret
|
||||
|
||||
.type vx_threadID, @function
|
||||
.global vx_threadID
|
||||
vx_threadID:
|
||||
csrr a0, 0x20 # read thread IDs
|
||||
ret
|
||||
|
||||
.type vx_getCycles, @function
|
||||
.global vx_getCycles
|
||||
vx_getCycles:
|
||||
csrr a0, 0x26 # read thread IDs
|
||||
ret
|
||||
|
||||
.type vx_getInst, @function
|
||||
.global vx_getInst
|
||||
vx_getInst:
|
||||
csrr a0, 0x25 # read thread IDs
|
||||
ret
|
||||
|
||||
.type vx_resetStack, @function
|
||||
.global vx_resetStack
|
||||
vx_resetStack:
|
||||
li a0, 4
|
||||
.word 0x0005006b # tmc 4
|
||||
|
||||
csrr a3, 0x21 # get wid
|
||||
slli a3, a3, 15 # shift by wid
|
||||
csrr a2, 0x20 # get tid
|
||||
slli a1, a2, 10 # multiply tid by 1024
|
||||
slli a2, a2, 2 # multiply tid by 4
|
||||
lui sp, 0x6ffff # load base sp
|
||||
sub sp, sp, a1 # sub sp - (1024*tid)
|
||||
sub sp, sp, a3 # shoft per warp
|
||||
add sp, sp, a2 # shift sp for better performance
|
||||
|
||||
csrr a3, 0x21 # get wid
|
||||
beqz a3, RETURN
|
||||
li a0, 0
|
||||
.word 0x0005006b # tmc 0
|
||||
RETURN:
|
||||
ret
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
|
||||
#include "../config.h"
|
||||
|
||||
.type vx_print_str, @function
|
||||
.global vx_print_str
|
||||
|
@ -29,5 +29,5 @@ vx_printc:
|
|||
|
||||
.section .data
|
||||
print_addr:
|
||||
.word 0x00010000
|
||||
.word IO_BUS_ADDR
|
||||
|
|
@ -8,6 +8,7 @@ extern "C" {
|
|||
#endif
|
||||
|
||||
static char * hextoa[] = {"0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "a", "b", "c", "d", "e", "f"};
|
||||
|
||||
void vx_print_hex(unsigned);
|
||||
void vx_printf(const char *, unsigned);
|
||||
|
||||
|
|
|
@ -313,7 +313,7 @@ void _kill()
|
|||
|
||||
unsigned _getpid()
|
||||
{
|
||||
return vx_threadID();
|
||||
return vx_thread_id();
|
||||
}
|
||||
|
||||
void _unlink()
|
||||
|
|
|
@ -38,20 +38,18 @@ vx_set_sp:
|
|||
addi gp, gp, %pcrel_lo(1b)
|
||||
.option pop
|
||||
|
||||
csrr a3, 0x22 # get global warp number
|
||||
slli a3, a3, 0x1a # shift by wid
|
||||
csrr a2, 0x20 # get tid
|
||||
slli a1, a2, 10 # multiply tid by 1024
|
||||
csrr a1, CSR_GTID # get gtid
|
||||
slli a1, a1, 10 # multiply tid by 1024
|
||||
csrr a2, CSR_LTID # get tid
|
||||
slli a2, a2, 2 # multiply tid by 4
|
||||
lui sp, 0x6ffff # load base sp
|
||||
sub sp, sp, a1 # sub sp - (1024*tid)
|
||||
sub sp, sp, a3 # shoft per warp
|
||||
add sp, sp, a2 # shift sp for better performance
|
||||
lui sp, STACK_BASE_ADDR # load base sp
|
||||
sub sp, sp, a1 # sub thread block
|
||||
add sp, sp, a2 # reduce addr collision for perf
|
||||
|
||||
csrr a3, 0x21 # get wid
|
||||
csrr a3, CSR_LWID # get wid
|
||||
beqz a3, RETURN
|
||||
li a0, 0
|
||||
.word 0x0005006b # tmc 0
|
||||
.word 0x0005006b # tmc 0
|
||||
RETURN:
|
||||
ret
|
||||
|
||||
|
|
|
@ -12,8 +12,8 @@ CPY = $(TOOLPATH)/riscv32-unknown-elf-objcopy
|
|||
|
||||
|
||||
VX_STR = ../../startup/vx_start.S
|
||||
VX_INT = ../../intrinsics/vx_intrinsics.s
|
||||
VX_IO = ../../io/vx_io.s ../../io/vx_io.c
|
||||
VX_INT = ../../intrinsics/vx_intrinsics.S
|
||||
VX_IO = ../../io/vx_io.S ../../io/vx_io.c
|
||||
VX_API = ../../vx_api/vx_api.c
|
||||
VX_TEST = ../common/tests.c
|
||||
|
||||
|
|
|
@ -41,8 +41,8 @@ void mat_add_kernel(void * void_arguments)
|
|||
{
|
||||
mat_add_args_t * arguments = (mat_add_args_t *) void_arguments;
|
||||
|
||||
unsigned wid = vx_warpID();
|
||||
unsigned tid = vx_threadID();
|
||||
unsigned wid = vx_warp_id();
|
||||
unsigned tid = vx_thread_id();
|
||||
|
||||
bool valid = (wid < arguments->numRows) && (tid < arguments->numColums);
|
||||
|
||||
|
@ -77,7 +77,7 @@ int main()
|
|||
// void * hellp = malloc(4);
|
||||
vx_print_str("Confirm Dev Main\n");
|
||||
|
||||
vx_print_str("vx_spawnWarps\n");
|
||||
vx_print_str("vx_spawn_warps\n");
|
||||
|
||||
mat_add_args_t arguments;
|
||||
arguments.x = x;
|
||||
|
@ -91,7 +91,7 @@ int main()
|
|||
int numThreads = 4;
|
||||
|
||||
// First kernel call
|
||||
vx_spawnWarps(numWarps, numThreads, mat_add_kernel, &arguments);
|
||||
vx_spawn_warps(numWarps, numThreads, mat_add_kernel, &arguments);
|
||||
vx_print_mat(z, arguments.numRows, arguments.numColums);
|
||||
|
||||
|
||||
|
@ -102,7 +102,7 @@ int main()
|
|||
arguments.numRows = 4;
|
||||
|
||||
// Second Kernel Call
|
||||
vx_spawnWarps(numWarps, numThreads, mat_add_kernel, &arguments);
|
||||
vx_spawn_warps(numWarps, numThreads, mat_add_kernel, &arguments);
|
||||
vx_print_mat(z, arguments.numRows, arguments.numColums);
|
||||
|
||||
|
||||
|
|
|
@ -77,23 +77,23 @@ Disassembly of section .text:
|
|||
800000cc: 0000306b 0x306b
|
||||
800000d0: 00008067 ret
|
||||
|
||||
800000d4 <vx_warpID>:
|
||||
800000d4 <vx_warp_id>:
|
||||
800000d4: 02102573 csrr a0,0x21
|
||||
800000d8: 00008067 ret
|
||||
|
||||
800000dc <vx_threadID>:
|
||||
800000dc <vx_thread_id>:
|
||||
800000dc: 02002573 csrr a0,0x20
|
||||
800000e0: 00008067 ret
|
||||
|
||||
800000e4 <vx_getCycles>:
|
||||
800000e4 <vx_num_cycles>:
|
||||
800000e4: 02602573 csrr a0,0x26
|
||||
800000e8: 00008067 ret
|
||||
|
||||
800000ec <vx_getInst>:
|
||||
800000ec <vx_num_instrs>:
|
||||
800000ec: 02502573 csrr a0,0x25
|
||||
800000f0: 00008067 ret
|
||||
|
||||
800000f4 <vx_resetStack>:
|
||||
800000f4 <vx_reset_stack>:
|
||||
800000f4: 00400513 li a0,4
|
||||
800000f8: 0005006b 0x5006b
|
||||
800000fc: 021026f3 csrr a3,0x21
|
||||
|
@ -219,7 +219,7 @@ Disassembly of section .text:
|
|||
8000029c: 3a01a783 lw a5,928(gp) # 80016ba8 <global_argument_struct>
|
||||
800002a0: 00078513 mv a0,a5
|
||||
800002a4: 000700e7 jalr a4
|
||||
800002a8: e2dff0ef jal ra,800000d4 <vx_warpID>
|
||||
800002a8: e2dff0ef jal ra,800000d4 <vx_warp_id>
|
||||
800002ac: fea42623 sw a0,-20(s0)
|
||||
800002b0: fec42783 lw a5,-20(s0)
|
||||
800002b4: 00078863 beqz a5,800002c4 <setup_call+0x48>
|
||||
|
@ -234,7 +234,7 @@ Disassembly of section .text:
|
|||
800002d8: 02010113 addi sp,sp,32
|
||||
800002dc: 00008067 ret
|
||||
|
||||
800002e0 <vx_spawnWarps>:
|
||||
800002e0 <vx_spawn_warps>:
|
||||
800002e0: fe010113 addi sp,sp,-32
|
||||
800002e4: 00112e23 sw ra,28(sp)
|
||||
800002e8: 00812c23 sw s0,24(sp)
|
||||
|
@ -269,10 +269,10 @@ Disassembly of section .text:
|
|||
80000354: 3b01a783 lw a5,944(gp) # 80016bb8 <pocl_threads>
|
||||
80000358: 00078513 mv a0,a5
|
||||
8000035c: d59ff0ef jal ra,800000b4 <vx_tmc>
|
||||
80000360: d7dff0ef jal ra,800000dc <vx_threadID>
|
||||
80000360: d7dff0ef jal ra,800000dc <vx_thread_id>
|
||||
80000364: 00050793 mv a5,a0
|
||||
80000368: fef42023 sw a5,-32(s0)
|
||||
8000036c: d69ff0ef jal ra,800000d4 <vx_warpID>
|
||||
8000036c: d69ff0ef jal ra,800000d4 <vx_warp_id>
|
||||
80000370: 00050793 mv a5,a0
|
||||
80000374: fcf42e23 sw a5,-36(s0)
|
||||
80000378: fe042623 sw zero,-20(s0)
|
||||
|
@ -389,14 +389,14 @@ Disassembly of section .text:
|
|||
8000052c: 00078593 mv a1,a5
|
||||
80000530: 00070513 mv a0,a4
|
||||
80000534: b79ff0ef jal ra,800000ac <vx_wspawn>
|
||||
80000538: badff0ef jal ra,800000e4 <vx_getCycles>
|
||||
80000538: badff0ef jal ra,800000e4 <vx_num_cycles>
|
||||
8000053c: fea42623 sw a0,-20(s0)
|
||||
80000540: badff0ef jal ra,800000ec <vx_getInst>
|
||||
80000540: badff0ef jal ra,800000ec <vx_num_instrs>
|
||||
80000544: fea42423 sw a0,-24(s0)
|
||||
80000548: dfdff0ef jal ra,80000344 <pocl_spawn_real>
|
||||
8000054c: b99ff0ef jal ra,800000e4 <vx_getCycles>
|
||||
8000054c: b99ff0ef jal ra,800000e4 <vx_num_cycles>
|
||||
80000550: fea42223 sw a0,-28(s0)
|
||||
80000554: b99ff0ef jal ra,800000ec <vx_getInst>
|
||||
80000554: b99ff0ef jal ra,800000ec <vx_num_instrs>
|
||||
80000558: fea42023 sw a0,-32(s0)
|
||||
8000055c: fe442703 lw a4,-28(s0)
|
||||
80000560: fec42783 lw a5,-20(s0)
|
||||
|
@ -424,7 +424,7 @@ Disassembly of section .text:
|
|||
800005b0: b85ff0ef jal ra,80000134 <vx_print_str>
|
||||
800005b4: 00400513 li a0,4
|
||||
800005b8: afdff0ef jal ra,800000b4 <vx_tmc>
|
||||
800005bc: b21ff0ef jal ra,800000dc <vx_threadID>
|
||||
800005bc: b21ff0ef jal ra,800000dc <vx_thread_id>
|
||||
800005c0: fea42623 sw a0,-20(s0)
|
||||
800005c4: fec42703 lw a4,-20(s0)
|
||||
800005c8: 88418693 addi a3,gp,-1916 # 8001608c <tmc_array>
|
||||
|
@ -473,7 +473,7 @@ Disassembly of section .text:
|
|||
8000066c: 00112e23 sw ra,28(sp)
|
||||
80000670: 00812c23 sw s0,24(sp)
|
||||
80000674: 02010413 addi s0,sp,32
|
||||
80000678: a65ff0ef jal ra,800000dc <vx_threadID>
|
||||
80000678: a65ff0ef jal ra,800000dc <vx_thread_id>
|
||||
8000067c: fea42623 sw a0,-20(s0)
|
||||
80000680: fec42783 lw a5,-20(s0)
|
||||
80000684: 0027b793 sltiu a5,a5,2
|
||||
|
@ -568,7 +568,7 @@ Disassembly of section .text:
|
|||
800007e0: 00112e23 sw ra,28(sp)
|
||||
800007e4: 00812c23 sw s0,24(sp)
|
||||
800007e8: 02010413 addi s0,sp,32
|
||||
800007ec: 8e9ff0ef jal ra,800000d4 <vx_warpID>
|
||||
800007ec: 8e9ff0ef jal ra,800000d4 <vx_warp_id>
|
||||
800007f0: fea42623 sw a0,-20(s0)
|
||||
800007f4: 3c418713 addi a4,gp,964 # 80016bcc <wsapwn_arr>
|
||||
800007f8: fec42783 lw a5,-20(s0)
|
||||
|
@ -664,9 +664,9 @@ Disassembly of section .text:
|
|||
80000948: fca42623 sw a0,-52(s0)
|
||||
8000094c: fcc42783 lw a5,-52(s0)
|
||||
80000950: fef42623 sw a5,-20(s0)
|
||||
80000954: f80ff0ef jal ra,800000d4 <vx_warpID>
|
||||
80000954: f80ff0ef jal ra,800000d4 <vx_warp_id>
|
||||
80000958: fea42423 sw a0,-24(s0)
|
||||
8000095c: f80ff0ef jal ra,800000dc <vx_threadID>
|
||||
8000095c: f80ff0ef jal ra,800000dc <vx_thread_id>
|
||||
80000960: fea42223 sw a0,-28(s0)
|
||||
80000964: fec42783 lw a5,-20(s0)
|
||||
80000968: 0107a783 lw a5,16(a5)
|
||||
|
@ -808,7 +808,7 @@ Disassembly of section .text:
|
|||
80000b78: 800017b7 lui a5,0x80001
|
||||
80000b7c: 93878613 addi a2,a5,-1736 # 80000938 <__BSS_END__+0xfffe9d48>
|
||||
80000b80: 00070513 mv a0,a4
|
||||
80000b84: f5cff0ef jal ra,800002e0 <vx_spawnWarps>
|
||||
80000b84: f5cff0ef jal ra,800002e0 <vx_spawn_warps>
|
||||
80000b88: fe442783 lw a5,-28(s0)
|
||||
80000b8c: 00078713 mv a4,a5
|
||||
80000b90: fe042783 lw a5,-32(s0)
|
||||
|
@ -833,7 +833,7 @@ Disassembly of section .text:
|
|||
80000bdc: 800017b7 lui a5,0x80001
|
||||
80000be0: 93878613 addi a2,a5,-1736 # 80000938 <__BSS_END__+0xfffe9d48>
|
||||
80000be4: 00070513 mv a0,a4
|
||||
80000be8: ef8ff0ef jal ra,800002e0 <vx_spawnWarps>
|
||||
80000be8: ef8ff0ef jal ra,800002e0 <vx_spawn_warps>
|
||||
80000bec: fe442783 lw a5,-28(s0)
|
||||
80000bf0: 00078713 mv a4,a5
|
||||
80000bf4: fe042783 lw a5,-32(s0)
|
||||
|
|
|
@ -9,7 +9,7 @@ CPY = ../../../../riscv-gnu-toolchain/drops/bin/riscv32-unknown-elf-objcopy
|
|||
|
||||
NEWLIB = ../../newlib/newlib.c ../../newlib/newlib_notimp.c ../../newlib/newlib.s
|
||||
VX_STR =
|
||||
VX_INT = ../../intrinsics/vx_intrinsics.s
|
||||
VX_INT = ../../intrinsics/vx_intrinsics.S
|
||||
VX_IO =
|
||||
VX_API =
|
||||
VX_FIO =
|
||||
|
|
|
@ -328,7 +328,7 @@ Disassembly of section .text:
|
|||
80000488: 00112623 sw ra,12(sp)
|
||||
8000048c: 00812423 sw s0,8(sp)
|
||||
80000490: 01010413 addi s0,sp,16
|
||||
80000494: 0b4000ef jal ra,80000548 <vx_threadID>
|
||||
80000494: 0b4000ef jal ra,80000548 <vx_thread_id>
|
||||
80000498: 00050793 mv a5,a0
|
||||
8000049c: 00078513 mv a0,a5
|
||||
800004a0: 00c12083 lw ra,12(sp)
|
||||
|
@ -388,15 +388,15 @@ Disassembly of section .text:
|
|||
80000538: 0000306b 0x306b
|
||||
8000053c: 00008067 ret
|
||||
|
||||
80000540 <vx_warpID>:
|
||||
80000540 <vx_warp_id>:
|
||||
80000540: 02102573 csrr a0,0x21
|
||||
80000544: 00008067 ret
|
||||
|
||||
80000548 <vx_threadID>:
|
||||
80000548 <vx_thread_id>:
|
||||
80000548: 02002573 csrr a0,0x20
|
||||
8000054c: 00008067 ret
|
||||
|
||||
80000550 <vx_resetStack>:
|
||||
80000550 <vx_reset_stack>:
|
||||
80000550: 00400513 li a0,4
|
||||
80000554: 0005006b 0x5006b
|
||||
80000558: 021026f3 csrr a3,0x21
|
||||
|
|
|
@ -13,10 +13,10 @@ CPY = ../../../../riscv-gnu-toolchain/drops/bin/riscv32-unknown-elf-objcopy
|
|||
|
||||
NEWLIB = ../../newlib/newlib.c
|
||||
VX_STR = ../../startup/vx_start.S
|
||||
VX_INT = ../../intrinsics/vx_intrinsics.s
|
||||
VX_IO = ../../io/vx_io.s ../../io/vx_io.c
|
||||
VX_INT = ../../intrinsics/vx_intrinsics.S
|
||||
VX_IO = ../../io/vx_io.S ../../io/vx_io.c
|
||||
VX_API = ../../vx_api/vx_api.c
|
||||
VX_FIO = ../../fileio/fileio.s
|
||||
VX_FIO = ../../fileio/fileio.S
|
||||
LIBS = -Wl,--whole-archive ./libs/libvecadd.a -Wl,--no-whole-archive ./libs/libOpenCL.a ../../../../riscv-gnu-toolchain/drops/riscv32-unknown-elf/lib/libc.a ../../../../riscv-gnu-toolchain/drops/riscv32-unknown-elf/lib/libstdc++.a -static-libgcc -lgcc
|
||||
|
||||
VX_MAIN = vx_pocl_main
|
||||
|
|
|
@ -485,7 +485,7 @@ Disassembly of section .text:
|
|||
800006ec: 00112623 sw ra,12(sp)
|
||||
800006f0: 00812423 sw s0,8(sp)
|
||||
800006f4: 01010413 addi s0,sp,16
|
||||
800006f8: 0e8000ef jal ra,800007e0 <vx_threadID>
|
||||
800006f8: 0e8000ef jal ra,800007e0 <vx_thread_id>
|
||||
800006fc: 00050793 mv a5,a0
|
||||
80000700: 00078513 mv a0,a5
|
||||
80000704: 00c12083 lw ra,12(sp)
|
||||
|
@ -558,15 +558,15 @@ Disassembly of section .text:
|
|||
800007d0: 0000306b 0x306b
|
||||
800007d4: 00008067 ret
|
||||
|
||||
800007d8 <vx_warpID>:
|
||||
800007d8 <vx_warp_id>:
|
||||
800007d8: 02102573 csrr a0,0x21
|
||||
800007dc: 00008067 ret
|
||||
|
||||
800007e0 <vx_threadID>:
|
||||
800007e0 <vx_thread_id>:
|
||||
800007e0: 02002573 csrr a0,0x20
|
||||
800007e4: 00008067 ret
|
||||
|
||||
800007e8 <vx_resetStack>:
|
||||
800007e8 <vx_reset_stack>:
|
||||
800007e8: 00400513 li a0,4
|
||||
800007ec: 0005006b 0x5006b
|
||||
800007f0: 021026f3 csrr a3,0x21
|
||||
|
|
|
@ -9,10 +9,10 @@ CPY = /opt/riscv/bin/riscv32-unknown-elf-objcopy
|
|||
|
||||
NEWLIB = ../../newlib/newlib.c ../../newlib/newlib_notimp.c ../../newlib/newlib.s
|
||||
VX_STR = ../../startup/vx_start.S
|
||||
VX_INT = ../../intrinsics/vx_intrinsics.s
|
||||
VX_IO = ../../io/vx_io.s ../../io/vx_io.c
|
||||
VX_INT = ../../intrinsics/vx_intrinsics.S
|
||||
VX_IO = ../../io/vx_io.S ../../io/vx_io.c
|
||||
VX_API = ../../vx_api/vx_api.c
|
||||
VX_FIO = ../../fileio/fileio.s
|
||||
VX_FIO = ../../fileio/fileio.S
|
||||
|
||||
VX_MAIN = ./vx_nl_main.c
|
||||
|
||||
|
|
|
@ -407,15 +407,15 @@ Disassembly of section .text:
|
|||
800005bc: 0000306b 0x306b
|
||||
800005c0: 00008067 ret
|
||||
|
||||
800005c4 <vx_warpID>:
|
||||
800005c4 <vx_warp_id>:
|
||||
800005c4: 02102573 csrr a0,0x21
|
||||
800005c8: 00008067 ret
|
||||
|
||||
800005cc <vx_threadID>:
|
||||
800005cc <vx_thread_id>:
|
||||
800005cc: 02002573 csrr a0,0x20
|
||||
800005d0: 00008067 ret
|
||||
|
||||
800005d4 <vx_resetStack>:
|
||||
800005d4 <vx_reset_stack>:
|
||||
800005d4: 00400513 li a0,4
|
||||
800005d8: 0005006b 0x5006b
|
||||
800005dc: 021026f3 csrr a3,0x21
|
||||
|
@ -544,7 +544,7 @@ Disassembly of section .text:
|
|||
80000788: 9947a783 lw a5,-1644(a5) # 81001994 <_PathLocale+0xffffff9c>
|
||||
8000078c: 00078513 mv a0,a5
|
||||
80000790: 000700e7 jalr a4
|
||||
80000794: e31ff0ef jal ra,800005c4 <vx_warpID>
|
||||
80000794: e31ff0ef jal ra,800005c4 <vx_warp_id>
|
||||
80000798: fea42623 sw a0,-20(s0)
|
||||
8000079c: fec42783 lw a5,-20(s0)
|
||||
800007a0: 00078863 beqz a5,800007b0 <setup_call+0x54>
|
||||
|
@ -559,7 +559,7 @@ Disassembly of section .text:
|
|||
800007c4: 02010113 addi sp,sp,32
|
||||
800007c8: 00008067 ret
|
||||
|
||||
800007cc <vx_spawnWarps>:
|
||||
800007cc <vx_spawn_warps>:
|
||||
800007cc: fe010113 addi sp,sp,-32
|
||||
800007d0: 00112e23 sw ra,28(sp)
|
||||
800007d4: 00812c23 sw s0,24(sp)
|
||||
|
@ -599,7 +599,7 @@ Disassembly of section .text:
|
|||
80000854: dc1ff0ef jal ra,80000614 <vx_print_str>
|
||||
80000858: 00400513 li a0,4
|
||||
8000085c: d49ff0ef jal ra,800005a4 <vx_tmc>
|
||||
80000860: d6dff0ef jal ra,800005cc <vx_threadID>
|
||||
80000860: d6dff0ef jal ra,800005cc <vx_thread_id>
|
||||
80000864: fea42623 sw a0,-20(s0)
|
||||
80000868: fec42703 lw a4,-20(s0)
|
||||
8000086c: 810017b7 lui a5,0x81001
|
||||
|
@ -652,7 +652,7 @@ Disassembly of section .text:
|
|||
80000920: 00112e23 sw ra,28(sp)
|
||||
80000924: 00812c23 sw s0,24(sp)
|
||||
80000928: 02010413 addi s0,sp,32
|
||||
8000092c: ca1ff0ef jal ra,800005cc <vx_threadID>
|
||||
8000092c: ca1ff0ef jal ra,800005cc <vx_thread_id>
|
||||
80000930: fea42623 sw a0,-20(s0)
|
||||
80000934: fec42783 lw a5,-20(s0)
|
||||
80000938: 0027b793 sltiu a5,a5,2
|
||||
|
@ -754,7 +754,7 @@ Disassembly of section .text:
|
|||
80000ab0: 00112e23 sw ra,28(sp)
|
||||
80000ab4: 00812c23 sw s0,24(sp)
|
||||
80000ab8: 02010413 addi s0,sp,32
|
||||
80000abc: b09ff0ef jal ra,800005c4 <vx_warpID>
|
||||
80000abc: b09ff0ef jal ra,800005c4 <vx_warp_id>
|
||||
80000ac0: fea42623 sw a0,-20(s0)
|
||||
80000ac4: 810027b7 lui a5,0x81002
|
||||
80000ac8: fec42703 lw a4,-20(s0)
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
COMP = ~/dev/riscv-gnu-toolchain/drops/bin/riscv32-unknown-elf-g++
|
||||
#COMP = /opt/riscv-new/drops/bin/riscv32-unknown-elf-g++
|
||||
|
||||
CC_FLAGS = -march=rv32im -mabi=ilp32 -O0 -Wl,-Bstatic,-T,../../startup/vx_link.ld -ffreestanding -nostdlib
|
||||
CC_FLAGS = -march=rv32im -mabi=ilp32 -O3 -Wl,-Bstatic,-T,../../startup/vx_link.ld -ffreestanding -nostdlib
|
||||
|
||||
DMP = ~/dev/riscv-gnu-toolchain/drops/bin/riscv32-unknown-elf-objdump
|
||||
CPY = ~/dev/riscv-gnu-toolchain/drops/bin/riscv32-unknown-elf-objcopy
|
||||
|
@ -10,10 +10,10 @@ CPY = ~/dev/riscv-gnu-toolchain/drops/bin/riscv32-unknown-elf-objcopy
|
|||
|
||||
NEWLIB = ../../newlib/newlib.c
|
||||
VX_STR = ../../startup/vx_start.S
|
||||
VX_INT = ../../intrinsics/vx_intrinsics.s
|
||||
VX_IO = ../../io/vx_io.s ../../io/vx_io.c
|
||||
VX_INT = ../../intrinsics/vx_intrinsics.S
|
||||
VX_IO = ../../io/vx_io.S ../../io/vx_io.c
|
||||
VX_API = ../../vx_api/vx_api.c
|
||||
VX_FIO = ../../fileio/fileio.s
|
||||
VX_FIO = ../../fileio/fileio.S
|
||||
|
||||
VX_MAIN = vx_simple_main
|
||||
|
||||
|
|
|
@ -6,15 +6,19 @@
|
|||
|
||||
int tmc_array[4] = {5,5,5,5};
|
||||
|
||||
void test_tmc_impl()
|
||||
{
|
||||
unsigned tid = vx_thread_id(); // Get TID
|
||||
tmc_array[tid] = tid;
|
||||
}
|
||||
|
||||
void test_tmc()
|
||||
{
|
||||
//vx_print_str("testing_tmc\n");
|
||||
vx_print_str("testing_tmc\n");
|
||||
|
||||
vx_tmc(4);
|
||||
|
||||
unsigned tid = vx_threadID(); // Get TID
|
||||
|
||||
tmc_array[tid] = tid;
|
||||
test_tmc_impl();
|
||||
|
||||
vx_tmc(1);
|
||||
|
||||
|
@ -34,7 +38,7 @@ int div_arr[4];
|
|||
|
||||
void test_divergence()
|
||||
{
|
||||
unsigned tid = vx_threadID(); // Get TID
|
||||
unsigned tid = vx_thread_id(); // Get TID
|
||||
|
||||
bool b = tid < 2;
|
||||
__if (b)
|
||||
|
@ -73,20 +77,16 @@ void test_divergence()
|
|||
vx_print_str("\n");
|
||||
vx_print_hex(div_arr[3]);
|
||||
vx_print_str("\n");
|
||||
|
||||
}
|
||||
|
||||
|
||||
unsigned wsapwn_arr[4];
|
||||
|
||||
|
||||
void simple_kernel()
|
||||
{
|
||||
unsigned wid = vx_warpID();
|
||||
unsigned wid = vx_warp_id();
|
||||
|
||||
wsapwn_arr[wid] = wid;
|
||||
|
||||
wid = vx_warpID();
|
||||
if (wid != 0)
|
||||
{
|
||||
vx_tmc(0);
|
||||
|
|
|
@ -24,7 +24,7 @@ unsigned y[] = {1, 1, 1, 1,
|
|||
1, 1, 1, 1,
|
||||
1, 1, 1, 1,
|
||||
1, 1, 1, 1};
|
||||
|
||||
F
|
||||
unsigned z[] = {0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
|
@ -34,8 +34,8 @@ void mat_add_kernel(void * void_arguments)
|
|||
{
|
||||
mat_add_args_t * arguments = (mat_add_args_t *) void_arguments;
|
||||
|
||||
unsigned wid = vx_warpID();
|
||||
unsigned tid = vx_threadID();
|
||||
unsigned wid = vx_warp_id();
|
||||
unsigned tid = vx_thread_id();
|
||||
|
||||
bool valid = (wid < arguments->numRows) && (tid < arguments->numColums);
|
||||
|
||||
|
@ -50,7 +50,7 @@ void mat_add_kernel(void * void_arguments)
|
|||
|
||||
int main()
|
||||
{
|
||||
// Main is called with all threads active of warp 0
|
||||
// ensure single thread
|
||||
vx_tmc(1);
|
||||
|
||||
vx_print_str("Let's start... (This might take a while)\n");
|
||||
|
@ -84,11 +84,9 @@ int main()
|
|||
vx_print_str("Wr->read and repeat(Wr) tests passed!\n");
|
||||
}
|
||||
|
||||
|
||||
vx_print_str("Simple Main\n");
|
||||
|
||||
|
||||
// // TMC test
|
||||
// TMC test
|
||||
test_tmc();
|
||||
|
||||
// Control Divergence Test
|
||||
|
@ -118,7 +116,7 @@ int main()
|
|||
|
||||
}
|
||||
|
||||
vx_print_str("vx_spawnWarps mat_add_kernel\n");
|
||||
vx_print_str("vx_spawn_warps mat_add_kernel\n");
|
||||
|
||||
mat_add_args_t arguments;
|
||||
arguments.x = x;
|
||||
|
@ -131,7 +129,7 @@ int main()
|
|||
int numWarps = 4;
|
||||
int numThreads = 4;
|
||||
|
||||
vx_spawnWarps(numWarps, numThreads, mat_add_kernel, &arguments);
|
||||
vx_spawn_warps(numWarps, numThreads, mat_add_kernel, &arguments);
|
||||
|
||||
vx_print_str("Waiting to ensure other warps are done... (Takes a while)\n");
|
||||
for (int i = 0; i < 5000; i++) {}
|
||||
|
|
File diff suppressed because it is too large
Load diff
Binary file not shown.
File diff suppressed because it is too large
Load diff
|
@ -2,7 +2,7 @@
|
|||
|
||||
|
||||
#include "io/io.h" // Printing functions
|
||||
#include "intrinsics/instrinsics.h" // vx_threadID and vx_WarpID
|
||||
#include "intrinsics/instrinsics.h" // vx_thread_id and vx_WarpID
|
||||
|
||||
struct args
|
||||
{
|
||||
|
@ -14,7 +14,7 @@ void function(void * arg)
|
|||
{
|
||||
struct args * real_arg = (struct args *) arg;
|
||||
|
||||
unsigned tid = vx_threadID();
|
||||
unsigned tid = vx_thread_id();
|
||||
unsigned wid = vx_WarpID();
|
||||
|
||||
__if(something) // Control divergent if
|
||||
|
@ -36,7 +36,7 @@ int main()
|
|||
struct args arg;
|
||||
arg.data = data;
|
||||
|
||||
vx_spawnWarps(numWarps, numThreads, function, &data);
|
||||
vx_spawn_warps(numWarps, numThreads, function, &data);
|
||||
|
||||
|
||||
}
|
|
@ -13,10 +13,10 @@ CPY = ../../../../riscv-gnu-toolchain/drops/bin/riscv32-unknown-elf-objcopy
|
|||
|
||||
NEWLIB = ../../newlib/newlib.c
|
||||
VX_STR = ../../startup/vx_start.S
|
||||
VX_INT = ../../intrinsics/vx_intrinsics.s
|
||||
VX_IO = ../../io/vx_io.s ../../io/vx_io.c
|
||||
VX_INT = ../../intrinsics/vx_intrinsics.S
|
||||
VX_IO = ../../io/vx_io.S ../../io/vx_io.c
|
||||
VX_API = ../../vx_api/vx_api.c
|
||||
VX_FIO = ../../fileio/fileio.s
|
||||
VX_FIO = ../../fileio/fileio.S
|
||||
LIBS = -Wl,--whole-archive ./libs/libvecadd.a -Wl,--no-whole-archive ./libs/libOpenCL.a ../../../../riscv-gnu-toolchain/drops/riscv32-unknown-elf/lib/libc.a ../../../../riscv-gnu-toolchain/drops/riscv32-unknown-elf/lib/libstdc++.a -static-libgcc -lgcc
|
||||
|
||||
VX_MAIN = vx_pocl_main
|
||||
|
|
|
@ -344,7 +344,7 @@ Disassembly of section .text:
|
|||
800004b8: 00112623 sw ra,12(sp)
|
||||
800004bc: 00812423 sw s0,8(sp)
|
||||
800004c0: 01010413 addi s0,sp,16
|
||||
800004c4: 0ac000ef jal ra,80000570 <vx_threadID>
|
||||
800004c4: 0ac000ef jal ra,80000570 <vx_thread_id>
|
||||
800004c8: 00050793 mv a5,a0
|
||||
800004cc: 00078513 mv a0,a5
|
||||
800004d0: 00c12083 lw ra,12(sp)
|
||||
|
@ -406,11 +406,11 @@ Disassembly of section .text:
|
|||
80000568: 02102573 csrr a0,0x21
|
||||
8000056c: 00008067 ret
|
||||
|
||||
80000570 <vx_threadID>:
|
||||
80000570 <vx_thread_id>:
|
||||
80000570: 02002573 csrr a0,0x20
|
||||
80000574: 00008067 ret
|
||||
|
||||
80000578 <vx_resetStack>:
|
||||
80000578 <vx_reset_stack>:
|
||||
80000578: 00400513 li a0,4
|
||||
8000057c: 0005006b 0x5006b
|
||||
80000580: 021026f3 csrr a3,0x21
|
||||
|
|
|
@ -16,9 +16,9 @@ CPY = /home/priya/dev/riscv_vec/riscv-gnu/bin/riscv32-unknown-elf-objcopy
|
|||
NEWLIB = ../../newlib/newlib.c
|
||||
VX_STR = ../../startup/vx_start.S
|
||||
VX_INT = ../../intrinsics/vx_intrinsics.s
|
||||
VX_IO = ../../io/vx_io.s ../../io/vx_io.c
|
||||
VX_IO = ../../io/vx_io.S ../../io/vx_io.c
|
||||
VX_API = ../../vx_api/vx_api.c
|
||||
VX_FIO = ../../fileio/fileio.s
|
||||
VX_FIO = ../../fileio/fileio.S
|
||||
VX_VEC = vx_vec.s
|
||||
#LIBS = /home/fares/dev/riscv-gnu-toolchain-vector/drops/riscv32-unknown-elf/lib/libc.a /home/fares/dev/riscv-gnu-toolchain-vector/drops/riscv32-unknown-elf/lib/libstdc++.a -static-libgcc -lgcc
|
||||
LIBS = /home/priya/dev/riscv_vec/riscv-gnu/riscv32-unknown-elf/lib/libc.a /home/priya/dev/riscv_vec/riscv-gnu/riscv32-unknown-elf/lib/libstdc++.a -static-libgcc -lgcc
|
||||
|
|
|
@ -500,7 +500,7 @@ Disassembly of section .text:
|
|||
80000718: 00112623 sw ra,12(sp)
|
||||
8000071c: 00812423 sw s0,8(sp)
|
||||
80000720: 01010413 addi s0,sp,16
|
||||
80000724: 0e8000ef jal ra,8000080c <vx_threadID>
|
||||
80000724: 0e8000ef jal ra,8000080c <vx_thread_id>
|
||||
80000728: 00050793 mv a5,a0
|
||||
8000072c: 00078513 mv a0,a5
|
||||
80000730: 00c12083 lw ra,12(sp)
|
||||
|
@ -573,15 +573,15 @@ Disassembly of section .text:
|
|||
800007fc: 0000306b 0x306b
|
||||
80000800: 00008067 ret
|
||||
|
||||
80000804 <vx_warpID>:
|
||||
80000804 <vx_warp_id>:
|
||||
80000804: 02102573 csrr a0,0x21
|
||||
80000808: 00008067 ret
|
||||
|
||||
8000080c <vx_threadID>:
|
||||
8000080c <vx_thread_id>:
|
||||
8000080c: 02002573 csrr a0,0x20
|
||||
80000810: 00008067 ret
|
||||
|
||||
80000814 <vx_resetStack>:
|
||||
80000814 <vx_reset_stack>:
|
||||
80000814: 00400513 li a0,4
|
||||
80000818: 0005006b 0x5006b
|
||||
8000081c: 021026f3 csrr a3,0x21
|
||||
|
|
|
@ -19,12 +19,12 @@ void spawn_warp_runonce() {
|
|||
global_function_pointer(global_argument_struct);
|
||||
|
||||
// resume single-thread execution on exit
|
||||
unsigned wid = vx_warpID();
|
||||
unsigned wid = vx_warp_id();
|
||||
unsigned tmask = (0 == wid) ? 0x1 : 0x0;
|
||||
vx_tmc(tmask);
|
||||
}
|
||||
|
||||
void vx_spawnWarps(unsigned numWarps, unsigned numThreads, func_t func_ptr, void * args) {
|
||||
void vx_spawn_warps(unsigned numWarps, unsigned numThreads, func_t func_ptr, void * args) {
|
||||
global_function_pointer = func_ptr;
|
||||
global_argument_struct = args;
|
||||
global_num_threads = numThreads;
|
||||
|
@ -43,15 +43,15 @@ void pocl_spawn_warp_runonce() {
|
|||
// active all threads
|
||||
vx_tmc(pocl_threads);
|
||||
|
||||
int x = vx_threadID();
|
||||
int y = vx_warpNum();
|
||||
int x = vx_thread_id();
|
||||
int y = vx_warp_gid();
|
||||
|
||||
// call kernel routine
|
||||
(pocl_pfn)(pocl_args, pocl_ctx, x, y, 0);
|
||||
|
||||
// resume single-thread execution on exit
|
||||
int wid = vx_warpID();
|
||||
unsigned tmask = (0 == wid) ? 0x1 : 0x0;
|
||||
int wid = vx_warp_id();
|
||||
unsigned tmask = (0 == wid) ? 0x1 : 0x0;
|
||||
vx_tmc(tmask);
|
||||
}
|
||||
|
||||
|
|
|
@ -11,7 +11,7 @@ extern "C" {
|
|||
|
||||
typedef void (*func_t)(void *);
|
||||
|
||||
void vx_spawnWarps(unsigned numWarps, unsigned numThreads, func_t func_ptr , void * args);
|
||||
void vx_spawn_warps(unsigned numWarps, unsigned numThreads, func_t func_ptr , void * args);
|
||||
|
||||
struct context_t {
|
||||
uint32_t num_groups[3];
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue