mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
Texture Instruction - Fixed Color
This commit is contained in:
parent
b8b3267757
commit
f3f62e9e7b
49 changed files with 2636 additions and 63 deletions
12
benchmarks/opencl/guassian/.depend
Normal file
12
benchmarks/opencl/guassian/.depend
Normal file
|
@ -0,0 +1,12 @@
|
|||
main.o: main.cc gaussianElim.h clutils.h \
|
||||
/opt/pocl/runtime/include/CL/cl.h \
|
||||
/opt/pocl/runtime/include/CL/cl_version.h \
|
||||
/opt/pocl/runtime/include/CL/cl_platform.h \
|
||||
/opt/pocl/runtime/include/CL/opencl.h \
|
||||
/opt/pocl/runtime/include/CL/cl_gl.h \
|
||||
/opt/pocl/runtime/include/CL/cl_gl_ext.h \
|
||||
/opt/pocl/runtime/include/CL/cl_ext.h
|
||||
clutils.o: clutils.cpp /opt/pocl/runtime/include/CL/cl.h \
|
||||
/opt/pocl/runtime/include/CL/cl_version.h \
|
||||
/opt/pocl/runtime/include/CL/cl_platform.h clutils.h utils.h
|
||||
utils.o: utils.cpp utils.h
|
BIN
benchmarks/opencl/guassian/guassian
Executable file
BIN
benchmarks/opencl/guassian/guassian
Executable file
Binary file not shown.
11
benchmarks/opencl/nearn/.depend
Normal file
11
benchmarks/opencl/nearn/.depend
Normal file
|
@ -0,0 +1,11 @@
|
|||
main.o: main.cc nearestNeighbor.h /opt/pocl/runtime/include/CL/opencl.h \
|
||||
/opt/pocl/runtime/include/CL/cl.h \
|
||||
/opt/pocl/runtime/include/CL/cl_version.h \
|
||||
/opt/pocl/runtime/include/CL/cl_platform.h \
|
||||
/opt/pocl/runtime/include/CL/cl_gl.h \
|
||||
/opt/pocl/runtime/include/CL/cl_gl_ext.h \
|
||||
/opt/pocl/runtime/include/CL/cl_ext.h clutils.h
|
||||
clutils.o: clutils.cpp /opt/pocl/runtime/include/CL/cl.h \
|
||||
/opt/pocl/runtime/include/CL/cl_version.h \
|
||||
/opt/pocl/runtime/include/CL/cl_platform.h clutils.h utils.h
|
||||
utils.o: utils.cpp utils.h
|
BIN
benchmarks/opencl/nearn/nearn
Executable file
BIN
benchmarks/opencl/nearn/nearn
Executable file
Binary file not shown.
3
benchmarks/opencl/saxpy/.depend
Normal file
3
benchmarks/opencl/saxpy/.depend
Normal file
|
@ -0,0 +1,3 @@
|
|||
main.o: main.cc /opt/pocl/runtime/include/CL/cl.h \
|
||||
/opt/pocl/runtime/include/CL/cl_version.h \
|
||||
/opt/pocl/runtime/include/CL/cl_platform.h
|
BIN
benchmarks/opencl/saxpy/saxpy
Executable file
BIN
benchmarks/opencl/saxpy/saxpy
Executable file
Binary file not shown.
3
benchmarks/opencl/sfilter/.depend
Normal file
3
benchmarks/opencl/sfilter/.depend
Normal file
|
@ -0,0 +1,3 @@
|
|||
main.o: main.cc /opt/pocl/runtime/include/CL/cl.h \
|
||||
/opt/pocl/runtime/include/CL/cl_version.h \
|
||||
/opt/pocl/runtime/include/CL/cl_platform.h
|
BIN
benchmarks/opencl/sfilter/sfilter
Executable file
BIN
benchmarks/opencl/sfilter/sfilter
Executable file
Binary file not shown.
7
benchmarks/opencl/sgemm/.depend
Normal file
7
benchmarks/opencl/sgemm/.depend
Normal file
|
@ -0,0 +1,7 @@
|
|||
main.o: main.cc /opt/pocl/runtime/include/CL/opencl.h \
|
||||
/opt/pocl/runtime/include/CL/cl.h \
|
||||
/opt/pocl/runtime/include/CL/cl_version.h \
|
||||
/opt/pocl/runtime/include/CL/cl_platform.h \
|
||||
/opt/pocl/runtime/include/CL/cl_gl.h \
|
||||
/opt/pocl/runtime/include/CL/cl_gl_ext.h \
|
||||
/opt/pocl/runtime/include/CL/cl_ext.h
|
BIN
benchmarks/opencl/sgemm/sgemm
Executable file
BIN
benchmarks/opencl/sgemm/sgemm
Executable file
Binary file not shown.
7
benchmarks/opencl/vecadd/.depend
Normal file
7
benchmarks/opencl/vecadd/.depend
Normal file
|
@ -0,0 +1,7 @@
|
|||
main.o: main.cc /opt/pocl/runtime/include/CL/opencl.h \
|
||||
/opt/pocl/runtime/include/CL/cl.h \
|
||||
/opt/pocl/runtime/include/CL/cl_version.h \
|
||||
/opt/pocl/runtime/include/CL/cl_platform.h \
|
||||
/opt/pocl/runtime/include/CL/cl_gl.h \
|
||||
/opt/pocl/runtime/include/CL/cl_gl_ext.h \
|
||||
/opt/pocl/runtime/include/CL/cl_ext.h
|
4
driver/opae/.depend
Normal file
4
driver/opae/.depend
Normal file
|
@ -0,0 +1,4 @@
|
|||
vortex.o: vortex.cpp ../include/vortex.h ../../hw/VX_config.h \
|
||||
vortex_afu.h
|
||||
vx_utils.o: ../common/vx_utils.cpp ../include/vortex.h \
|
||||
../../hw/VX_config.h
|
BIN
driver/opae/vlsim/libopae-c-vlsim.so
Executable file
BIN
driver/opae/vlsim/libopae-c-vlsim.so
Executable file
Binary file not shown.
BIN
driver/opae/vlsim/libvortex.so
Executable file
BIN
driver/opae/vlsim/libvortex.so
Executable file
Binary file not shown.
BIN
driver/stub/libvortex.so
Executable file
BIN
driver/stub/libvortex.so
Executable file
Binary file not shown.
1
driver/tests/tex_demo/.depend
Normal file
1
driver/tests/tex_demo/.depend
Normal file
|
@ -0,0 +1 @@
|
|||
demo.o: demo.cpp ../../include/vortex.h common.h
|
67
driver/tests/tex_demo/Makefile
Normal file
67
driver/tests/tex_demo/Makefile
Normal file
|
@ -0,0 +1,67 @@
|
|||
RISCV_TOOLCHAIN_PATH ?= /opt/riscv-gnu-toolchain
|
||||
VORTEX_RT_PATH ?= $(wildcard ../../../runtime)
|
||||
|
||||
OPTS ?= -n64
|
||||
|
||||
VX_CC = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-gcc
|
||||
VX_CXX = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-g++
|
||||
VX_DP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objdump
|
||||
VX_CP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objcopy
|
||||
|
||||
VX_CFLAGS += -march=rv32imf -mabi=ilp32f -O3 -Wstack-usage=1024 -ffreestanding -nostartfiles -fdata-sections -ffunction-sections
|
||||
VX_CFLAGS += -I$(VORTEX_RT_PATH)/include -I$(VORTEX_RT_PATH)/../hw
|
||||
|
||||
VX_LDFLAGS += -Wl,-Bstatic,-T,$(VORTEX_RT_PATH)/linker/vx_link.ld -Wl,--gc-sections $(VORTEX_RT_PATH)/libvortexrt.a
|
||||
|
||||
VX_SRCS = kernel.c
|
||||
|
||||
#CXXFLAGS += -std=c++11 -O2 -Wall -Wextra -pedantic -Wfatal-errors
|
||||
CXXFLAGS += -std=c++11 -O0 -g -Wall -Wextra -pedantic -Wfatal-errors
|
||||
|
||||
CXXFLAGS += -I../../include
|
||||
|
||||
PROJECT = demo
|
||||
|
||||
SRCS = demo.cpp
|
||||
|
||||
all: $(PROJECT) kernel.bin kernel.dump
|
||||
|
||||
kernel.dump: kernel.elf
|
||||
$(VX_DP) -D kernel.elf > kernel.dump
|
||||
|
||||
kernel.bin: kernel.elf
|
||||
$(VX_CP) -O binary kernel.elf kernel.bin
|
||||
|
||||
kernel.elf: $(VX_SRCS)
|
||||
$(VX_CC) $(VX_CFLAGS) $(VX_SRCS) $(VX_LDFLAGS) -o kernel.elf
|
||||
|
||||
$(PROJECT): $(SRCS)
|
||||
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -L../../stub -lvortex -o $@
|
||||
|
||||
run-fpga: $(PROJECT)
|
||||
LD_LIBRARY_PATH=../../opae:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
|
||||
|
||||
run-asesim: $(PROJECT)
|
||||
ASE_LOG=0 LD_LIBRARY_PATH=../../opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
|
||||
|
||||
run-vlsim: $(PROJECT)
|
||||
LD_LIBRARY_PATH=../../opae/vlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
|
||||
|
||||
run-rtlsim: $(PROJECT)
|
||||
LD_LIBRARY_PATH=../../rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
|
||||
|
||||
run-simx: $(PROJECT)
|
||||
LD_LIBRARY_PATH=../../simx:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
|
||||
|
||||
.depend: $(SRCS)
|
||||
$(CXX) $(CXXFLAGS) -MM $^ > .depend;
|
||||
|
||||
clean:
|
||||
rm -rf $(PROJECT) *.o .depend
|
||||
|
||||
clean-all: clean
|
||||
rm -rf *.elf *.bin *.dump
|
||||
|
||||
ifneq ($(MAKECMDGOALS),clean)
|
||||
-include .depend
|
||||
endif
|
14
driver/tests/tex_demo/common.h
Normal file
14
driver/tests/tex_demo/common.h
Normal file
|
@ -0,0 +1,14 @@
|
|||
#ifndef _COMMON_H_
|
||||
#define _COMMON_H_
|
||||
|
||||
#define KERNEL_ARG_DEV_MEM_ADDR 0x7ffff000
|
||||
|
||||
struct kernel_arg_t {
|
||||
uint32_t num_tasks;
|
||||
uint32_t task_size;
|
||||
uint32_t src0_ptr;
|
||||
uint32_t src1_ptr;
|
||||
uint32_t dst_ptr;
|
||||
};
|
||||
|
||||
#endif
|
BIN
driver/tests/tex_demo/demo
Executable file
BIN
driver/tests/tex_demo/demo
Executable file
Binary file not shown.
203
driver/tests/tex_demo/demo.cpp
Normal file
203
driver/tests/tex_demo/demo.cpp
Normal file
|
@ -0,0 +1,203 @@
|
|||
#include <iostream>
|
||||
#include <unistd.h>
|
||||
#include <string.h>
|
||||
#include <vortex.h>
|
||||
#include "common.h"
|
||||
|
||||
#define RT_CHECK(_expr) \
|
||||
do { \
|
||||
int _ret = _expr; \
|
||||
if (0 == _ret) \
|
||||
break; \
|
||||
printf("Error: '%s' returned %d!\n", #_expr, (int)_ret); \
|
||||
cleanup(); \
|
||||
exit(-1); \
|
||||
} while (false)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
const char* kernel_file = "kernel.bin";
|
||||
uint32_t count = 0;
|
||||
|
||||
vx_device_h device = nullptr;
|
||||
vx_buffer_h buffer = nullptr;
|
||||
|
||||
static void show_usage() {
|
||||
std::cout << "Vortex Driver Test." << std::endl;
|
||||
std::cout << "Usage: [-k: kernel] [-n words] [-h: help]" << std::endl;
|
||||
}
|
||||
|
||||
static void parse_args(int argc, char **argv) {
|
||||
int c;
|
||||
while ((c = getopt(argc, argv, "n:k:h?")) != -1) {
|
||||
switch (c) {
|
||||
case 'n':
|
||||
count = atoi(optarg);
|
||||
break;
|
||||
case 'k':
|
||||
kernel_file = optarg;
|
||||
break;
|
||||
case 'h':
|
||||
case '?': {
|
||||
show_usage();
|
||||
exit(0);
|
||||
} break;
|
||||
default:
|
||||
show_usage();
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void cleanup() {
|
||||
if (buffer) {
|
||||
vx_buf_release(buffer);
|
||||
}
|
||||
if (device) {
|
||||
vx_dev_close(device);
|
||||
}
|
||||
}
|
||||
|
||||
int run_test(const kernel_arg_t& kernel_arg,
|
||||
uint32_t buf_size,
|
||||
uint32_t num_points) {
|
||||
// start device
|
||||
std::cout << "start device" << std::endl;
|
||||
RT_CHECK(vx_start(device));
|
||||
|
||||
// wait for completion
|
||||
std::cout << "wait for completion" << std::endl;
|
||||
RT_CHECK(vx_ready_wait(device, -1));
|
||||
|
||||
// download destination buffer
|
||||
std::cout << "download destination buffer" << std::endl;
|
||||
RT_CHECK(vx_copy_from_dev(buffer, kernel_arg.dst_ptr, buf_size, 0));
|
||||
|
||||
// verify result
|
||||
std::cout << "verify result" << std::endl;
|
||||
{
|
||||
int errors = 0;
|
||||
auto buf_ptr = (int32_t*)vx_host_ptr(buffer);
|
||||
for (uint32_t i = 0; i < num_points; ++i) {
|
||||
int ref = 0xFAAF;
|
||||
int cur = buf_ptr[i];
|
||||
if (cur != ref) {
|
||||
std::cout << "error at result #" << i
|
||||
<< ": actual 0x" << cur << ", expected 0x" << ref << std::endl;
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
if (errors != 0) {
|
||||
std::cout << "Found " << std::dec << errors << " errors!" << std::endl;
|
||||
std::cout << "FAILED!" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
size_t value;
|
||||
kernel_arg_t kernel_arg;
|
||||
|
||||
// parse command arguments
|
||||
parse_args(argc, argv);
|
||||
|
||||
if (count == 0) {
|
||||
count = 1;
|
||||
}
|
||||
|
||||
// open device connection
|
||||
std::cout << "open device connection" << std::endl;
|
||||
RT_CHECK(vx_dev_open(&device));
|
||||
|
||||
unsigned max_cores, max_warps, max_threads;
|
||||
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_CORES, &max_cores));
|
||||
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_WARPS, &max_warps));
|
||||
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_THREADS, &max_threads));
|
||||
|
||||
// uint32_t num_tasks = max_cores * max_warps * max_threads;
|
||||
uint32_t num_tasks = 1;
|
||||
uint32_t num_points = count * num_tasks;
|
||||
uint32_t buf_size = num_points * sizeof(uint32_t);
|
||||
|
||||
std::cout << "number of points: " << num_points << std::endl;
|
||||
std::cout << "buffer size: " << buf_size << " bytes" << std::endl;
|
||||
|
||||
// upload program
|
||||
std::cout << "upload program" << std::endl;
|
||||
RT_CHECK(vx_upload_kernel_file(device, kernel_file));
|
||||
|
||||
// allocate device memory
|
||||
std::cout << "allocate device memory" << std::endl;
|
||||
|
||||
RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
|
||||
kernel_arg.src0_ptr = value;
|
||||
RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
|
||||
kernel_arg.src1_ptr = value;
|
||||
RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
|
||||
kernel_arg.dst_ptr = value;
|
||||
|
||||
kernel_arg.num_tasks = num_tasks;
|
||||
kernel_arg.task_size = count;
|
||||
|
||||
std::cout << "dev_src0=" << std::hex << kernel_arg.src0_ptr << std::endl;
|
||||
std::cout << "dev_src1=" << std::hex << kernel_arg.src1_ptr << std::endl;
|
||||
std::cout << "dev_dst=" << std::hex << kernel_arg.dst_ptr << std::endl;
|
||||
|
||||
// allocate shared memory
|
||||
std::cout << "allocate shared memory" << std::endl;
|
||||
uint32_t alloc_size = std::max<uint32_t>(buf_size, sizeof(kernel_arg_t));
|
||||
RT_CHECK(vx_alloc_shared_mem(device, alloc_size, &buffer));
|
||||
|
||||
// upload kernel argument
|
||||
std::cout << "upload kernel argument" << std::endl;
|
||||
{
|
||||
auto buf_ptr = (int*)vx_host_ptr(buffer);
|
||||
memcpy(buf_ptr, &kernel_arg, sizeof(kernel_arg_t));
|
||||
RT_CHECK(vx_copy_to_dev(buffer, KERNEL_ARG_DEV_MEM_ADDR, sizeof(kernel_arg_t), 0));
|
||||
}
|
||||
|
||||
// upload source buffer0
|
||||
{
|
||||
auto buf_ptr = (int32_t*)vx_host_ptr(buffer);
|
||||
for (uint32_t i = 0; i < num_points; ++i) {
|
||||
buf_ptr[i] = i-1;
|
||||
}
|
||||
}
|
||||
std::cout << "upload source buffer0" << std::endl;
|
||||
RT_CHECK(vx_copy_to_dev(buffer, kernel_arg.src0_ptr, buf_size, 0));
|
||||
|
||||
// upload source buffer1
|
||||
{
|
||||
auto buf_ptr = (int32_t*)vx_host_ptr(buffer);
|
||||
for (uint32_t i = 0; i < num_points; ++i) {
|
||||
buf_ptr[i] = i+1;
|
||||
}
|
||||
}
|
||||
std::cout << "upload source buffer1" << std::endl;
|
||||
RT_CHECK(vx_copy_to_dev(buffer, kernel_arg.src1_ptr, buf_size, 0));
|
||||
|
||||
// clear destination buffer
|
||||
{
|
||||
auto buf_ptr = (int32_t*)vx_host_ptr(buffer);
|
||||
for (uint32_t i = 0; i < num_points; ++i) {
|
||||
buf_ptr[i] = 0xdeadbeef;
|
||||
}
|
||||
}
|
||||
std::cout << "clear destination buffer" << std::endl;
|
||||
RT_CHECK(vx_copy_to_dev(buffer, kernel_arg.dst_ptr, buf_size, 0));
|
||||
|
||||
// run tests
|
||||
std::cout << "run tests" << std::endl;
|
||||
RT_CHECK(run_test(kernel_arg, buf_size, num_points));
|
||||
|
||||
// cleanup
|
||||
std::cout << "cleanup" << std::endl;
|
||||
cleanup();
|
||||
|
||||
std::cout << "PASSED!" << std::endl;
|
||||
|
||||
return 0;
|
||||
}
|
BIN
driver/tests/tex_demo/kernel.bin
Executable file
BIN
driver/tests/tex_demo/kernel.bin
Executable file
Binary file not shown.
29
driver/tests/tex_demo/kernel.c
Normal file
29
driver/tests/tex_demo/kernel.c
Normal file
|
@ -0,0 +1,29 @@
|
|||
#include <stdint.h>
|
||||
#include <vx_intrinsics.h>
|
||||
#include <vx_tex.h>
|
||||
#include "common.h"
|
||||
|
||||
void kernel_body(int task_id, void* arg) {
|
||||
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
|
||||
uint32_t count = _arg->task_size;
|
||||
// int32_t* src0_ptr = (int32_t*)_arg->src0_ptr;
|
||||
// int32_t* src1_ptr = (int32_t*)_arg->src1_ptr;
|
||||
int32_t* dst_ptr = (int32_t*)_arg->dst_ptr;
|
||||
|
||||
unsigned lod = 1;
|
||||
unsigned u = 1;
|
||||
unsigned v = 1;
|
||||
unsigned t = 1;
|
||||
|
||||
uint32_t offset = task_id * count;
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
// dst_ptr[offset+i] = src0_ptr[offset+i] + src1_ptr[offset+i];
|
||||
dst_ptr[offset+i] = vx_tex(t, u, v, lod);
|
||||
}
|
||||
}
|
||||
|
||||
int main() {
|
||||
struct kernel_arg_t* arg = (struct kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR;
|
||||
vx_spawn_tasks(arg->num_tasks, kernel_body, arg);
|
||||
}
|
571
driver/tests/tex_demo/kernel.dump
Normal file
571
driver/tests/tex_demo/kernel.dump
Normal file
|
@ -0,0 +1,571 @@
|
|||
|
||||
kernel.elf: file format elf32-littleriscv
|
||||
|
||||
|
||||
Disassembly of section .init:
|
||||
|
||||
80000000 <_start>:
|
||||
80000000: 00000597 auipc a1,0x0
|
||||
80000004: 0f458593 addi a1,a1,244 # 800000f4 <vx_set_sp>
|
||||
80000008: fc102573 csrr a0,0xfc1
|
||||
8000000c: 00b5106b 0xb5106b
|
||||
80000010: 0e4000ef jal ra,800000f4 <vx_set_sp>
|
||||
80000014: 00100513 li a0,1
|
||||
80000018: 0005006b 0x5006b
|
||||
8000001c: 00002517 auipc a0,0x2
|
||||
80000020: b4050513 addi a0,a0,-1216 # 80001b5c <g_wspawn_args>
|
||||
80000024: 00002617 auipc a2,0x2
|
||||
80000028: bb860613 addi a2,a2,-1096 # 80001bdc <__BSS_END__>
|
||||
8000002c: 40a60633 sub a2,a2,a0
|
||||
80000030: 00000593 li a1,0
|
||||
80000034: 458000ef jal ra,8000048c <memset>
|
||||
80000038: 00000517 auipc a0,0x0
|
||||
8000003c: 35c50513 addi a0,a0,860 # 80000394 <__libc_fini_array>
|
||||
80000040: 30c000ef jal ra,8000034c <atexit>
|
||||
80000044: 3ac000ef jal ra,800003f0 <__libc_init_array>
|
||||
80000048: 008000ef jal ra,80000050 <main>
|
||||
8000004c: 3140006f j 80000360 <exit>
|
||||
|
||||
Disassembly of section .text:
|
||||
|
||||
80000050 <main>:
|
||||
80000050: 7ffff7b7 lui a5,0x7ffff
|
||||
80000054: 0007a503 lw a0,0(a5) # 7ffff000 <__stack_size+0x7fffec00>
|
||||
80000058: 800005b7 lui a1,0x80000
|
||||
8000005c: 7ffff637 lui a2,0x7ffff
|
||||
80000060: 08058593 addi a1,a1,128 # 80000080 <__stack_top+0x81000080>
|
||||
80000064: 18c0006f j 800001f0 <vx_spawn_tasks>
|
||||
|
||||
80000068 <register_fini>:
|
||||
80000068: 00000793 li a5,0
|
||||
8000006c: 00078863 beqz a5,8000007c <register_fini+0x14>
|
||||
80000070: 80000537 lui a0,0x80000
|
||||
80000074: 39450513 addi a0,a0,916 # 80000394 <__stack_top+0x81000394>
|
||||
80000078: 2d40006f j 8000034c <atexit>
|
||||
8000007c: 00008067 ret
|
||||
|
||||
80000080 <kernel_body>:
|
||||
80000080: 0045a783 lw a5,4(a1)
|
||||
80000084: 0105a703 lw a4,16(a1)
|
||||
80000088: 02f50533 mul a0,a0,a5
|
||||
8000008c: 04078e63 beqz a5,800000e8 <kernel_body+0x68>
|
||||
80000090: ff010113 addi sp,sp,-16
|
||||
80000094: 00912223 sw s1,4(sp)
|
||||
80000098: 00a784b3 add s1,a5,a0
|
||||
8000009c: 00249493 slli s1,s1,0x2
|
||||
800000a0: 00251513 slli a0,a0,0x2
|
||||
800000a4: 00812423 sw s0,8(sp)
|
||||
800000a8: 00112623 sw ra,12(sp)
|
||||
800000ac: 00e50433 add s0,a0,a4
|
||||
800000b0: 00e484b3 add s1,s1,a4
|
||||
800000b4: 00100693 li a3,1
|
||||
800000b8: 00100613 li a2,1
|
||||
800000bc: 00100593 li a1,1
|
||||
800000c0: 00100513 li a0,1
|
||||
800000c4: 274000ef jal ra,80000338 <vx_tex>
|
||||
800000c8: 00a42023 sw a0,0(s0)
|
||||
800000cc: 00440413 addi s0,s0,4
|
||||
800000d0: fe8492e3 bne s1,s0,800000b4 <kernel_body+0x34>
|
||||
800000d4: 00c12083 lw ra,12(sp)
|
||||
800000d8: 00812403 lw s0,8(sp)
|
||||
800000dc: 00412483 lw s1,4(sp)
|
||||
800000e0: 01010113 addi sp,sp,16
|
||||
800000e4: 00008067 ret
|
||||
800000e8: 00008067 ret
|
||||
|
||||
800000ec <_exit>:
|
||||
800000ec: 00000513 li a0,0
|
||||
800000f0: 0005006b 0x5006b
|
||||
|
||||
800000f4 <vx_set_sp>:
|
||||
800000f4: fc002573 csrr a0,0xfc0
|
||||
800000f8: 0005006b 0x5006b
|
||||
800000fc: 00002197 auipc gp,0x2
|
||||
80000100: e3418193 addi gp,gp,-460 # 80001f30 <__global_pointer>
|
||||
80000104: 7f000117 auipc sp,0x7f000
|
||||
80000108: efc10113 addi sp,sp,-260 # ff000000 <__stack_top>
|
||||
8000010c: 40000593 li a1,1024
|
||||
80000110: cc102673 csrr a2,0xcc1
|
||||
80000114: 02c585b3 mul a1,a1,a2
|
||||
80000118: 40b10133 sub sp,sp,a1
|
||||
8000011c: cc3026f3 csrr a3,0xcc3
|
||||
80000120: 00068663 beqz a3,8000012c <RETURN>
|
||||
80000124: 00000513 li a0,0
|
||||
80000128: 0005006b 0x5006b
|
||||
|
||||
8000012c <RETURN>:
|
||||
8000012c: 00008067 ret
|
||||
|
||||
80000130 <spawn_tasks_callback>:
|
||||
80000130: fe010113 addi sp,sp,-32
|
||||
80000134: 00112e23 sw ra,28(sp)
|
||||
80000138: 00812c23 sw s0,24(sp)
|
||||
8000013c: 00912a23 sw s1,20(sp)
|
||||
80000140: 01212823 sw s2,16(sp)
|
||||
80000144: 01312623 sw s3,12(sp)
|
||||
80000148: fc0027f3 csrr a5,0xfc0
|
||||
8000014c: 0007806b 0x7806b
|
||||
80000150: cc5026f3 csrr a3,0xcc5
|
||||
80000154: cc3029f3 csrr s3,0xcc3
|
||||
80000158: cc002773 csrr a4,0xcc0
|
||||
8000015c: fc002673 csrr a2,0xfc0
|
||||
80000160: 800027b7 lui a5,0x80002
|
||||
80000164: 00269693 slli a3,a3,0x2
|
||||
80000168: b5c78793 addi a5,a5,-1188 # 80001b5c <__stack_top+0x81001b5c>
|
||||
8000016c: 00d787b3 add a5,a5,a3
|
||||
80000170: 0007a483 lw s1,0(a5)
|
||||
80000174: 0104a403 lw s0,16(s1)
|
||||
80000178: 00c4a683 lw a3,12(s1)
|
||||
8000017c: 0089a933 slt s2,s3,s0
|
||||
80000180: 00040793 mv a5,s0
|
||||
80000184: 00d90933 add s2,s2,a3
|
||||
80000188: 03368433 mul s0,a3,s3
|
||||
8000018c: 00f9d463 bge s3,a5,80000194 <spawn_tasks_callback+0x64>
|
||||
80000190: 00098793 mv a5,s3
|
||||
80000194: 00f40433 add s0,s0,a5
|
||||
80000198: 0084a683 lw a3,8(s1)
|
||||
8000019c: 02c40433 mul s0,s0,a2
|
||||
800001a0: 02e907b3 mul a5,s2,a4
|
||||
800001a4: 00d40433 add s0,s0,a3
|
||||
800001a8: 00f40433 add s0,s0,a5
|
||||
800001ac: 00890933 add s2,s2,s0
|
||||
800001b0: 01245e63 bge s0,s2,800001cc <spawn_tasks_callback+0x9c>
|
||||
800001b4: 0004a783 lw a5,0(s1)
|
||||
800001b8: 0044a583 lw a1,4(s1)
|
||||
800001bc: 00040513 mv a0,s0
|
||||
800001c0: 00140413 addi s0,s0,1
|
||||
800001c4: 000780e7 jalr a5
|
||||
800001c8: fe8916e3 bne s2,s0,800001b4 <spawn_tasks_callback+0x84>
|
||||
800001cc: 0019b993 seqz s3,s3
|
||||
800001d0: 0009806b 0x9806b
|
||||
800001d4: 01c12083 lw ra,28(sp)
|
||||
800001d8: 01812403 lw s0,24(sp)
|
||||
800001dc: 01412483 lw s1,20(sp)
|
||||
800001e0: 01012903 lw s2,16(sp)
|
||||
800001e4: 00c12983 lw s3,12(sp)
|
||||
800001e8: 02010113 addi sp,sp,32
|
||||
800001ec: 00008067 ret
|
||||
|
||||
800001f0 <vx_spawn_tasks>:
|
||||
800001f0: fc010113 addi sp,sp,-64
|
||||
800001f4: 02112e23 sw ra,60(sp)
|
||||
800001f8: 02812c23 sw s0,56(sp)
|
||||
800001fc: 02912a23 sw s1,52(sp)
|
||||
80000200: 03212823 sw s2,48(sp)
|
||||
80000204: 03312623 sw s3,44(sp)
|
||||
80000208: fc2026f3 csrr a3,0xfc2
|
||||
8000020c: fc102873 csrr a6,0xfc1
|
||||
80000210: fc002473 csrr s0,0xfc0
|
||||
80000214: cc5027f3 csrr a5,0xcc5
|
||||
80000218: 01f00713 li a4,31
|
||||
8000021c: 0cf74463 blt a4,a5,800002e4 <vx_spawn_tasks+0xf4>
|
||||
80000220: 030408b3 mul a7,s0,a6
|
||||
80000224: 00100713 li a4,1
|
||||
80000228: 00a8d463 bge a7,a0,80000230 <vx_spawn_tasks+0x40>
|
||||
8000022c: 03154733 div a4,a0,a7
|
||||
80000230: 0ce6c863 blt a3,a4,80000300 <vx_spawn_tasks+0x110>
|
||||
80000234: 0ae7d863 bge a5,a4,800002e4 <vx_spawn_tasks+0xf4>
|
||||
80000238: fff68693 addi a3,a3,-1
|
||||
8000023c: 02e54333 div t1,a0,a4
|
||||
80000240: 00030893 mv a7,t1
|
||||
80000244: 00f69663 bne a3,a5,80000250 <vx_spawn_tasks+0x60>
|
||||
80000248: 02e56533 rem a0,a0,a4
|
||||
8000024c: 006508b3 add a7,a0,t1
|
||||
80000250: 0288c4b3 div s1,a7,s0
|
||||
80000254: 0288e933 rem s2,a7,s0
|
||||
80000258: 0b04ca63 blt s1,a6,8000030c <vx_spawn_tasks+0x11c>
|
||||
8000025c: 00100693 li a3,1
|
||||
80000260: 0304c733 div a4,s1,a6
|
||||
80000264: 00070663 beqz a4,80000270 <vx_spawn_tasks+0x80>
|
||||
80000268: 00070693 mv a3,a4
|
||||
8000026c: 0304e733 rem a4,s1,a6
|
||||
80000270: 800029b7 lui s3,0x80002
|
||||
80000274: b5c98993 addi s3,s3,-1188 # 80001b5c <__stack_top+0x81001b5c>
|
||||
80000278: 00e12e23 sw a4,28(sp)
|
||||
8000027c: 00c10713 addi a4,sp,12
|
||||
80000280: 00b12623 sw a1,12(sp)
|
||||
80000284: 00c12823 sw a2,16(sp)
|
||||
80000288: 00d12c23 sw a3,24(sp)
|
||||
8000028c: 02f30333 mul t1,t1,a5
|
||||
80000290: 00279793 slli a5,a5,0x2
|
||||
80000294: 00f987b3 add a5,s3,a5
|
||||
80000298: 00e7a023 sw a4,0(a5)
|
||||
8000029c: 00612a23 sw t1,20(sp)
|
||||
800002a0: 06904c63 bgtz s1,80000318 <vx_spawn_tasks+0x128>
|
||||
800002a4: 04090063 beqz s2,800002e4 <vx_spawn_tasks+0xf4>
|
||||
800002a8: 02848433 mul s0,s1,s0
|
||||
800002ac: 00812a23 sw s0,20(sp)
|
||||
800002b0: 0009006b 0x9006b
|
||||
800002b4: cc5027f3 csrr a5,0xcc5
|
||||
800002b8: cc202573 csrr a0,0xcc2
|
||||
800002bc: 00279793 slli a5,a5,0x2
|
||||
800002c0: 00f989b3 add s3,s3,a5
|
||||
800002c4: 0009a783 lw a5,0(s3)
|
||||
800002c8: 0087a683 lw a3,8(a5)
|
||||
800002cc: 0007a703 lw a4,0(a5)
|
||||
800002d0: 0047a583 lw a1,4(a5)
|
||||
800002d4: 00d50533 add a0,a0,a3
|
||||
800002d8: 000700e7 jalr a4
|
||||
800002dc: 00100793 li a5,1
|
||||
800002e0: 0007806b 0x7806b
|
||||
800002e4: 03c12083 lw ra,60(sp)
|
||||
800002e8: 03812403 lw s0,56(sp)
|
||||
800002ec: 03412483 lw s1,52(sp)
|
||||
800002f0: 03012903 lw s2,48(sp)
|
||||
800002f4: 02c12983 lw s3,44(sp)
|
||||
800002f8: 04010113 addi sp,sp,64
|
||||
800002fc: 00008067 ret
|
||||
80000300: 00068713 mv a4,a3
|
||||
80000304: f2e7cae3 blt a5,a4,80000238 <vx_spawn_tasks+0x48>
|
||||
80000308: fddff06f j 800002e4 <vx_spawn_tasks+0xf4>
|
||||
8000030c: 00000713 li a4,0
|
||||
80000310: 00100693 li a3,1
|
||||
80000314: f5dff06f j 80000270 <vx_spawn_tasks+0x80>
|
||||
80000318: 00048713 mv a4,s1
|
||||
8000031c: 00985463 bge a6,s1,80000324 <vx_spawn_tasks+0x134>
|
||||
80000320: 00080713 mv a4,a6
|
||||
80000324: 800007b7 lui a5,0x80000
|
||||
80000328: 13078793 addi a5,a5,304 # 80000130 <__stack_top+0x81000130>
|
||||
8000032c: 00f7106b 0xf7106b
|
||||
80000330: e01ff0ef jal ra,80000130 <spawn_tasks_callback>
|
||||
80000334: f71ff06f j 800002a4 <vx_spawn_tasks+0xb4>
|
||||
|
||||
80000338 <vx_tex>:
|
||||
80000338: 00869693 slli a3,a3,0x8
|
||||
8000033c: 00a6e6b3 or a3,a3,a0
|
||||
80000340: 00000513 li a0,0
|
||||
80000344: 6ac5d56b 0x6ac5d56b
|
||||
80000348: 00008067 ret
|
||||
|
||||
8000034c <atexit>:
|
||||
8000034c: 00050593 mv a1,a0
|
||||
80000350: 00000693 li a3,0
|
||||
80000354: 00000613 li a2,0
|
||||
80000358: 00000513 li a0,0
|
||||
8000035c: 20c0006f j 80000568 <__register_exitproc>
|
||||
|
||||
80000360 <exit>:
|
||||
80000360: ff010113 addi sp,sp,-16
|
||||
80000364: 00000593 li a1,0
|
||||
80000368: 00812423 sw s0,8(sp)
|
||||
8000036c: 00112623 sw ra,12(sp)
|
||||
80000370: 00050413 mv s0,a0
|
||||
80000374: 290000ef jal ra,80000604 <__call_exitprocs>
|
||||
80000378: 800027b7 lui a5,0x80002
|
||||
8000037c: b587a503 lw a0,-1192(a5) # 80001b58 <__stack_top+0x81001b58>
|
||||
80000380: 03c52783 lw a5,60(a0)
|
||||
80000384: 00078463 beqz a5,8000038c <exit+0x2c>
|
||||
80000388: 000780e7 jalr a5
|
||||
8000038c: 00040513 mv a0,s0
|
||||
80000390: d5dff0ef jal ra,800000ec <_exit>
|
||||
|
||||
80000394 <__libc_fini_array>:
|
||||
80000394: ff010113 addi sp,sp,-16
|
||||
80000398: 00812423 sw s0,8(sp)
|
||||
8000039c: 800017b7 lui a5,0x80001
|
||||
800003a0: 80001437 lui s0,0x80001
|
||||
800003a4: 72c40413 addi s0,s0,1836 # 8000172c <__stack_top+0x8100172c>
|
||||
800003a8: 72c78793 addi a5,a5,1836 # 8000172c <__stack_top+0x8100172c>
|
||||
800003ac: 408787b3 sub a5,a5,s0
|
||||
800003b0: 00912223 sw s1,4(sp)
|
||||
800003b4: 00112623 sw ra,12(sp)
|
||||
800003b8: 4027d493 srai s1,a5,0x2
|
||||
800003bc: 02048063 beqz s1,800003dc <__libc_fini_array+0x48>
|
||||
800003c0: ffc78793 addi a5,a5,-4
|
||||
800003c4: 00878433 add s0,a5,s0
|
||||
800003c8: 00042783 lw a5,0(s0)
|
||||
800003cc: fff48493 addi s1,s1,-1
|
||||
800003d0: ffc40413 addi s0,s0,-4
|
||||
800003d4: 000780e7 jalr a5
|
||||
800003d8: fe0498e3 bnez s1,800003c8 <__libc_fini_array+0x34>
|
||||
800003dc: 00c12083 lw ra,12(sp)
|
||||
800003e0: 00812403 lw s0,8(sp)
|
||||
800003e4: 00412483 lw s1,4(sp)
|
||||
800003e8: 01010113 addi sp,sp,16
|
||||
800003ec: 00008067 ret
|
||||
|
||||
800003f0 <__libc_init_array>:
|
||||
800003f0: ff010113 addi sp,sp,-16
|
||||
800003f4: 00812423 sw s0,8(sp)
|
||||
800003f8: 01212023 sw s2,0(sp)
|
||||
800003fc: 80001437 lui s0,0x80001
|
||||
80000400: 80001937 lui s2,0x80001
|
||||
80000404: 72840793 addi a5,s0,1832 # 80001728 <__stack_top+0x81001728>
|
||||
80000408: 72890913 addi s2,s2,1832 # 80001728 <__stack_top+0x81001728>
|
||||
8000040c: 40f90933 sub s2,s2,a5
|
||||
80000410: 00112623 sw ra,12(sp)
|
||||
80000414: 00912223 sw s1,4(sp)
|
||||
80000418: 40295913 srai s2,s2,0x2
|
||||
8000041c: 02090063 beqz s2,8000043c <__libc_init_array+0x4c>
|
||||
80000420: 72840413 addi s0,s0,1832
|
||||
80000424: 00000493 li s1,0
|
||||
80000428: 00042783 lw a5,0(s0)
|
||||
8000042c: 00148493 addi s1,s1,1
|
||||
80000430: 00440413 addi s0,s0,4
|
||||
80000434: 000780e7 jalr a5
|
||||
80000438: fe9918e3 bne s2,s1,80000428 <__libc_init_array+0x38>
|
||||
8000043c: 80001437 lui s0,0x80001
|
||||
80000440: 80001937 lui s2,0x80001
|
||||
80000444: 72840793 addi a5,s0,1832 # 80001728 <__stack_top+0x81001728>
|
||||
80000448: 72c90913 addi s2,s2,1836 # 8000172c <__stack_top+0x8100172c>
|
||||
8000044c: 40f90933 sub s2,s2,a5
|
||||
80000450: 40295913 srai s2,s2,0x2
|
||||
80000454: 02090063 beqz s2,80000474 <__libc_init_array+0x84>
|
||||
80000458: 72840413 addi s0,s0,1832
|
||||
8000045c: 00000493 li s1,0
|
||||
80000460: 00042783 lw a5,0(s0)
|
||||
80000464: 00148493 addi s1,s1,1
|
||||
80000468: 00440413 addi s0,s0,4
|
||||
8000046c: 000780e7 jalr a5
|
||||
80000470: fe9918e3 bne s2,s1,80000460 <__libc_init_array+0x70>
|
||||
80000474: 00c12083 lw ra,12(sp)
|
||||
80000478: 00812403 lw s0,8(sp)
|
||||
8000047c: 00412483 lw s1,4(sp)
|
||||
80000480: 00012903 lw s2,0(sp)
|
||||
80000484: 01010113 addi sp,sp,16
|
||||
80000488: 00008067 ret
|
||||
|
||||
8000048c <memset>:
|
||||
8000048c: 00f00313 li t1,15
|
||||
80000490: 00050713 mv a4,a0
|
||||
80000494: 02c37e63 bgeu t1,a2,800004d0 <memset+0x44>
|
||||
80000498: 00f77793 andi a5,a4,15
|
||||
8000049c: 0a079063 bnez a5,8000053c <memset+0xb0>
|
||||
800004a0: 08059263 bnez a1,80000524 <memset+0x98>
|
||||
800004a4: ff067693 andi a3,a2,-16
|
||||
800004a8: 00f67613 andi a2,a2,15
|
||||
800004ac: 00e686b3 add a3,a3,a4
|
||||
800004b0: 00b72023 sw a1,0(a4)
|
||||
800004b4: 00b72223 sw a1,4(a4)
|
||||
800004b8: 00b72423 sw a1,8(a4)
|
||||
800004bc: 00b72623 sw a1,12(a4)
|
||||
800004c0: 01070713 addi a4,a4,16
|
||||
800004c4: fed766e3 bltu a4,a3,800004b0 <memset+0x24>
|
||||
800004c8: 00061463 bnez a2,800004d0 <memset+0x44>
|
||||
800004cc: 00008067 ret
|
||||
800004d0: 40c306b3 sub a3,t1,a2
|
||||
800004d4: 00269693 slli a3,a3,0x2
|
||||
800004d8: 00000297 auipc t0,0x0
|
||||
800004dc: 005686b3 add a3,a3,t0
|
||||
800004e0: 00c68067 jr 12(a3)
|
||||
800004e4: 00b70723 sb a1,14(a4)
|
||||
800004e8: 00b706a3 sb a1,13(a4)
|
||||
800004ec: 00b70623 sb a1,12(a4)
|
||||
800004f0: 00b705a3 sb a1,11(a4)
|
||||
800004f4: 00b70523 sb a1,10(a4)
|
||||
800004f8: 00b704a3 sb a1,9(a4)
|
||||
800004fc: 00b70423 sb a1,8(a4)
|
||||
80000500: 00b703a3 sb a1,7(a4)
|
||||
80000504: 00b70323 sb a1,6(a4)
|
||||
80000508: 00b702a3 sb a1,5(a4)
|
||||
8000050c: 00b70223 sb a1,4(a4)
|
||||
80000510: 00b701a3 sb a1,3(a4)
|
||||
80000514: 00b70123 sb a1,2(a4)
|
||||
80000518: 00b700a3 sb a1,1(a4)
|
||||
8000051c: 00b70023 sb a1,0(a4)
|
||||
80000520: 00008067 ret
|
||||
80000524: 0ff5f593 andi a1,a1,255
|
||||
80000528: 00859693 slli a3,a1,0x8
|
||||
8000052c: 00d5e5b3 or a1,a1,a3
|
||||
80000530: 01059693 slli a3,a1,0x10
|
||||
80000534: 00d5e5b3 or a1,a1,a3
|
||||
80000538: f6dff06f j 800004a4 <memset+0x18>
|
||||
8000053c: 00279693 slli a3,a5,0x2
|
||||
80000540: 00000297 auipc t0,0x0
|
||||
80000544: 005686b3 add a3,a3,t0
|
||||
80000548: 00008293 mv t0,ra
|
||||
8000054c: fa0680e7 jalr -96(a3)
|
||||
80000550: 00028093 mv ra,t0
|
||||
80000554: ff078793 addi a5,a5,-16
|
||||
80000558: 40f70733 sub a4,a4,a5
|
||||
8000055c: 00f60633 add a2,a2,a5
|
||||
80000560: f6c378e3 bgeu t1,a2,800004d0 <memset+0x44>
|
||||
80000564: f3dff06f j 800004a0 <memset+0x14>
|
||||
|
||||
80000568 <__register_exitproc>:
|
||||
80000568: 800027b7 lui a5,0x80002
|
||||
8000056c: b587a703 lw a4,-1192(a5) # 80001b58 <__stack_top+0x81001b58>
|
||||
80000570: 14872783 lw a5,328(a4)
|
||||
80000574: 04078c63 beqz a5,800005cc <__register_exitproc+0x64>
|
||||
80000578: 0047a703 lw a4,4(a5)
|
||||
8000057c: 01f00813 li a6,31
|
||||
80000580: 06e84e63 blt a6,a4,800005fc <__register_exitproc+0x94>
|
||||
80000584: 00271813 slli a6,a4,0x2
|
||||
80000588: 02050663 beqz a0,800005b4 <__register_exitproc+0x4c>
|
||||
8000058c: 01078333 add t1,a5,a6
|
||||
80000590: 08c32423 sw a2,136(t1)
|
||||
80000594: 1887a883 lw a7,392(a5)
|
||||
80000598: 00100613 li a2,1
|
||||
8000059c: 00e61633 sll a2,a2,a4
|
||||
800005a0: 00c8e8b3 or a7,a7,a2
|
||||
800005a4: 1917a423 sw a7,392(a5)
|
||||
800005a8: 10d32423 sw a3,264(t1)
|
||||
800005ac: 00200693 li a3,2
|
||||
800005b0: 02d50463 beq a0,a3,800005d8 <__register_exitproc+0x70>
|
||||
800005b4: 00170713 addi a4,a4,1
|
||||
800005b8: 00e7a223 sw a4,4(a5)
|
||||
800005bc: 010787b3 add a5,a5,a6
|
||||
800005c0: 00b7a423 sw a1,8(a5)
|
||||
800005c4: 00000513 li a0,0
|
||||
800005c8: 00008067 ret
|
||||
800005cc: 14c70793 addi a5,a4,332
|
||||
800005d0: 14f72423 sw a5,328(a4)
|
||||
800005d4: fa5ff06f j 80000578 <__register_exitproc+0x10>
|
||||
800005d8: 18c7a683 lw a3,396(a5)
|
||||
800005dc: 00170713 addi a4,a4,1
|
||||
800005e0: 00e7a223 sw a4,4(a5)
|
||||
800005e4: 00c6e633 or a2,a3,a2
|
||||
800005e8: 18c7a623 sw a2,396(a5)
|
||||
800005ec: 010787b3 add a5,a5,a6
|
||||
800005f0: 00b7a423 sw a1,8(a5)
|
||||
800005f4: 00000513 li a0,0
|
||||
800005f8: 00008067 ret
|
||||
800005fc: fff00513 li a0,-1
|
||||
80000600: 00008067 ret
|
||||
|
||||
80000604 <__call_exitprocs>:
|
||||
80000604: fd010113 addi sp,sp,-48
|
||||
80000608: 800027b7 lui a5,0x80002
|
||||
8000060c: 01412c23 sw s4,24(sp)
|
||||
80000610: b587aa03 lw s4,-1192(a5) # 80001b58 <__stack_top+0x81001b58>
|
||||
80000614: 03212023 sw s2,32(sp)
|
||||
80000618: 02112623 sw ra,44(sp)
|
||||
8000061c: 148a2903 lw s2,328(s4)
|
||||
80000620: 02812423 sw s0,40(sp)
|
||||
80000624: 02912223 sw s1,36(sp)
|
||||
80000628: 01312e23 sw s3,28(sp)
|
||||
8000062c: 01512a23 sw s5,20(sp)
|
||||
80000630: 01612823 sw s6,16(sp)
|
||||
80000634: 01712623 sw s7,12(sp)
|
||||
80000638: 01812423 sw s8,8(sp)
|
||||
8000063c: 04090063 beqz s2,8000067c <__call_exitprocs+0x78>
|
||||
80000640: 00050b13 mv s6,a0
|
||||
80000644: 00058b93 mv s7,a1
|
||||
80000648: 00100a93 li s5,1
|
||||
8000064c: fff00993 li s3,-1
|
||||
80000650: 00492483 lw s1,4(s2)
|
||||
80000654: fff48413 addi s0,s1,-1
|
||||
80000658: 02044263 bltz s0,8000067c <__call_exitprocs+0x78>
|
||||
8000065c: 00249493 slli s1,s1,0x2
|
||||
80000660: 009904b3 add s1,s2,s1
|
||||
80000664: 040b8463 beqz s7,800006ac <__call_exitprocs+0xa8>
|
||||
80000668: 1044a783 lw a5,260(s1)
|
||||
8000066c: 05778063 beq a5,s7,800006ac <__call_exitprocs+0xa8>
|
||||
80000670: fff40413 addi s0,s0,-1
|
||||
80000674: ffc48493 addi s1,s1,-4
|
||||
80000678: ff3416e3 bne s0,s3,80000664 <__call_exitprocs+0x60>
|
||||
8000067c: 02c12083 lw ra,44(sp)
|
||||
80000680: 02812403 lw s0,40(sp)
|
||||
80000684: 02412483 lw s1,36(sp)
|
||||
80000688: 02012903 lw s2,32(sp)
|
||||
8000068c: 01c12983 lw s3,28(sp)
|
||||
80000690: 01812a03 lw s4,24(sp)
|
||||
80000694: 01412a83 lw s5,20(sp)
|
||||
80000698: 01012b03 lw s6,16(sp)
|
||||
8000069c: 00c12b83 lw s7,12(sp)
|
||||
800006a0: 00812c03 lw s8,8(sp)
|
||||
800006a4: 03010113 addi sp,sp,48
|
||||
800006a8: 00008067 ret
|
||||
800006ac: 00492783 lw a5,4(s2)
|
||||
800006b0: 0044a683 lw a3,4(s1)
|
||||
800006b4: fff78793 addi a5,a5,-1
|
||||
800006b8: 04878e63 beq a5,s0,80000714 <__call_exitprocs+0x110>
|
||||
800006bc: 0004a223 sw zero,4(s1)
|
||||
800006c0: fa0688e3 beqz a3,80000670 <__call_exitprocs+0x6c>
|
||||
800006c4: 18892783 lw a5,392(s2)
|
||||
800006c8: 008a9733 sll a4,s5,s0
|
||||
800006cc: 00492c03 lw s8,4(s2)
|
||||
800006d0: 00f777b3 and a5,a4,a5
|
||||
800006d4: 02079263 bnez a5,800006f8 <__call_exitprocs+0xf4>
|
||||
800006d8: 000680e7 jalr a3
|
||||
800006dc: 00492703 lw a4,4(s2)
|
||||
800006e0: 148a2783 lw a5,328(s4)
|
||||
800006e4: 01871463 bne a4,s8,800006ec <__call_exitprocs+0xe8>
|
||||
800006e8: f8f904e3 beq s2,a5,80000670 <__call_exitprocs+0x6c>
|
||||
800006ec: f80788e3 beqz a5,8000067c <__call_exitprocs+0x78>
|
||||
800006f0: 00078913 mv s2,a5
|
||||
800006f4: f5dff06f j 80000650 <__call_exitprocs+0x4c>
|
||||
800006f8: 18c92783 lw a5,396(s2)
|
||||
800006fc: 0844a583 lw a1,132(s1)
|
||||
80000700: 00f77733 and a4,a4,a5
|
||||
80000704: 00071c63 bnez a4,8000071c <__call_exitprocs+0x118>
|
||||
80000708: 000b0513 mv a0,s6
|
||||
8000070c: 000680e7 jalr a3
|
||||
80000710: fcdff06f j 800006dc <__call_exitprocs+0xd8>
|
||||
80000714: 00892223 sw s0,4(s2)
|
||||
80000718: fa9ff06f j 800006c0 <__call_exitprocs+0xbc>
|
||||
8000071c: 00058513 mv a0,a1
|
||||
80000720: 000680e7 jalr a3
|
||||
80000724: fb9ff06f j 800006dc <__call_exitprocs+0xd8>
|
||||
|
||||
Disassembly of section .init_array:
|
||||
|
||||
80001728 <__init_array_start>:
|
||||
80001728: 0068 addi a0,sp,12
|
||||
8000172a: 8000 0x8000
|
||||
|
||||
Disassembly of section .data:
|
||||
|
||||
80001730 <impure_data>:
|
||||
80001730: 0000 unimp
|
||||
80001732: 0000 unimp
|
||||
80001734: 1a1c addi a5,sp,304
|
||||
80001736: 8000 0x8000
|
||||
80001738: 1a84 addi s1,sp,368
|
||||
8000173a: 8000 0x8000
|
||||
8000173c: 1aec addi a1,sp,380
|
||||
8000173e: 8000 0x8000
|
||||
...
|
||||
800017d8: 0001 nop
|
||||
800017da: 0000 unimp
|
||||
800017dc: 0000 unimp
|
||||
800017de: 0000 unimp
|
||||
800017e0: 330e fld ft6,224(sp)
|
||||
800017e2: abcd j 80001dd4 <__BSS_END__+0x1f8>
|
||||
800017e4: 1234 addi a3,sp,296
|
||||
800017e6: e66d bnez a2,800018d0 <impure_data+0x1a0>
|
||||
800017e8: deec sw a1,124(a3)
|
||||
800017ea: 0005 c.nop 1
|
||||
800017ec: 0000000b 0xb
|
||||
...
|
||||
|
||||
Disassembly of section .sdata:
|
||||
|
||||
80001b58 <_global_impure_ptr>:
|
||||
80001b58: 1730 addi a2,sp,936
|
||||
80001b5a: 8000 0x8000
|
||||
|
||||
Disassembly of section .bss:
|
||||
|
||||
80001b5c <g_wspawn_args>:
|
||||
...
|
||||
|
||||
Disassembly of section .comment:
|
||||
|
||||
00000000 <.comment>:
|
||||
0: 3a434347 fmsub.d ft6,ft6,ft4,ft7,rmm
|
||||
4: 2820 fld fs0,80(s0)
|
||||
6: 29554e47 fmsub.s ft8,fa0,fs5,ft5,rmm
|
||||
a: 3920 fld fs0,112(a0)
|
||||
c: 322e fld ft4,232(sp)
|
||||
e: 302e fld ft0,232(sp)
|
||||
...
|
||||
|
||||
Disassembly of section .riscv.attributes:
|
||||
|
||||
00000000 <.riscv.attributes>:
|
||||
0: 2541 jal 680 <__stack_size+0x280>
|
||||
2: 0000 unimp
|
||||
4: 7200 flw fs0,32(a2)
|
||||
6: 7369 lui t1,0xffffa
|
||||
8: 01007663 bgeu zero,a6,14 <__stack_usage+0x14>
|
||||
c: 0000001b 0x1b
|
||||
10: 1004 addi s1,sp,32
|
||||
12: 7205 lui tp,0xfffe1
|
||||
14: 3376 fld ft6,376(sp)
|
||||
16: 6932 flw fs2,12(sp)
|
||||
18: 7032 flw ft0,44(sp)
|
||||
1a: 5f30 lw a2,120(a4)
|
||||
1c: 326d jal fffff9c6 <__stack_top+0xfff9c6>
|
||||
1e: 3070 fld fa2,224(s0)
|
||||
20: 665f 7032 0030 0x307032665f
|
BIN
driver/tests/tex_demo/kernel.elf
Executable file
BIN
driver/tests/tex_demo/kernel.elf
Executable file
Binary file not shown.
412
hw/VX_config.h
Normal file
412
hw/VX_config.h
Normal file
|
@ -0,0 +1,412 @@
|
|||
// auto-generated by gen_config.py. DO NOT EDIT
|
||||
// Generated at 2021-03-12 17:51:37.263369
|
||||
|
||||
#ifndef VX_USER_CONFIG
|
||||
#define VX_USER_CONFIG
|
||||
|
||||
|
||||
#endif
|
||||
// auto-generated by gen_config.py. DO NOT EDIT
|
||||
// Generated at 2021-03-12 17:51:37.265050
|
||||
|
||||
// Translated from VX_config.vh:
|
||||
|
||||
#ifndef VX_CONFIG
|
||||
#define VX_CONFIG
|
||||
|
||||
|
||||
|
||||
#ifndef NUM_CLUSTERS
|
||||
#define NUM_CLUSTERS 1
|
||||
#endif
|
||||
|
||||
#ifndef NUM_CORES
|
||||
#define NUM_CORES 1
|
||||
#endif
|
||||
|
||||
#ifndef NUM_WARPS
|
||||
#define NUM_WARPS 4
|
||||
#endif
|
||||
|
||||
#ifndef NUM_THREADS
|
||||
#define NUM_THREADS 4
|
||||
#endif
|
||||
|
||||
#ifndef NUM_BARRIERS
|
||||
#define NUM_BARRIERS 4
|
||||
#endif
|
||||
|
||||
#ifndef L2_ENABLE
|
||||
#define L2_ENABLE 0
|
||||
#endif
|
||||
|
||||
#ifndef L3_ENABLE
|
||||
#define L3_ENABLE 0
|
||||
#endif
|
||||
|
||||
#ifndef SM_ENABLE
|
||||
#define SM_ENABLE 1
|
||||
#endif
|
||||
|
||||
#ifndef GLOBAL_BLOCK_SIZE
|
||||
#define GLOBAL_BLOCK_SIZE 64
|
||||
#endif
|
||||
|
||||
#ifndef L1_BLOCK_SIZE
|
||||
#define L1_BLOCK_SIZE (NUM_THREADS * 4)
|
||||
#endif
|
||||
|
||||
#ifndef STARTUP_ADDR
|
||||
#define STARTUP_ADDR 0x80000000
|
||||
#endif
|
||||
|
||||
#ifndef IO_BUS_BASE_ADDR
|
||||
#define IO_BUS_BASE_ADDR 0xFF000000
|
||||
#endif
|
||||
|
||||
#ifndef SHARED_MEM_BASE_ADDR
|
||||
#define SHARED_MEM_BASE_ADDR IO_BUS_BASE_ADDR
|
||||
#endif
|
||||
|
||||
#ifndef SHARED_MEM_BASE_ADDR_ALIGN
|
||||
#define SHARED_MEM_BASE_ADDR_ALIGN 64
|
||||
#endif
|
||||
|
||||
#ifndef IO_BUS_ADDR_COUT
|
||||
#define IO_BUS_ADDR_COUT 0xFFFFFFFC
|
||||
#endif
|
||||
|
||||
#ifndef FRAME_BUFFER_BASE_ADDR
|
||||
#define FRAME_BUFFER_BASE_ADDR 0xFF000000
|
||||
#endif
|
||||
|
||||
#ifndef FRAME_BUFFER_WIDTH
|
||||
#define FRAME_BUFFER_WIDTH 1920
|
||||
#endif
|
||||
|
||||
#ifndef FRAME_BUFFER_HEIGHT
|
||||
#define FRAME_BUFFER_HEIGHT 1080
|
||||
#endif
|
||||
|
||||
#define FRAME_BUFFER_SIZE (FRAME_BUFFER_WIDTH * FRAME_BUFFER_HEIGHT)
|
||||
|
||||
#ifndef EXT_M_DISABLE
|
||||
#define EXT_M_ENABLE
|
||||
#endif
|
||||
|
||||
#ifndef EXT_F_DISABLE
|
||||
#define EXT_F_ENABLE
|
||||
#endif
|
||||
|
||||
// Device identification
|
||||
#define VENDOR_ID 0
|
||||
#define ARCHITECTURE_ID 0
|
||||
#define IMPLEMENTATION_ID 0
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef LATENCY_IMUL
|
||||
#define LATENCY_IMUL 3
|
||||
#endif
|
||||
|
||||
#ifndef LATENCY_FNCP
|
||||
#define LATENCY_FNCP 2
|
||||
#endif
|
||||
|
||||
#ifndef LATENCY_FMA
|
||||
#define LATENCY_FMA 4
|
||||
#endif
|
||||
|
||||
#ifndef LATENCY_FDIV
|
||||
#ifdef ALTERA_S10
|
||||
#define LATENCY_FDIV 34
|
||||
#else
|
||||
#define LATENCY_FDIV 15
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef LATENCY_FSQRT
|
||||
#ifdef ALTERA_S10
|
||||
#define LATENCY_FSQRT 25
|
||||
#else
|
||||
#define LATENCY_FSQRT 10
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef LATENCY_FDIVSQRT
|
||||
#define LATENCY_FDIVSQRT 32
|
||||
#endif
|
||||
|
||||
#ifndef LATENCY_FCVT
|
||||
#define LATENCY_FCVT 4
|
||||
#endif
|
||||
|
||||
// CSR Addresses //////////////////////////////////////////////////////////////
|
||||
|
||||
// User Floating-Point CSRs
|
||||
#define CSR_FFLAGS 0x001
|
||||
#define CSR_FRM 0x002
|
||||
#define CSR_FCSR 0x003
|
||||
|
||||
#define CSR_SATP 0x180
|
||||
|
||||
#define CSR_PMPCFG0 0x3A0
|
||||
#define CSR_PMPADDR0 0x3B0
|
||||
|
||||
#define CSR_MSTATUS 0x300
|
||||
#define CSR_MISA 0x301
|
||||
#define CSR_MEDELEG 0x302
|
||||
#define CSR_MIDELEG 0x303
|
||||
#define CSR_MIE 0x304
|
||||
#define CSR_MTVEC 0x305
|
||||
|
||||
#define CSR_MEPC 0x341
|
||||
|
||||
// Machine Counter/Timers
|
||||
#define CSR_CYCLE 0xC00
|
||||
#define CSR_CYCLE_H 0xC80
|
||||
#define CSR_INSTRET 0xC02
|
||||
#define CSR_INSTRET_H 0xC82
|
||||
|
||||
// Machine Performance-monitoring counters
|
||||
// PERF: pipeline
|
||||
#define CSR_MPM_IBUF_ST 0xB03
|
||||
#define CSR_MPM_IBUF_ST_H 0xB83
|
||||
#define CSR_MPM_SCRB_ST 0xB04
|
||||
#define CSR_MPM_SCRB_ST_H 0xB84
|
||||
#define CSR_MPM_ALU_ST 0xB05
|
||||
#define CSR_MPM_ALU_ST_H 0xB85
|
||||
#define CSR_MPM_LSU_ST 0xB06
|
||||
#define CSR_MPM_LSU_ST_H 0xB86
|
||||
#define CSR_MPM_CSR_ST 0xB07
|
||||
#define CSR_MPM_CSR_ST_H 0xB87
|
||||
#define CSR_MPM_FPU_ST 0xB08
|
||||
#define CSR_MPM_FPU_ST_H 0xB88
|
||||
#define CSR_MPM_GPU_ST 0xB09
|
||||
#define CSR_MPM_GPU_ST_H 0xB89
|
||||
// PERF: icache
|
||||
#define CSR_MPM_ICACHE_READS 0xB0A // total reads
|
||||
#define CSR_MPM_ICACHE_READS_H 0xB8A
|
||||
#define CSR_MPM_ICACHE_MISS_R 0xB0B // total misses
|
||||
#define CSR_MPM_ICACHE_MISS_R_H 0xB8B
|
||||
#define CSR_MPM_ICACHE_PIPE_ST 0xB0C // pipeline stalls
|
||||
#define CSR_MPM_ICACHE_PIPE_ST_H 0xB8C
|
||||
#define CSR_MPM_ICACHE_CRSP_ST 0xB0D // core response stalls
|
||||
#define CSR_MPM_ICACHE_CRSP_ST_H 0xB8D
|
||||
// PERF: dcache
|
||||
#define CSR_MPM_DCACHE_READS 0xB0E // total reads
|
||||
#define CSR_MPM_DCACHE_READS_H 0xB8E
|
||||
#define CSR_MPM_DCACHE_WRITES 0xB0F // total writes
|
||||
#define CSR_MPM_DCACHE_WRITES_H 0xB8F
|
||||
#define CSR_MPM_DCACHE_MISS_R 0xB10 // read misses
|
||||
#define CSR_MPM_DCACHE_MISS_R_H 0xB90
|
||||
#define CSR_MPM_DCACHE_MISS_W 0xB11 // write misses
|
||||
#define CSR_MPM_DCACHE_MISS_W_H 0xB91
|
||||
#define CSR_MPM_DCACHE_BANK_ST 0xB12 // bank conflicts stalls
|
||||
#define CSR_MPM_DCACHE_BANK_ST_H 0xB92
|
||||
#define CSR_MPM_DCACHE_MSHR_ST 0xB13 // MSHR stalls
|
||||
#define CSR_MPM_DCACHE_MSHR_ST_H 0xB93
|
||||
#define CSR_MPM_DCACHE_PIPE_ST 0xB14 // pipeline stalls
|
||||
#define CSR_MPM_DCACHE_PIPE_ST_H 0xB94
|
||||
#define CSR_MPM_DCACHE_CRSP_ST 0xB15 // core response stalls
|
||||
#define CSR_MPM_DCACHE_CRSP_ST_H 0xB95
|
||||
// PERF: smem
|
||||
#define CSR_MPM_SMEM_READS 0xB16 // total reads
|
||||
#define CSR_MPM_SMEM_READS_H 0xB96
|
||||
#define CSR_MPM_SMEM_WRITES 0xB17 // total writes
|
||||
#define CSR_MPM_SMEM_WRITES_H 0xB97
|
||||
#define CSR_MPM_SMEM_BANK_ST 0xB18 // bank conflicts stalls
|
||||
#define CSR_MPM_SMEM_BANK_ST_H 0xB98
|
||||
// PERF: memory
|
||||
#define CSR_MPM_DRAM_READS 0xB19 // dram reads
|
||||
#define CSR_MPM_DRAM_READS_H 0xB99
|
||||
#define CSR_MPM_DRAM_WRITES 0xB1A // dram writes
|
||||
#define CSR_MPM_DRAM_WRITES_H 0xB9A
|
||||
#define CSR_MPM_DRAM_ST 0xB1B // dram request stalls
|
||||
#define CSR_MPM_DRAM_ST_H 0xB9B
|
||||
#define CSR_MPM_DRAM_LAT 0xB1C // dram latency (total)
|
||||
#define CSR_MPM_DRAM_LAT_H 0xB9C
|
||||
|
||||
// Machine Information Registers
|
||||
#define CSR_MVENDORID 0xF11
|
||||
#define CSR_MARCHID 0xF12
|
||||
#define CSR_MIMPID 0xF13
|
||||
#define CSR_MHARTID 0xF14
|
||||
|
||||
// User SIMT CSRs
|
||||
#define CSR_WTID 0xCC0
|
||||
#define CSR_LTID 0xCC1
|
||||
#define CSR_GTID 0xCC2
|
||||
#define CSR_LWID 0xCC3
|
||||
#define CSR_GWID CSR_MHARTID
|
||||
#define CSR_GCID 0xCC5
|
||||
|
||||
// Machine SIMT CSRs
|
||||
#define CSR_NT 0xFC0
|
||||
#define CSR_NW 0xFC1
|
||||
#define CSR_NC 0xFC2
|
||||
|
||||
// Pipeline Queues ////////////////////////////////////////////////////////////
|
||||
|
||||
// Size of LSU Request Queue
|
||||
#ifndef LSUQ_SIZE
|
||||
#define LSUQ_SIZE 8
|
||||
#endif
|
||||
|
||||
// Size of FPU Request Queue
|
||||
#ifndef FPUQ_SIZE
|
||||
#define FPUQ_SIZE 8
|
||||
#endif
|
||||
|
||||
// Icache Configurable Knobs //////////////////////////////////////////////////
|
||||
|
||||
// Size of cache in bytes
|
||||
#ifndef ICACHE_SIZE
|
||||
#define ICACHE_SIZE 16384
|
||||
#endif
|
||||
|
||||
// Core Request Queue Size
|
||||
#ifndef ICREQ_SIZE
|
||||
#define ICREQ_SIZE 4
|
||||
#endif
|
||||
|
||||
// Miss Handling Register Size
|
||||
#ifndef IMSHR_SIZE
|
||||
#define IMSHR_SIZE NUM_WARPS
|
||||
#endif
|
||||
|
||||
// DRAM Request Queue Size
|
||||
#ifndef IDREQ_SIZE
|
||||
#define IDREQ_SIZE 4
|
||||
#endif
|
||||
|
||||
// DRAM Response Queue Size
|
||||
#ifndef IDRSQ_SIZE
|
||||
#define IDRSQ_SIZE 4
|
||||
#endif
|
||||
|
||||
// Dcache Configurable Knobs //////////////////////////////////////////////////
|
||||
|
||||
// Size of cache in bytes
|
||||
#ifndef DCACHE_SIZE
|
||||
#define DCACHE_SIZE 16384
|
||||
#endif
|
||||
|
||||
// Number of banks
|
||||
#ifndef DNUM_BANKS
|
||||
#define DNUM_BANKS NUM_THREADS
|
||||
#endif
|
||||
|
||||
// Number of bank ports
|
||||
#ifndef DNUM_PORTS
|
||||
#define DNUM_PORTS 1
|
||||
#endif
|
||||
|
||||
// Core Request Queue Size
|
||||
#ifndef DCREQ_SIZE
|
||||
#define DCREQ_SIZE 4
|
||||
#endif
|
||||
|
||||
// Miss Handling Register Size
|
||||
#ifndef DMSHR_SIZE
|
||||
#define DMSHR_SIZE LSUQ_SIZE
|
||||
#endif
|
||||
|
||||
// DRAM Request Queue Size
|
||||
#ifndef DDREQ_SIZE
|
||||
#define DDREQ_SIZE 4
|
||||
#endif
|
||||
|
||||
// DRAM Response Queue Size
|
||||
#ifndef DDRSQ_SIZE
|
||||
#define DDRSQ_SIZE MAX(4, (DNUM_BANKS * 2))
|
||||
#endif
|
||||
|
||||
// SM Configurable Knobs //////////////////////////////////////////////////////
|
||||
|
||||
// per thread stack size
|
||||
#ifndef STACK_SIZE
|
||||
#define STACK_SIZE 1024
|
||||
#endif
|
||||
|
||||
// Size of cache in bytes
|
||||
#ifndef SMEM_SIZE
|
||||
#define SMEM_SIZE (STACK_SIZE * NUM_WARPS * NUM_THREADS)
|
||||
#endif
|
||||
|
||||
// Number of banks
|
||||
#ifndef SNUM_BANKS
|
||||
#define SNUM_BANKS NUM_THREADS
|
||||
#endif
|
||||
|
||||
// Core Request Queue Size
|
||||
#ifndef SCREQ_SIZE
|
||||
#define SCREQ_SIZE 4
|
||||
#endif
|
||||
|
||||
// L2cache Configurable Knobs /////////////////////////////////////////////////
|
||||
|
||||
// Size of cache in bytes
|
||||
#ifndef L2CACHE_SIZE
|
||||
#define L2CACHE_SIZE 65536
|
||||
#endif
|
||||
|
||||
// Number of banks
|
||||
#ifndef L2NUM_BANKS
|
||||
#define L2NUM_BANKS MIN(NUM_CORES, 4)
|
||||
#endif
|
||||
|
||||
// Core Request Queue Size
|
||||
#ifndef L2CREQ_SIZE
|
||||
#define L2CREQ_SIZE 4
|
||||
#endif
|
||||
|
||||
// Miss Handling Register Size
|
||||
#ifndef L2MSHR_SIZE
|
||||
#define L2MSHR_SIZE 16
|
||||
#endif
|
||||
|
||||
// DRAM Request Queue Size
|
||||
#ifndef L2DREQ_SIZE
|
||||
#define L2DREQ_SIZE 4
|
||||
#endif
|
||||
|
||||
// DRAM Response Queue Size
|
||||
#ifndef L2DRSQ_SIZE
|
||||
#define L2DRSQ_SIZE MAX(4, (L2NUM_BANKS * 2))
|
||||
#endif
|
||||
|
||||
// L3cache Configurable Knobs /////////////////////////////////////////////////
|
||||
|
||||
// Size of cache in bytes
|
||||
#ifndef L3CACHE_SIZE
|
||||
#define L3CACHE_SIZE 131072
|
||||
#endif
|
||||
|
||||
// Number of banks
|
||||
#ifndef L3NUM_BANKS
|
||||
#define L3NUM_BANKS MIN(NUM_CLUSTERS, 4)
|
||||
#endif
|
||||
|
||||
// Core Request Queue Size
|
||||
#ifndef L3CREQ_SIZE
|
||||
#define L3CREQ_SIZE 4
|
||||
#endif
|
||||
|
||||
// Miss Handling Register Size
|
||||
#ifndef L3MSHR_SIZE
|
||||
#define L3MSHR_SIZE 16
|
||||
#endif
|
||||
|
||||
// DRAM Request Queue Size
|
||||
#ifndef L3DREQ_SIZE
|
||||
#define L3DREQ_SIZE 4
|
||||
#endif
|
||||
|
||||
// DRAM Response Queue Size
|
||||
#ifndef L3DRSQ_SIZE
|
||||
#define L3DRSQ_SIZE MAX(4, (L3NUM_BANKS * 2))
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
@ -73,13 +73,14 @@ module VX_commit #(
|
|||
.ld_commit_if (ld_commit_if),
|
||||
.csr_commit_if (csr_commit_if),
|
||||
.fpu_commit_if (fpu_commit_if),
|
||||
.gpu_commit_if (gpu_commit_if),
|
||||
|
||||
.writeback_if (writeback_if)
|
||||
);
|
||||
|
||||
// store and gpu commits don't writeback
|
||||
// store doesn't writeback
|
||||
assign st_commit_if.ready = 1'b1;
|
||||
assign gpu_commit_if.ready = 1'b1;
|
||||
// assign gpu_commit_if.ready = 1'b1;
|
||||
|
||||
`ifdef DBG_PRINT_PIPELINE
|
||||
always @(posedge clk) begin
|
||||
|
|
|
@ -357,6 +357,13 @@ module VX_decode #(
|
|||
use_rs2 = 1;
|
||||
is_wstall = 1;
|
||||
end
|
||||
3'h5: begin
|
||||
op_type = `OP_BITS'(`GPU_TEX);
|
||||
use_rd = 1;
|
||||
use_rs1 = 1;
|
||||
use_rs2 = 1;
|
||||
use_rs3 = 1;
|
||||
end
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
|
|
|
@ -52,6 +52,8 @@
|
|||
|
||||
`define INST_GPU 7'b1101011
|
||||
|
||||
`define INST_TEX 7'b0101011
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define FRM_RNE 3'b000 // round to nearest even
|
||||
|
@ -182,6 +184,7 @@
|
|||
`define GPU_SPLIT 3'h2
|
||||
`define GPU_JOIN 3'h3
|
||||
`define GPU_BAR 3'h4
|
||||
`define GPU_TEX 3'h5
|
||||
`define GPU_OTHER 3'h7
|
||||
`define GPU_BITS 3
|
||||
`define GPU_OP(x) x[`GPU_BITS-1:0]
|
||||
|
@ -381,6 +384,17 @@
|
|||
|
||||
`define XDRAM_TAG_WIDTH (`DDRAM_TAG_WIDTH+`CLOG2(2))
|
||||
|
||||
////////////////////////// Texture Unit Configurable Knobs //////////////////////////////
|
||||
`define MADDRW 8
|
||||
`define MAXWTW 8
|
||||
`define MAXHTW 8
|
||||
`define MAXFTW 8
|
||||
`define MAXFMW 8
|
||||
`define MAXAMW 8
|
||||
`define TAGW 8
|
||||
`define DATAW 32
|
||||
////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`include "VX_types.vh"
|
||||
|
||||
`endif
|
||||
|
|
|
@ -23,10 +23,14 @@ module VX_gpu_unit #(
|
|||
gpu_barrier_t barrier;
|
||||
gpu_split_t split;
|
||||
|
||||
VX_tex_req_if tex_req_if;
|
||||
VX_tex_rsp_if tex_rsp_if;
|
||||
|
||||
wire is_wspawn = (gpu_req_if.op_type == `GPU_WSPAWN);
|
||||
wire is_tmc = (gpu_req_if.op_type == `GPU_TMC);
|
||||
wire is_split = (gpu_req_if.op_type == `GPU_SPLIT);
|
||||
wire is_bar = (gpu_req_if.op_type == `GPU_BAR);
|
||||
wire is_tex = (gpu_req_if.op_type == `GPU_TEX);
|
||||
|
||||
// tmc
|
||||
|
||||
|
@ -39,7 +43,7 @@ module VX_gpu_unit #(
|
|||
|
||||
// wspawn
|
||||
|
||||
wire [31:0] wspawn_pc = gpu_req_if.rs2_data;
|
||||
wire [31:0] wspawn_pc = gpu_req_if.rs2_data[0];
|
||||
wire [`NUM_WARPS-1:0] wspawn_wmask;
|
||||
for (genvar i = 0; i < `NUM_WARPS; i++) begin
|
||||
assign wspawn_wmask[i] = (i < gpu_req_if.rs1_data[0]);
|
||||
|
@ -69,21 +73,48 @@ module VX_gpu_unit #(
|
|||
|
||||
assign barrier.valid = is_bar;
|
||||
assign barrier.id = gpu_req_if.rs1_data[0][`NB_BITS-1:0];
|
||||
assign barrier.size_m1 = (`NW_BITS)'(gpu_req_if.rs2_data - 1);
|
||||
assign barrier.size_m1 = (`NW_BITS)'(gpu_req_if.rs2_data[0] - 1);
|
||||
|
||||
// texture
|
||||
assign tex_req_if.valid = is_tex;
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
assign tex_req_if.u[i] = gpu_req_if.rs1_data[i];
|
||||
assign tex_req_if.v[i] = gpu_req_if.rs2_data[i];
|
||||
assign tex_req_if.lod_t[i] = gpu_req_if.rs3_data[i];
|
||||
end
|
||||
|
||||
`UNUSED_VAR (tex_req_if.u)
|
||||
`UNUSED_VAR (tex_req_if.v)
|
||||
`UNUSED_VAR (tex_req_if.valid)
|
||||
`UNUSED_VAR (tex_req_if.lod_t)
|
||||
|
||||
|
||||
VX_tex_unit #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) texture_unit (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
.tex_req_if (tex_req_if),
|
||||
.tex_rsp_if (tex_rsp_if)
|
||||
);
|
||||
|
||||
assign gpu_req_if.valid = is_tex;
|
||||
assign gpu_req_if.wb = tex_rsp_if.ready;
|
||||
|
||||
// output
|
||||
|
||||
wire stall = ~gpu_commit_if.ready && gpu_commit_if.valid;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + `GPU_TMC_SIZE + `GPU_WSPAWN_SIZE + `GPU_SPLIT_SIZE + `GPU_BARRIER_SIZE),
|
||||
.DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + `GPU_TMC_SIZE + `GPU_WSPAWN_SIZE + `GPU_SPLIT_SIZE + `GPU_BARRIER_SIZE + (`NUM_THREADS * 32)),
|
||||
.RESETW (1)
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (!stall),
|
||||
.data_in ({gpu_req_if.valid, gpu_req_if.wid, gpu_req_if.tmask, gpu_req_if.PC, gpu_req_if.rd, gpu_req_if.wb, tmc, wspawn, split, barrier}),
|
||||
.data_out ({gpu_commit_if.valid, gpu_commit_if.wid, gpu_commit_if.tmask, gpu_commit_if.PC, gpu_commit_if.rd, gpu_commit_if.wb, warp_ctl_if.tmc, warp_ctl_if.wspawn, warp_ctl_if.split, warp_ctl_if.barrier})
|
||||
.data_in ({gpu_req_if.valid, gpu_req_if.wid, gpu_req_if.tmask, gpu_req_if.PC, tex_rsp_if.data, gpu_req_if.rd, gpu_req_if.wb, tmc, wspawn, split, barrier}),
|
||||
.data_out ({gpu_commit_if.valid, gpu_commit_if.wid, gpu_commit_if.tmask, gpu_commit_if.PC, gpu_commit_if.data, gpu_commit_if.rd, gpu_commit_if.wb, warp_ctl_if.tmc, warp_ctl_if.wspawn, warp_ctl_if.split, warp_ctl_if.barrier})
|
||||
);
|
||||
|
||||
assign gpu_commit_if.eop = 1'b1;
|
||||
|
@ -99,7 +130,7 @@ module VX_gpu_unit #(
|
|||
`SCOPE_ASSIGN (gpu_req_tmask, gpu_req_if.tmask);
|
||||
`SCOPE_ASSIGN (gpu_req_op_type, gpu_req_if.op_type);
|
||||
`SCOPE_ASSIGN (gpu_req_rs1, gpu_req_if.rs1_data[0]);
|
||||
`SCOPE_ASSIGN (gpu_req_rs2, gpu_req_if.rs2_data);
|
||||
`SCOPE_ASSIGN (gpu_req_rs2, gpu_req_if.rs2_data[0]);
|
||||
`SCOPE_ASSIGN (gpu_rsp_valid, warp_ctl_if.valid);
|
||||
`SCOPE_ASSIGN (gpu_rsp_wid, warp_ctl_if.wid);
|
||||
`SCOPE_ASSIGN (gpu_rsp_tmc, warp_ctl_if.tmc);
|
||||
|
|
|
@ -111,14 +111,14 @@ module VX_instr_demux (
|
|||
wire gpu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_GPU);
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `GPU_BITS + `NR_BITS + 1 + (`NUM_THREADS * 32 + 32))
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `GPU_BITS + `NR_BITS + 1 + (3 * `NUM_THREADS * 32)) //update number of bits
|
||||
) gpu_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (gpu_req_valid),
|
||||
.ready_in (gpu_req_ready),
|
||||
.data_in ({execute_if.wid, execute_if.tmask, execute_if.PC, next_PC, `GPU_OP(execute_if.op_type), execute_if.rd, execute_if.wb, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data[0]}),
|
||||
.data_out ({gpu_req_if.wid, gpu_req_if.tmask, gpu_req_if.PC, gpu_req_if.next_PC, gpu_req_if.op_type, gpu_req_if.rd, gpu_req_if.wb, gpu_req_if.rs1_data, gpu_req_if.rs2_data}),
|
||||
.data_in ({execute_if.wid, execute_if.tmask, execute_if.PC, next_PC, `GPU_OP(execute_if.op_type), execute_if.rd, execute_if.wb, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data, gpr_rsp_if.rs3_data}),
|
||||
.data_out ({gpu_req_if.wid, gpu_req_if.tmask, gpu_req_if.PC, gpu_req_if.next_PC, gpu_req_if.op_type, gpu_req_if.rd, gpu_req_if.wb, gpu_req_if.rs1_data, gpu_req_if.rs2_data, gpu_req_if.rs3_data}),
|
||||
.valid_out (gpu_req_if.valid),
|
||||
.ready_out (gpu_req_if.ready)
|
||||
);
|
||||
|
|
|
@ -11,6 +11,7 @@ module VX_writeback #(
|
|||
VX_commit_if ld_commit_if,
|
||||
VX_commit_if csr_commit_if,
|
||||
VX_commit_if fpu_commit_if,
|
||||
VX_commit_if gpu_commit_if,
|
||||
|
||||
// outputs
|
||||
VX_writeback_if writeback_if
|
||||
|
@ -19,6 +20,7 @@ module VX_writeback #(
|
|||
wire fpu_valid = fpu_commit_if.valid && fpu_commit_if.wb;
|
||||
wire csr_valid = csr_commit_if.valid && csr_commit_if.wb;
|
||||
wire alu_valid = alu_commit_if.valid && alu_commit_if.wb;
|
||||
wire gpu_valid = gpu_commit_if.valid && gpu_commit_if.wb;
|
||||
|
||||
wire wb_valid;
|
||||
wire [`NW_BITS-1:0] wb_wid;
|
||||
|
@ -31,37 +33,44 @@ module VX_writeback #(
|
|||
assign wb_valid = ld_valid |
|
||||
fpu_valid |
|
||||
csr_valid |
|
||||
alu_valid;
|
||||
alu_valid |
|
||||
gpu_valid;
|
||||
|
||||
assign wb_wid = ld_valid ? ld_commit_if.wid :
|
||||
fpu_valid ? fpu_commit_if.wid :
|
||||
csr_valid ? csr_commit_if.wid :
|
||||
/*alu_valid ?*/ alu_commit_if.wid;
|
||||
alu_valid ? alu_commit_if.wid :
|
||||
/*gpu_valid*/ gpu_commit_if.wid;
|
||||
|
||||
assign wb_PC = ld_valid ? ld_commit_if.PC :
|
||||
fpu_valid ? fpu_commit_if.PC :
|
||||
csr_valid ? csr_commit_if.PC :
|
||||
/*alu_valid ?*/ alu_commit_if.PC;
|
||||
|
||||
alu_valid ? alu_commit_if.PC :
|
||||
/*gpu_valid*/ gpu_commit_if.PC;
|
||||
|
||||
assign wb_tmask = ld_valid ? ld_commit_if.tmask :
|
||||
fpu_valid ? fpu_commit_if.tmask :
|
||||
csr_valid ? csr_commit_if.tmask :
|
||||
/*alu_valid ?*/ alu_commit_if.tmask;
|
||||
alu_valid ? alu_commit_if.tmask :
|
||||
/*gpu_valid*/ gpu_commit_if.tmask;
|
||||
|
||||
assign wb_rd = ld_valid ? ld_commit_if.rd :
|
||||
fpu_valid ? fpu_commit_if.rd :
|
||||
csr_valid ? csr_commit_if.rd :
|
||||
/*alu_valid ?*/ alu_commit_if.rd;
|
||||
alu_valid ? alu_commit_if.rd :
|
||||
/*gpu_valid*/ gpu_commit_if.rd;
|
||||
|
||||
assign wb_data = ld_valid ? ld_commit_if.data :
|
||||
fpu_valid ? fpu_commit_if.data :
|
||||
csr_valid ? csr_commit_if.data :
|
||||
/*alu_valid ?*/ alu_commit_if.data;
|
||||
alu_valid ? alu_commit_if.data :
|
||||
/*gpu_valid*/ gpu_commit_if.data;
|
||||
|
||||
assign wb_eop = ld_valid ? ld_commit_if.eop :
|
||||
fpu_valid ? fpu_commit_if.eop :
|
||||
csr_valid ? csr_commit_if.eop :
|
||||
/*alu_valid ?*/ alu_commit_if.eop;
|
||||
alu_valid ? alu_commit_if.eop :
|
||||
/*gpu_valid*/ gpu_commit_if.eop;
|
||||
|
||||
wire stall = ~writeback_if.ready && writeback_if.valid;
|
||||
|
||||
|
@ -79,7 +88,9 @@ module VX_writeback #(
|
|||
assign ld_commit_if.ready = !stall;
|
||||
assign fpu_commit_if.ready = !stall && !ld_valid;
|
||||
assign csr_commit_if.ready = !stall && !ld_valid && !fpu_valid;
|
||||
assign alu_commit_if.ready = !stall && !ld_valid && !fpu_valid && !csr_valid;
|
||||
assign alu_commit_if.ready = !stall && !ld_valid && !fpu_valid && !csr_valid;
|
||||
// if not TEX instruction, no writeback and commit is ready
|
||||
assign gpu_commit_if.ready = (!stall && !ld_valid && !fpu_valid && !csr_valid && !alu_valid) || !gpu_commit_if.wb ;
|
||||
|
||||
// special workaround to get RISC-V tests Pass/Fail status
|
||||
reg [31:0] last_wb_value [`NUM_REGS-1:0] /* verilator public */;
|
||||
|
|
|
@ -13,7 +13,8 @@ interface VX_gpu_req_if();
|
|||
wire [31:0] next_PC;
|
||||
wire [`GPU_BITS-1:0] op_type;
|
||||
wire [`NUM_THREADS-1:0][31:0] rs1_data;
|
||||
wire [31:0] rs2_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] rs2_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] rs3_data;
|
||||
wire [`NR_BITS-1:0] rd;
|
||||
wire wb;
|
||||
|
||||
|
|
24
hw/rtl/interfaces/VX_tex_req_if.v
Normal file
24
hw/rtl/interfaces/VX_tex_req_if.v
Normal file
|
@ -0,0 +1,24 @@
|
|||
`ifndef VX_TEX_REQ_IF
|
||||
`define VX_TEX_REQ_IF
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
interface VX_tex_req_if ();
|
||||
wire valid;
|
||||
wire [`NUM_THREADS-1:0][31:0] u;
|
||||
wire [`NUM_THREADS-1:0][31:0] v;
|
||||
wire [`NUM_THREADS-1:0][31:0] lod_t;
|
||||
// wire [`MADDRW-1:0] addr;
|
||||
// wire [`MAXWTW-1:0] width;
|
||||
// wire [`MAXHTW-1:0] height;
|
||||
// wire [`MAXFTW-1:0] format;
|
||||
// wire [`MAXFMW-1:0] filter;
|
||||
// wire [`MAXAMW-1:0] clamp;
|
||||
// wire [`TAGW-1:0] tag;
|
||||
// wire ready;
|
||||
|
||||
endinterface
|
||||
`endif
|
||||
|
||||
|
||||
|
14
hw/rtl/interfaces/VX_tex_rsp_if.v
Normal file
14
hw/rtl/interfaces/VX_tex_rsp_if.v
Normal file
|
@ -0,0 +1,14 @@
|
|||
`ifndef VX_TEX_RSP_IF
|
||||
`define VX_TEX_RSP_IF
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
interface VX_tex_rsp_if ();
|
||||
// wire valid;
|
||||
// wire [`TAGW-1:0] tag;
|
||||
wire [`NUM_THREADS-1:0][31:0] data;
|
||||
wire ready;
|
||||
endinterface
|
||||
`endif
|
||||
|
||||
|
|
@ -1,50 +1,55 @@
|
|||
`include "VX_platform.vh"
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_tex_unit #(
|
||||
parameter TADDRW = 32,
|
||||
parameter MADDRW = 32,
|
||||
parameter DATAW = 32,
|
||||
parameter MAXWTW = 8,
|
||||
parameter MAXHTW = 8,
|
||||
parameter MAXFTW = 2,
|
||||
parameter MAXFMW = 1,
|
||||
parameter MAXAMW = 2,
|
||||
parameter TAGW = 16,
|
||||
|
||||
parameter NUMCRQS = 32
|
||||
module VX_tex_unit #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
// Inputs
|
||||
VX_tex_req_if tex_req_if,
|
||||
|
||||
// Texture Request
|
||||
input wire tex_req_valid,
|
||||
input wire [TADDRW-1:0] tex_req_u,
|
||||
input wire [TADDRW-1:0] tex_req_v,
|
||||
input wire [MADDRW-1:0] tex_req_addr,
|
||||
input wire [MAXWTW-1:0] tex_req_width,
|
||||
input wire [MAXHTW-1:0] tex_req_height,
|
||||
input wire [MAXFTW-1:0] tex_req_format,
|
||||
input wire [MAXFMW-1:0] tex_req_filter,
|
||||
input wire [MAXAMW-1:0] tex_req_clamp,
|
||||
input wire [TAGW-1:0] tex_req_tag,
|
||||
output wire tex_req_ready,
|
||||
// Outputs
|
||||
VX_tex_rsp_if tex_rsp_if
|
||||
// VX_commit_if gpu_commit_if
|
||||
// // Texture Request
|
||||
// input wire tex_req_valid,
|
||||
// input wire [`TADDRW-1:0] tex_req_u,
|
||||
// input wire [`TADDRW-1:0] tex_req_v,
|
||||
// input wire [`MADDRW-1:0] tex_req_addr,
|
||||
// input wire [`MAXWTW-1:0] tex_req_width,
|
||||
// input wire [`MAXHTW-1:0] tex_req_height,
|
||||
// input wire [`MAXFTW-1:0] tex_req_format,
|
||||
// input wire [`MAXFMW-1:0] tex_req_filter,
|
||||
// input wire [`MAXAMW-1:0] tex_req_clamp,
|
||||
// input wire [`TAGW-1:0] tex_req_tag,
|
||||
// output wire tex_req_ready,
|
||||
|
||||
// Texture Response
|
||||
output wire tex_rsp_valid,
|
||||
output wire [TAGW-1:0] tex_rsp_tag,
|
||||
input wire [DATAW-1:0] tex_rsp_data,
|
||||
input wire tex_rsp_ready,
|
||||
// // Texture Response
|
||||
// output wire tex_rsp_valid,
|
||||
// output wire [`TAGW-1:0] tex_rsp_tag,
|
||||
// input wire [`DATAW-1:0] tex_rsp_data,
|
||||
// input wire tex_rsp_ready,
|
||||
|
||||
// Cache Request
|
||||
output wire [NUMCRQS-1:0] cache_req_valids,
|
||||
output wire [NUMCRQS-1:0][MADDRW-1:0] cache_req_addrs,
|
||||
input wire cache_req_ready,
|
||||
// output wire [NUMCRQS-1:0] cache_req_valids,
|
||||
// output wire [NUMCRQS-1:0][MADDRW-1:0] cache_req_addrs,
|
||||
// input wire cache_req_ready,
|
||||
|
||||
// Cache Response
|
||||
input wire cache_rsp_valid,
|
||||
input wire [MADDRW-1:0] cache_rsp_addr,
|
||||
input wire [DATAW-1:0] cache_rsp_data,
|
||||
output wire cache_rsp_ready
|
||||
// input wire cache_rsp_valid,
|
||||
// input wire [MADDRW-1:0] cache_rsp_addr,
|
||||
// input wire [DATAW-1:0] cache_rsp_data,
|
||||
// output wire cache_rsp_ready
|
||||
);
|
||||
|
||||
`UNUSED_VAR (clk)
|
||||
`UNUSED_VAR (reset)
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
assign tex_rsp_if.data[i] = 32'hFAAF;
|
||||
end
|
||||
|
||||
assign tex_rsp_if.ready = 1'b1;
|
||||
|
||||
endmodule
|
|
@ -36,8 +36,9 @@ RTL_DIR=../rtl
|
|||
DPI_DIR=../dpi
|
||||
|
||||
FPU_INCLUDE = -I$(RTL_DIR)/fp_cores -I$(DPI_DIR) -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/include -I$(RTL_DIR)/fp_cores/fpnew/src/common_cells/src -I$(RTL_DIR)/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(RTL_DIR)/fp_cores/fpnew/src
|
||||
RTL_INCLUDE = -I$(RTL_DIR)/ -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/cache -I$(RTL_DIR)/simulate $(FPU_INCLUDE)
|
||||
|
||||
TEX_INCLUDE = -I$(RTL_DIR)/tex_unit
|
||||
RTL_INCLUDE = -I$(RTL_DIR)/ -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/cache -I$(RTL_DIR)/simulate $(FPU_INCLUDE) $(TEX_INCLUDE)
|
||||
|
||||
SRCS = simulator.cpp testbench.cpp
|
||||
SRCS += $(DPI_DIR)/util_dpi.cpp $(DPI_DIR)/float_dpi.cpp
|
||||
|
||||
|
|
|
@ -10,7 +10,7 @@ CFLAGS += -I./include -I../hw
|
|||
|
||||
PROJECT = libvortexrt
|
||||
|
||||
SRCS = ./src/vx_start.S ./src/vx_print.S ./src/vx_print.c ./src/vx_spawn.c
|
||||
SRCS = ./src/vx_start.S ./src/vx_print.S ./src/vx_print.c ./src/vx_spawn.c ./src/vx_tex.c
|
||||
|
||||
OBJS := $(addsuffix .o, $(notdir $(SRCS)))
|
||||
|
||||
|
|
|
@ -109,6 +109,14 @@ inline int vx_num_instrs() {
|
|||
return result;
|
||||
}
|
||||
|
||||
// Texture load instruction
|
||||
inline int vx_tex_ld(unsigned t, unsigned u, unsigned v, unsigned lod_t) {
|
||||
lod_t = (lod_t << 8) | t;
|
||||
int result;
|
||||
asm volatile (".insn r4 0x6b, 5, 1, %0, %1, %2, %3" :: "r"(result), "r"(u), "r"(v), "r"(lod_t));
|
||||
return result;
|
||||
}
|
||||
|
||||
#define __if(b) vx_split(b); \
|
||||
if (b)
|
||||
|
||||
|
|
17
runtime/include/vx_tex.h
Normal file
17
runtime/include/vx_tex.h
Normal file
|
@ -0,0 +1,17 @@
|
|||
#ifndef VX_API_H
|
||||
#define VX_API_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
int vx_tex(unsigned t, unsigned u, unsigned v, unsigned lod);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
BIN
runtime/libvortexrt.a
Normal file
BIN
runtime/libvortexrt.a
Normal file
Binary file not shown.
1073
runtime/libvortexrt.dump
Normal file
1073
runtime/libvortexrt.dump
Normal file
File diff suppressed because it is too large
Load diff
9
runtime/src/vx_tex.S
Normal file
9
runtime/src/vx_tex.S
Normal file
|
@ -0,0 +1,9 @@
|
|||
#include <VX_config.h>
|
||||
|
||||
@ .type vx_tex_ld, @function
|
||||
@ .global vx_tex_ld
|
||||
@ vx_tex_ld:
|
||||
@ slli a1,a1,0x8
|
||||
@ or a1,a1,a0
|
||||
@ .word 0x5ae7952b
|
||||
@ ret
|
13
runtime/src/vx_tex.c
Normal file
13
runtime/src/vx_tex.c
Normal file
|
@ -0,0 +1,13 @@
|
|||
#include <vx_tex.h>
|
||||
#include <vx_intrinsics.h>
|
||||
#include <inttypes.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define NUM_CORES_MAX 32
|
||||
|
||||
int vx_tex(unsigned t, unsigned u, unsigned v, unsigned lod){
|
||||
return vx_tex_ld(t,u,v,lod);
|
||||
}
|
BIN
runtime/vx_print.S.o
Normal file
BIN
runtime/vx_print.S.o
Normal file
Binary file not shown.
BIN
runtime/vx_print.c.o
Normal file
BIN
runtime/vx_print.c.o
Normal file
Binary file not shown.
BIN
runtime/vx_spawn.c.o
Normal file
BIN
runtime/vx_spawn.c.o
Normal file
Binary file not shown.
BIN
runtime/vx_start.S.o
Normal file
BIN
runtime/vx_start.S.o
Normal file
Binary file not shown.
BIN
runtime/vx_tex.c.o
Normal file
BIN
runtime/vx_tex.c.o
Normal file
Binary file not shown.
BIN
simX/simX
Executable file
BIN
simX/simX
Executable file
Binary file not shown.
Loading…
Add table
Add a link
Reference in a new issue