mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
Merge branch 'master' of https://github.gatech.edu/casl/Vortex
This commit is contained in:
commit
5a55dcb95c
52 changed files with 32656 additions and 482 deletions
|
@ -13,7 +13,7 @@ DBG_PRINT_FLAGS += -DDBG_PRINT_DRAM
|
|||
DBG_PRINT_FLAGS += -DDBG_PRINT_PIPELINE
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE
|
||||
|
||||
#DBG_FLAGS += $(DBG_PRINT_FLAGS)
|
||||
DBG_FLAGS += $(DBG_PRINT_FLAGS)
|
||||
DBG_FLAGS += -DDBG_CORE_REQ_INFO
|
||||
|
||||
#CONFIGS += -DNUM_CLUSTERS=2 -DNUM_CORES=4 -DL2_ENABLE=1
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
RISCV_TOOLCHAIN_PATH ?= $(wildcard ~/dev/riscv-gnu-toolchain/drops)
|
||||
VORTEX_RT_PATH ?= $(wildcard ../../../runtime)
|
||||
|
||||
OPTS ?= -n256
|
||||
|
||||
VX_CC = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-gcc
|
||||
VX_CXX = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-g++
|
||||
VX_DP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objdump
|
||||
|
@ -38,16 +40,16 @@ $(PROJECT): $(SRCS)
|
|||
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -L../../stub -lvortex -o $@
|
||||
|
||||
run-fpga: $(PROJECT)
|
||||
LD_LIBRARY_PATH=../../opae:$(LD_LIBRARY_PATH) ./$(PROJECT) -n 256
|
||||
LD_LIBRARY_PATH=../../opae:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
|
||||
|
||||
run-ase: $(PROJECT)
|
||||
ASE_LOG=0 LD_LIBRARY_PATH=../../opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) -n 256
|
||||
ASE_LOG=0 LD_LIBRARY_PATH=../../opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
|
||||
|
||||
run-rtlsim: $(PROJECT)
|
||||
LD_LIBRARY_PATH=../../rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) -n 256
|
||||
LD_LIBRARY_PATH=../../rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
|
||||
|
||||
run-simx: $(PROJECT)
|
||||
LD_LIBRARY_PATH=../../simx:$(LD_LIBRARY_PATH) ./$(PROJECT) -n 256
|
||||
LD_LIBRARY_PATH=../../simx:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
|
||||
|
||||
.depend: $(SRCS)
|
||||
$(CXX) $(CXXFLAGS) -MM $^ > .depend;
|
||||
|
|
|
@ -14,6 +14,8 @@
|
|||
exit(-1); \
|
||||
} while (false)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
const char* kernel_file = "kernel.bin";
|
||||
int test = -1;
|
||||
uint32_t count = 0;
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
RISCV_TOOLCHAIN_PATH ?= $(wildcard ~/dev/riscv-gnu-toolchain/drops)
|
||||
VORTEX_RT_PATH ?= $(wildcard ../../../runtime)
|
||||
|
||||
OPTS ?= -n64
|
||||
|
||||
VX_CC = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-gcc
|
||||
VX_CXX = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-g++
|
||||
VX_DP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objdump
|
||||
|
@ -36,16 +38,16 @@ $(PROJECT): $(SRCS)
|
|||
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -L../../stub -lvortex -o $@
|
||||
|
||||
run-fpga: $(PROJECT)
|
||||
LD_LIBRARY_PATH=../../opae:$(LD_LIBRARY_PATH) ./$(PROJECT) -n 64
|
||||
LD_LIBRARY_PATH=../../opae:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
|
||||
|
||||
run-ase: $(PROJECT)
|
||||
ASE_LOG=0 LD_LIBRARY_PATH=../../opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) -n 64
|
||||
ASE_LOG=0 LD_LIBRARY_PATH=../../opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
|
||||
|
||||
run-rtlsim: $(PROJECT)
|
||||
LD_LIBRARY_PATH=../../rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) -n 64
|
||||
LD_LIBRARY_PATH=../../rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
|
||||
|
||||
run-simx: $(PROJECT)
|
||||
LD_LIBRARY_PATH=../../simx:$(LD_LIBRARY_PATH) ./$(PROJECT) -n 64
|
||||
LD_LIBRARY_PATH=../../simx:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
|
||||
|
||||
.depend: $(SRCS)
|
||||
$(CXX) $(CXXFLAGS) -MM $^ > .depend;
|
||||
|
|
|
@ -14,6 +14,8 @@
|
|||
exit(-1); \
|
||||
} while (false)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
const char* kernel_file = "kernel.bin";
|
||||
uint32_t count = 0;
|
||||
|
||||
|
|
64
driver/tests/dogfood/Makefile
Normal file
64
driver/tests/dogfood/Makefile
Normal file
|
@ -0,0 +1,64 @@
|
|||
RISCV_TOOLCHAIN_PATH ?= $(wildcard ~/dev/riscv-gnu-toolchain/drops)
|
||||
VORTEX_RT_PATH ?= $(wildcard ../../../runtime)
|
||||
|
||||
OPTS ?= -n64
|
||||
|
||||
VX_CC = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-gcc
|
||||
VX_CXX = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-g++
|
||||
VX_DP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objdump
|
||||
VX_CP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objcopy
|
||||
|
||||
VX_CFLAGS += -march=rv32imf -mabi=ilp32 -O3 -Wl,-Bstatic,-T,$(VORTEX_RT_PATH)/linker/vx_link.ld -ffreestanding -nostartfiles -Wl,--gc-sections
|
||||
VX_CFLAGS += -I$(VORTEX_RT_PATH)/include
|
||||
|
||||
VX_LDFLAGS += $(VORTEX_RT_PATH)/libvortexrt.a
|
||||
VX_LDFLAGS += -lm
|
||||
|
||||
VX_SRCS = kernel.c
|
||||
|
||||
CXXFLAGS += -std=c++11 -O0 -g -Wall -Wextra -pedantic -Wfatal-errors
|
||||
|
||||
CXXFLAGS += -I../../include
|
||||
|
||||
PROJECT = dogfood
|
||||
|
||||
SRCS = dogfood.cpp
|
||||
|
||||
all: $(PROJECT) kernel.bin kernel.dump
|
||||
|
||||
kernel.dump: kernel.elf
|
||||
$(VX_DP) -D kernel.elf > kernel.dump
|
||||
|
||||
kernel.bin: kernel.elf
|
||||
$(VX_CP) -O binary kernel.elf kernel.bin
|
||||
|
||||
kernel.elf: $(VX_SRCS)
|
||||
$(VX_CC) $(VX_CFLAGS) $(VX_SRCS) $(VX_LDFLAGS) -o kernel.elf
|
||||
|
||||
$(PROJECT): $(SRCS)
|
||||
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -L../../stub -lvortex -o $@
|
||||
|
||||
run-fpga: $(PROJECT)
|
||||
LD_LIBRARY_PATH=../../opae:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
|
||||
|
||||
run-ase: $(PROJECT)
|
||||
ASE_LOG=0 LD_LIBRARY_PATH=../../opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
|
||||
|
||||
run-rtlsim: $(PROJECT)
|
||||
LD_LIBRARY_PATH=../../rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
|
||||
|
||||
run-simx: $(PROJECT)
|
||||
LD_LIBRARY_PATH=../../simx:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
|
||||
|
||||
.depend: $(SRCS)
|
||||
$(CXX) $(CXXFLAGS) -MM $^ > .depend;
|
||||
|
||||
clean:
|
||||
rm -rf $(PROJECT) *.o .depend
|
||||
|
||||
clean-all:
|
||||
rm -rf $(PROJECT) *.o *.elf *.bin *.dump .depend
|
||||
|
||||
ifneq ($(MAKECMDGOALS),clean)
|
||||
-include .depend
|
||||
endif
|
14
driver/tests/dogfood/common.h
Normal file
14
driver/tests/dogfood/common.h
Normal file
|
@ -0,0 +1,14 @@
|
|||
#ifndef _COMMON_H_
|
||||
#define _COMMON_H_
|
||||
|
||||
#define KERNEL_ARG_DEV_MEM_ADDR 0x7ffff000
|
||||
|
||||
struct kernel_arg_t {
|
||||
uint32_t testid;
|
||||
uint32_t count;
|
||||
uint32_t src0_ptr;
|
||||
uint32_t src1_ptr;
|
||||
uint32_t dst_ptr;
|
||||
};
|
||||
|
||||
#endif
|
273
driver/tests/dogfood/dogfood.cpp
Normal file
273
driver/tests/dogfood/dogfood.cpp
Normal file
|
@ -0,0 +1,273 @@
|
|||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <unistd.h>
|
||||
#include <string.h>
|
||||
#include <vortex.h>
|
||||
#include "testcases.h"
|
||||
#include "common.h"
|
||||
|
||||
#define RT_CHECK(_expr) \
|
||||
do { \
|
||||
int _ret = _expr; \
|
||||
if (0 == _ret) \
|
||||
break; \
|
||||
printf("Error: '%s' returned %d!\n", #_expr, (int)_ret); \
|
||||
cleanup(); \
|
||||
exit(-1); \
|
||||
} while (false)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
class TestMngr {
|
||||
public:
|
||||
TestMngr() {
|
||||
this->add_test("iadd", new Test_IADD());
|
||||
this->add_test("imul", new Test_IMUL());
|
||||
this->add_test("idiv", new Test_IDIV());
|
||||
this->add_test("idiv-mul", new Test_IDIV_MUL());
|
||||
this->add_test("fadd", new Test_FADD());
|
||||
this->add_test("fsub", new Test_FSUB());
|
||||
this->add_test("fmul", new Test_FMUL());
|
||||
this->add_test("fmadd", new Test_FMADD());
|
||||
this->add_test("fmsub", new Test_FMSUB());
|
||||
this->add_test("fnmadd", new Test_FNMADD());
|
||||
this->add_test("fnmsub", new Test_FNMSUB());
|
||||
this->add_test("fnmadd-madd", new Test_FNMADD_MADD());
|
||||
this->add_test("fdiv", new Test_FDIV());
|
||||
this->add_test("fdiv2", new Test_FDIV2());
|
||||
this->add_test("fsqrt", new Test_FSQRT());
|
||||
this->add_test("ftoi", new Test_FTOI());
|
||||
this->add_test("ftou", new Test_FTOU());
|
||||
this->add_test("tof", new Test_ITOF());
|
||||
this->add_test("utof", new Test_UTOF());
|
||||
}
|
||||
|
||||
~TestMngr() {
|
||||
for (size_t i = 0; i < _tests.size(); ++i) {
|
||||
delete _tests[i];
|
||||
}
|
||||
}
|
||||
|
||||
const std::string& get_name(int testid) const {
|
||||
return _names.at(testid);
|
||||
}
|
||||
|
||||
ITestCase* get_test(int testid) const {
|
||||
return _tests.at(testid);
|
||||
}
|
||||
|
||||
void add_test(const char* name, ITestCase* test) {
|
||||
_names.push_back(name);
|
||||
_tests.push_back(test);
|
||||
}
|
||||
|
||||
size_t size() const {
|
||||
return _tests.size();
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<std::string> _names;
|
||||
std::vector<ITestCase*> _tests;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
TestMngr testMngr;
|
||||
const char* kernel_file = "kernel.bin";
|
||||
int count = 0;
|
||||
int testid_s = 0;
|
||||
int testid_e = (testMngr.size() - 1);
|
||||
bool stop_on_error = true;
|
||||
|
||||
vx_device_h device = nullptr;
|
||||
vx_buffer_h arg_buf = nullptr;
|
||||
vx_buffer_h src1_buf = nullptr;
|
||||
vx_buffer_h src2_buf = nullptr;
|
||||
vx_buffer_h dst_buf = nullptr;
|
||||
|
||||
static void show_usage() {
|
||||
std::cout << "Vortex Driver Test." << std::endl;
|
||||
std::cout << "Usage: [-s:testid] [-e:testid] [-k: kernel] [-n words] [-c] [-h: help]" << std::endl;
|
||||
}
|
||||
|
||||
static void parse_args(int argc, char **argv) {
|
||||
int c;
|
||||
while ((c = getopt(argc, argv, "n:s:e:k:ch?")) != -1) {
|
||||
switch (c) {
|
||||
case 'n':
|
||||
count = atoi(optarg);
|
||||
break;
|
||||
case 's':
|
||||
testid_s = atoi(optarg);
|
||||
break;
|
||||
case 'e':
|
||||
testid_e = atoi(optarg);
|
||||
break;
|
||||
case 'k':
|
||||
kernel_file = optarg;
|
||||
break;
|
||||
case 'c':
|
||||
stop_on_error = false;
|
||||
break;
|
||||
case 'h':
|
||||
case '?': {
|
||||
show_usage();
|
||||
exit(0);
|
||||
} break;
|
||||
default:
|
||||
show_usage();
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void cleanup() {
|
||||
if (arg_buf) {
|
||||
vx_buf_release(arg_buf);
|
||||
}
|
||||
if (src1_buf) {
|
||||
vx_buf_release(src1_buf);
|
||||
}
|
||||
if (src2_buf) {
|
||||
vx_buf_release(src2_buf);
|
||||
}
|
||||
if (dst_buf) {
|
||||
vx_buf_release(dst_buf);
|
||||
}
|
||||
if (device) {
|
||||
vx_dev_close(device);
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
int exitcode = 0;
|
||||
size_t value;
|
||||
kernel_arg_t kernel_arg;
|
||||
|
||||
// parse command arguments
|
||||
parse_args(argc, argv);
|
||||
|
||||
if (count == 0) {
|
||||
count = 1;
|
||||
}
|
||||
|
||||
std::cout << std::dec;
|
||||
|
||||
std::cout << "test ids: " << testid_s << " - " << testid_e << std::endl;
|
||||
std::cout << "workitem size: " << count << std::endl;
|
||||
std::cout << "using kernel: " << kernel_file << std::endl;
|
||||
|
||||
// open device connection
|
||||
std::cout << "open device connection" << std::endl;
|
||||
RT_CHECK(vx_dev_open(&device));
|
||||
|
||||
unsigned max_cores, max_warps, max_threads;
|
||||
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_CORES, &max_cores));
|
||||
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_WARPS, &max_warps));
|
||||
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_THREADS, &max_threads));
|
||||
|
||||
int num_points = count * max_cores * max_warps * max_threads;
|
||||
size_t buf_size = num_points * sizeof(uint32_t);
|
||||
|
||||
std::cout << "number of points: " << num_points << std::endl;
|
||||
std::cout << "buffer size: " << std::hex << buf_size << std::dec << " bytes" << std::endl;
|
||||
|
||||
// upload program
|
||||
std::cout << "upload kernel" << std::endl;
|
||||
RT_CHECK(vx_upload_kernel_file(device, kernel_file));
|
||||
|
||||
// allocate device memory
|
||||
std::cout << "allocate device memory" << std::endl;
|
||||
|
||||
RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
|
||||
kernel_arg.src0_ptr = value;
|
||||
RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
|
||||
kernel_arg.src1_ptr = value;
|
||||
RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
|
||||
kernel_arg.dst_ptr = value;
|
||||
|
||||
kernel_arg.count = count;
|
||||
|
||||
std::cout << "dev_src0=" << std::hex << kernel_arg.src0_ptr << std::dec << std::endl;
|
||||
std::cout << "dev_src1=" << std::hex << kernel_arg.src1_ptr << std::dec << std::endl;
|
||||
std::cout << "dev_dst=" << std::hex << kernel_arg.dst_ptr << std::dec << std::endl;
|
||||
|
||||
// allocate shared memory
|
||||
std::cout << "allocate shared memory" << std::endl;
|
||||
RT_CHECK(vx_alloc_shared_mem(device, sizeof(kernel_arg_t), &arg_buf));
|
||||
RT_CHECK(vx_alloc_shared_mem(device, buf_size, &src1_buf));
|
||||
RT_CHECK(vx_alloc_shared_mem(device, buf_size, &src2_buf));
|
||||
RT_CHECK(vx_alloc_shared_mem(device, buf_size, &dst_buf));
|
||||
|
||||
for (int t = testid_s; t <= testid_e; ++t) {
|
||||
auto name = testMngr.get_name(t);
|
||||
auto test = testMngr.get_test(t);
|
||||
|
||||
std::cout << "Test" << t << ": " << name << std::endl;
|
||||
|
||||
// upload kernel argument
|
||||
std::cout << "upload kernel argument" << std::endl;
|
||||
kernel_arg.testid = t;
|
||||
memcpy((void*)vx_host_ptr(arg_buf), &kernel_arg, sizeof(kernel_arg_t));
|
||||
RT_CHECK(vx_copy_to_dev(arg_buf, KERNEL_ARG_DEV_MEM_ADDR, sizeof(kernel_arg_t), 0));
|
||||
|
||||
// get test arguments
|
||||
std::cout << "get test arguments" << std::endl;
|
||||
test->setup(num_points, (void*)vx_host_ptr(src1_buf), (void*)vx_host_ptr(src2_buf));
|
||||
|
||||
// upload source buffer0
|
||||
std::cout << "upload source buffer0" << std::endl;
|
||||
RT_CHECK(vx_copy_to_dev(src1_buf, kernel_arg.src0_ptr, buf_size, 0));
|
||||
|
||||
// upload source buffer1
|
||||
std::cout << "upload source buffer1" << std::endl;
|
||||
RT_CHECK(vx_copy_to_dev(src2_buf, kernel_arg.src1_ptr, buf_size, 0));
|
||||
|
||||
// clear destination buffer
|
||||
std::cout << "clear destination buffer" << std::endl;
|
||||
for (int i = 0; i < num_points; ++i) {
|
||||
((uint32_t*)vx_host_ptr(dst_buf))[i] = 0xdeadbeef;
|
||||
}
|
||||
RT_CHECK(vx_copy_to_dev(dst_buf, kernel_arg.dst_ptr, buf_size, 0));
|
||||
|
||||
// start device
|
||||
std::cout << "start device" << std::endl;
|
||||
RT_CHECK(vx_start(device));
|
||||
|
||||
// wait for completion
|
||||
std::cout << "wait for completion" << std::endl;
|
||||
RT_CHECK(vx_ready_wait(device, -1));
|
||||
|
||||
// flush the destination buffer caches
|
||||
std::cout << "flush the destination buffer caches" << std::endl;
|
||||
RT_CHECK(vx_flush_caches(device, kernel_arg.dst_ptr, buf_size));
|
||||
|
||||
// download destination buffer
|
||||
std::cout << "download destination buffer" << std::endl;
|
||||
RT_CHECK(vx_copy_from_dev(dst_buf, kernel_arg.dst_ptr, buf_size, 0));
|
||||
|
||||
// verify destination
|
||||
std::cout << "verify test result" << std::endl;
|
||||
int errors = test->verify(num_points,
|
||||
(void*)vx_host_ptr(dst_buf),
|
||||
(void*)vx_host_ptr(src1_buf),
|
||||
(void*)vx_host_ptr(src2_buf));
|
||||
if (errors != 0) {
|
||||
std::cout << "found " << errors << " errors!" << std::endl;
|
||||
std::cout << "FAILED!" << std::endl << std::flush;
|
||||
if (stop_on_error) {
|
||||
cleanup();
|
||||
exit(1);
|
||||
}
|
||||
exitcode = 1;
|
||||
} else {
|
||||
std::cout << "PASSED!" << std::endl << std::flush;
|
||||
}
|
||||
}
|
||||
|
||||
// cleanup
|
||||
std::cout << "cleanup" << std::endl;
|
||||
cleanup();
|
||||
|
||||
return exitcode;
|
||||
}
|
BIN
driver/tests/dogfood/kernel.bin
Executable file
BIN
driver/tests/dogfood/kernel.bin
Executable file
Binary file not shown.
350
driver/tests/dogfood/kernel.c
Normal file
350
driver/tests/dogfood/kernel.c
Normal file
|
@ -0,0 +1,350 @@
|
|||
#include <stdint.h>
|
||||
#include <math.h>
|
||||
#include <vx_intrinsics.h>
|
||||
#include <vx_spawn.h>
|
||||
#include "common.h"
|
||||
|
||||
typedef void (*PFN_Kernel)(void* arg);
|
||||
|
||||
void kernel_iadd(void* arg) {
|
||||
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
|
||||
uint32_t count = _arg->count;
|
||||
int32_t* src0_ptr = (int32_t*)_arg->src0_ptr;
|
||||
int32_t* src1_ptr = (int32_t*)_arg->src1_ptr;
|
||||
int32_t* dst_ptr = (int32_t*)_arg->dst_ptr;
|
||||
uint32_t offset = vx_thread_gid() * count;
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
int32_t a = src0_ptr[offset+i];
|
||||
int32_t b = src1_ptr[offset+i];
|
||||
int32_t c = a + b;
|
||||
dst_ptr[offset+i] = c;
|
||||
}
|
||||
}
|
||||
|
||||
void kernel_imul(void* arg) {
|
||||
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
|
||||
uint32_t count = _arg->count;
|
||||
int32_t* src0_ptr = (int32_t*)_arg->src0_ptr;
|
||||
int32_t* src1_ptr = (int32_t*)_arg->src1_ptr;
|
||||
int32_t* dst_ptr = (int32_t*)_arg->dst_ptr;
|
||||
uint32_t offset = vx_thread_gid() * count;
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
int32_t a = src0_ptr[offset+i];
|
||||
int32_t b = src1_ptr[offset+i];
|
||||
int32_t c = a * b;
|
||||
dst_ptr[offset+i] = c;
|
||||
}
|
||||
}
|
||||
|
||||
void kernel_idiv(void* arg) {
|
||||
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
|
||||
uint32_t count = _arg->count;
|
||||
int32_t* src0_ptr = (int32_t*)_arg->src0_ptr;
|
||||
int32_t* src1_ptr = (int32_t*)_arg->src1_ptr;
|
||||
int32_t* dst_ptr = (int32_t*)_arg->dst_ptr;
|
||||
uint32_t offset = vx_thread_gid() * count;
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
int32_t a = src0_ptr[offset+i];
|
||||
int32_t b = src1_ptr[offset+i];
|
||||
int32_t c = a / b;
|
||||
dst_ptr[offset+i] = c;
|
||||
}
|
||||
}
|
||||
|
||||
void kernel_idiv_mul(void* arg) {
|
||||
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
|
||||
uint32_t count = _arg->count;
|
||||
int32_t* src0_ptr = (int32_t*)_arg->src0_ptr;
|
||||
int32_t* src1_ptr = (int32_t*)_arg->src1_ptr;
|
||||
int32_t* dst_ptr = (int32_t*)_arg->dst_ptr;
|
||||
uint32_t offset = vx_thread_gid() * count;
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
int32_t a = src0_ptr[offset+i];
|
||||
int32_t b = src1_ptr[offset+i];
|
||||
int32_t c = a / b;
|
||||
int32_t d = a * b;
|
||||
int32_t e = c + d;
|
||||
dst_ptr[offset+i] = e;
|
||||
}
|
||||
}
|
||||
|
||||
void kernel_fadd(void* arg) {
|
||||
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
|
||||
uint32_t count = _arg->count;
|
||||
float* src0_ptr = (float*)_arg->src0_ptr;
|
||||
float* src1_ptr = (float*)_arg->src1_ptr;
|
||||
float* dst_ptr = (float*)_arg->dst_ptr;
|
||||
uint32_t offset = vx_thread_gid() * count;
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
float a = src0_ptr[offset+i];
|
||||
float b = src1_ptr[offset+i];
|
||||
float c = a + b;
|
||||
dst_ptr[offset+i] = c;
|
||||
}
|
||||
}
|
||||
|
||||
void kernel_fsub(void* arg) {
|
||||
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
|
||||
uint32_t count = _arg->count;
|
||||
float* src0_ptr = (float*)_arg->src0_ptr;
|
||||
float* src1_ptr = (float*)_arg->src1_ptr;
|
||||
float* dst_ptr = (float*)_arg->dst_ptr;
|
||||
uint32_t offset = vx_thread_gid() * count;
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
float a = src0_ptr[offset+i];
|
||||
float b = src1_ptr[offset+i];
|
||||
float c = a - b;
|
||||
dst_ptr[offset+i] = c;
|
||||
}
|
||||
}
|
||||
|
||||
void kernel_fmul(void* arg) {
|
||||
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
|
||||
uint32_t count = _arg->count;
|
||||
float* src0_ptr = (float*)_arg->src0_ptr;
|
||||
float* src1_ptr = (float*)_arg->src1_ptr;
|
||||
float* dst_ptr = (float*)_arg->dst_ptr;
|
||||
uint32_t offset = vx_thread_gid() * count;
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
float a = src0_ptr[offset+i];
|
||||
float b = src1_ptr[offset+i];
|
||||
float c = a * b;
|
||||
dst_ptr[offset+i] = c;
|
||||
}
|
||||
}
|
||||
|
||||
void kernel_fmadd(void* arg) {
|
||||
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
|
||||
uint32_t count = _arg->count;
|
||||
float* src0_ptr = (float*)_arg->src0_ptr;
|
||||
float* src1_ptr = (float*)_arg->src1_ptr;
|
||||
float* dst_ptr = (float*)_arg->dst_ptr;
|
||||
uint32_t offset = vx_thread_gid() * count;
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
float a = src0_ptr[offset+i];
|
||||
float b = src1_ptr[offset+i];
|
||||
float c = a * b + 0.5f;
|
||||
dst_ptr[offset+i] = c;
|
||||
}
|
||||
}
|
||||
|
||||
void kernel_fmsub(void* arg) {
|
||||
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
|
||||
uint32_t count = _arg->count;
|
||||
float* src0_ptr = (float*)_arg->src0_ptr;
|
||||
float* src1_ptr = (float*)_arg->src1_ptr;
|
||||
float* dst_ptr = (float*)_arg->dst_ptr;
|
||||
uint32_t offset = vx_thread_gid() * count;
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
float a = src0_ptr[offset+i];
|
||||
float b = src1_ptr[offset+i];
|
||||
float c = a * b - 0.5f;
|
||||
dst_ptr[offset+i] = c;
|
||||
}
|
||||
}
|
||||
|
||||
void kernel_fnmadd(void* arg) {
|
||||
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
|
||||
uint32_t count = _arg->count;
|
||||
float* src0_ptr = (float*)_arg->src0_ptr;
|
||||
float* src1_ptr = (float*)_arg->src1_ptr;
|
||||
float* dst_ptr = (float*)_arg->dst_ptr;
|
||||
uint32_t offset = vx_thread_gid() * count;
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
float a = src0_ptr[offset+i];
|
||||
float b = src1_ptr[offset+i];
|
||||
float c = -a * b - 0.5f;
|
||||
dst_ptr[offset+i] = c;
|
||||
}
|
||||
}
|
||||
|
||||
void kernel_fnmsub(void* arg) {
|
||||
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
|
||||
uint32_t count = _arg->count;
|
||||
float* src0_ptr = (float*)_arg->src0_ptr;
|
||||
float* src1_ptr = (float*)_arg->src1_ptr;
|
||||
float* dst_ptr = (float*)_arg->dst_ptr;
|
||||
uint32_t offset = vx_thread_gid() * count;
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
float a = src0_ptr[offset+i];
|
||||
float b = src1_ptr[offset+i];
|
||||
float c = -a * b + 0.5f;
|
||||
dst_ptr[offset+i] = c;
|
||||
}
|
||||
}
|
||||
|
||||
void kernel_fnmadd_madd(void* arg) {
|
||||
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
|
||||
uint32_t count = _arg->count;
|
||||
float* src0_ptr = (float*)_arg->src0_ptr;
|
||||
float* src1_ptr = (float*)_arg->src1_ptr;
|
||||
float* dst_ptr = (float*)_arg->dst_ptr;
|
||||
uint32_t offset = vx_thread_gid() * count;
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
float a = src0_ptr[offset+i];
|
||||
float b = src1_ptr[offset+i];
|
||||
float c =-a * b - 0.5f;
|
||||
float d = a * b + 0.5f;
|
||||
float e = c + d;
|
||||
dst_ptr[offset+i] = e;
|
||||
}
|
||||
}
|
||||
|
||||
void kernel_fdiv(void* arg) {
|
||||
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
|
||||
uint32_t count = _arg->count;
|
||||
float* src0_ptr = (float*)_arg->src0_ptr;
|
||||
float* src1_ptr = (float*)_arg->src1_ptr;
|
||||
float* dst_ptr = (float*)_arg->dst_ptr;
|
||||
uint32_t offset = vx_thread_gid() * count;
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
float a = src0_ptr[offset+i];
|
||||
float b = src1_ptr[offset+i];
|
||||
float c = a / b;
|
||||
dst_ptr[offset+i] = c;
|
||||
}
|
||||
}
|
||||
|
||||
void kernel_fdiv2(void* arg) {
|
||||
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
|
||||
uint32_t count = _arg->count;
|
||||
float* src0_ptr = (float*)_arg->src0_ptr;
|
||||
float* src1_ptr = (float*)_arg->src1_ptr;
|
||||
float* dst_ptr = (float*)_arg->dst_ptr;
|
||||
uint32_t offset = vx_thread_gid() * count;
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
float a = src0_ptr[offset+i];
|
||||
float b = src1_ptr[offset+i];
|
||||
float c = a / b;
|
||||
float d = b / a;
|
||||
float e = c + d;
|
||||
dst_ptr[offset+i] = e;
|
||||
}
|
||||
}
|
||||
|
||||
void kernel_fsqrt(void* arg) {
|
||||
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
|
||||
uint32_t count = _arg->count;
|
||||
float* src0_ptr = (float*)_arg->src0_ptr;
|
||||
float* src1_ptr = (float*)_arg->src1_ptr;
|
||||
float* dst_ptr = (float*)_arg->dst_ptr;
|
||||
uint32_t offset = vx_thread_gid() * count;
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
float a = src0_ptr[offset+i];
|
||||
float b = src1_ptr[offset+i];
|
||||
float c = sqrt(a * b);
|
||||
dst_ptr[offset+i] = c;
|
||||
}
|
||||
}
|
||||
|
||||
void kernel_ftoi(void* arg) {
|
||||
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
|
||||
uint32_t count = _arg->count;
|
||||
float* src0_ptr = (float*)_arg->src0_ptr;
|
||||
float* src1_ptr = (float*)_arg->src1_ptr;
|
||||
int32_t* dst_ptr = (int32_t*)_arg->dst_ptr;
|
||||
uint32_t offset = vx_thread_gid() * count;
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
float a = src0_ptr[offset+i];
|
||||
float b = src1_ptr[offset+i];
|
||||
float c = a + b;
|
||||
int32_t d = (int32_t)c;
|
||||
dst_ptr[offset+i] = d;
|
||||
}
|
||||
}
|
||||
|
||||
void kernel_ftou(void* arg) {
|
||||
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
|
||||
uint32_t count = _arg->count;
|
||||
float* src0_ptr = (float*)_arg->src0_ptr;
|
||||
float* src1_ptr = (float*)_arg->src1_ptr;
|
||||
uint32_t* dst_ptr = (uint32_t*)_arg->dst_ptr;
|
||||
uint32_t offset = vx_thread_gid() * count;
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
float a = src0_ptr[offset+i];
|
||||
float b = src1_ptr[offset+i];
|
||||
float c = a + b;
|
||||
uint32_t d = (uint32_t)c;
|
||||
dst_ptr[offset+i] = d;
|
||||
}
|
||||
}
|
||||
|
||||
void kernel_itof(void* arg) {
|
||||
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
|
||||
uint32_t count = _arg->count;
|
||||
int32_t* src0_ptr = (int32_t*)_arg->src0_ptr;
|
||||
int32_t* src1_ptr = (int32_t*)_arg->src1_ptr;
|
||||
float* dst_ptr = (float*)_arg->dst_ptr;
|
||||
uint32_t offset = vx_thread_gid() * count;
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
int32_t a = src0_ptr[offset+i];
|
||||
int32_t b = src1_ptr[offset+i];
|
||||
int32_t c = a + b;
|
||||
float d = (float)c;
|
||||
dst_ptr[offset+i] = d;
|
||||
}
|
||||
}
|
||||
|
||||
void kernel_utof(void* arg) {
|
||||
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
|
||||
uint32_t count = _arg->count;
|
||||
int32_t* src0_ptr = (int32_t*)_arg->src0_ptr;
|
||||
int32_t* src1_ptr = (int32_t*)_arg->src1_ptr;
|
||||
float* dst_ptr = (float*)_arg->dst_ptr;
|
||||
uint32_t offset = vx_thread_gid() * count;
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
int32_t a = src0_ptr[offset+i];
|
||||
int32_t b = src1_ptr[offset+i];
|
||||
uint32_t c = a + b;
|
||||
float d = (float)c;
|
||||
dst_ptr[offset+i] = d;
|
||||
}
|
||||
}
|
||||
|
||||
static const PFN_Kernel sc_tests[] = {
|
||||
kernel_iadd,
|
||||
kernel_imul,
|
||||
kernel_idiv,
|
||||
kernel_idiv_mul,
|
||||
kernel_fadd,
|
||||
kernel_fsub,
|
||||
kernel_fmul,
|
||||
kernel_fmadd,
|
||||
kernel_fmsub,
|
||||
kernel_fnmadd,
|
||||
kernel_fnmsub,
|
||||
kernel_fnmadd_madd,
|
||||
kernel_fdiv,
|
||||
kernel_fdiv2,
|
||||
kernel_fsqrt,
|
||||
kernel_ftoi,
|
||||
kernel_ftou,
|
||||
kernel_itof,
|
||||
kernel_utof,
|
||||
};
|
||||
|
||||
void main() {
|
||||
struct kernel_arg_t* arg = (struct kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR;
|
||||
int num_warps = vx_num_warps();
|
||||
int num_threads = vx_num_threads();
|
||||
vx_spawn_warps(num_warps, num_threads, sc_tests[arg->testid], arg);
|
||||
}
|
30752
driver/tests/dogfood/kernel.dump
Normal file
30752
driver/tests/dogfood/kernel.dump
Normal file
File diff suppressed because it is too large
Load diff
BIN
driver/tests/dogfood/kernel.elf
Executable file
BIN
driver/tests/dogfood/kernel.elf
Executable file
Binary file not shown.
581
driver/tests/dogfood/testcases.h
Normal file
581
driver/tests/dogfood/testcases.h
Normal file
|
@ -0,0 +1,581 @@
|
|||
#pragma once
|
||||
|
||||
#include <iostream>
|
||||
#include <math.h>
|
||||
#include <limits>
|
||||
|
||||
union Float_t {
|
||||
float f;
|
||||
int32_t i;
|
||||
struct {
|
||||
uint32_t mantissa : 23;
|
||||
uint32_t exponent : 8;
|
||||
uint32_t sign : 1;
|
||||
} parts;
|
||||
};
|
||||
|
||||
inline bool almost_equal_eps(float a, float b, float eps = std::numeric_limits<float>::epsilon()) {
|
||||
auto tolerance = std::max(std::fabs(a), std::fabs(b)) * eps;
|
||||
return std::fabs(a - b) <= tolerance;
|
||||
}
|
||||
|
||||
inline bool almost_equal_ulp(float a, float b, int32_t ulp = 4) {
|
||||
Float_t fa{a}, fb{b};
|
||||
return std::abs(fa.i - fb.i) <= ulp;
|
||||
}
|
||||
|
||||
inline bool almost_equal(float a, float b) {
|
||||
return almost_equal_ulp(a, b);
|
||||
}
|
||||
|
||||
class ITestCase {
|
||||
public:
|
||||
ITestCase() {}
|
||||
virtual ~ITestCase() {}
|
||||
|
||||
virtual void setup(int n, void* src1, void* src2) = 0;
|
||||
virtual int verify(int n, void* dst, const void* src1, const void* src2) = 0;
|
||||
};
|
||||
|
||||
class Test_IADD : public ITestCase {
|
||||
public:
|
||||
|
||||
void setup(int n, void* src1, void* src2) override {
|
||||
auto a = (int32_t*)src1;
|
||||
auto b = (int32_t*)src2;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
a[i] = n/2 - i;
|
||||
b[i] = n/2 + i;
|
||||
}
|
||||
}
|
||||
|
||||
int verify(int n, void* dst, const void* src1, const void* src2) override {
|
||||
int errors = 0;
|
||||
auto a = (int32_t*)src1;
|
||||
auto b = (int32_t*)src2;
|
||||
auto c = (int32_t*)dst;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
auto ref = a[i] + b[i];
|
||||
if (c[i] != ref) {
|
||||
std::cout << "error at value " << i << ": expected " << ref << ", actual " << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
return errors;
|
||||
}
|
||||
};
|
||||
|
||||
class Test_IMUL : public ITestCase {
|
||||
public:
|
||||
|
||||
void setup(int n, void* src1, void* src2) override {
|
||||
auto a = (int32_t*)src1;
|
||||
auto b = (int32_t*)src2;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
a[i] = n/2 - i;
|
||||
b[i] = n/2 + i;
|
||||
}
|
||||
}
|
||||
|
||||
int verify(int n, void* dst, const void* src1, const void* src2) override {
|
||||
int errors = 0;
|
||||
auto a = (int32_t*)src1;
|
||||
auto b = (int32_t*)src2;
|
||||
auto c = (int32_t*)dst;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
auto ref = a[i] * b[i];
|
||||
if (c[i] != ref) {
|
||||
std::cout << "error at value " << i << ": expected " << ref << ", actual " << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
return errors;
|
||||
}
|
||||
};
|
||||
|
||||
class Test_IDIV : public ITestCase {
|
||||
public:
|
||||
|
||||
void setup(int n, void* src1, void* src2) override {
|
||||
auto a = (int32_t*)src1;
|
||||
auto b = (int32_t*)src2;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
a[i] = n/2 - i;
|
||||
b[i] = n/2 + i;
|
||||
}
|
||||
}
|
||||
|
||||
int verify(int n, void* dst, const void* src1, const void* src2) override {
|
||||
int errors = 0;
|
||||
auto a = (int32_t*)src1;
|
||||
auto b = (int32_t*)src2;
|
||||
auto c = (int32_t*)dst;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
auto ref = a[i] / b[i];
|
||||
if (c[i] != ref) {
|
||||
std::cout << "error at value " << i << ": expected " << ref << ", actual " << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
return errors;
|
||||
}
|
||||
};
|
||||
|
||||
class Test_IDIV_MUL : public ITestCase {
|
||||
public:
|
||||
|
||||
void setup(int n, void* src1, void* src2) override {
|
||||
auto a = (int32_t*)src1;
|
||||
auto b = (int32_t*)src2;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
a[i] = n/2 - i;
|
||||
b[i] = n/2 + i;
|
||||
}
|
||||
}
|
||||
|
||||
int verify(int n, void* dst, const void* src1, const void* src2) override {
|
||||
int errors = 0;
|
||||
auto a = (int32_t*)src1;
|
||||
auto b = (int32_t*)src2;
|
||||
auto c = (int32_t*)dst;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
auto x = a[i] / b[i];
|
||||
auto y = a[i] * b[i];
|
||||
auto ref = x + y;
|
||||
if (c[i] != ref) {
|
||||
std::cout << "error at value " << i << ": expected " << ref << ", actual " << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
return errors;
|
||||
}
|
||||
};
|
||||
|
||||
class Test_FADD : public ITestCase {
|
||||
public:
|
||||
|
||||
void setup(int n, void* src1, void* src2) override {
|
||||
auto a = (float*)src1;
|
||||
auto b = (float*)src2;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
a[i] = (n - i) * (1.0f/n);
|
||||
b[i] = (n + i) * (1.0f/n);
|
||||
}
|
||||
}
|
||||
|
||||
int verify(int n, void* dst, const void* src1, const void* src2) override {
|
||||
int errors = 0;
|
||||
auto a = (float*)src1;
|
||||
auto b = (float*)src2;
|
||||
auto c = (float*)dst;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
auto ref = a[i] + b[i];
|
||||
if (!almost_equal(c[i], ref)) {
|
||||
std::cout << "error at value " << i << ": expected " << ref << ", actual " << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
return errors;
|
||||
}
|
||||
};
|
||||
|
||||
class Test_FSUB : public ITestCase {
|
||||
public:
|
||||
|
||||
void setup(int n, void* src1, void* src2) override {
|
||||
auto a = (float*)src1;
|
||||
auto b = (float*)src2;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
a[i] = (n - i) * (1.0f/n);
|
||||
b[i] = (n + i) * (1.0f/n);
|
||||
}
|
||||
}
|
||||
|
||||
int verify(int n, void* dst, const void* src1, const void* src2) override {
|
||||
int errors = 0;
|
||||
auto a = (float*)src1;
|
||||
auto b = (float*)src2;
|
||||
auto c = (float*)dst;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
auto ref = a[i] - b[i];
|
||||
if (!almost_equal(c[i], ref)) {
|
||||
std::cout << "error at value " << i << ": expected " << ref << ", actual " << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
return errors;
|
||||
}
|
||||
};
|
||||
|
||||
class Test_FMUL : public ITestCase {
|
||||
public:
|
||||
|
||||
void setup(int n, void* src1, void* src2) override {
|
||||
auto a = (float*)src1;
|
||||
auto b = (float*)src2;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
a[i] = (n - i) * (1.0f/n);
|
||||
b[i] = (n + i) * (1.0f/n);
|
||||
}
|
||||
}
|
||||
|
||||
int verify(int n, void* dst, const void* src1, const void* src2) override {
|
||||
int errors = 0;
|
||||
auto a = (float*)src1;
|
||||
auto b = (float*)src2;
|
||||
auto c = (float*)dst;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
auto ref = a[i] * b[i];
|
||||
if (!almost_equal(c[i], ref)) {
|
||||
std::cout << "error at value " << i << ": expected " << ref << ", actual " << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
return errors;
|
||||
}
|
||||
};
|
||||
|
||||
class Test_FMADD : public ITestCase {
|
||||
public:
|
||||
|
||||
void setup(int n, void* src1, void* src2) override {
|
||||
auto a = (float*)src1;
|
||||
auto b = (float*)src2;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
a[i] = (n - i) * (1.0f/n);
|
||||
b[i] = (n + i) * (1.0f/n);
|
||||
}
|
||||
}
|
||||
|
||||
int verify(int n, void* dst, const void* src1, const void* src2) override {
|
||||
int errors = 0;
|
||||
auto a = (float*)src1;
|
||||
auto b = (float*)src2;
|
||||
auto c = (float*)dst;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
auto ref = a[i] * b[i] + 0.5f;
|
||||
if (!almost_equal(c[i], ref)) {
|
||||
std::cout << "error at value " << i << ": expected " << ref << ", actual " << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
return errors;
|
||||
}
|
||||
};
|
||||
|
||||
class Test_FMSUB : public ITestCase {
|
||||
public:
|
||||
|
||||
void setup(int n, void* src1, void* src2) override {
|
||||
auto a = (float*)src1;
|
||||
auto b = (float*)src2;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
a[i] = (n - i) * (1.0f/n);
|
||||
b[i] = (n + i) * (1.0f/n);
|
||||
}
|
||||
}
|
||||
|
||||
int verify(int n, void* dst, const void* src1, const void* src2) override {
|
||||
int errors = 0;
|
||||
auto a = (float*)src1;
|
||||
auto b = (float*)src2;
|
||||
auto c = (float*)dst;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
auto ref = a[i] * b[i] - 0.5f;
|
||||
if (!almost_equal(c[i], ref)) {
|
||||
std::cout << "error at value " << i << ": expected " << ref << ", actual " << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
return errors;
|
||||
}
|
||||
};
|
||||
|
||||
class Test_FNMADD : public ITestCase {
|
||||
public:
|
||||
|
||||
void setup(int n, void* src1, void* src2) override {
|
||||
auto a = (float*)src1;
|
||||
auto b = (float*)src2;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
a[i] = (n - i) * (1.0f/n);
|
||||
b[i] = (n + i) * (1.0f/n);
|
||||
}
|
||||
}
|
||||
|
||||
int verify(int n, void* dst, const void* src1, const void* src2) override {
|
||||
int errors = 0;
|
||||
auto a = (float*)src1;
|
||||
auto b = (float*)src2;
|
||||
auto c = (float*)dst;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
auto ref = -a[i] * b[i] - 0.5f;
|
||||
if (!almost_equal(c[i], ref)) {
|
||||
std::cout << "error at value " << i << ": expected " << ref << ", actual " << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
return errors;
|
||||
}
|
||||
};
|
||||
|
||||
class Test_FNMSUB : public ITestCase {
|
||||
public:
|
||||
|
||||
void setup(int n, void* src1, void* src2) override {
|
||||
auto a = (float*)src1;
|
||||
auto b = (float*)src2;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
a[i] = (n - i) * (1.0f/n);
|
||||
b[i] = (n + i) * (1.0f/n);
|
||||
}
|
||||
}
|
||||
|
||||
int verify(int n, void* dst, const void* src1, const void* src2) override {
|
||||
int errors = 0;
|
||||
auto a = (float*)src1;
|
||||
auto b = (float*)src2;
|
||||
auto c = (float*)dst;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
auto ref = -a[i] * b[i] + 0.5f;
|
||||
if (!almost_equal(c[i], ref)) {
|
||||
std::cout << "error at value " << i << ": expected " << ref << ", actual " << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
return errors;
|
||||
}
|
||||
};
|
||||
|
||||
class Test_FNMADD_MADD : public ITestCase {
|
||||
public:
|
||||
|
||||
void setup(int n, void* src1, void* src2) override {
|
||||
auto a = (float*)src1;
|
||||
auto b = (float*)src2;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
a[i] = (n - i) * (1.0f/n);
|
||||
b[i] = (n + i) * (1.0f/n);
|
||||
}
|
||||
}
|
||||
|
||||
int verify(int n, void* dst, const void* src1, const void* src2) override {
|
||||
int errors = 0;
|
||||
auto a = (float*)src1;
|
||||
auto b = (float*)src2;
|
||||
auto c = (float*)dst;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
auto x = -a[i] * b[i] - 0.5f;
|
||||
auto y = a[i] * b[i] + 0.5f;
|
||||
auto ref = x + y;
|
||||
if (!almost_equal(c[i], ref)) {
|
||||
std::cout << "error at value " << i << ": expected " << ref << ", actual " << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
return errors;
|
||||
}
|
||||
};
|
||||
|
||||
class Test_FDIV : public ITestCase {
|
||||
public:
|
||||
|
||||
void setup(int n, void* src1, void* src2) override {
|
||||
auto a = (float*)src1;
|
||||
auto b = (float*)src2;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
a[i] = (n - i) * (1.0f/n);
|
||||
b[i] = (n + i) * (1.0f/n);
|
||||
}
|
||||
}
|
||||
|
||||
int verify(int n, void* dst, const void* src1, const void* src2) override {
|
||||
int errors = 0;
|
||||
auto a = (float*)src1;
|
||||
auto b = (float*)src2;
|
||||
auto c = (float*)dst;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
auto ref = a[i] / b[i];
|
||||
if (!almost_equal(c[i], ref)) {
|
||||
std::cout << "error at value " << i << ": expected " << ref << ", actual " << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
return errors;
|
||||
}
|
||||
};
|
||||
|
||||
class Test_FDIV2 : public ITestCase {
|
||||
public:
|
||||
|
||||
void setup(int n, void* src1, void* src2) override {
|
||||
auto a = (float*)src1;
|
||||
auto b = (float*)src2;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
a[i] = (n - i) * (1.0f/n);
|
||||
b[i] = (n + i) * (1.0f/n);
|
||||
}
|
||||
}
|
||||
|
||||
int verify(int n, void* dst, const void* src1, const void* src2) override {
|
||||
int errors = 0;
|
||||
auto a = (float*)src1;
|
||||
auto b = (float*)src2;
|
||||
auto c = (float*)dst;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
auto x = a[i] / b[i];
|
||||
auto y = b[i] / a[i];
|
||||
auto ref = x + y;
|
||||
if (!almost_equal(c[i], ref)) {
|
||||
std::cout << "error at value " << i << ": expected " << ref << ", actual " << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
return errors;
|
||||
}
|
||||
};
|
||||
|
||||
class Test_FSQRT : public ITestCase {
|
||||
public:
|
||||
|
||||
void setup(int n, void* src1, void* src2) override {
|
||||
auto a = (float*)src1;
|
||||
auto b = (float*)src2;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
int q = 1.0f + (i % 64);
|
||||
a[i] = q;
|
||||
b[i] = q;
|
||||
}
|
||||
}
|
||||
|
||||
int verify(int n, void* dst, const void* src1, const void* src2) override {
|
||||
int errors = 0;
|
||||
auto a = (float*)src1;
|
||||
auto b = (float*)src2;
|
||||
auto c = (float*)dst;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
auto ref = sqrt(a[i] * b[i]);
|
||||
if (!almost_equal(c[i], ref)) {
|
||||
std::cout << "error at value " << i << ": expected " << ref << ", actual " << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
return errors;
|
||||
}
|
||||
};
|
||||
|
||||
class Test_FTOI : public ITestCase {
|
||||
public:
|
||||
|
||||
void setup(int n, void* src1, void* src2) override {
|
||||
auto a = (float*)src1;
|
||||
auto b = (float*)src2;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
a[i] = (n/2 - i) * (1.0f/n);
|
||||
b[i] = (n/2 - i) * (1.0f/n);
|
||||
}
|
||||
}
|
||||
|
||||
int verify(int n, void* dst, const void* src1, const void* src2) override {
|
||||
int errors = 0;
|
||||
auto a = (float*)src1;
|
||||
auto b = (float*)src2;
|
||||
auto c = (int32_t*)dst;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
auto x = a[i] + b[i];
|
||||
auto ref = (int32_t)x;
|
||||
if (c[i] != ref) {
|
||||
std::cout << "error at value " << i << ": expected " << ref << ", actual " << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
return errors;
|
||||
}
|
||||
};
|
||||
|
||||
class Test_FTOU : public ITestCase {
|
||||
public:
|
||||
|
||||
void setup(int n, void* src1, void* src2) override {
|
||||
auto a = (float*)src1;
|
||||
auto b = (float*)src2;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
a[i] = i * (1.0f/n);
|
||||
b[i] = i * (1.0f/n);
|
||||
}
|
||||
}
|
||||
|
||||
int verify(int n, void* dst, const void* src1, const void* src2) override {
|
||||
int errors = 0;
|
||||
auto a = (float*)src1;
|
||||
auto b = (float*)src2;
|
||||
auto c = (uint32_t*)dst;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
auto x = a[i] + b[i];
|
||||
auto ref = (uint32_t)x;
|
||||
if (c[i] != ref) {
|
||||
std::cout << "error at value " << i << ": expected " << ref << ", actual " << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
return errors;
|
||||
}
|
||||
};
|
||||
|
||||
class Test_ITOF : public ITestCase {
|
||||
public:
|
||||
|
||||
void setup(int n, void* src1, void* src2) override {
|
||||
auto a = (int32_t*)src1;
|
||||
auto b = (int32_t*)src2;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
a[i] = n/2 - i;
|
||||
b[i] = n/2 - i;
|
||||
}
|
||||
}
|
||||
|
||||
int verify(int n, void* dst, const void* src1, const void* src2) override {
|
||||
int errors = 0;
|
||||
auto a = (int32_t*)src1;
|
||||
auto b = (int32_t*)src2;
|
||||
auto c = (float*)dst;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
auto x = a[i] + b[i];
|
||||
auto ref = (float)x;
|
||||
if (!almost_equal(c[i], ref)) {
|
||||
std::cout << "error at value " << i << ": expected " << ref << ", actual " << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
return errors;
|
||||
}
|
||||
};
|
||||
|
||||
class Test_UTOF : public ITestCase {
|
||||
public:
|
||||
|
||||
void setup(int n, void* src1, void* src2) override {
|
||||
auto a = (uint32_t*)src1;
|
||||
auto b = (uint32_t*)src2;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
a[i] = i;
|
||||
b[i] = i;
|
||||
}
|
||||
}
|
||||
|
||||
int verify(int n, void* dst, const void* src1, const void* src2) override {
|
||||
int errors = 0;
|
||||
auto a = (uint32_t*)src1;
|
||||
auto b = (uint32_t*)src2;
|
||||
auto c = (float*)dst;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
auto x = a[i] + b[i];
|
||||
auto ref = (float)x;
|
||||
if (!almost_equal(c[i], ref)) {
|
||||
std::cout << "error at value " << i << ": expected " << ref << ", actual " << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
return errors;
|
||||
}
|
||||
};
|
|
@ -988,32 +988,32 @@ Vortex #() vortex (
|
|||
localparam SCOPE_DATAW = $bits({`SCOPE_SIGNALS_DATA_LIST `SCOPE_SIGNALS_UPD_LIST});
|
||||
localparam SCOPE_SR_DEPTH = 2;
|
||||
|
||||
`SCOPE_ASSIGN(scope_dram_req_valid, vx_dram_req_valid);
|
||||
`SCOPE_ASSIGN(scope_dram_req_addr, {vx_dram_req_addr, 4'b0});
|
||||
`SCOPE_ASSIGN(scope_dram_req_rw, vx_dram_req_rw);
|
||||
`SCOPE_ASSIGN(scope_dram_req_byteen,vx_dram_req_byteen);
|
||||
`SCOPE_ASSIGN(scope_dram_req_data, vx_dram_req_data);
|
||||
`SCOPE_ASSIGN(scope_dram_req_tag, vx_dram_req_tag);
|
||||
`SCOPE_ASSIGN(scope_dram_req_ready, vx_dram_req_ready);
|
||||
`SCOPE_ASSIGN (scope_dram_req_valid, vx_dram_req_valid);
|
||||
`SCOPE_ASSIGN (scope_dram_req_addr, {vx_dram_req_addr, 4'b0});
|
||||
`SCOPE_ASSIGN (scope_dram_req_rw, vx_dram_req_rw);
|
||||
`SCOPE_ASSIGN (scope_dram_req_byteen,vx_dram_req_byteen);
|
||||
`SCOPE_ASSIGN (scope_dram_req_data, vx_dram_req_data);
|
||||
`SCOPE_ASSIGN (scope_dram_req_tag, vx_dram_req_tag);
|
||||
`SCOPE_ASSIGN (scope_dram_req_ready, vx_dram_req_ready);
|
||||
|
||||
`SCOPE_ASSIGN(scope_dram_rsp_valid, vx_dram_rsp_valid);
|
||||
`SCOPE_ASSIGN(scope_dram_rsp_data, vx_dram_rsp_data);
|
||||
`SCOPE_ASSIGN(scope_dram_rsp_tag, vx_dram_rsp_tag);
|
||||
`SCOPE_ASSIGN(scope_dram_rsp_ready, vx_dram_rsp_ready);
|
||||
`SCOPE_ASSIGN (scope_dram_rsp_valid, vx_dram_rsp_valid);
|
||||
`SCOPE_ASSIGN (scope_dram_rsp_data, vx_dram_rsp_data);
|
||||
`SCOPE_ASSIGN (scope_dram_rsp_tag, vx_dram_rsp_tag);
|
||||
`SCOPE_ASSIGN (scope_dram_rsp_ready, vx_dram_rsp_ready);
|
||||
|
||||
`SCOPE_ASSIGN(scope_snp_req_valid, vx_snp_req_valid);
|
||||
`SCOPE_ASSIGN(scope_snp_req_addr, {vx_snp_req_addr, 4'b0});
|
||||
`SCOPE_ASSIGN(scope_snp_req_invalidate, vx_snp_req_invalidate);
|
||||
`SCOPE_ASSIGN(scope_snp_req_tag, vx_snp_req_tag);
|
||||
`SCOPE_ASSIGN(scope_snp_req_ready, vx_snp_req_ready);
|
||||
`SCOPE_ASSIGN (scope_snp_req_valid, vx_snp_req_valid);
|
||||
`SCOPE_ASSIGN (scope_snp_req_addr, {vx_snp_req_addr, 4'b0});
|
||||
`SCOPE_ASSIGN (scope_snp_req_invalidate, vx_snp_req_invalidate);
|
||||
`SCOPE_ASSIGN (scope_snp_req_tag, vx_snp_req_tag);
|
||||
`SCOPE_ASSIGN (scope_snp_req_ready, vx_snp_req_ready);
|
||||
|
||||
`SCOPE_ASSIGN(scope_snp_rsp_valid, vx_snp_rsp_valid);
|
||||
`SCOPE_ASSIGN(scope_snp_rsp_tag, vx_snp_rsp_tag);
|
||||
`SCOPE_ASSIGN(scope_snp_rsp_ready, vx_snp_rsp_ready);
|
||||
`SCOPE_ASSIGN (scope_snp_rsp_valid, vx_snp_rsp_valid);
|
||||
`SCOPE_ASSIGN (scope_snp_rsp_tag, vx_snp_rsp_tag);
|
||||
`SCOPE_ASSIGN (scope_snp_rsp_ready, vx_snp_rsp_ready);
|
||||
|
||||
`SCOPE_ASSIGN(scope_snp_rsp_valid, vx_snp_rsp_valid);
|
||||
`SCOPE_ASSIGN(scope_snp_rsp_tag, vx_snp_rsp_tag);
|
||||
`SCOPE_ASSIGN(scope_snp_rsp_ready, vx_snp_rsp_ready);
|
||||
`SCOPE_ASSIGN (scope_snp_rsp_valid, vx_snp_rsp_valid);
|
||||
`SCOPE_ASSIGN (scope_snp_rsp_tag, vx_snp_rsp_tag);
|
||||
`SCOPE_ASSIGN (scope_snp_rsp_ready, vx_snp_rsp_ready);
|
||||
|
||||
wire scope_changed = (scope_icache_req_valid && scope_icache_req_ready)
|
||||
|| (scope_icache_rsp_valid && scope_icache_rsp_ready)
|
||||
|
|
|
@ -15,7 +15,7 @@ module VX_decode #(
|
|||
VX_wstall_if wstall_if,
|
||||
VX_join_if join_if
|
||||
);
|
||||
wire in_valid = ifetch_rsp_if.valid;
|
||||
wire valid_in = ifetch_rsp_if.valid;
|
||||
wire [31:0] instr = ifetch_rsp_if.instr;
|
||||
|
||||
reg [`ALU_BITS-1:0] alu_op;
|
||||
|
@ -352,10 +352,10 @@ module VX_decode #(
|
|||
|
||||
assign decode_tmp_if.frm = func3;
|
||||
|
||||
assign join_if.is_join = in_valid && is_gpu && (gpu_op == `GPU_JOIN);
|
||||
assign join_if.is_join = valid_in && is_gpu && (gpu_op == `GPU_JOIN);
|
||||
assign join_if.warp_num = ifetch_rsp_if.warp_num;
|
||||
|
||||
assign wstall_if.wstall = in_valid && (is_btype || is_jal || is_jalr || (is_gpu && (gpu_op == `GPU_TMC || gpu_op == `GPU_SPLIT || gpu_op == `GPU_BAR)));
|
||||
assign wstall_if.wstall = valid_in && (is_btype || is_jal || is_jalr || (is_gpu && (gpu_op == `GPU_TMC || gpu_op == `GPU_SPLIT || gpu_op == `GPU_BAR)));
|
||||
assign wstall_if.warp_num = ifetch_rsp_if.warp_num;
|
||||
|
||||
wire stall = ~decode_if.ready && decode_if.valid;
|
||||
|
|
|
@ -77,6 +77,8 @@
|
|||
|
||||
`define INST_GPU 7'b1101011
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define BYTEEN_SB 3'h0
|
||||
`define BYTEEN_SH 3'h1
|
||||
`define BYTEEN_SW 3'h2
|
||||
|
@ -85,6 +87,8 @@
|
|||
`define BYTEEN_BITS 3
|
||||
`define BYTEEN_TYPE(x) x[1:0]
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define BR_EQ 4'h0
|
||||
`define BR_NE 4'h1
|
||||
`define BR_LT 4'h2
|
||||
|
@ -101,6 +105,22 @@
|
|||
`define BR_NO 4'hF
|
||||
`define BR_BITS 4
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define EX_NOP 3'h0
|
||||
`define EX_ALU 3'h1
|
||||
`define EX_LSU 3'h2
|
||||
`define EX_CSR 3'h3
|
||||
`define EX_MUL 3'h4
|
||||
`define EX_FPU 3'h5
|
||||
`define EX_GPU 3'h6
|
||||
`define EX_BITS 3
|
||||
|
||||
`define NUM_EXS 6
|
||||
`define NE_BITS `LOG2UP(`NUM_EXS)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define OP_BITS 5
|
||||
|
||||
`define ALU_ADD 5'h00
|
||||
|
@ -210,18 +230,6 @@
|
|||
`define GPU_BITS 3
|
||||
`define GPU_OP(x) x[`GPU_BITS-1:0]
|
||||
|
||||
`define EX_NOP 3'h0
|
||||
`define EX_ALU 3'h1
|
||||
`define EX_LSU 3'h2
|
||||
`define EX_CSR 3'h3
|
||||
`define EX_MUL 3'h4
|
||||
`define EX_FPU 3'h5
|
||||
`define EX_GPU 3'h6
|
||||
`define EX_BITS 3
|
||||
|
||||
`define NUM_EXS 6
|
||||
`define NE_BITS `LOG2UP(`NUM_EXS)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`ifdef EXT_M_ENABLE
|
||||
|
|
|
@ -123,25 +123,25 @@ module VX_execute #(
|
|||
|
||||
assign ebreak = alu_req_if.valid && (alu_req_if.alu_op == `ALU_EBREAK || alu_req_if.alu_op == `ALU_ECALL);
|
||||
|
||||
`SCOPE_ASSIGN(scope_decode_valid, decode_if.valid);
|
||||
`SCOPE_ASSIGN(scope_decode_warp_num, decode_if.warp_num);
|
||||
`SCOPE_ASSIGN(scope_decode_curr_PC, decode_if.curr_PC);
|
||||
`SCOPE_ASSIGN(scope_decode_is_jal, decode_if.is_jal);
|
||||
`SCOPE_ASSIGN(scope_decode_rs1, decode_if.rs1);
|
||||
`SCOPE_ASSIGN(scope_decode_rs2, decode_if.rs2);
|
||||
`SCOPE_ASSIGN (scope_decode_valid, decode_if.valid);
|
||||
`SCOPE_ASSIGN (scope_decode_warp_num, decode_if.warp_num);
|
||||
`SCOPE_ASSIGN (scope_decode_curr_PC, decode_if.curr_PC);
|
||||
`SCOPE_ASSIGN (scope_decode_is_jal, decode_if.is_jal);
|
||||
`SCOPE_ASSIGN (scope_decode_rs1, decode_if.rs1);
|
||||
`SCOPE_ASSIGN (scope_decode_rs2, decode_if.rs2);
|
||||
|
||||
`SCOPE_ASSIGN(scope_execute_valid, alu_req_if.valid);
|
||||
`SCOPE_ASSIGN(scope_execute_warp_num, alu_req_if.warp_num);
|
||||
`SCOPE_ASSIGN(scope_execute_curr_PC, alu_req_if.curr_PC);
|
||||
`SCOPE_ASSIGN(scope_execute_rd, alu_req_if.rd);
|
||||
`SCOPE_ASSIGN(scope_execute_a, alu_req_if.rs1_data);
|
||||
`SCOPE_ASSIGN(scope_execute_b, alu_req_if.rs2_data);
|
||||
`SCOPE_ASSIGN (scope_execute_valid, alu_req_if.valid);
|
||||
`SCOPE_ASSIGN (scope_execute_warp_num, alu_req_if.warp_num);
|
||||
`SCOPE_ASSIGN (scope_execute_curr_PC, alu_req_if.curr_PC);
|
||||
`SCOPE_ASSIGN (scope_execute_rd, alu_req_if.rd);
|
||||
`SCOPE_ASSIGN (scope_execute_a, alu_req_if.rs1_data);
|
||||
`SCOPE_ASSIGN (scope_execute_b, alu_req_if.rs2_data);
|
||||
|
||||
`SCOPE_ASSIGN(scope_writeback_valid, writeback_if.valid);
|
||||
`SCOPE_ASSIGN(scope_writeback_warp_num, writeback_if.warp_num);
|
||||
`SCOPE_ASSIGN(scope_writeback_curr_PC, writeback_if.curr_PC);
|
||||
`SCOPE_ASSIGN(scope_writeback_wb, writeback_if.wb);
|
||||
`SCOPE_ASSIGN(scope_writeback_rd, writeback_if.rd);
|
||||
`SCOPE_ASSIGN(scope_writeback_data, writeback_if.data);
|
||||
`SCOPE_ASSIGN (scope_writeback_valid, writeback_if.valid);
|
||||
`SCOPE_ASSIGN (scope_writeback_warp_num, writeback_if.warp_num);
|
||||
`SCOPE_ASSIGN (scope_writeback_curr_PC, writeback_if.curr_PC);
|
||||
`SCOPE_ASSIGN (scope_writeback_wb, writeback_if.wb);
|
||||
`SCOPE_ASSIGN (scope_writeback_rd, writeback_if.rd);
|
||||
`SCOPE_ASSIGN (scope_writeback_data, writeback_if.data);
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -24,10 +24,10 @@ module VX_fpu_unit #(
|
|||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
.in_valid (fpu_req_if.valid),
|
||||
.in_ready (fpu_req_if.ready),
|
||||
.valid_in (fpu_req_if.valid),
|
||||
.ready_in (fpu_req_if.ready),
|
||||
|
||||
.in_tag (fpu_req_if.issue_tag),
|
||||
.tag_in (fpu_req_if.issue_tag),
|
||||
|
||||
.op (fpu_req_if.fpu_op),
|
||||
.frm (frm),
|
||||
|
@ -40,10 +40,10 @@ module VX_fpu_unit #(
|
|||
.has_fflags (fpu_commit_if.has_fflags),
|
||||
.fflags (fpu_commit_if.fflags),
|
||||
|
||||
.out_tag (fpu_commit_if.issue_tag),
|
||||
.tag_out (fpu_commit_if.issue_tag),
|
||||
|
||||
.out_ready (fpu_commit_if.ready),
|
||||
.out_valid (fpu_commit_if.valid)
|
||||
.ready_out (fpu_commit_if.ready),
|
||||
.valid_out (fpu_commit_if.valid)
|
||||
);
|
||||
|
||||
`else
|
||||
|
@ -57,10 +57,10 @@ module VX_fpu_unit #(
|
|||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
.in_valid (fpu_req_if.valid),
|
||||
.in_ready (fpu_req_if.ready),
|
||||
.valid_in (fpu_req_if.valid),
|
||||
.ready_in (fpu_req_if.ready),
|
||||
|
||||
.in_tag (fpu_req_if.issue_tag),
|
||||
.tag_in (fpu_req_if.issue_tag),
|
||||
|
||||
.op (fpu_req_if.fpu_op),
|
||||
.frm (frm),
|
||||
|
@ -73,10 +73,10 @@ module VX_fpu_unit #(
|
|||
.has_fflags (fpu_commit_if.has_fflags),
|
||||
.fflags (fpu_commit_if.fflags),
|
||||
|
||||
.out_tag (fpu_commit_if.issue_tag),
|
||||
.tag_out (fpu_commit_if.issue_tag),
|
||||
|
||||
.out_ready (fpu_commit_if.ready),
|
||||
.out_valid (fpu_commit_if.valid)
|
||||
.ready_out (fpu_commit_if.ready),
|
||||
.valid_out (fpu_commit_if.valid)
|
||||
);
|
||||
|
||||
`endif
|
||||
|
|
|
@ -18,14 +18,14 @@ module VX_gpr_fp_ctrl (
|
|||
reg [`NUM_THREADS-1:0][31:0] tmp_rs1_data;
|
||||
reg read_rs3;
|
||||
|
||||
wire delay = gpr_read_if.valid && gpr_read_if.use_rs3 && ~read_rs3;
|
||||
wire rs3_delay = gpr_read_if.valid && gpr_read_if.use_rs3 && ~read_rs3;
|
||||
|
||||
wire read_fire = gpr_read_if.valid && gpr_read_if.out_ready;
|
||||
wire read_fire = gpr_read_if.valid && read_rs3;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
read_rs3 <= 0;
|
||||
end else if (delay) begin
|
||||
end else if (rs3_delay) begin
|
||||
read_rs3 <= 1;
|
||||
end else if (read_fire) begin
|
||||
read_rs3 <= 0;
|
||||
|
@ -34,14 +34,14 @@ module VX_gpr_fp_ctrl (
|
|||
|
||||
// backup original rs1 data
|
||||
always @(posedge clk) begin
|
||||
if (delay) begin
|
||||
if (rs3_delay) begin
|
||||
tmp_rs1_data <= rs1_data;
|
||||
end
|
||||
end
|
||||
|
||||
// outputs
|
||||
assign raddr1 = {gpr_read_if.warp_num, (read_rs3 ? gpr_read_if.rs3 : gpr_read_if.rs1)};
|
||||
assign gpr_read_if.in_ready = ~delay;
|
||||
assign gpr_read_if.ready = ~rs3_delay;
|
||||
assign gpr_read_if.rs1_data = gpr_read_if.use_rs3 ? tmp_rs1_data : rs1_data;
|
||||
assign gpr_read_if.rs2_data = rs2_data;
|
||||
assign gpr_read_if.rs3_data = rs1_data;
|
||||
|
|
|
@ -14,22 +14,24 @@ module VX_gpr_ram (
|
|||
|
||||
reg [`NUM_THREADS-1:0][3:0][7:0] ram [(`NUM_WARPS * `NUM_REGS)-1:0];
|
||||
|
||||
integer i, j;
|
||||
|
||||
initial begin
|
||||
// initialize r0 to 0
|
||||
for (j = 0; j < `NUM_WARPS; j++) begin
|
||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
||||
ram[j * `NUM_REGS][i][0] = 8'h0;
|
||||
ram[j * `NUM_REGS][i][1] = 8'h0;
|
||||
ram[j * `NUM_REGS][i][2] = 8'h0;
|
||||
ram[j * `NUM_REGS][i][3] = 8'h0;
|
||||
// initialize ram
|
||||
for (integer j = 0; j < `NUM_WARPS; j++) begin
|
||||
for (integer i = 0; i < `NUM_REGS; i++) begin
|
||||
if (i == 0) begin
|
||||
ram[j * `NUM_REGS + i] = {`NUM_THREADS{32'h00000000}}; // set r0 = 0
|
||||
end
|
||||
`ifndef SYNTHESIS
|
||||
else begin
|
||||
ram[j * `NUM_REGS + i] = {`NUM_THREADS{32'hdeadbeef}};
|
||||
end
|
||||
`endif
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
||||
for (integer i = 0; i < `NUM_THREADS; i++) begin
|
||||
if (we[i]) begin
|
||||
ram[waddr][i][0] <= wdata[i][07:00];
|
||||
ram[waddr][i][1] <= wdata[i][15:08];
|
||||
|
|
|
@ -43,18 +43,16 @@ module VX_gpr_stage #(
|
|||
assign gpr_read_if.rs1_data = rs1_data;
|
||||
assign gpr_read_if.rs2_data = rs2_data;
|
||||
assign gpr_read_if.rs3_data = 0;
|
||||
assign gpr_read_if.in_ready = 1;
|
||||
assign gpr_read_if.ready = 1;
|
||||
|
||||
wire valid = gpr_read_if.valid;
|
||||
wire out_ready = gpr_read_if.out_ready;
|
||||
wire use_rs3 = gpr_read_if.use_rs3;
|
||||
wire [`NR_BITS-1:0] rs3 = gpr_read_if.rs3;
|
||||
`UNUSED_VAR (valid);
|
||||
`UNUSED_VAR (out_ready);
|
||||
`UNUSED_VAR (use_rs3);
|
||||
`UNUSED_VAR (rs3);
|
||||
`endif
|
||||
|
||||
assign writeback_if.ready = 1'b1;
|
||||
assign writeback_if.ready = 1'b1; // writes are stall-free
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -60,16 +60,16 @@ module VX_icache_stage #(
|
|||
// Can accept new response?
|
||||
assign icache_rsp_if.ready = ifetch_rsp_if.ready;
|
||||
|
||||
`SCOPE_ASSIGN(scope_icache_req_valid, icache_req_if.valid);
|
||||
`SCOPE_ASSIGN(scope_icache_req_warp_num, ifetch_req_if.warp_num);
|
||||
`SCOPE_ASSIGN(scope_icache_req_addr, {icache_req_if.addr, 2'b0});
|
||||
`SCOPE_ASSIGN(scope_icache_req_tag, icache_req_if.tag);
|
||||
`SCOPE_ASSIGN(scope_icache_req_ready, icache_req_if.ready);
|
||||
`SCOPE_ASSIGN (scope_icache_req_valid, icache_req_if.valid);
|
||||
`SCOPE_ASSIGN (scope_icache_req_warp_num, ifetch_req_if.warp_num);
|
||||
`SCOPE_ASSIGN (scope_icache_req_addr, {icache_req_if.addr, 2'b0});
|
||||
`SCOPE_ASSIGN (scope_icache_req_tag, icache_req_if.tag);
|
||||
`SCOPE_ASSIGN (scope_icache_req_ready, icache_req_if.ready);
|
||||
|
||||
`SCOPE_ASSIGN(scope_icache_rsp_valid, icache_rsp_if.valid);
|
||||
`SCOPE_ASSIGN(scope_icache_rsp_data, icache_rsp_if.data);
|
||||
`SCOPE_ASSIGN(scope_icache_rsp_tag, icache_rsp_if.tag);
|
||||
`SCOPE_ASSIGN(scope_icache_rsp_ready, icache_rsp_if.ready);
|
||||
`SCOPE_ASSIGN (scope_icache_rsp_valid, icache_rsp_if.valid);
|
||||
`SCOPE_ASSIGN (scope_icache_rsp_data, icache_rsp_if.data);
|
||||
`SCOPE_ASSIGN (scope_icache_rsp_tag, icache_rsp_if.tag);
|
||||
`SCOPE_ASSIGN (scope_icache_rsp_ready, icache_rsp_if.ready);
|
||||
|
||||
`ifdef DBG_PRINT_CORE_ICACHE
|
||||
always @(posedge clk) begin
|
||||
|
|
|
@ -17,26 +17,27 @@ module VX_issue #(
|
|||
VX_fpu_req_if fpu_req_if,
|
||||
VX_gpu_req_if gpu_req_if
|
||||
);
|
||||
|
||||
wire [`ISTAG_BITS-1:0] issue_tag;
|
||||
wire schedule_delay;
|
||||
|
||||
VX_gpr_read_if gpr_read_if();
|
||||
assign gpr_read_if.valid = decode_if.valid;
|
||||
assign gpr_read_if.valid = decode_if.valid && ~schedule_delay;
|
||||
assign gpr_read_if.warp_num = decode_if.warp_num;
|
||||
assign gpr_read_if.rs1 = decode_if.rs1;
|
||||
assign gpr_read_if.rs2 = decode_if.rs2;
|
||||
assign gpr_read_if.rs3 = decode_if.rs3;
|
||||
assign gpr_read_if.use_rs3 = decode_if.use_rs3;
|
||||
assign gpr_read_if.out_ready = decode_if.ready;
|
||||
|
||||
wire [`ISTAG_BITS-1:0] issue_tag, issue_tmp_tag;
|
||||
|
||||
wire schedule_delay;
|
||||
|
||||
wire gpr_busy = ~gpr_read_if.in_ready;
|
||||
|
||||
wire ex_busy = (~alu_req_if.ready && (decode_if.ex_type == `EX_ALU))
|
||||
|| (~lsu_req_if.ready && (decode_if.ex_type == `EX_LSU))
|
||||
|| (~csr_req_if.ready && (decode_if.ex_type == `EX_CSR))
|
||||
`ifdef EXT_M_ENABLE
|
||||
|| (~mul_req_if.ready && (decode_if.ex_type == `EX_MUL))
|
||||
`endif
|
||||
`ifdef EXT_F_ENABLE
|
||||
|| (~fpu_req_if.ready && (decode_if.ex_type == `EX_FPU))
|
||||
`endif
|
||||
|| (~gpu_req_if.ready && (decode_if.ex_type == `EX_GPU));
|
||||
|
||||
VX_scheduler #(
|
||||
|
@ -47,8 +48,7 @@ module VX_issue #(
|
|||
.decode_if (decode_if),
|
||||
.writeback_if (writeback_if),
|
||||
.cmt_to_issue_if(cmt_to_issue_if),
|
||||
.ex_busy (ex_busy),
|
||||
.gpr_busy (gpr_busy),
|
||||
.ex_busy (ex_busy),
|
||||
.issue_tag (issue_tag),
|
||||
.schedule_delay (schedule_delay)
|
||||
);
|
||||
|
@ -62,56 +62,117 @@ module VX_issue #(
|
|||
.gpr_read_if (gpr_read_if)
|
||||
);
|
||||
|
||||
VX_decode_if decode_tmp_if();
|
||||
VX_gpr_read_if gpr_read_tmp_if();
|
||||
|
||||
wire stall = schedule_delay;
|
||||
wire flush = schedule_delay && ~ex_busy;
|
||||
|
||||
VX_generic_register #(
|
||||
.N(1 + `ISTAG_BITS + `NW_BITS + `NUM_THREADS + 32 + 32 + `NR_BITS + `NR_BITS + `NR_BITS + 32 + 1 + 1 + `EX_BITS + `OP_BITS + 1 + `NR_BITS + 1 + `FRM_BITS + (`NUM_THREADS * 32) + (`NUM_THREADS * 32) + (`NUM_THREADS * 32))
|
||||
) issue_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall),
|
||||
.flush (flush),
|
||||
.in ({decode_if.valid, issue_tag, decode_if.warp_num, decode_if.thread_mask, decode_if.curr_PC, decode_if.next_PC, decode_if.rd, decode_if.rs1, decode_if.rs2, decode_if.imm, decode_if.rs1_is_PC, decode_if.rs2_is_imm, decode_if.ex_type, decode_if.ex_op, decode_if.wb, decode_if.rs3, decode_if.use_rs3, decode_if.frm, gpr_read_if.rs1_data, gpr_read_if.rs2_data, gpr_read_if.rs3_data}),
|
||||
.out ({decode_tmp_if.valid, issue_tmp_tag, decode_tmp_if.warp_num, decode_tmp_if.thread_mask, decode_tmp_if.curr_PC, decode_tmp_if.next_PC, decode_tmp_if.rd, decode_tmp_if.rs1, decode_tmp_if.rs2, decode_tmp_if.imm, decode_tmp_if.rs1_is_PC, decode_tmp_if.rs2_is_imm, decode_tmp_if.ex_type, decode_tmp_if.ex_op, decode_tmp_if.wb, decode_tmp_if.rs3, decode_tmp_if.use_rs3, decode_tmp_if.frm, gpr_read_tmp_if.rs1_data, gpr_read_tmp_if.rs2_data, gpr_read_tmp_if.rs3_data})
|
||||
);
|
||||
|
||||
assign decode_if.ready = ~stall;
|
||||
VX_alu_req_if alu_req_tmp_if();
|
||||
VX_lsu_req_if lsu_req_tmp_if();
|
||||
VX_csr_req_if csr_req_tmp_if();
|
||||
VX_mul_req_if mul_req_tmp_if();
|
||||
VX_fpu_req_if fpu_req_tmp_if();
|
||||
VX_gpu_req_if gpu_req_tmp_if();
|
||||
|
||||
VX_issue_demux issue_demux (
|
||||
.decode_if (decode_tmp_if),
|
||||
.gpr_read_if(gpr_read_tmp_if),
|
||||
.issue_tag (issue_tmp_tag),
|
||||
.alu_req_if (alu_req_if),
|
||||
.lsu_req_if (lsu_req_if),
|
||||
.csr_req_if (csr_req_if),
|
||||
.mul_req_if (mul_req_if),
|
||||
.fpu_req_if (fpu_req_if),
|
||||
.gpu_req_if (gpu_req_if)
|
||||
);
|
||||
.decode_if (decode_if),
|
||||
.gpr_read_if(gpr_read_if),
|
||||
.issue_tag (issue_tag),
|
||||
.alu_req_if (alu_req_tmp_if),
|
||||
.lsu_req_if (lsu_req_tmp_if),
|
||||
.csr_req_if (csr_req_tmp_if),
|
||||
.mul_req_if (mul_req_tmp_if),
|
||||
.fpu_req_if (fpu_req_tmp_if),
|
||||
.gpu_req_if (gpu_req_tmp_if)
|
||||
);
|
||||
|
||||
wire stall = schedule_delay || ~gpr_read_if.ready;
|
||||
assign decode_if.ready = ~stall;
|
||||
|
||||
VX_generic_register #(
|
||||
.N(1 + `ISTAG_BITS + `NW_BITS + 32 + `NUM_THREADS + `ALU_BITS + (`NUM_THREADS * 32) + (`NUM_THREADS * 32) + 32 + 32)
|
||||
) alu_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (~alu_req_if.ready),
|
||||
.flush (stall && alu_req_if.ready),
|
||||
.in ({alu_req_tmp_if.valid, alu_req_tmp_if.issue_tag, alu_req_tmp_if.warp_num, alu_req_tmp_if.curr_PC, alu_req_tmp_if.thread_mask, alu_req_tmp_if.alu_op, alu_req_tmp_if.rs1_data, alu_req_tmp_if.rs2_data, alu_req_tmp_if.offset, alu_req_tmp_if.next_PC}),
|
||||
.out ({alu_req_if.valid, alu_req_if.issue_tag, alu_req_if.warp_num, alu_req_if.curr_PC, alu_req_if.thread_mask, alu_req_if.alu_op, alu_req_if.rs1_data, alu_req_if.rs2_data, alu_req_if.offset, alu_req_if.next_PC})
|
||||
);
|
||||
|
||||
VX_generic_register #(
|
||||
.N(1 + `ISTAG_BITS + `NW_BITS + 32 + `NUM_THREADS + 1 + `BYTEEN_BITS + (`NUM_THREADS * 32) + 32 + (`NUM_THREADS * 32) + `NR_BITS + 1)
|
||||
) lsu_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (~lsu_req_if.ready),
|
||||
.flush (stall && lsu_req_if.ready),
|
||||
.in ({lsu_req_tmp_if.valid, lsu_req_tmp_if.issue_tag, lsu_req_tmp_if.warp_num, lsu_req_tmp_if.curr_PC, lsu_req_tmp_if.thread_mask, lsu_req_tmp_if.rw, lsu_req_tmp_if.byteen, lsu_req_tmp_if.base_addr, lsu_req_tmp_if.offset, lsu_req_tmp_if.store_data, lsu_req_tmp_if.rd, lsu_req_tmp_if.wb}),
|
||||
.out ({lsu_req_if.valid, lsu_req_if.issue_tag, lsu_req_if.warp_num, lsu_req_if.curr_PC, lsu_req_if.thread_mask, lsu_req_if.rw, lsu_req_if.byteen, lsu_req_if.base_addr, lsu_req_if.offset, lsu_req_if.store_data, lsu_req_if.rd, lsu_req_if.wb})
|
||||
);
|
||||
|
||||
VX_generic_register #(
|
||||
.N(1 + `ISTAG_BITS + `NW_BITS + 32 + `NUM_THREADS + `CSR_BITS + `CSR_ADDR_BITS + 32 + 1)
|
||||
) csr_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (~csr_req_if.ready),
|
||||
.flush (stall && csr_req_if.ready),
|
||||
.in ({csr_req_tmp_if.valid, csr_req_tmp_if.issue_tag, csr_req_tmp_if.warp_num, csr_req_tmp_if.curr_PC, csr_req_tmp_if.thread_mask, csr_req_tmp_if.csr_op, csr_req_tmp_if.csr_addr, csr_req_tmp_if.csr_mask, csr_req_tmp_if.is_io}),
|
||||
.out ({csr_req_if.valid, csr_req_if.issue_tag, csr_req_if.warp_num, csr_req_if.curr_PC, csr_req_if.thread_mask, csr_req_if.csr_op, csr_req_if.csr_addr, csr_req_if.csr_mask, csr_req_if.is_io})
|
||||
);
|
||||
|
||||
`ifdef EXT_M_ENABLE
|
||||
VX_generic_register #(
|
||||
.N(1 + `ISTAG_BITS + `NW_BITS + 32 + `NUM_THREADS + `MUL_BITS + (`NUM_THREADS * 32) + (`NUM_THREADS * 32))
|
||||
) mul_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (~mul_req_if.ready),
|
||||
.flush (stall && mul_req_if.ready),
|
||||
.in ({mul_req_tmp_if.valid, mul_req_tmp_if.issue_tag, mul_req_tmp_if.warp_num, mul_req_tmp_if.curr_PC, mul_req_tmp_if.thread_mask, mul_req_tmp_if.mul_op, mul_req_tmp_if.rs1_data, mul_req_tmp_if.rs2_data}),
|
||||
.out ({mul_req_if.valid, mul_req_if.issue_tag, mul_req_if.warp_num, mul_req_if.curr_PC, mul_req_if.thread_mask, mul_req_if.mul_op, mul_req_if.rs1_data, mul_req_if.rs2_data})
|
||||
);
|
||||
`endif
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
VX_generic_register #(
|
||||
.N(1 + `ISTAG_BITS + `NW_BITS + 32 + `NUM_THREADS + `FPU_BITS + `FRM_BITS + (`NUM_THREADS * 32) + (`NUM_THREADS * 32) + (`NUM_THREADS * 32))
|
||||
) fpu_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (~fpu_req_if.ready),
|
||||
.flush (stall && fpu_req_if.ready),
|
||||
.in ({fpu_req_tmp_if.valid, fpu_req_tmp_if.issue_tag, fpu_req_tmp_if.warp_num, fpu_req_tmp_if.curr_PC, fpu_req_tmp_if.thread_mask, fpu_req_tmp_if.fpu_op, fpu_req_tmp_if.frm, fpu_req_tmp_if.rs1_data, fpu_req_tmp_if.rs2_data, fpu_req_tmp_if.rs3_data}),
|
||||
.out ({fpu_req_if.valid, fpu_req_if.issue_tag, fpu_req_if.warp_num, fpu_req_if.curr_PC, fpu_req_if.thread_mask, fpu_req_if.fpu_op, fpu_req_if.frm, fpu_req_if.rs1_data, fpu_req_if.rs2_data, fpu_req_if.rs3_data})
|
||||
);
|
||||
`endif
|
||||
|
||||
VX_generic_register #(
|
||||
.N(1 + `ISTAG_BITS + `NW_BITS + 32 + `NUM_THREADS + `GPU_BITS + (`NUM_THREADS * 32) + 32 + 32)
|
||||
) gpu_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (~gpu_req_if.ready),
|
||||
.flush (stall && gpu_req_if.ready),
|
||||
.in ({gpu_req_tmp_if.valid, gpu_req_tmp_if.issue_tag, gpu_req_tmp_if.warp_num, gpu_req_tmp_if.curr_PC, gpu_req_tmp_if.thread_mask, gpu_req_tmp_if.gpu_op, gpu_req_tmp_if.rs1_data, gpu_req_tmp_if.rs2_data, gpu_req_tmp_if.next_PC}),
|
||||
.out ({gpu_req_if.valid, gpu_req_if.issue_tag, gpu_req_if.warp_num, gpu_req_if.curr_PC, gpu_req_if.thread_mask, gpu_req_if.gpu_op, gpu_req_if.rs1_data, gpu_req_if.rs2_data, gpu_req_if.next_PC})
|
||||
);
|
||||
|
||||
`ifdef DBG_PRINT_PIPELINE
|
||||
always @(posedge clk) begin
|
||||
if (alu_req_if.valid && alu_req_if.ready) begin
|
||||
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=ALU, istag=%0d, tmask=%b, wb=%d, rd=%0d, rs1_data=%0h, rs2_data=%0h, offset=%0h, next_PC=%0h", $time, CORE_ID, decode_tmp_if.warp_num, decode_tmp_if.curr_PC, issue_tmp_tag, decode_tmp_if.thread_mask, decode_tmp_if.wb, decode_tmp_if.rd, alu_req_if.rs1_data, alu_req_if.rs2_data, alu_req_if.offset, alu_req_if.next_PC);
|
||||
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=ALU, istag=%0d, tmask=%b, rs1_data=%0h, rs2_data=%0h, offset=%0h, next_PC=%0h", $time, CORE_ID, alu_req_if.warp_num, alu_req_if.curr_PC, alu_req_if.issue_tag, alu_req_if.thread_mask, alu_req_if.rs1_data, alu_req_if.rs2_data, alu_req_if.offset, alu_req_if.next_PC);
|
||||
end
|
||||
if (lsu_req_if.valid && lsu_req_if.ready) begin
|
||||
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=LSU, istag=%0d, tmask=%b, wb=%0b, rd=%0d, rw=%b, byteen=%b, baddr=%0h, offset=%0h, data=%0h", $time, CORE_ID, decode_tmp_if.warp_num, decode_tmp_if.curr_PC, issue_tmp_tag, decode_tmp_if.thread_mask, decode_tmp_if.wb, decode_tmp_if.rd, lsu_req_if.rw, lsu_req_if.byteen, lsu_req_if.base_addr, lsu_req_if.offset, lsu_req_if.store_data);
|
||||
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=LSU, istag=%0d, tmask=%b, rw=%b, byteen=%b, baddr=%0h, offset=%0h, data=%0h", $time, CORE_ID, lsu_req_if.warp_num, lsu_req_if.curr_PC, lsu_req_if.issue_tag, lsu_req_if.thread_mask, lsu_req_if.rw, lsu_req_if.byteen, lsu_req_if.base_addr, lsu_req_if.offset, lsu_req_if.store_data);
|
||||
end
|
||||
if (csr_req_if.valid && csr_req_if.ready) begin
|
||||
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=CSR, istag=%0d, tmask=%b, wb=%d, rd=%0d, addr=%0h, mask=%0h", $time, CORE_ID, decode_tmp_if.warp_num, decode_tmp_if.curr_PC, issue_tmp_tag, decode_tmp_if.thread_mask, decode_tmp_if.wb, decode_tmp_if.rd, csr_req_if.csr_addr, csr_req_if.csr_mask);
|
||||
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=CSR, istag=%0d, tmask=%b, addr=%0h, mask=%0h", $time, CORE_ID, csr_req_if.warp_num, csr_req_if.curr_PC, csr_req_if.issue_tag, csr_req_if.thread_mask, csr_req_if.csr_addr, csr_req_if.csr_mask);
|
||||
end
|
||||
if (mul_req_if.valid && mul_req_if.ready) begin
|
||||
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=MUL, istag=%0d, tmask=%b, wb=%d, rd=%0d, rs1_data=%0h, rs2_data=%0h", $time, CORE_ID, decode_tmp_if.warp_num, decode_tmp_if.curr_PC, issue_tmp_tag, decode_tmp_if.thread_mask, decode_tmp_if.wb, decode_tmp_if.rd, mul_req_if.rs1_data, mul_req_if.rs2_data);
|
||||
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=MUL, istag=%0d, tmask=%b, rs1_data=%0h, rs2_data=%0h", $time, CORE_ID, mul_req_if.warp_num, mul_req_if.curr_PC, mul_req_if.issue_tag, mul_req_if.thread_mask, mul_req_if.rs1_data, mul_req_if.rs2_data);
|
||||
end
|
||||
if (fpu_req_if.valid && fpu_req_if.ready) begin
|
||||
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=FPU, istag=%0d, tmask=%b, wb=%d, rd=%0d, frm=%0h, rs1_data=%0h, rs2_data=%0h, rs3_data=%0h", $time, CORE_ID, decode_tmp_if.warp_num, decode_tmp_if.curr_PC, issue_tmp_tag, decode_tmp_if.thread_mask, decode_tmp_if.wb, decode_tmp_if.rd, fpu_req_if.frm, fpu_req_if.rs1_data, fpu_req_if.rs2_data, fpu_req_if.rs3_data);
|
||||
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=FPU, istag=%0d, tmask=%b, rs1_data=%0h, rs2_data=%0h, rs3_data=%0h", $time, CORE_ID, fpu_req_if.warp_num, fpu_req_if.curr_PC, fpu_req_if.issue_tag, fpu_req_if.thread_mask, fpu_req_if.rs1_data, fpu_req_if.rs2_data, fpu_req_if.rs3_data);
|
||||
end
|
||||
if (gpu_req_if.valid && gpu_req_if.ready) begin
|
||||
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=GPU, istag=%0d, tmask=%b, rs1_data=%0h, rs2_data=%0h", $time, CORE_ID, decode_tmp_if.warp_num, decode_tmp_if.curr_PC, issue_tmp_tag, decode_tmp_if.thread_mask, gpu_req_if.rs1_data, gpu_req_if.rs2_data);
|
||||
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=GPU, istag=%0d, tmask=%b, rs1_data=%0h, rs2_data=%0h", $time, CORE_ID, gpu_req_if.warp_num, gpu_req_if.curr_PC, gpu_req_if.issue_tag, gpu_req_if.thread_mask, gpu_req_if.rs1_data, gpu_req_if.rs2_data);
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
|
|
@ -32,11 +32,11 @@ module VX_issue_demux (
|
|||
assign lsu_req_if.issue_tag = issue_tag;
|
||||
assign lsu_req_if.warp_num = decode_if.warp_num;
|
||||
assign lsu_req_if.curr_PC = decode_if.curr_PC;
|
||||
assign lsu_req_if.rw = `LSU_RW(decode_if.ex_op);
|
||||
assign lsu_req_if.byteen = `LSU_BE(decode_if.ex_op);
|
||||
assign lsu_req_if.base_addr = gpr_read_if.rs1_data;
|
||||
assign lsu_req_if.store_data = gpr_read_if.rs2_data;
|
||||
assign lsu_req_if.offset = decode_if.imm;
|
||||
assign lsu_req_if.rw = `LSU_RW(decode_if.ex_op);
|
||||
assign lsu_req_if.byteen = `LSU_BE(decode_if.ex_op);
|
||||
assign lsu_req_if.rd = decode_if.rd;
|
||||
assign lsu_req_if.wb = decode_if.wb;
|
||||
|
||||
|
@ -54,6 +54,8 @@ module VX_issue_demux (
|
|||
`ifdef EXT_M_ENABLE
|
||||
assign mul_req_if.valid = decode_if.valid && (decode_if.ex_type == `EX_MUL);
|
||||
assign mul_req_if.issue_tag = issue_tag;
|
||||
assign mul_req_if.warp_num = decode_if.warp_num;
|
||||
assign mul_req_if.curr_PC = decode_if.curr_PC;
|
||||
assign mul_req_if.mul_op = `MUL_OP(decode_if.ex_op);
|
||||
assign mul_req_if.rs1_data = gpr_read_if.rs1_data;
|
||||
assign mul_req_if.rs2_data = gpr_read_if.rs2_data;
|
||||
|
@ -64,11 +66,12 @@ module VX_issue_demux (
|
|||
assign fpu_req_if.valid = decode_if.valid && (decode_if.ex_type == `EX_FPU);
|
||||
assign fpu_req_if.issue_tag = issue_tag;
|
||||
assign fpu_req_if.warp_num = decode_if.warp_num;
|
||||
assign fpu_req_if.curr_PC = decode_if.curr_PC;
|
||||
assign fpu_req_if.fpu_op = `FPU_OP(decode_if.ex_op);
|
||||
assign fpu_req_if.frm = decode_if.frm;
|
||||
assign fpu_req_if.rs1_data = gpr_read_if.rs1_data;
|
||||
assign fpu_req_if.rs2_data = gpr_read_if.rs2_data;
|
||||
assign fpu_req_if.rs3_data = gpr_read_if.rs3_data;
|
||||
assign fpu_req_if.frm = decode_if.frm;
|
||||
assign fpu_req_if.rs3_data = gpr_read_if.rs3_data;
|
||||
`endif
|
||||
|
||||
// GPU unit
|
||||
|
@ -76,6 +79,7 @@ module VX_issue_demux (
|
|||
assign gpu_req_if.thread_mask = decode_if.thread_mask;
|
||||
assign gpu_req_if.issue_tag = issue_tag;
|
||||
assign gpu_req_if.warp_num = decode_if.warp_num;
|
||||
assign gpu_req_if.curr_PC = decode_if.curr_PC;
|
||||
assign gpu_req_if.gpu_op = `GPU_OP(decode_if.ex_op);
|
||||
assign gpu_req_if.rs1_data = gpr_read_if.rs1_data;
|
||||
assign gpu_req_if.rs2_data = gpr_read_if.rs2_data[0];
|
||||
|
|
|
@ -68,12 +68,12 @@ module VX_lsu_unit #(
|
|||
assign mem_req_offset[i] = full_address[i][1:0];
|
||||
assign mem_req_byteen[i] = wmask << full_address[i][1:0];
|
||||
assign mem_req_data[i] = lsu_req_if.store_data[i] << {mem_req_offset[i], 3'b0};
|
||||
end
|
||||
end
|
||||
|
||||
wire stall_in = ~dcache_req_if.ready;
|
||||
wire stall_in = ~dcache_req_if.ready && use_valid;
|
||||
|
||||
// Can accept new request?
|
||||
assign lsu_req_if.ready = ~stall_in;
|
||||
assign lsu_req_if.ready = ~stall_in;
|
||||
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
wire [`NUM_THREADS-1:0][31:0] use_address;
|
||||
|
@ -167,20 +167,20 @@ module VX_lsu_unit #(
|
|||
assign dcache_rsp_if.ready = lsu_commit_if.ready && ~is_store_rsp; // STORE has priority
|
||||
|
||||
// scope registration
|
||||
`SCOPE_ASSIGN(scope_dcache_req_valid, dcache_req_if.valid);
|
||||
`SCOPE_ASSIGN(scope_dcache_req_addr, use_address);
|
||||
`SCOPE_ASSIGN(scope_dcache_req_rw, dcache_req_if.rw );
|
||||
`SCOPE_ASSIGN(scope_dcache_req_byteen,dcache_req_if.byteen);
|
||||
`SCOPE_ASSIGN(scope_dcache_req_data, dcache_req_if.data);
|
||||
`SCOPE_ASSIGN(scope_dcache_req_tag, dcache_req_if.tag);
|
||||
`SCOPE_ASSIGN(scope_dcache_req_ready, dcache_req_if.ready);
|
||||
`SCOPE_ASSIGN(scope_dcache_req_warp_num, use_warp_num);
|
||||
`SCOPE_ASSIGN(scope_dcache_req_curr_PC, use_pc);
|
||||
`SCOPE_ASSIGN (scope_dcache_req_valid, dcache_req_if.valid);
|
||||
`SCOPE_ASSIGN (scope_dcache_req_addr, use_address);
|
||||
`SCOPE_ASSIGN (scope_dcache_req_rw, dcache_req_if.rw );
|
||||
`SCOPE_ASSIGN (scope_dcache_req_byteen,dcache_req_if.byteen);
|
||||
`SCOPE_ASSIGN (scope_dcache_req_data, dcache_req_if.data);
|
||||
`SCOPE_ASSIGN (scope_dcache_req_tag, dcache_req_if.tag);
|
||||
`SCOPE_ASSIGN (scope_dcache_req_ready, dcache_req_if.ready);
|
||||
`SCOPE_ASSIGN (scope_dcache_req_warp_num, use_warp_num);
|
||||
`SCOPE_ASSIGN (scope_dcache_req_curr_PC, use_pc);
|
||||
|
||||
`SCOPE_ASSIGN(scope_dcache_rsp_valid, dcache_rsp_if.valid);
|
||||
`SCOPE_ASSIGN(scope_dcache_rsp_data, dcache_rsp_if.data);
|
||||
`SCOPE_ASSIGN(scope_dcache_rsp_tag, dcache_rsp_if.tag);
|
||||
`SCOPE_ASSIGN(scope_dcache_rsp_ready, dcache_rsp_if.ready);
|
||||
`SCOPE_ASSIGN (scope_dcache_rsp_valid, dcache_rsp_if.valid);
|
||||
`SCOPE_ASSIGN (scope_dcache_rsp_data, dcache_rsp_if.data);
|
||||
`SCOPE_ASSIGN (scope_dcache_rsp_tag, dcache_rsp_if.tag);
|
||||
`SCOPE_ASSIGN (scope_dcache_rsp_ready, dcache_rsp_if.ready);
|
||||
|
||||
`UNUSED_VAR (mem_rsp_warp_num)
|
||||
`UNUSED_VAR (mem_rsp_curr_PC)
|
||||
|
|
|
@ -21,13 +21,13 @@ module VX_mul_unit #(
|
|||
|
||||
wire stall_mul, stall_div;
|
||||
|
||||
wire is_mul_op = (alu_op == `MUL_MUL);
|
||||
wire is_div_op = (alu_op == `MUL_DIV || alu_op == `MUL_DIVU);
|
||||
|
||||
reg [`NUM_THREADS-1:0] is_div_op_in;
|
||||
wire [`NUM_THREADS-1:0] is_div_op_out;
|
||||
wire is_mul_op_out;
|
||||
wire is_mul_mul = (alu_op == `MUL_MUL);
|
||||
wire is_mul_mul_out;
|
||||
|
||||
wire is_div_divu = (alu_op == `MUL_DIV || alu_op == `MUL_DIVU);
|
||||
reg [`NUM_THREADS-1:0] is_div_divu_qual;
|
||||
wire [`NUM_THREADS-1:0] is_div_divu_out;
|
||||
|
||||
genvar i;
|
||||
|
||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
||||
|
@ -39,16 +39,16 @@ module VX_mul_unit #(
|
|||
|
||||
// handle divide by zero
|
||||
always @(*) begin
|
||||
is_div_op_in[i] = is_div_op;
|
||||
is_div_divu_qual[i] = is_div_divu;
|
||||
div_in1 = {(alu_op == `MUL_DIV || alu_op == `MUL_REM) & alu_in1[i][31], alu_in1[i]};
|
||||
div_in2 = {(alu_op == `MUL_DIV || alu_op == `MUL_REM) & alu_in2[i][31], alu_in2[i]};
|
||||
|
||||
if (0 == alu_in2[i]) begin
|
||||
if (is_div_op) begin
|
||||
if (is_div_divu) begin
|
||||
div_in1 = {1'b0, 32'hFFFFFFFF}; // quotient = (0xFFFFFFFF / 1)
|
||||
div_in2 = 1;
|
||||
end else begin
|
||||
is_div_op_in[i] = 1; // remainder = (in1 / 1)
|
||||
is_div_divu_qual[i] = 1; // remainder = (in1 / 1)
|
||||
div_in2 = 1;
|
||||
end
|
||||
end
|
||||
|
@ -91,10 +91,13 @@ module VX_mul_unit #(
|
|||
.remainder(rem_result_tmp)
|
||||
);
|
||||
|
||||
assign mul_result[i] = is_mul_op_out ? mul_result_tmp[31:0] : mul_result_tmp[63:32];
|
||||
assign div_result[i] = is_div_op_out[i] ? div_result_tmp : rem_result_tmp;
|
||||
assign mul_result[i] = is_mul_mul_out ? mul_result_tmp[31:0] : mul_result_tmp[63:32];
|
||||
assign div_result[i] = is_div_divu_out[i] ? div_result_tmp : rem_result_tmp;
|
||||
end
|
||||
|
||||
wire is_mul_fire = alu_req_if.valid && alu_req_if.ready && ~`IS_DIV_OP(alu_op);
|
||||
wire is_div_fire = alu_req_if.valid && alu_req_if.ready && `IS_DIV_OP(alu_op);
|
||||
|
||||
wire mul_valid_out;
|
||||
wire div_valid_out;
|
||||
|
||||
|
@ -108,8 +111,8 @@ module VX_mul_unit #(
|
|||
.clk(clk),
|
||||
.reset(reset),
|
||||
.enable(~stall_mul),
|
||||
.in({alu_req_if.valid && ~`IS_DIV_OP(alu_op), alu_req_if.issue_tag, is_mul_op}),
|
||||
.out({mul_valid_out, mul_issue_tag, is_mul_op_out})
|
||||
.in({is_mul_fire, alu_req_if.issue_tag, is_mul_mul}),
|
||||
.out({mul_valid_out, mul_issue_tag, is_mul_mul_out})
|
||||
);
|
||||
|
||||
VX_shift_register #(
|
||||
|
@ -119,8 +122,8 @@ module VX_mul_unit #(
|
|||
.clk(clk),
|
||||
.reset(reset),
|
||||
.enable(~stall_div),
|
||||
.in({alu_req_if.valid && `IS_DIV_OP(alu_op), alu_req_if.issue_tag, is_div_op_in}),
|
||||
.out({div_valid_out, div_issue_tag, is_div_op_out})
|
||||
.in({is_div_fire, alu_req_if.issue_tag, is_div_divu_qual}),
|
||||
.out({div_valid_out, div_issue_tag, is_div_divu_out})
|
||||
);
|
||||
|
||||
wire stall_out = (~alu_commit_if.ready && alu_commit_if.valid);
|
||||
|
|
|
@ -245,10 +245,10 @@ module VX_pipeline #(
|
|||
assign core_icache_rsp_if.tag = icache_rsp_tag;
|
||||
assign icache_rsp_ready = core_icache_rsp_if.ready;
|
||||
|
||||
`SCOPE_ASSIGN(scope_busy, busy);
|
||||
`SCOPE_ASSIGN(scope_schedule_delay, schedule_delay);
|
||||
`SCOPE_ASSIGN(scope_mem_delay, mem_delay);
|
||||
`SCOPE_ASSIGN(scope_exec_delay, exec_delay);
|
||||
`SCOPE_ASSIGN(scope_gpr_stage_delay, gpr_delay);
|
||||
`SCOPE_ASSIGN (scope_busy, busy);
|
||||
`SCOPE_ASSIGN (scope_schedule_delay, schedule_delay);
|
||||
`SCOPE_ASSIGN (scope_mem_delay, mem_delay);
|
||||
`SCOPE_ASSIGN (scope_exec_delay, exec_delay);
|
||||
`SCOPE_ASSIGN (scope_gpr_stage_delay, gpr_delay);
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -10,11 +10,10 @@ module VX_scheduler #(
|
|||
VX_wb_if writeback_if,
|
||||
VX_cmt_to_issue_if cmt_to_issue_if,
|
||||
input wire ex_busy,
|
||||
input wire gpr_busy,
|
||||
output wire [`ISTAG_BITS-1:0] issue_tag,
|
||||
output wire schedule_delay
|
||||
);
|
||||
localparam CTVW = `CLOG2(`NUM_WARPS * `NUM_REGS + 1);
|
||||
localparam CTVW = `CLOG2(`NUM_WARPS * `NUM_REGS + 1);
|
||||
reg [`NUM_THREADS-1:0] inuse_registers [(`NUM_WARPS * `NUM_REGS)-1:0];
|
||||
reg [`NUM_REGS-1:0] inuse_reg_mask [`NUM_WARPS-1:0];
|
||||
|
||||
|
@ -23,13 +22,13 @@ module VX_scheduler #(
|
|||
|
||||
wire issue_buf_full;
|
||||
|
||||
wire stall = gpr_busy || ex_busy || inuse_hazard || issue_buf_full;
|
||||
assign schedule_delay = ex_busy || inuse_hazard || issue_buf_full;
|
||||
|
||||
wire issue_fire = decode_if.valid && ~stall;
|
||||
wire issue_fire = decode_if.valid && decode_if.ready;
|
||||
|
||||
wire writeback_fire = writeback_if.valid && writeback_if.ready;
|
||||
|
||||
wire acquire_rd = issue_fire && (decode_if.wb != 0);
|
||||
|
||||
wire release_rd = writeback_if.valid;
|
||||
|
||||
wire [`NUM_THREADS-1:0] inuse_registers_n = inuse_registers[{writeback_if.warp_num, writeback_if.rd}] & ~writeback_if.thread_mask;
|
||||
|
||||
|
@ -46,7 +45,7 @@ module VX_scheduler #(
|
|||
inuse_registers[{decode_if.warp_num, decode_if.rd}] <= decode_if.thread_mask;
|
||||
inuse_reg_mask[decode_if.warp_num][decode_if.rd] <= 1;
|
||||
end
|
||||
if (release_rd) begin
|
||||
if (writeback_fire) begin
|
||||
assert(inuse_reg_mask[writeback_if.warp_num][writeback_if.rd] != 0);
|
||||
inuse_registers[{writeback_if.warp_num, writeback_if.rd}] <= inuse_registers_n;
|
||||
inuse_reg_mask[writeback_if.warp_num][writeback_if.rd] <= (| inuse_registers_n);
|
||||
|
@ -59,25 +58,23 @@ module VX_scheduler #(
|
|||
.SIZE (`ISSUEQ_SIZE),
|
||||
.RPORTS (`NUM_EXS)
|
||||
) issue_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.write_data ({decode_if.warp_num, decode_if.thread_mask, decode_if.curr_PC, decode_if.rd, decode_if.wb}),
|
||||
.write_addr (issue_tag),
|
||||
.acquire_slot (issue_fire),
|
||||
.release_slot ({cmt_to_issue_if.alu_valid, cmt_to_issue_if.lsu_valid, cmt_to_issue_if.csr_valid, cmt_to_issue_if.mul_valid, cmt_to_issue_if.fpu_valid, cmt_to_issue_if.gpu_valid}),
|
||||
.read_addr ({cmt_to_issue_if.alu_tag, cmt_to_issue_if.lsu_tag, cmt_to_issue_if.csr_tag, cmt_to_issue_if.mul_tag, cmt_to_issue_if.fpu_tag, cmt_to_issue_if.gpu_tag}),
|
||||
.read_data ({cmt_to_issue_if.alu_data, cmt_to_issue_if.lsu_data, cmt_to_issue_if.csr_data, cmt_to_issue_if.mul_data, cmt_to_issue_if.fpu_data, cmt_to_issue_if.gpu_data}),
|
||||
.full (issue_buf_full)
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.write_data ({decode_if.warp_num, decode_if.thread_mask, decode_if.curr_PC, decode_if.rd, decode_if.wb}),
|
||||
.write_addr (issue_tag),
|
||||
.acquire_slot (issue_fire),
|
||||
.release_slot ({cmt_to_issue_if.alu_valid, cmt_to_issue_if.lsu_valid, cmt_to_issue_if.csr_valid, cmt_to_issue_if.mul_valid, cmt_to_issue_if.fpu_valid, cmt_to_issue_if.gpu_valid}),
|
||||
.read_addr ({cmt_to_issue_if.alu_tag, cmt_to_issue_if.lsu_tag, cmt_to_issue_if.csr_tag, cmt_to_issue_if.mul_tag, cmt_to_issue_if.fpu_tag, cmt_to_issue_if.gpu_tag}),
|
||||
.read_data ({cmt_to_issue_if.alu_data, cmt_to_issue_if.lsu_data, cmt_to_issue_if.csr_data, cmt_to_issue_if.mul_data, cmt_to_issue_if.fpu_data, cmt_to_issue_if.gpu_data}),
|
||||
.full (issue_buf_full)
|
||||
);
|
||||
|
||||
assign schedule_delay = stall;
|
||||
|
||||
`ifdef DBG_PRINT_PIPELINE
|
||||
always @(posedge clk) begin
|
||||
if (decode_if.valid && stall) begin
|
||||
$display("%t: Core%0d-stall: warp=%0d, PC=%0h, rd=%0d, wb=%0d, ib_full=%b, inuse=%b%b%b%b, ex_busy=%b, gpr_busy=%b",
|
||||
if (decode_if.valid && ~decode_if.ready) begin
|
||||
$display("%t: Core%0d-stall: warp=%0d, PC=%0h, rd=%0d, wb=%0d, ib_full=%b, inuse=%b%b%b%b, ex_busy=%b",
|
||||
$time, CORE_ID, decode_if.warp_num, decode_if.curr_PC, decode_if.rd, decode_if.wb, issue_buf_full,
|
||||
inuse_mask[decode_if.rd], inuse_mask[decode_if.rs1], inuse_mask[decode_if.rs2], inuse_mask[decode_if.rs3], ex_busy, gpr_busy);
|
||||
inuse_mask[decode_if.rd], inuse_mask[decode_if.rs1], inuse_mask[decode_if.rs2], inuse_mask[decode_if.rs3], ex_busy);
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
|
|
@ -92,6 +92,15 @@ module VX_writeback #(
|
|||
wb_warp_num_table_n [fpu_commit_if.issue_tag] = cmt_to_issue_if.fpu_data.warp_num;
|
||||
wb_curr_PC_table_n [fpu_commit_if.issue_tag] = cmt_to_issue_if.fpu_data.curr_PC;
|
||||
wb_rd_table_n [fpu_commit_if.issue_tag] = cmt_to_issue_if.fpu_data.rd;
|
||||
end
|
||||
|
||||
if (gpu_commit_if.valid) begin
|
||||
wb_valid_table_n [gpu_commit_if.issue_tag] = cmt_to_issue_if.gpu_data.wb;
|
||||
wb_thread_mask_table_n [gpu_commit_if.issue_tag] = cmt_to_issue_if.gpu_data.thread_mask;
|
||||
wb_data_table_n [gpu_commit_if.issue_tag] = gpu_commit_if.data;
|
||||
wb_warp_num_table_n [gpu_commit_if.issue_tag] = cmt_to_issue_if.gpu_data.warp_num;
|
||||
wb_curr_PC_table_n [gpu_commit_if.issue_tag] = cmt_to_issue_if.gpu_data.curr_PC;
|
||||
wb_rd_table_n [gpu_commit_if.issue_tag] = cmt_to_issue_if.gpu_data.rd;
|
||||
end
|
||||
end
|
||||
|
||||
|
|
22
hw/rtl/cache/VX_bank.v
vendored
22
hw/rtl/cache/VX_bank.v
vendored
|
@ -763,18 +763,18 @@ module VX_bank #(
|
|||
end
|
||||
`endif
|
||||
|
||||
`SCOPE_ASSIGN(scope_bank_valid_st0, qual_valid_st0);
|
||||
`SCOPE_ASSIGN(scope_bank_valid_st1, valid_st1e);
|
||||
`SCOPE_ASSIGN(scope_bank_valid_st2, valid_st2);
|
||||
`SCOPE_ASSIGN (scope_bank_valid_st0, qual_valid_st0);
|
||||
`SCOPE_ASSIGN (scope_bank_valid_st1, valid_st1e);
|
||||
`SCOPE_ASSIGN (scope_bank_valid_st2, valid_st2);
|
||||
|
||||
`SCOPE_ASSIGN(scope_bank_is_mrvq_st1, is_mrvq_st1e);
|
||||
`SCOPE_ASSIGN(scope_bank_miss_st1, miss_st1e);
|
||||
`SCOPE_ASSIGN(scope_bank_dirty_st1, dirty_st1e);
|
||||
`SCOPE_ASSIGN(scope_bank_force_miss_st1, force_request_miss_st1e);
|
||||
`SCOPE_ASSIGN(scope_bank_stall_pipe, stall_bank_pipe);
|
||||
`SCOPE_ASSIGN (scope_bank_is_mrvq_st1, is_mrvq_st1e);
|
||||
`SCOPE_ASSIGN (scope_bank_miss_st1, miss_st1e);
|
||||
`SCOPE_ASSIGN (scope_bank_dirty_st1, dirty_st1e);
|
||||
`SCOPE_ASSIGN (scope_bank_force_miss_st1, force_request_miss_st1e);
|
||||
`SCOPE_ASSIGN (scope_bank_stall_pipe, stall_bank_pipe);
|
||||
|
||||
`SCOPE_ASSIGN(scope_bank_addr_st0, `LINE_TO_BYTE_ADDR(qual_addr_st0, BANK_ID));
|
||||
`SCOPE_ASSIGN(scope_bank_addr_st1, `LINE_TO_BYTE_ADDR(addr_st1e, BANK_ID));
|
||||
`SCOPE_ASSIGN(scope_bank_addr_st2, `LINE_TO_BYTE_ADDR(addr_st2, BANK_ID));
|
||||
`SCOPE_ASSIGN (scope_bank_addr_st0, `LINE_TO_BYTE_ADDR(qual_addr_st0, BANK_ID));
|
||||
`SCOPE_ASSIGN (scope_bank_addr_st1, `LINE_TO_BYTE_ADDR(addr_st1e, BANK_ID));
|
||||
`SCOPE_ASSIGN (scope_bank_addr_st2, `LINE_TO_BYTE_ADDR(addr_st2, BANK_ID));
|
||||
|
||||
endmodule
|
||||
|
|
11
hw/rtl/cache/VX_tag_data_store.v
vendored
11
hw/rtl/cache/VX_tag_data_store.v
vendored
|
@ -44,10 +44,9 @@ module VX_tag_data_store #(
|
|||
|
||||
wire do_write = (| write_enable);
|
||||
|
||||
integer i, j;
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
for (i = 0; i < `BANK_LINE_COUNT; i++) begin
|
||||
for (integer i = 0; i < `BANK_LINE_COUNT; i++) begin
|
||||
valid[i] <= 0;
|
||||
dirty[i] <= 0;
|
||||
end
|
||||
|
@ -71,10 +70,10 @@ module VX_tag_data_store #(
|
|||
valid[write_addr] <= 0;
|
||||
end
|
||||
|
||||
for (i = 0; i < `BANK_LINE_WORDS; i++) begin
|
||||
for (j = 0; j < WORD_SIZE; j++) begin
|
||||
if (write_enable[i][j]) begin
|
||||
data[write_addr][i][j] <= write_data[i * `WORD_WIDTH + j * 8 +: 8];
|
||||
for (integer j = 0; j < `BANK_LINE_WORDS; j++) begin
|
||||
for (integer i = 0; i < WORD_SIZE; i++) begin
|
||||
if (write_enable[j][i]) begin
|
||||
data[write_addr][j][i] <= write_data[j * `WORD_WIDTH + i * 8 +: 8];
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -5,10 +5,10 @@ module VX_fp_fpga (
|
|||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
input wire in_valid,
|
||||
output wire in_ready,
|
||||
input wire valid_in,
|
||||
output wire ready_in,
|
||||
|
||||
input wire [`ISTAG_BITS-1:0] in_tag,
|
||||
input wire [`ISTAG_BITS-1:0] tag_in,
|
||||
|
||||
input wire [`FPU_BITS-1:0] op,
|
||||
input wire [`FRM_BITS-1:0] frm,
|
||||
|
@ -21,21 +21,22 @@ module VX_fp_fpga (
|
|||
output wire has_fflags,
|
||||
output fflags_t [`NUM_THREADS-1:0] fflags,
|
||||
|
||||
output wire [`ISTAG_BITS-1:0] out_tag,
|
||||
output wire [`ISTAG_BITS-1:0] tag_out,
|
||||
|
||||
input wire out_ready,
|
||||
output wire out_valid
|
||||
input wire ready_out,
|
||||
output wire valid_out
|
||||
);
|
||||
localparam NUM_FPC = 12;
|
||||
localparam FPC_BITS = `LOG2UP(NUM_FPC);
|
||||
|
||||
wire [NUM_FPC-1:0] core_in_ready;
|
||||
wire [NUM_FPC-1:0][`NUM_THREADS-1:0][31:0] core_result;
|
||||
wire [NUM_FPC-1:0] per_core_ready_in;
|
||||
wire [NUM_FPC-1:0][`NUM_THREADS-1:0][31:0] per_core_result;
|
||||
wire [NUM_FPC-1:0][`ISTAG_BITS-1:0] per_core_tag_out;
|
||||
wire [NUM_FPC-1:0] per_core_ready_out;
|
||||
wire [NUM_FPC-1:0] per_core_valid_out;
|
||||
|
||||
wire fpnew_has_fflags;
|
||||
fflags_t fpnew_fflags;
|
||||
wire [NUM_FPC-1:0][`ISTAG_BITS-1:0] core_out_tag;
|
||||
wire [NUM_FPC-1:0] core_out_ready;
|
||||
wire [NUM_FPC-1:0] core_out_valid;
|
||||
|
||||
reg [FPC_BITS-1:0] core_select;
|
||||
reg fmadd_negate;
|
||||
|
@ -66,172 +67,172 @@ module VX_fp_fpga (
|
|||
VX_fp_noncomp fp_noncomp (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.in_valid (in_valid && (core_select == 0)),
|
||||
.in_ready (core_in_ready[0]),
|
||||
.in_tag (in_tag),
|
||||
.valid_in (valid_in && (core_select == 0)),
|
||||
.ready_in (per_core_ready_in[0]),
|
||||
.tag_in (tag_in),
|
||||
.op (op),
|
||||
.frm (frm),
|
||||
.dataa (dataa),
|
||||
.datab (datab),
|
||||
.result (core_result[0]),
|
||||
.result (per_core_result[0]),
|
||||
.has_fflags (fpnew_has_fflags),
|
||||
.fflags (fpnew_fflags),
|
||||
.out_tag (core_out_tag[0]),
|
||||
.out_ready (core_out_ready[0]),
|
||||
.out_valid (core_out_valid[0])
|
||||
.tag_out (per_core_tag_out[0]),
|
||||
.ready_out (per_core_ready_out[0]),
|
||||
.valid_out (per_core_valid_out[0])
|
||||
);
|
||||
|
||||
VX_fp_add fp_add (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.in_valid (in_valid && (core_select == 1)),
|
||||
.in_ready (core_in_ready[1]),
|
||||
.in_tag (in_tag),
|
||||
.valid_in (valid_in && (core_select == 1)),
|
||||
.ready_in (per_core_ready_in[1]),
|
||||
.tag_in (tag_in),
|
||||
.dataa (dataa),
|
||||
.datab (datab),
|
||||
.result (core_result[1]),
|
||||
.out_tag (core_out_tag[1]),
|
||||
.out_ready (core_out_ready[1]),
|
||||
.out_valid (core_out_valid[1])
|
||||
.result (per_core_result[1]),
|
||||
.tag_out (per_core_tag_out[1]),
|
||||
.ready_out (per_core_ready_out[1]),
|
||||
.valid_out (per_core_valid_out[1])
|
||||
);
|
||||
|
||||
VX_fp_sub fp_sub (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.in_valid (in_valid && (core_select == 2)),
|
||||
.in_ready (core_in_ready[2]),
|
||||
.in_tag (in_tag),
|
||||
.valid_in (valid_in && (core_select == 2)),
|
||||
.ready_in (per_core_ready_in[2]),
|
||||
.tag_in (tag_in),
|
||||
.dataa (dataa),
|
||||
.datab (datab),
|
||||
.result (core_result[2]),
|
||||
.out_tag (core_out_tag[2]),
|
||||
.out_ready (core_out_ready[2]),
|
||||
.out_valid (core_out_valid[2])
|
||||
.result (per_core_result[2]),
|
||||
.tag_out (per_core_tag_out[2]),
|
||||
.ready_out (per_core_ready_out[2]),
|
||||
.valid_out (per_core_valid_out[2])
|
||||
);
|
||||
|
||||
VX_fp_mul fp_mul (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.in_valid (in_valid && (core_select == 3)),
|
||||
.in_ready (core_in_ready[3]),
|
||||
.in_tag (in_tag),
|
||||
.valid_in (valid_in && (core_select == 3)),
|
||||
.ready_in (per_core_ready_in[3]),
|
||||
.tag_in (tag_in),
|
||||
.dataa (dataa),
|
||||
.datab (datab),
|
||||
.result (core_result[3]),
|
||||
.out_tag (core_out_tag[3]),
|
||||
.out_ready (core_out_ready[3]),
|
||||
.out_valid (core_out_valid[3])
|
||||
.result (per_core_result[3]),
|
||||
.tag_out (per_core_tag_out[3]),
|
||||
.ready_out (per_core_ready_out[3]),
|
||||
.valid_out (per_core_valid_out[3])
|
||||
);
|
||||
|
||||
VX_fp_madd fp_madd (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.in_valid (in_valid && (core_select == 4)),
|
||||
.in_ready (core_in_ready[4]),
|
||||
.in_tag (in_tag),
|
||||
.valid_in (valid_in && (core_select == 4)),
|
||||
.ready_in (per_core_ready_in[4]),
|
||||
.tag_in (tag_in),
|
||||
.negate (fmadd_negate),
|
||||
.dataa (dataa),
|
||||
.datab (datab),
|
||||
.datac (datac),
|
||||
.result (core_result[4]),
|
||||
.out_tag (core_out_tag[4]),
|
||||
.out_ready (core_out_ready[4]),
|
||||
.out_valid (core_out_valid[4])
|
||||
.result (per_core_result[4]),
|
||||
.tag_out (per_core_tag_out[4]),
|
||||
.ready_out (per_core_ready_out[4]),
|
||||
.valid_out (per_core_valid_out[4])
|
||||
);
|
||||
|
||||
VX_fp_msub fp_msub (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.in_valid (in_valid && (core_select == 5)),
|
||||
.in_ready (core_in_ready[5]),
|
||||
.in_tag (in_tag),
|
||||
.valid_in (valid_in && (core_select == 5)),
|
||||
.ready_in (per_core_ready_in[5]),
|
||||
.tag_in (tag_in),
|
||||
.negate (fmadd_negate),
|
||||
.dataa (dataa),
|
||||
.datab (datab),
|
||||
.datac (datac),
|
||||
.result (core_result[5]),
|
||||
.out_tag (core_out_tag[5]),
|
||||
.out_ready (core_out_ready[5]),
|
||||
.out_valid (core_out_valid[5])
|
||||
.result (per_core_result[5]),
|
||||
.tag_out (per_core_tag_out[5]),
|
||||
.ready_out (per_core_ready_out[5]),
|
||||
.valid_out (per_core_valid_out[5])
|
||||
);
|
||||
|
||||
VX_fp_div fp_div (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.in_valid (in_valid && (core_select == 6)),
|
||||
.in_ready (core_in_ready[6]),
|
||||
.in_tag (in_tag),
|
||||
.valid_in (valid_in && (core_select == 6)),
|
||||
.ready_in (per_core_ready_in[6]),
|
||||
.tag_in (tag_in),
|
||||
.dataa (dataa),
|
||||
.datab (datab),
|
||||
.result (core_result[6]),
|
||||
.out_tag (core_out_tag[6]),
|
||||
.out_ready (core_out_ready[6]),
|
||||
.out_valid (core_out_valid[6])
|
||||
.result (per_core_result[6]),
|
||||
.tag_out (per_core_tag_out[6]),
|
||||
.ready_out (per_core_ready_out[6]),
|
||||
.valid_out (per_core_valid_out[6])
|
||||
);
|
||||
|
||||
VX_fp_sqrt fp_sqrt (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.in_valid (in_valid && (core_select == 7)),
|
||||
.in_ready (core_in_ready[7]),
|
||||
.in_tag (in_tag),
|
||||
.valid_in (valid_in && (core_select == 7)),
|
||||
.ready_in (per_core_ready_in[7]),
|
||||
.tag_in (tag_in),
|
||||
.dataa (dataa),
|
||||
.result (core_result[7]),
|
||||
.out_tag (core_out_tag[7]),
|
||||
.out_ready (core_out_ready[7]),
|
||||
.out_valid (core_out_valid[7])
|
||||
.result (per_core_result[7]),
|
||||
.tag_out (per_core_tag_out[7]),
|
||||
.ready_out (per_core_ready_out[7]),
|
||||
.valid_out (per_core_valid_out[7])
|
||||
);
|
||||
|
||||
VX_fp_ftoi fp_ftoi (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.in_valid (in_valid && (core_select == 8)),
|
||||
.in_ready (core_in_ready[8]),
|
||||
.in_tag (in_tag),
|
||||
.valid_in (valid_in && (core_select == 8)),
|
||||
.ready_in (per_core_ready_in[8]),
|
||||
.tag_in (tag_in),
|
||||
.dataa (dataa),
|
||||
.result (core_result[8]),
|
||||
.out_tag (core_out_tag[8]),
|
||||
.out_ready (core_out_ready[8]),
|
||||
.out_valid (core_out_valid[8])
|
||||
.result (per_core_result[8]),
|
||||
.tag_out (per_core_tag_out[8]),
|
||||
.ready_out (per_core_ready_out[8]),
|
||||
.valid_out (per_core_valid_out[8])
|
||||
);
|
||||
|
||||
VX_fp_ftou fp_ftou (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.in_valid (in_valid && (core_select == 9)),
|
||||
.in_ready (core_in_ready[9]),
|
||||
.in_tag (in_tag),
|
||||
.valid_in (valid_in && (core_select == 9)),
|
||||
.ready_in (per_core_ready_in[9]),
|
||||
.tag_in (tag_in),
|
||||
.dataa (dataa),
|
||||
.result (core_result[9]),
|
||||
.out_tag (core_out_tag[9]),
|
||||
.out_ready (core_out_ready[9]),
|
||||
.out_valid (core_out_valid[9])
|
||||
.result (per_core_result[9]),
|
||||
.tag_out (per_core_tag_out[9]),
|
||||
.ready_out (per_core_ready_out[9]),
|
||||
.valid_out (per_core_valid_out[9])
|
||||
);
|
||||
|
||||
VX_fp_itof fp_itof (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.in_valid (in_valid && (core_select == 10)),
|
||||
.in_ready (core_in_ready[10]),
|
||||
.in_tag (in_tag),
|
||||
.valid_in (valid_in && (core_select == 10)),
|
||||
.ready_in (per_core_ready_in[10]),
|
||||
.tag_in (tag_in),
|
||||
.dataa (dataa),
|
||||
.result (core_result[10]),
|
||||
.out_tag (core_out_tag[10]),
|
||||
.out_ready (core_out_ready[10]),
|
||||
.out_valid (core_out_valid[10])
|
||||
.result (per_core_result[10]),
|
||||
.tag_out (per_core_tag_out[10]),
|
||||
.ready_out (per_core_ready_out[10]),
|
||||
.valid_out (per_core_valid_out[10])
|
||||
);
|
||||
|
||||
VX_fp_utof fp_utof (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.in_valid (in_valid && (core_select == 11)),
|
||||
.in_ready (core_in_ready[11]),
|
||||
.in_tag (in_tag),
|
||||
.valid_in (valid_in && (core_select == 11)),
|
||||
.ready_in (per_core_ready_in[11]),
|
||||
.tag_in (tag_in),
|
||||
.dataa (dataa),
|
||||
.result (core_result[11]),
|
||||
.out_tag (core_out_tag[11]),
|
||||
.out_ready (core_out_ready[11]),
|
||||
.out_valid (core_out_valid[11])
|
||||
.result (per_core_result[11]),
|
||||
.tag_out (per_core_tag_out[11]),
|
||||
.ready_out (per_core_ready_out[11]),
|
||||
.valid_out (per_core_valid_out[11])
|
||||
);
|
||||
|
||||
wire [FPC_BITS-1:0] fp_index;
|
||||
|
@ -240,18 +241,18 @@ module VX_fp_fpga (
|
|||
VX_priority_encoder #(
|
||||
.N(NUM_FPC)
|
||||
) wb_select (
|
||||
.data_in (core_out_valid),
|
||||
.data_in (per_core_valid_out),
|
||||
.data_out (fp_index),
|
||||
.valid_out (fp_valid)
|
||||
);
|
||||
|
||||
for (i = 0; i < NUM_FPC; i++) begin
|
||||
assign core_out_ready[i] = out_ready && (i == fp_index);
|
||||
assign per_core_ready_out[i] = ready_out && (i == fp_index);
|
||||
end
|
||||
|
||||
wire tmp_valid = fp_valid;
|
||||
wire [`ISTAG_BITS-1:0] tmp_tag = core_out_tag[fp_index];
|
||||
wire [`NUM_THREADS-1:0][31:0] tmp_result = core_result[fp_index];
|
||||
wire [`ISTAG_BITS-1:0] tmp_tag = per_core_tag_out[fp_index];
|
||||
wire [`NUM_THREADS-1:0][31:0] tmp_result = per_core_result[fp_index];
|
||||
wire tmp_has_fflags = fpnew_has_fflags && (fp_index == 0);
|
||||
fflags_t [`NUM_THREADS-1:0] tmp_flags = fpnew_fflags;
|
||||
|
||||
|
@ -263,7 +264,7 @@ module VX_fp_fpga (
|
|||
.stall (stall),
|
||||
.flush (1'b0),
|
||||
.in ({tmp_valid, tmp_tag, tmp_result, tmp_has_fflags, tmp_fflags}),
|
||||
.out ({out_valid, out_tag, result, has_fflags, fflags})
|
||||
.out ({valid_out, tag_out, result, has_fflags, fflags})
|
||||
);
|
||||
|
||||
endmodule
|
|
@ -4,10 +4,10 @@ module VX_fp_noncomp (
|
|||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
output wire in_ready,
|
||||
input wire in_valid,
|
||||
output wire ready_in,
|
||||
input wire valid_in,
|
||||
|
||||
input wire [`ISTAG_BITS-1:0] in_tag,
|
||||
input wire [`ISTAG_BITS-1:0] tag_in,
|
||||
|
||||
input wire [`FPU_BITS-1:0] op,
|
||||
input wire [`FRM_BITS-1:0] frm,
|
||||
|
@ -19,10 +19,10 @@ module VX_fp_noncomp (
|
|||
output wire has_fflags,
|
||||
output fflags_t [`NUM_THREADS-1:0] fflags,
|
||||
|
||||
output wire [`ISTAG_BITS-1:0] out_tag,
|
||||
output wire [`ISTAG_BITS-1:0] tag_out,
|
||||
|
||||
input wire out_ready,
|
||||
output wire out_valid
|
||||
input wire ready_out,
|
||||
output wire valid_out
|
||||
);
|
||||
localparam NEG_INF = 32'h00000001,
|
||||
NEG_NORM = 32'h00000002,
|
||||
|
@ -226,8 +226,8 @@ module VX_fp_noncomp (
|
|||
end
|
||||
end
|
||||
|
||||
wire stall = ~out_ready && out_valid;
|
||||
assign in_ready = ~stall;
|
||||
wire stall = ~ready_out && valid_out;
|
||||
assign ready_in = ~stall;
|
||||
|
||||
VX_generic_register #(
|
||||
.N(1 + `ISTAG_BITS + (`NUM_THREADS * 32) + 1 + (`NUM_THREADS * `FFG_BITS))
|
||||
|
@ -236,8 +236,8 @@ module VX_fp_noncomp (
|
|||
.reset (reset),
|
||||
.stall (stall),
|
||||
.flush (1'b0),
|
||||
.in ({tmp_valid, in_tag, tmp_result, tmp_has_fflags, tmp_fflags}),
|
||||
.out ({out_valid, out_tag, result, has_fflags, fflags})
|
||||
.in ({tmp_valid, tag_in, tmp_result, tmp_has_fflags, tmp_fflags}),
|
||||
.out ({valid_out, tag_out, result, has_fflags, fflags})
|
||||
);
|
||||
|
||||
endmodule
|
|
@ -11,10 +11,10 @@ module VX_fpnew #(
|
|||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
input wire in_valid,
|
||||
output wire in_ready,
|
||||
input wire valid_in,
|
||||
output wire ready_in,
|
||||
|
||||
input wire [`ISTAG_BITS-1:0] in_tag,
|
||||
input wire [`ISTAG_BITS-1:0] tag_in,
|
||||
|
||||
input wire [`FPU_BITS-1:0] op,
|
||||
input wire [`FRM_BITS-1:0] frm,
|
||||
|
@ -27,10 +27,10 @@ module VX_fpnew #(
|
|||
output wire has_fflags,
|
||||
output fflags_t [`NUM_THREADS-1:0] fflags,
|
||||
|
||||
output wire [`ISTAG_BITS-1:0] out_tag,
|
||||
output wire [`ISTAG_BITS-1:0] tag_out,
|
||||
|
||||
input wire out_ready,
|
||||
output wire out_valid
|
||||
input wire ready_out,
|
||||
output wire valid_out
|
||||
);
|
||||
localparam UNIT_FMULADD = FMULADD ? fpnew_pkg::PARALLEL : fpnew_pkg::DISABLED;
|
||||
localparam UNIT_FDIVSQRT = FDIVSQRT ? fpnew_pkg::MERGED : fpnew_pkg::DISABLED;
|
||||
|
@ -56,17 +56,17 @@ module VX_fpnew #(
|
|||
'{default: `LATENCY_FDIVSQRT}, // DIVSQRT
|
||||
'{default: `LATENCY_FNONCOMP}, // NONCOMP
|
||||
'{default: `LATENCY_FCONV}}, // CONV
|
||||
UnitTypes:'{'{default: UNIT_FMULADD}, // ADDMUL
|
||||
UnitTypes:'{'{default: UNIT_FMULADD}, // ADDMUL
|
||||
'{default: UNIT_FDIVSQRT}, // DIVSQRT
|
||||
'{default: UNIT_FNONCOMP}, // NONCOMP
|
||||
'{default: UNIT_FCONV}}, // CONV
|
||||
PipeConfig: fpnew_pkg::DISTRIBUTED
|
||||
};
|
||||
|
||||
wire fpu_in_ready, fpu_in_valid;
|
||||
wire fpu_out_ready, fpu_out_valid;
|
||||
wire fpu_ready_in, fpu_valid_in;
|
||||
wire fpu_ready_out, fpu_valid_out;
|
||||
|
||||
reg [`ISTAG_BITS-1:0] fpu_in_tag, fpu_out_tag;
|
||||
reg [`ISTAG_BITS-1:0] fpu_tag_in, fpu_tag_out;
|
||||
|
||||
reg [2:0][`NUM_THREADS-1:0][31:0] fpu_operands;
|
||||
|
||||
|
@ -77,13 +77,13 @@ module VX_fpnew #(
|
|||
wire [`NUM_THREADS-1:0][31:0] fpu_result;
|
||||
fpnew_pkg::status_t [0:`NUM_THREADS-1] fpu_status;
|
||||
|
||||
wire is_class_op_i, is_class_op_o;
|
||||
assign is_class_op_i = (op == `FPU_CLASS);
|
||||
wire is_class_op, is_class_op_out;
|
||||
assign is_class_op = (op == `FPU_CLASS);
|
||||
|
||||
reg [FOP_BITS-1:0] fpu_op;
|
||||
reg [`FRM_BITS-1:0] fpu_rnd;
|
||||
reg fpu_op_mod;
|
||||
reg fpu_has_fflags, fpu_has_fflags_o;
|
||||
reg fpu_has_fflags, fpu_has_fflags_out;
|
||||
|
||||
always @(*) begin
|
||||
fpu_op = fpnew_pkg::SGNJ;
|
||||
|
@ -150,15 +150,15 @@ module VX_fpnew #(
|
|||
.dst_fmt_i (fpnew_pkg::fp_format_e'(fpu_dst_fmt)),
|
||||
.int_fmt_i (fpnew_pkg::int_format_e'(fpu_int_fmt)),
|
||||
.vectorial_op_i (1'b0),
|
||||
.tag_i ({fpu_in_tag, fpu_has_fflags, is_class_op_i}),
|
||||
.in_valid_i (fpu_in_valid),
|
||||
.in_ready_o (fpu_in_ready),
|
||||
.tag_i ({fpu_tag_in, fpu_has_fflags, is_class_op}),
|
||||
.in_valid_i (fpu_valid_in),
|
||||
.in_ready_o (fpu_ready_in),
|
||||
.flush_i (reset),
|
||||
.result_o (fpu_result[0]),
|
||||
.status_o (fpu_status[0]),
|
||||
.tag_o ({fpu_out_tag, fpu_has_fflags_o, is_class_op_o}),
|
||||
.out_valid_o (fpu_out_valid),
|
||||
.out_ready_i (fpu_out_ready),
|
||||
.tag_o ({fpu_tag_out, fpu_has_fflags_out, is_class_op_out}),
|
||||
.out_valid_o (fpu_valid_out),
|
||||
.out_ready_i (fpu_ready_out),
|
||||
`UNUSED_PIN (busy_o)
|
||||
);
|
||||
end else begin
|
||||
|
@ -178,14 +178,14 @@ module VX_fpnew #(
|
|||
.int_fmt_i (fpnew_pkg::int_format_e'(fpu_int_fmt)),
|
||||
.vectorial_op_i (1'b0),
|
||||
.tag_i (1'b0),
|
||||
.in_valid_i (fpu_in_valid),
|
||||
.in_valid_i (fpu_valid_in),
|
||||
`UNUSED_PIN (in_ready_o),
|
||||
.flush_i (reset),
|
||||
.result_o (fpu_result[i]),
|
||||
.status_o (fpu_status[i]),
|
||||
`UNUSED_PIN (tag_o),
|
||||
`UNUSED_PIN (out_valid_o),
|
||||
.out_ready_i (fpu_out_ready),
|
||||
.out_ready_i (fpu_ready_out),
|
||||
`UNUSED_PIN (busy_o)
|
||||
);
|
||||
end
|
||||
|
@ -193,19 +193,19 @@ module VX_fpnew #(
|
|||
|
||||
`ENABLE_TRACING
|
||||
|
||||
assign fpu_in_valid = in_valid;
|
||||
assign in_ready = fpu_in_ready
|
||||
|| ~in_valid; // fix fpnews's in_ready containing in_valid;
|
||||
assign fpu_valid_in = valid_in;
|
||||
assign ready_in = fpu_ready_in
|
||||
|| ~valid_in; // fix
|
||||
|
||||
assign fpu_in_tag = in_tag;
|
||||
assign out_tag = fpu_out_tag;
|
||||
assign fpu_tag_in = tag_in;
|
||||
assign tag_out = fpu_tag_out;
|
||||
|
||||
assign result = fpu_result;
|
||||
|
||||
assign has_fflags = fpu_has_fflags_o;
|
||||
assign has_fflags = fpu_has_fflags_out;
|
||||
assign fflags = fpu_status;
|
||||
|
||||
assign out_valid = fpu_out_valid;
|
||||
assign fpu_out_ready = out_ready;
|
||||
assign valid_out = fpu_valid_out;
|
||||
assign fpu_ready_out = ready_out;
|
||||
|
||||
endmodule
|
|
@ -4,23 +4,23 @@ module VX_fp_add (
|
|||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
output wire in_ready,
|
||||
input wire in_valid,
|
||||
output wire ready_in,
|
||||
input wire valid_in,
|
||||
|
||||
input wire [`ISTAG_BITS-1:0] in_tag,
|
||||
input wire [`ISTAG_BITS-1:0] tag_in,
|
||||
|
||||
input wire [`NUM_THREADS-1:0][31:0] dataa,
|
||||
input wire [`NUM_THREADS-1:0][31:0] datab,
|
||||
output wire [`NUM_THREADS-1:0][31:0] result,
|
||||
|
||||
output wire [`ISTAG_BITS-1:0] out_tag,
|
||||
output wire [`ISTAG_BITS-1:0] tag_out,
|
||||
|
||||
input wire out_ready,
|
||||
output wire out_valid
|
||||
input wire ready_out,
|
||||
output wire valid_out
|
||||
);
|
||||
wire stall = ~out_ready && out_valid;
|
||||
wire stall = ~ready_out && valid_out;
|
||||
wire enable = ~stall;
|
||||
assign in_ready = enable;
|
||||
assign ready_in = enable;
|
||||
|
||||
genvar i;
|
||||
|
||||
|
@ -73,8 +73,8 @@ module VX_fp_add (
|
|||
.clk(clk),
|
||||
.reset(reset),
|
||||
.enable(enable),
|
||||
.in({in_tag, in_valid}),
|
||||
.out({out_tag, out_valid})
|
||||
.in ({tag_in, valid_in}),
|
||||
.out({tag_out, valid_out})
|
||||
);
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -4,23 +4,23 @@ module VX_fp_div (
|
|||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
output wire in_ready,
|
||||
input wire in_valid,
|
||||
output wire ready_in,
|
||||
input wire valid_in,
|
||||
|
||||
input wire [`ISTAG_BITS-1:0] in_tag,
|
||||
input wire [`ISTAG_BITS-1:0] tag_in,
|
||||
|
||||
input wire [`NUM_THREADS-1:0][31:0] dataa,
|
||||
input wire [`NUM_THREADS-1:0][31:0] datab,
|
||||
output wire [`NUM_THREADS-1:0][31:0] result,
|
||||
|
||||
output wire [`ISTAG_BITS-1:0] out_tag,
|
||||
output wire [`ISTAG_BITS-1:0] tag_out,
|
||||
|
||||
input wire out_ready,
|
||||
output wire out_valid
|
||||
input wire ready_out,
|
||||
output wire valid_out
|
||||
);
|
||||
wire stall = ~out_ready && out_valid;
|
||||
wire stall = ~ready_out && valid_out;
|
||||
wire enable = ~stall;
|
||||
assign in_ready = enable;
|
||||
assign ready_in = enable;
|
||||
|
||||
genvar i;
|
||||
|
||||
|
@ -42,8 +42,8 @@ module VX_fp_div (
|
|||
.clk(clk),
|
||||
.reset(reset),
|
||||
.enable(enable),
|
||||
.in({in_tag, in_valid}),
|
||||
.out({out_tag, out_valid})
|
||||
.in ({tag_in, valid_in}),
|
||||
.out({tag_out, valid_out})
|
||||
);
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -4,22 +4,22 @@ module VX_fp_ftoi (
|
|||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
output wire in_ready,
|
||||
input wire in_valid,
|
||||
output wire ready_in,
|
||||
input wire valid_in,
|
||||
|
||||
input wire [`ISTAG_BITS-1:0] in_tag,
|
||||
input wire [`ISTAG_BITS-1:0] tag_in,
|
||||
|
||||
input wire [`NUM_THREADS-1:0][31:0] dataa,
|
||||
output wire [`NUM_THREADS-1:0][31:0] result,
|
||||
|
||||
output wire [`ISTAG_BITS-1:0] out_tag,
|
||||
output wire [`ISTAG_BITS-1:0] tag_out,
|
||||
|
||||
input wire out_ready,
|
||||
output wire out_valid
|
||||
input wire ready_out,
|
||||
output wire valid_out
|
||||
);
|
||||
wire stall = ~out_ready && out_valid;
|
||||
wire stall = ~ready_out && valid_out;
|
||||
wire enable = ~stall;
|
||||
assign in_ready = enable;
|
||||
assign ready_in = enable;
|
||||
|
||||
genvar i;
|
||||
|
||||
|
@ -40,8 +40,8 @@ module VX_fp_ftoi (
|
|||
.clk(clk),
|
||||
.reset(reset),
|
||||
.enable(enable),
|
||||
.in({in_tag, in_valid}),
|
||||
.out({out_tag, out_valid})
|
||||
.in ({tag_in, valid_in}),
|
||||
.out({tag_out, valid_out})
|
||||
);
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -4,22 +4,22 @@ module VX_fp_ftou (
|
|||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
output wire in_ready,
|
||||
input wire in_valid,
|
||||
output wire ready_in,
|
||||
input wire valid_in,
|
||||
|
||||
input wire [`ISTAG_BITS-1:0] in_tag,
|
||||
input wire [`ISTAG_BITS-1:0] tag_in,
|
||||
|
||||
input wire [`NUM_THREADS-1:0][31:0] dataa,
|
||||
output wire [`NUM_THREADS-1:0][31:0] result,
|
||||
|
||||
output wire [`ISTAG_BITS-1:0] out_tag,
|
||||
output wire [`ISTAG_BITS-1:0] tag_out,
|
||||
|
||||
input wire out_ready,
|
||||
output wire out_valid
|
||||
input wire ready_out,
|
||||
output wire valid_out
|
||||
);
|
||||
wire stall = ~out_ready && out_valid;
|
||||
wire stall = ~ready_out && valid_out;
|
||||
wire enable = ~stall;
|
||||
assign in_ready = enable;
|
||||
assign ready_in = enable;
|
||||
|
||||
genvar i;
|
||||
|
||||
|
@ -40,8 +40,8 @@ module VX_fp_ftou (
|
|||
.clk(clk),
|
||||
.reset(reset),
|
||||
.enable(enable),
|
||||
.in({in_tag, in_valid}),
|
||||
.out({out_tag, out_valid})
|
||||
.in ({tag_in, valid_in}),
|
||||
.out({tag_out, valid_out})
|
||||
);
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -4,22 +4,22 @@ module VX_fp_itof (
|
|||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
output wire in_ready,
|
||||
input wire in_valid,
|
||||
output wire ready_in,
|
||||
input wire valid_in,
|
||||
|
||||
input wire [`ISTAG_BITS-1:0] in_tag,
|
||||
input wire [`ISTAG_BITS-1:0] tag_in,
|
||||
|
||||
input wire [`NUM_THREADS-1:0][31:0] dataa,
|
||||
output wire [`NUM_THREADS-1:0][31:0] result,
|
||||
|
||||
output wire [`ISTAG_BITS-1:0] out_tag,
|
||||
output wire [`ISTAG_BITS-1:0] tag_out,
|
||||
|
||||
input wire out_ready,
|
||||
output wire out_valid
|
||||
input wire ready_out,
|
||||
output wire valid_out
|
||||
);
|
||||
wire stall = ~out_ready && out_valid;
|
||||
wire stall = ~ready_out && valid_out;
|
||||
wire enable = ~stall;
|
||||
assign in_ready = enable;
|
||||
assign ready_in = enable;
|
||||
|
||||
genvar i;
|
||||
|
||||
|
@ -40,8 +40,8 @@ module VX_fp_itof (
|
|||
.clk(clk),
|
||||
.reset(reset),
|
||||
.enable(enable),
|
||||
.in({in_tag, in_valid}),
|
||||
.out({out_tag, out_valid})
|
||||
.in ({tag_in, valid_in}),
|
||||
.out({tag_out, valid_out})
|
||||
);
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -4,10 +4,10 @@ module VX_fp_madd (
|
|||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
output wire in_ready,
|
||||
input wire in_valid,
|
||||
output wire ready_in,
|
||||
input wire valid_in,
|
||||
|
||||
input wire [`ISTAG_BITS-1:0] in_tag,
|
||||
input wire [`ISTAG_BITS-1:0] tag_in,
|
||||
|
||||
input wire [`NUM_THREADS-1:0][31:0] dataa,
|
||||
input wire [`NUM_THREADS-1:0][31:0] datab,
|
||||
|
@ -16,13 +16,13 @@ module VX_fp_madd (
|
|||
|
||||
input wire negate,
|
||||
|
||||
output wire [`ISTAG_BITS-1:0] out_tag,
|
||||
output wire [`ISTAG_BITS-1:0] tag_out,
|
||||
|
||||
input wire out_ready,
|
||||
output wire out_valid
|
||||
input wire ready_out,
|
||||
output wire valid_out
|
||||
);
|
||||
wire enable0, enable1;
|
||||
assign in_ready = enable0 && enable1;
|
||||
assign ready_in = enable0 && enable1;
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] result_st0, result_st1;
|
||||
wire [`ISTAG_BITS-1:0] out_tag_st0, out_tag_st1;
|
||||
|
@ -119,7 +119,7 @@ module VX_fp_madd (
|
|||
.clk(clk),
|
||||
.reset(reset),
|
||||
.enable(enable0),
|
||||
.in({in_tag, (in_valid && ~negate), (in_valid && negate)}),
|
||||
.in ({tag_in, (valid_in && ~negate), (valid_in && negate)}),
|
||||
.out({out_tag_st0, out_valid_st0, in_valid_st0})
|
||||
);
|
||||
|
||||
|
@ -134,12 +134,12 @@ module VX_fp_madd (
|
|||
.out({out_tag_st1, out_valid_st1})
|
||||
);
|
||||
|
||||
wire out_stall = ~out_ready && out_valid;
|
||||
wire out_stall = ~ready_out && valid_out;
|
||||
assign enable0 = ~out_stall;
|
||||
assign enable1 = ~out_stall && ~(out_valid_st0 && out_valid_st1); // stall the negate stage if dual outputs
|
||||
|
||||
assign result = out_valid_st0 ? result_st0 : result_st1;
|
||||
assign out_tag = out_valid_st0 ? out_tag_st0 : out_tag_st1;
|
||||
assign out_valid = out_valid_st0 || out_valid_st1;
|
||||
assign tag_out = out_valid_st0 ? out_tag_st0 : out_tag_st1;
|
||||
assign valid_out = out_valid_st0 || out_valid_st1;
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -4,10 +4,10 @@ module VX_fp_msub (
|
|||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
output wire in_ready,
|
||||
input wire in_valid,
|
||||
output wire ready_in,
|
||||
input wire valid_in,
|
||||
|
||||
input wire [`ISTAG_BITS-1:0] in_tag,
|
||||
input wire [`ISTAG_BITS-1:0] tag_in,
|
||||
|
||||
input wire [`NUM_THREADS-1:0][31:0] dataa,
|
||||
input wire [`NUM_THREADS-1:0][31:0] datab,
|
||||
|
@ -16,13 +16,13 @@ module VX_fp_msub (
|
|||
|
||||
input wire negate,
|
||||
|
||||
output wire [`ISTAG_BITS-1:0] out_tag,
|
||||
output wire [`ISTAG_BITS-1:0] tag_out,
|
||||
|
||||
input wire out_ready,
|
||||
output wire out_valid
|
||||
input wire ready_out,
|
||||
output wire valid_out
|
||||
);
|
||||
wire enable0, enable1;
|
||||
assign in_ready = enable0 && enable1;
|
||||
assign ready_in = enable0 && enable1;
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] result_st0, result_st1;
|
||||
wire [`ISTAG_BITS-1:0] out_tag_st0, out_tag_st1;
|
||||
|
@ -119,7 +119,7 @@ module VX_fp_msub (
|
|||
.clk(clk),
|
||||
.reset(reset),
|
||||
.enable(enable0),
|
||||
.in({in_tag, (in_valid && ~negate), (in_valid && negate)}),
|
||||
.in ({tag_in, (valid_in && ~negate), (valid_in && negate)}),
|
||||
.out({out_tag_st0, out_valid_st0, in_valid_st0})
|
||||
);
|
||||
|
||||
|
@ -134,12 +134,12 @@ module VX_fp_msub (
|
|||
.out({out_tag_st1, out_valid_st1})
|
||||
);
|
||||
|
||||
wire out_stall = ~out_ready && out_valid;
|
||||
wire out_stall = ~ready_out && valid_out;
|
||||
assign enable0 = ~out_stall;
|
||||
assign enable1 = ~out_stall && ~(out_valid_st0 && out_valid_st1); // stall the negate stage if dual outputs
|
||||
|
||||
assign result = out_valid_st0 ? result_st0 : result_st1;
|
||||
assign out_tag = out_valid_st0 ? out_tag_st0 : out_tag_st1;
|
||||
assign out_valid = out_valid_st0 || out_valid_st1;
|
||||
assign tag_out = out_valid_st0 ? out_tag_st0 : out_tag_st1;
|
||||
assign valid_out = out_valid_st0 || out_valid_st1;
|
||||
|
||||
endmodule
|
|
@ -4,23 +4,23 @@ module VX_fp_mul (
|
|||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
output wire in_ready,
|
||||
input wire in_valid,
|
||||
output wire ready_in,
|
||||
input wire valid_in,
|
||||
|
||||
input wire [`ISTAG_BITS-1:0] in_tag,
|
||||
input wire [`ISTAG_BITS-1:0] tag_in,
|
||||
|
||||
input wire [`NUM_THREADS-1:0][31:0] dataa,
|
||||
input wire [`NUM_THREADS-1:0][31:0] datab,
|
||||
output wire [`NUM_THREADS-1:0][31:0] result,
|
||||
|
||||
output wire [`ISTAG_BITS-1:0] out_tag,
|
||||
output wire [`ISTAG_BITS-1:0] tag_out,
|
||||
|
||||
input wire out_ready,
|
||||
output wire out_valid
|
||||
input wire ready_out,
|
||||
output wire valid_out
|
||||
);
|
||||
wire stall = ~out_ready && out_valid;
|
||||
wire stall = ~ready_out && valid_out;
|
||||
wire enable = ~stall;
|
||||
assign in_ready = enable;
|
||||
assign ready_in = enable;
|
||||
|
||||
genvar i;
|
||||
|
||||
|
@ -73,8 +73,8 @@ module VX_fp_mul (
|
|||
.clk(clk),
|
||||
.reset(reset),
|
||||
.enable(enable),
|
||||
.in({in_tag, in_valid}),
|
||||
.out({out_tag, out_valid})
|
||||
.in ({tag_in, valid_in}),
|
||||
.out({tag_out, valid_out})
|
||||
);
|
||||
|
||||
endmodule
|
|
@ -4,22 +4,22 @@ module VX_fp_sqrt (
|
|||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
output wire in_ready,
|
||||
input wire in_valid,
|
||||
output wire ready_in,
|
||||
input wire valid_in,
|
||||
|
||||
input wire [`ISTAG_BITS-1:0] in_tag,
|
||||
input wire [`ISTAG_BITS-1:0] tag_in,
|
||||
|
||||
input wire [`NUM_THREADS-1:0][31:0] dataa,
|
||||
output wire [`NUM_THREADS-1:0][31:0] result,
|
||||
|
||||
output wire [`ISTAG_BITS-1:0] out_tag,
|
||||
output wire [`ISTAG_BITS-1:0] tag_out,
|
||||
|
||||
input wire out_ready,
|
||||
output wire out_valid
|
||||
input wire ready_out,
|
||||
output wire valid_out
|
||||
);
|
||||
wire stall = ~out_ready && out_valid;
|
||||
wire stall = ~ready_out && valid_out;
|
||||
wire enable = ~stall;
|
||||
assign in_ready = enable;
|
||||
assign ready_in = enable;
|
||||
|
||||
genvar i;
|
||||
|
||||
|
@ -40,8 +40,8 @@ module VX_fp_sqrt (
|
|||
.clk(clk),
|
||||
.reset(reset),
|
||||
.enable(enable),
|
||||
.in({in_tag, in_valid}),
|
||||
.out({out_tag, out_valid})
|
||||
.in ({tag_in, valid_in}),
|
||||
.out({tag_out, valid_out})
|
||||
);
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -4,23 +4,23 @@ module VX_fp_sub (
|
|||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
output wire in_ready,
|
||||
input wire in_valid,
|
||||
output wire ready_in,
|
||||
input wire valid_in,
|
||||
|
||||
input wire [`ISTAG_BITS-1:0] in_tag,
|
||||
input wire [`ISTAG_BITS-1:0] tag_in,
|
||||
|
||||
input wire [`NUM_THREADS-1:0][31:0] dataa,
|
||||
input wire [`NUM_THREADS-1:0][31:0] datab,
|
||||
output wire [`NUM_THREADS-1:0][31:0] result,
|
||||
|
||||
output wire [`ISTAG_BITS-1:0] out_tag,
|
||||
output wire [`ISTAG_BITS-1:0] tag_out,
|
||||
|
||||
input wire out_ready,
|
||||
output wire out_valid
|
||||
input wire ready_out,
|
||||
output wire valid_out
|
||||
);
|
||||
wire stall = ~out_ready && out_valid;
|
||||
wire stall = ~ready_out && valid_out;
|
||||
wire enable = ~stall;
|
||||
assign in_ready = enable;
|
||||
assign ready_in = enable;
|
||||
|
||||
genvar i;
|
||||
|
||||
|
@ -73,8 +73,8 @@ module VX_fp_sub (
|
|||
.clk(clk),
|
||||
.reset(reset),
|
||||
.enable(enable),
|
||||
.in({in_tag, in_valid}),
|
||||
.out({out_tag, out_valid})
|
||||
.in ({tag_in, valid_in}),
|
||||
.out({tag_out, valid_out})
|
||||
);
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -4,22 +4,22 @@ module VX_fp_utof (
|
|||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
output wire in_ready,
|
||||
input wire in_valid,
|
||||
output wire ready_in,
|
||||
input wire valid_in,
|
||||
|
||||
input wire [`ISTAG_BITS-1:0] in_tag,
|
||||
input wire [`ISTAG_BITS-1:0] tag_in,
|
||||
|
||||
input wire [`NUM_THREADS-1:0][31:0] dataa,
|
||||
output wire [`NUM_THREADS-1:0][31:0] result,
|
||||
|
||||
output wire [`ISTAG_BITS-1:0] out_tag,
|
||||
output wire [`ISTAG_BITS-1:0] tag_out,
|
||||
|
||||
input wire out_ready,
|
||||
output wire out_valid
|
||||
input wire ready_out,
|
||||
output wire valid_out
|
||||
);
|
||||
wire stall = ~out_ready && out_valid;
|
||||
wire stall = ~ready_out && valid_out;
|
||||
wire enable = ~stall;
|
||||
assign in_ready = enable;
|
||||
assign ready_in = enable;
|
||||
|
||||
genvar i;
|
||||
|
||||
|
@ -40,8 +40,8 @@ module VX_fp_utof (
|
|||
.clk(clk),
|
||||
.reset(reset),
|
||||
.enable(enable),
|
||||
.in({in_tag, in_valid}),
|
||||
.out({out_tag, out_valid})
|
||||
.in ({tag_in, valid_in}),
|
||||
.out({tag_out, valid_out})
|
||||
);
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -7,6 +7,9 @@ interface VX_csr_req_if ();
|
|||
|
||||
wire valid;
|
||||
wire [`ISTAG_BITS-1:0] issue_tag;
|
||||
`DEBUG_BEGIN
|
||||
wire [`NUM_THREADS-1:0] thread_mask;
|
||||
`DEBUG_END
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
wire [31:0] curr_PC;
|
||||
|
||||
|
|
|
@ -11,7 +11,11 @@ interface VX_fpu_req_if ();
|
|||
|
||||
wire valid;
|
||||
wire [`ISTAG_BITS-1:0] issue_tag;
|
||||
`DEBUG_BEGIN
|
||||
wire [`NUM_THREADS-1:0] thread_mask;
|
||||
`DEBUG_END
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
wire [31:0] curr_PC;
|
||||
|
||||
wire [`FPU_BITS-1:0] fpu_op;
|
||||
wire [`FRM_BITS-1:0] frm;
|
||||
|
|
|
@ -19,8 +19,7 @@ interface VX_gpr_read_if ();
|
|||
wire [`NUM_THREADS-1:0][31:0] rs2_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] rs3_data;
|
||||
|
||||
wire in_ready;
|
||||
wire out_ready;
|
||||
wire ready;
|
||||
|
||||
endinterface
|
||||
|
||||
|
|
|
@ -9,7 +9,9 @@ interface VX_gpu_req_if();
|
|||
wire [`ISTAG_BITS-1:0] issue_tag;
|
||||
wire [`NUM_THREADS-1:0] thread_mask;
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
|
||||
`DEBUG_BEGIN
|
||||
wire [31:0] curr_PC;
|
||||
`DEBUG_END
|
||||
wire [`GPU_BITS-1:0] gpu_op;
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] rs1_data;
|
||||
|
|
|
@ -11,7 +11,11 @@ interface VX_mul_req_if ();
|
|||
|
||||
wire valid;
|
||||
wire [`ISTAG_BITS-1:0] issue_tag;
|
||||
|
||||
`DEBUG_BEGIN
|
||||
wire [`NUM_THREADS-1:0] thread_mask;
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
wire [31:0] curr_PC;
|
||||
`DEBUG_END
|
||||
wire [`MUL_BITS-1:0] mul_op;
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] rs1_data;
|
||||
|
|
38
hw/rtl/libs/VX_elastic_buffer.v
Normal file
38
hw/rtl/libs/VX_elastic_buffer.v
Normal file
|
@ -0,0 +1,38 @@
|
|||
`include "VX_platform.vh"
|
||||
|
||||
module VX_elastic_buffer #(
|
||||
parameter DATAW = 1,
|
||||
parameter SIZE = 2,
|
||||
parameter BUFFERED = 1
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire valid_in,
|
||||
output wire ready_in,
|
||||
input wire [DATAW-1:0] data_in,
|
||||
output wire [DATAW-1:0] data_out,
|
||||
input wire ready_out,
|
||||
output wire valid_out
|
||||
);
|
||||
wire empty, full;
|
||||
|
||||
VX_generic_queue #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (SIZE),
|
||||
.BUFFERED (BUFFERED)
|
||||
) queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.push (valid_in),
|
||||
.pop (ready_out),
|
||||
.data_in(data_in),
|
||||
.data_out(data_out),
|
||||
.empty (empty),
|
||||
.full (full),
|
||||
`UNUSED_PIN (size)
|
||||
);
|
||||
|
||||
assign ready_in = ~full;
|
||||
assign valid_out = ~empty;
|
||||
|
||||
endmodule
|
|
@ -2,7 +2,7 @@
|
|||
|
||||
module VX_generic_queue #(
|
||||
parameter DATAW = 1,
|
||||
parameter SIZE = 16,
|
||||
parameter SIZE = 2,
|
||||
parameter BUFFERED = 1
|
||||
) (
|
||||
input wire clk,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue