driver basic test and demo test refactoring

This commit is contained in:
Blaise Tine 2020-06-19 09:12:07 -07:00
parent e2e1b63e14
commit 68d9fc9a75
55 changed files with 1006 additions and 1205 deletions

View file

@ -18,7 +18,10 @@ CXXFLAGS +=-fstack-protector
CXXFLAGS += -fPIC
# Enable scope analyzer
#CXXFLAGS += -DSCOPE
CXXFLAGS += -DSCOPE
# config parameters
CXXFLAGS += -DNUM_WARPS=2 -DNUM_THREADS=2
LDFLAGS += -luuid

View file

@ -25,31 +25,34 @@ struct scope_signal_t {
const char* name;
};
constexpr int ilog2(int n) {
return (n > 1) ? 1 + ilog2(n >> 1) : 0;
}
static constexpr int NW_BITS = ilog2(NUM_WARPS);
static const scope_signal_t scope_signals[] = {
{ 2, "icache_req_warp_num" },
{ NW_BITS, "icache_req_warp_num" },
{ 32, "icache_req_addr" },
{ 2, "icache_req_tag" },
{ NW_BITS, "icache_req_tag" },
{ 32, "icache_rsp_data" },
{ 2, "icache_rsp_tag" },
{ NW_BITS, "icache_rsp_tag" },
{ 2, "dcache_req_warp_num" },
{ NW_BITS, "dcache_req_warp_num" },
{ 32, "dcache_req_curr_PC" },
{ 32, "dcache_req_addr" },
{ 1, "dcache_req_rw" },
{ 4, "dcache_req_byteen" },
{ 32, "dcache_req_data" },
{ 2, "dcache_req_tag" },
{ NW_BITS, "dcache_req_tag" },
{ 32, "dcache_rsp_data" },
{ 2 , "dcache_rsp_tag" },
{ NW_BITS, "dcache_rsp_tag" },
{ 32, "dram_req_addr" },
{ 1, "dram_req_rw" },
{ 16, "dram_req_byteen" },
{ 32, "dram_req_data" },
{ 29, "dram_req_tag" },
{ 32, "dram_rsp_data" },
{ 29, "dram_rsp_tag" },
@ -58,30 +61,32 @@ static const scope_signal_t scope_signals[] = {
{ 16, "snp_req_tag" },
{ 16, "snp_rsp_tag" },
{ 2, "decode_warp_num" },
{ NW_BITS, "decode_warp_num" },
{ 32, "decode_curr_PC" },
{ 1, "decode_is_jal" },
{ 5, "decode_rs1" },
{ 5, "decode_rs2" },
{ 1, "decode_is_jal" },
{ 5, "decode_rs1" },
{ 5, "decode_rs2" },
{ 2, "execute_warp_num" },
{ NW_BITS, "execute_warp_num" },
{ 5, "execute_rd" },
{ 32, "execute_a" },
{ 32, "execute_b" },
{ 2, "writeback_warp_num" },
{ NW_BITS, "writeback_warp_num" },
{ 2, "writeback_wb" },
{ 5, "writeback_rd" },
{ 32, "writeback_data" },
///////////////////////////////////////////////////////////////////////////
{ 1, "icache_req_valid" },
{ 1, "icache_req_ready" },
{ 1, "icache_rsp_valid" },
{ 1, "icache_rsp_ready" },
{ 4, "dcache_req_valid" },
{ NUM_THREADS, "dcache_req_valid" },
{ 1, "dcache_req_ready" },
{ 4, "dcache_rsp_valid" },
{ NUM_THREADS, "dcache_rsp_valid" },
{ 1, "dcache_rsp_ready" },
{ 1, "dram_req_valid" },
@ -94,14 +99,19 @@ static const scope_signal_t scope_signals[] = {
{ 1, "snp_rsp_valid" },
{ 1, "snp_rsp_ready" },
{ 4, "decode_valid" },
{ 4, "execute_valid" },
{ 4, "writeback_valid" },
{ NUM_THREADS, "decode_valid" },
{ NUM_THREADS, "execute_valid" },
{ NUM_THREADS, "writeback_valid" },
{ 1, "schedule_delay" },
{ 1, "memory_delay" },
{ 1, "exec_delay" },
{ 1, "gpr_stage_delay" },
{ 1, "busy" },
{ 1, "idram_req_valid" },
{ 1, "idram_req_ready" },
{ 1, "idram_rsp_valid" },
{ 1, "idram_rsp_ready" },
};
static const int num_signals = sizeof(scope_signals) / sizeof(scope_signal_t);
@ -161,7 +171,10 @@ int vx_scope_stop(fpga_handle hfpga, uint64_t delay) {
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_CSR_SCOPE_CMD, 1));
assert(fwidth == (int)frame_width);
if (fwidth != (int)frame_width) {
std::cerr << "invalid frame_width: expecting " << std::dec << fwidth << "!" << std::endl;
std::abort();
}
std::vector<char> signal_data(frame_width+1);
uint64_t frame_offset = 0;

View file

@ -13,18 +13,19 @@ DBG_PRINT_FLAGS = -DDBG_PRINT_CORE_ICACHE \
-DDBG_PRINT_WB \
-DDBG_PRINT_OPAE
#DBG_PRINT=$(DBG_PRINT_FLAGS)
DBG_PRINT=$(DBG_PRINT_FLAGS)
#MULTICORE += -DNUM_CLUSTERS=2 -DNUM_CORES=4
#MULTICORE += -DNUM_CLUSTERS=1 -DNUM_CORES=4
#MULTICORE += -DNUM_CLUSTERS=1 -DNUM_CORES=2
#CONFIGS += -DNUM_CLUSTERS=2 -DNUM_CORES=4
#CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=4
#CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=2
CONFIGS += -DNUM_WARPS=2 -DNUM_THREADS=2
#DEBUG=1
#AFU=1
DEBUG=1
AFU=1
CFLAGS += -fPIC
CFLAGS += -DUSE_RTLSIM $(MULTICORE)
CFLAGS += -DUSE_RTLSIM $(CONFIGS)
LDFLAGS += -shared -pthread
# LDFLAGS += -dynamiclib -pthread
@ -35,7 +36,7 @@ SRCS = vortex.cpp ../common/vx_utils.cpp ../../hw/simulate/simulator.cpp
RTL_INCLUDE = -I../../hw/rtl -I../../hw/rtl/libs -I../../hw/rtl/interfaces -I../../hw/rtl/pipe_regs -I../../hw/rtl/cache
VL_FLAGS += --language 1800-2009 --assert -Wall -Wpedantic $(MULTICORE)
VL_FLAGS += --language 1800-2009 --assert -Wall -Wpedantic $(CONFIGS)
VL_FLAGS += -Wno-DECLFILENAME
VL_FLAGS += --x-initial unique
VL_FLAGS += --x-assign unique
@ -47,9 +48,11 @@ VL_FLAGS += --x-assign unique
# Debugigng
ifdef DEBUG
VL_FLAGS += --trace -DVCD_OUTPUT $(DBG_PRINT)
CFLAGS += -DVCD_OUTPUT $(DBG_PRINT)
CFLAGS += -DVCD_OUTPUT $(DBG_PRINT)
#VL_FLAGS += -DDBG_CORE_REQ_INFO
#CFLAGS += -DDBG_CORE_REQ_INFO
else
CFLAGS += -DNDEBUG
CFLAGS += -DNDEBUG
VL_FLAGS += -DNDEBUG
endif

View file

@ -44,16 +44,16 @@ $(PROJECT): $(SRCS)
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -L../../stub -lvortex -o $@
run-fpga: $(PROJECT)
LD_LIBRARY_PATH=../../opae:$(LD_LIBRARY_PATH) ./$(PROJECT)
LD_LIBRARY_PATH=../../opae:$(LD_LIBRARY_PATH) ./$(PROJECT) -n 32
run-ase: $(PROJECT)
ASE_LOG=0 LD_LIBRARY_PATH=../../opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT)
ASE_LOG=0 LD_LIBRARY_PATH=../../opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) -n 16
run-rtlsim: $(PROJECT)
LD_LIBRARY_PATH=../../rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT)
LD_LIBRARY_PATH=../../rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) -n 16
run-simx: $(PROJECT)
LD_LIBRARY_PATH=../../simx:$(LD_LIBRARY_PATH) ./$(PROJECT)
LD_LIBRARY_PATH=../../simx:$(LD_LIBRARY_PATH) ./$(PROJECT) -n 16
.depend: $(SRCS)
$(CXX) $(CXXFLAGS) -MM $^ > .depend;

View file

@ -1,10 +1,9 @@
#include <iostream>
#include <unistd.h>
#include <string.h>
#include <vortex.h>
#include "common.h"
int test = -1;
#define RT_CHECK(_expr) \
do { \
int _ret = _expr; \
@ -15,79 +14,84 @@ int test = -1;
exit(-1); \
} while (false)
const char* kernel_file = "kernel.bin";
int test = -1;
uint32_t count = 0;
vx_device_h device = nullptr;
vx_buffer_h buffer = nullptr;
static void show_usage() {
std::cout << "Vortex Driver Test." << std::endl;
std::cout << "Usage: [-t testno][-k: kernel][-n words][-h: help]" << std::endl;
}
static void parse_args(int argc, char **argv) {
int c;
while ((c = getopt(argc, argv, "t:h?")) != -1) {
while ((c = getopt(argc, argv, "n:t:k:h?")) != -1) {
switch (c) {
case 't': {
case 'n':
count = atoi(optarg);
break;
case 't':
test = atoi(optarg);
} break;
break;
case 'k':
kernel_file = optarg;
break;
case 'h':
case '?': {
std::cout << "Test." << std::endl;
std::cout << "Usage: [-t testno][-h: help]" << std::endl;
show_usage();
exit(0);
} break;
default:
show_usage();
exit(-1);
}
}
}
void cleanup() {
if (buffer) {
vx_buf_release(buffer);
}
if (device) {
vx_dev_close(device);
}
}
uint64_t shuffle(int i, uint64_t value) {
return (value << i) | (value & ((1 << i)-1));;
}
vx_device_h device = nullptr;
vx_buffer_h sbuf = nullptr;
vx_buffer_h dbuf = nullptr;
int total_blocks = NUM_BLOCKS;
void cleanup() {
if (sbuf) {
vx_buf_release(sbuf);
}
if (dbuf) {
vx_buf_release(dbuf);
}
if (device) {
vx_dev_close(device);
}
}
int run_memcopy_test(vx_buffer_h sbuf,
vx_buffer_h dbuf,
uint32_t address,
uint64_t value,
int num_blocks) {
int run_memcopy_test(uint32_t dev_addr, uint64_t value, int num_blocks) {
int errors = 0;
// write sbuf data
// update source buffer
for (int i = 0; i < (64 * num_blocks) / 8; ++i) {
((uint64_t*)vx_host_ptr(sbuf))[i] = shuffle(i, value);
}
// clear dbuf data
for (int i = 0; i < (64 * num_blocks) / 8; ++i) {
((uint64_t*)vx_host_ptr(dbuf))[i] = 0;
((uint64_t*)vx_host_ptr(buffer))[i] = shuffle(i, value);
}
// write buffer to local memory
std::cout << "write buffer to local memory" << std::endl;
RT_CHECK(vx_copy_to_dev(sbuf, address, 64 * num_blocks, 0));
RT_CHECK(vx_copy_to_dev(buffer, dev_addr, 64 * num_blocks, 0));
// clear destination buffer
for (int i = 0; i < (64 * num_blocks) / 8; ++i) {
((uint64_t*)vx_host_ptr(buffer))[i] = 0;
}
// read buffer from local memory
std::cout << "read buffer from local memory" << std::endl;
RT_CHECK(vx_copy_from_dev(dbuf, address, 64 * num_blocks, 0));
RT_CHECK(vx_copy_from_dev(buffer, dev_addr, 64 * num_blocks, 0));
// verify result
std::cout << "verify result" << std::endl;
for (int i = 0; i < (64 * num_blocks) / 8; ++i) {
auto curr = ((uint64_t*)vx_host_ptr(dbuf))[i];
auto curr = ((uint64_t*)vx_host_ptr(buffer))[i];
auto ref = shuffle(i, value);
if (curr != ref) {
std::cout << "error at 0x" << std::hex << (address + 8 * i)
std::cout << "error at 0x" << std::hex << (dev_addr + 8 * i)
<< ": actual 0x" << curr << ", expected 0x" << ref << std::endl;
++errors;
}
@ -102,35 +106,19 @@ int run_memcopy_test(vx_buffer_h sbuf,
return 0;
}
int run_kernel_test(vx_device_h device,
vx_buffer_h sbuf,
vx_buffer_h dbuf,
const char* program,
int num_blocks) {
int errors = 0;
uint64_t seed = 0x0badf00d40ff40ff;
int run_kernel_test(const kernel_arg_t& kernel_arg,
uint32_t buf_size,
uint32_t num_points) {
int errors = 0;
int src_dev_addr = DEV_MEM_SRC_ADDR;
int dest_dev_addr = DEV_MEM_DST_ADDR;
// write sbuf data
for (int i = 0; i < (64 * num_blocks) / 8; ++i) {
((uint64_t*)vx_host_ptr(sbuf))[i] = shuffle(i, seed);
}
// clear dbuf data
for (int i = 0; i < (64 * num_blocks) / 8; ++i) {
((uint64_t*)vx_host_ptr(dbuf))[i] = 0;
// update source buffer
for (uint32_t i = 0; i < num_points; ++i) {
((int32_t*)vx_host_ptr(buffer))[i] = i;
}
// write buffer to local memory
std::cout << "write buffer to local memory" << std::endl;
RT_CHECK(vx_copy_to_dev(sbuf, src_dev_addr, 64 * num_blocks, 0));
// upload program
std::cout << "upload program" << std::endl;
RT_CHECK(vx_upload_kernel_file(device, program));
RT_CHECK(vx_copy_to_dev(buffer, kernel_arg.src_ptr, buf_size, 0));
// start device
std::cout << "start device" << std::endl;
@ -142,19 +130,24 @@ int run_kernel_test(vx_device_h device,
// flush the caches
std::cout << "flush the caches" << std::endl;
RT_CHECK(vx_flush_caches(device, dest_dev_addr, 64 * num_blocks));
RT_CHECK(vx_flush_caches(device, kernel_arg.dst_ptr, buf_size));
// clear destination buffer
for (uint32_t i = 0; i < num_points; ++i) {
((int32_t*)vx_host_ptr(buffer))[i] = 0;
}
// read buffer from local memory
std::cout << "read buffer from local memory" << std::endl;
RT_CHECK(vx_copy_from_dev(dbuf, dest_dev_addr, 64 * num_blocks, 0));
RT_CHECK(vx_copy_from_dev(buffer, kernel_arg.dst_ptr, buf_size, 0));
// verify result
std::cout << "verify result" << std::endl;
for (int i = 0; i < (64 * num_blocks) / 8; ++i) {
auto curr = ((uint64_t*)vx_host_ptr(dbuf))[i];
auto ref = shuffle(i, seed);
for (uint32_t i = 0; i < num_points; ++i) {
int32_t curr = ((int32_t*)vx_host_ptr(buffer))[i];
int32_t ref = i;
if (curr != ref) {
std::cout << "error at 0x" << std::hex << (dest_dev_addr + 8 * i)
std::cout << "error at value " << i
<< ": actual 0x" << curr << ", expected 0x" << ref << std::endl;
++errors;
}
@ -170,33 +163,66 @@ int run_kernel_test(vx_device_h device,
}
int main(int argc, char *argv[]) {
size_t value;
kernel_arg_t kernel_arg;
// parse command arguments
parse_args(argc, argv);
std::cout << "total blocks: " << total_blocks << std::endl;
if (count == 0) {
count = 1;
}
uint32_t max_cores = vx_dev_caps(VX_CAPS_MAX_CORES);
uint32_t num_points = max_cores * count;
uint32_t num_blocks = (num_points * sizeof(uint32_t) + 63) / 64;
uint32_t buf_size = num_blocks * 64;
std::cout << "number of points: " << num_points << std::endl;
std::cout << "buffer size: " << buf_size << " bytes" << std::endl;
// open device connection
std::cout << "open device connection" << std::endl;
RT_CHECK(vx_dev_open(&device));
// create source buffer
std::cout << "create source buffer" << std::endl;
RT_CHECK(vx_alloc_shared_mem(device, total_blocks * 64, &sbuf));
// create destination buffer
std::cout << "create destination buffer" << std::endl;
RT_CHECK(vx_alloc_shared_mem(device, total_blocks * 64, &dbuf));
// allocate device memory
RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
kernel_arg.src_ptr = value;
RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
kernel_arg.dst_ptr = value;
kernel_arg.count = count;
std::cout << "dev_src=" << std::hex << kernel_arg.src_ptr << std::endl;
std::cout << "dev_dst=" << std::hex << kernel_arg.dst_ptr << std::endl;
// allocate shared memory
std::cout << "allocate shared memory" << std::endl;
uint32_t alloc_size = std::max<uint32_t>(buf_size, sizeof(kernel_arg_t));
RT_CHECK(vx_alloc_shared_mem(device, alloc_size, &buffer));
// run tests
if (0 == test || -1 == test) {
std::cout << "run memcopy test" << std::endl;
RT_CHECK(run_memcopy_test(sbuf, dbuf, DEV_MEM_SRC_ADDR, 0x0badf00d00ff00ff, 1));
RT_CHECK(run_memcopy_test(sbuf, dbuf, DEV_MEM_SRC_ADDR, 0x0badf00d40ff40ff, total_blocks));
RT_CHECK(run_memcopy_test(kernel_arg.src_ptr, 0x0badf00d00ff00ff, 1));
RT_CHECK(run_memcopy_test(kernel_arg.src_ptr, 0x0badf00d40ff40ff, num_blocks));
}
if (1 == test || -1 == test) {
// upload program
std::cout << "upload program" << std::endl;
RT_CHECK(vx_upload_kernel_file(device, kernel_file));
// upload kernel argument
std::cout << "upload kernel argument" << std::endl;
{
auto buf_ptr = (void*)vx_host_ptr(buffer);
memcpy(buf_ptr, &kernel_arg, sizeof(kernel_arg_t));
RT_CHECK(vx_copy_to_dev(buffer, KERNEL_ARG_DEV_MEM_ADDR, sizeof(kernel_arg_t), 0));
}
std::cout << "run kernel test" << std::endl;
RT_CHECK(run_kernel_test(device, sbuf, dbuf, "kernel.bin", total_blocks));
RT_CHECK(run_kernel_test(kernel_arg, buf_size, num_points));
}
// cleanup

View file

@ -1,8 +1,12 @@
#ifndef _COMMON_H_
#define _COMMON_H_
#define DEV_MEM_SRC_ADDR 0x10000040
#define DEV_MEM_DST_ADDR 0x20000080
#define NUM_BLOCKS 16
#define KERNEL_ARG_DEV_MEM_ADDR 0x7ffff000
struct kernel_arg_t {
uint32_t count;
uint32_t src_ptr;
uint32_t dst_ptr;
};
#endif

Binary file not shown.

View file

@ -4,17 +4,14 @@
#include "common.h"
void main() {
int64_t* x = (int64_t*)DEV_MEM_SRC_ADDR;
int64_t* y = (int64_t*)DEV_MEM_DST_ADDR;
int num_words = (NUM_BLOCKS * 64) / 8;
int core_id = vx_core_id();
int num_cores = vx_num_cores();
int num_words_per_core = num_words / num_cores;
int offset = core_id * num_words_per_core;
struct kernel_arg_t* arg = (struct kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR;
uint32_t count = arg->count;
int32_t* src_ptr = (int32_t*)arg->src_ptr;
int32_t* dst_ptr = (int32_t*)arg->dst_ptr;
for (int i = 0; i < num_words_per_core; ++i) {
y[offset + i] = x[offset + i];
uint32_t offset = vx_core_id() * count;
for (uint32_t i = 0; i < count; ++i) {
dst_ptr[offset + i] = src_ptr[offset + i];
}
}

View file

@ -41,16 +41,16 @@ $(PROJECT): $(SRCS)
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -L../../stub -lvortex -o $@
run-fpga: $(PROJECT)
LD_LIBRARY_PATH=../../opae:$(LD_LIBRARY_PATH) ./$(PROJECT) -f kernel.bin -n 16
LD_LIBRARY_PATH=../../opae:$(LD_LIBRARY_PATH) ./$(PROJECT) -n 16
run-ase: $(PROJECT)
ASE_LOG=0 LD_LIBRARY_PATH=../../opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) -f kernel.bin -n 16
ASE_LOG=0 LD_LIBRARY_PATH=../../opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) -n 16
run-rtlsim: $(PROJECT)
LD_LIBRARY_PATH=../../rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) -f kernel.bin -n 16
LD_LIBRARY_PATH=../../rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) -n 16
run-simx: $(PROJECT)
LD_LIBRARY_PATH=../../simx:$(LD_LIBRARY_PATH) ./$(PROJECT) -f kernel.bin -n 16
LD_LIBRARY_PATH=../../simx:$(LD_LIBRARY_PATH) ./$(PROJECT) -n 16
.depend: $(SRCS)
$(CXX) $(CXXFLAGS) -MM $^ > .depend;

View file

@ -4,7 +4,7 @@
#define KERNEL_ARG_DEV_MEM_ADDR 0x7ffff000
struct kernel_arg_t {
uint32_t stride;
uint32_t count;
uint32_t src0_ptr;
uint32_t src1_ptr;
uint32_t dst_ptr;

View file

@ -14,23 +14,26 @@
exit(-1); \
} while (false)
const char* program_file = "kernel.bin";
uint32_t data_stride = 0;
const char* kernel_file = "kernel.bin";
uint32_t count = 0;
vx_device_h device = nullptr;
vx_buffer_h buffer = nullptr;
static void show_usage() {
std::cout << "Vortex Driver Test." << std::endl;
std::cout << "Usage: [-f: program] [-n stride] [-h: help]" << std::endl;
std::cout << "Usage: [-k: kernel] [-n words] [-h: help]" << std::endl;
}
static void parse_args(int argc, char **argv) {
int c;
while ((c = getopt(argc, argv, "n:f:h?")) != -1) {
while ((c = getopt(argc, argv, "n:k:h?")) != -1) {
switch (c) {
case 'n':
data_stride = atoi(optarg);
count = atoi(optarg);
break;
case 'f':
program_file = optarg;
case 'k':
kernel_file = optarg;
break;
case 'h':
case '?': {
@ -42,16 +45,8 @@ static void parse_args(int argc, char **argv) {
exit(-1);
}
}
if (nullptr == program_file) {
show_usage();
exit(-1);
}
}
vx_device_h device = nullptr;
vx_buffer_h buffer = nullptr;
void cleanup() {
if (buffer) {
vx_buf_release(buffer);
@ -61,9 +56,7 @@ void cleanup() {
}
}
int run_test(vx_device_h device,
vx_buffer_h buffer,
const kernel_arg_t& kernel_arg,
int run_test(const kernel_arg_t& kernel_arg,
uint32_t buf_size,
uint32_t num_points) {
// start device
@ -86,13 +79,13 @@ int run_test(vx_device_h device,
std::cout << "verify result" << std::endl;
{
int errors = 0;
auto buf_ptr = (int*)vx_host_ptr(buffer);
auto buf_ptr = (int32_t*)vx_host_ptr(buffer);
for (uint32_t i = 0; i < num_points; ++i) {
int ref = i + i;
int cur = buf_ptr[i];
if (cur != ref) {
std::cout << "error at value " << i
<< ": actual 0x" << cur << ", expected 0x" << ref << std::endl;
<< ": actual 0x" << cur << ", expected 0x" << ref << std::endl;
++errors;
}
}
@ -113,21 +106,18 @@ int main(int argc, char *argv[]) {
// parse command arguments
parse_args(argc, argv);
if (count == 0) {
count = 1;
}
uint32_t max_cores = vx_dev_caps(VX_CAPS_MAX_CORES);
uint32_t max_warps = vx_dev_caps(VX_CAPS_MAX_WARPS);
uint32_t max_threads = vx_dev_caps(VX_CAPS_MAX_THREADS);
if (data_stride == 0) {
data_stride = 1;
}
uint32_t num_points = count * max_cores * max_warps * max_threads;
uint32_t buf_size = num_points * sizeof(uint32_t);
kernel_arg.stride = data_stride;
uint32_t num_points = max_cores * max_warps * max_threads;
uint32_t buf_size = num_points * data_stride * sizeof(uint32_t);
std::cout << "number of workitems: " << num_points << std::endl;
std::cout << "workitem size: " << data_stride * sizeof(uint32_t) << " bytes" << std::endl;
std::cout << "number of points: " << num_points << std::endl;
std::cout << "buffer size: " << buf_size << " bytes" << std::endl;
// open device connection
@ -136,55 +126,29 @@ int main(int argc, char *argv[]) {
// upload program
std::cout << "upload program" << std::endl;
RT_CHECK(vx_upload_kernel_file(device, program_file));
RT_CHECK(vx_upload_kernel_file(device, kernel_file));
// allocate device memory
std::cout << "allocate device memory" << std::endl;
RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
kernel_arg.src0_ptr = value;
RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
kernel_arg.src1_ptr = value;
RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
kernel_arg.dst_ptr = value;
kernel_arg.count = count;
std::cout << "dev_src0=" << std::hex << kernel_arg.src0_ptr << std::endl;
std::cout << "dev_src1=" << std::hex << kernel_arg.src1_ptr << std::endl;
std::cout << "dev_dst=" << std::hex << kernel_arg.dst_ptr << std::endl;
// allocate shared memory
std::cout << "allocate shared memory" << std::endl;
uint32_t alloc_size = std::max<uint32_t>(buf_size, sizeof(kernel_arg_t));
RT_CHECK(vx_alloc_shared_mem(device, alloc_size, &buffer));
// populate source buffer0 values
std::cout << "populate source buffer0 values" << std::endl;
{
auto buf_ptr = (int*)vx_host_ptr(buffer);
for (uint32_t i = 0; i < num_points; ++i) {
buf_ptr[i] = i-1;
}
}
// upload source buffer0
std::cout << "upload source buffer0" << std::endl;
RT_CHECK(vx_copy_to_dev(buffer, kernel_arg.src0_ptr, buf_size, 0));
// populate source buffer1 values
std::cout << "populate source buffer1 values" << std::endl;
{
auto buf_ptr = (int*)vx_host_ptr(buffer);
for (uint32_t i = 0; i < num_points; ++i) {
buf_ptr[i] = i+1;
}
}
// upload source buffer1
std::cout << "upload source buffer1" << std::endl;
RT_CHECK(vx_copy_to_dev(buffer, kernel_arg.src1_ptr, buf_size, 0));
// upload kernel argument
std::cout << "upload kernel argument" << std::endl;
{
@ -193,9 +157,41 @@ int main(int argc, char *argv[]) {
RT_CHECK(vx_copy_to_dev(buffer, KERNEL_ARG_DEV_MEM_ADDR, sizeof(kernel_arg_t), 0));
}
// upload source buffer0
{
auto buf_ptr = (int32_t*)vx_host_ptr(buffer);
for (uint32_t i = 0; i < num_points; ++i) {
buf_ptr[i] = i-1;
}
}
std::cout << "upload source buffer0" << std::endl;
RT_CHECK(vx_copy_to_dev(buffer, kernel_arg.src0_ptr, buf_size, 0));
// upload source buffer1
{
auto buf_ptr = (int32_t*)vx_host_ptr(buffer);
for (uint32_t i = 0; i < num_points; ++i) {
buf_ptr[i] = i+1;
}
}
std::cout << "upload source buffer1" << std::endl;
RT_CHECK(vx_copy_to_dev(buffer, kernel_arg.src1_ptr, buf_size, 0));
// clear destination buffer
{
auto buf_ptr = (int32_t*)vx_host_ptr(buffer);
for (uint32_t i = 0; i < num_points; ++i) {
buf_ptr[i] = 0;
}
}
std::cout << "clear destination buffer" << std::endl;
RT_CHECK(vx_copy_to_dev(buffer, kernel_arg.dst_ptr, buf_size, 0));
// run tests
std::cout << "run tests" << std::endl;
RT_CHECK(run_test(device, buffer, kernel_arg, buf_size, num_points));
RT_CHECK(run_test(kernel_arg, buf_size, num_points));
// cleanup
std::cout << "cleanup" << std::endl;

BIN
driver/tests/demo/kernel.bin Executable file → Normal file

Binary file not shown.

View file

@ -6,13 +6,14 @@
void kernel_body(void* arg) {
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
int* src0_ptr = (int*)_arg->src0_ptr;
int* src1_ptr = (int*)_arg->src1_ptr;
int* dst_ptr = (int*)_arg->dst_ptr;
uint32_t count = _arg->count;
int32_t* src0_ptr = (int32_t*)_arg->src0_ptr;
int32_t* src1_ptr = (int32_t*)_arg->src1_ptr;
int32_t* dst_ptr = (int32_t*)_arg->dst_ptr;
uint32_t offset = vx_thread_gid() * count;
unsigned offset = vx_thread_gid() * _arg->stride;
for (unsigned i = 0; i < _arg->stride; ++i) {
for (uint32_t i = 0; i < count; ++i) {
dst_ptr[offset+i] = src0_ptr[offset+i] + src1_ptr[offset+i];
}
}

View file

@ -68,6 +68,11 @@ vcd file vortex.vcd
vcd add -r /*/Vortex/hw/rtl/*
run -all
# compress FPGA output files
tar -zcvf output_files_1c.tar.gz `find ./build_fpga_1c -type f \( -iname \*.rpt -o -iname \*.txt -o -iname \*summary -o -iname \*.log \)`
tar -zcvf output_files_1c_rel.tar.gz `find ./build_fpga_1c_rel -type f \( -iname \*.rpt -o -iname \*.txt -o -iname \*summary -o -iname \*.log \)`
tar -zcvf output_files_2c_rel.tar.gz `find ./build_fpga_2c_rel -type f \( -iname \*.rpt -o -iname \*.txt -o -iname \*summary -o -iname \*.log \)`
# compress VCD trace
tar -zcvf vortex.vcd.tar.gz ./build_ase_1c/work/vortex.vcd

View file

@ -2,8 +2,10 @@ vortex_afu.json
QI:vortex_afu.qsf
+define+NDEBUG
#+define+SCOPE
+define+SCOPE
+define+NUM_WARPS=2
+define+NUM_THREADS=2
#+define+DBG_PRINT_CORE_ICACHE
#+define+DBG_PRINT_CORE_DCACHE
@ -77,6 +79,7 @@ QI:vortex_afu.qsf
../rtl/Vortex_Socket.v
../rtl/Vortex_Cluster.v
../rtl/Vortex.v
../rtl/VX_mem_unit.v
../rtl/VX_pipeline.v
../rtl/VX_front_end.v
../rtl/VX_back_end.v
@ -94,12 +97,11 @@ QI:vortex_afu.qsf
../rtl/VX_gpr.v
../rtl/VX_gpr_ram.v
../rtl/VX_gpr_stage.v
../rtl/VX_mem_ctrl.v
../rtl/VX_alu_unit.v
../rtl/VX_lsu_unit.v
../rtl/VX_lsu_addr_gen.v
../rtl/VX_decode.v
../rtl/VX_inst_multiplex.v
../rtl/VX_lsu_addr_gen.v
../rtl/VX_dcache_arb.v
../rtl/VX_mem_arb.v

View file

@ -1,4 +1,7 @@
# Analysis & Synthesis Assignments
set_global_assignment -name VERILOG_INPUT_VERSION SYSTEMVERILOG_2009
set_global_assignment -name ADD_PASS_THROUGH_LOGIC_TO_INFERRED_RAMS ON
set_global_assignment -name ADD_PASS_THROUGH_LOGIC_TO_INFERRED_RAMS ON
set_global_assignment -name VERILOG_MACRO QUARTUS
set_global_assignment -name VERILOG_MACRO SYNTHESIS
set_global_assignment -name VERILOG_MACRO NDEBUG

View file

@ -13,8 +13,6 @@ import local_mem_cfg_pkg::*;
`include "VX_define.vh"
`define VX_TO_DRAM_ADDR(x) x[`VX_DRAM_ADDR_WIDTH-1:(`VX_DRAM_ADDR_WIDTH-DRAM_ADDR_WIDTH)]
module vortex_afu #(
parameter NUM_LOCAL_MEM_BANKS = 2
) (
@ -139,10 +137,12 @@ t_ccip_clAddr csr_io_addr;
logic[DRAM_ADDR_WIDTH-1:0] csr_mem_addr;
logic[DRAM_ADDR_WIDTH-1:0] csr_data_size;
`ifdef SCOPE
logic [63:0] csr_scope_cmd;
logic [63:0] csr_scope_data;
logic csr_scope_read;
logic csr_scope_write;
`endif
// MMIO controller ////////////////////////////////////////////////////////////
@ -154,9 +154,11 @@ assign mmio_hdr = t_ccip_c0_ReqMmioHdr'(cp2af_sRxPort.c0.hdr);
t_if_ccip_c2_Tx mmio_tx;
assign af2cp_sTxPort.c2 = mmio_tx;
`ifdef SCOPE
assign csr_scope_cmd = 64'(cp2af_sRxPort.c0.data);
assign csr_scope_write = cp2af_sRxPort.c0.mmioWrValid && (MMIO_CSR_SCOPE_CMD == mmio_hdr.address);
assign csr_scope_read = cp2af_sRxPort.c0.mmioRdValid && (MMIO_CSR_SCOPE_DATA == mmio_hdr.address);
`endif
always_ff @(posedge clk)
begin
@ -202,11 +204,13 @@ begin
$display("%t: CSR_CMD: %0d", $time, $bits(csr_cmd)'(cp2af_sRxPort.c0.data));
`endif
end
`ifdef SCOPE
MMIO_CSR_SCOPE_CMD: begin
`ifdef DBG_PRINT_OPAE
$display("%t: CSR_SCOPE_CMD: %0h", $time, 64'(cp2af_sRxPort.c0.data));
`endif
end
`endif
default: begin
// user-defined CSRs
//if (mmio_hdr.addres >= MMIO_CSR_USER) begin
@ -237,18 +241,20 @@ begin
16'h0008: mmio_tx.data <= 64'h0; // reserved
MMIO_CSR_STATUS: begin
`ifdef DBG_PRINT_OPAE
if (state != mmio_tx.data) begin
if (state != state_t'(mmio_tx.data)) begin
$display("%t: STATUS: state=%0d", $time, state);
end
`endif
mmio_tx.data <= 64'(state);
end
`ifdef SCOPE
MMIO_CSR_SCOPE_DATA: begin
mmio_tx.data <= csr_scope_data;
`ifdef DBG_PRINT_OPAE
$display("%t: SCOPE: data=%0h", $time, csr_scope_data);
`endif
end
`endif
default: mmio_tx.data <= 64'h0;
endcase
mmio_tx.mmioRdValid <= 1; // post response
@ -406,7 +412,7 @@ begin
case (state)
CMD_TYPE_READ: avs_address = cci_dram_rd_req_addr;
CMD_TYPE_WRITE: avs_address = cci_dram_wr_req_addr + ((DRAM_ADDR_WIDTH)'(t_cci_rdq_tag'(cci_rdq_dout)));
default: avs_address = `VX_TO_DRAM_ADDR(vx_dram_req_addr);
default: avs_address = vx_dram_req_addr[`VX_DRAM_ADDR_WIDTH-1:`VX_DRAM_ADDR_WIDTH-DRAM_ADDR_WIDTH];
endcase
case (state)
@ -821,7 +827,7 @@ end
`SCOPE_ASSIGN(scope_snp_rsp_tag, vx_snp_rsp_tag);
`SCOPE_ASSIGN(scope_snp_rsp_ready, vx_snp_rsp_ready);
`STATIC_ASSERT($bits({`SCOPE_SIGNALS_DATA_LIST `SCOPE_SIGNALS_UPD_LIST}) == 641, "oops!")
`STATIC_ASSERT($bits({`SCOPE_SIGNALS_DATA_LIST `SCOPE_SIGNALS_UPD_LIST}) == 626, "oops!")
wire scope_changed = (scope_icache_req_valid && scope_icache_req_ready)
|| (scope_icache_rsp_valid && scope_icache_rsp_ready)
@ -855,15 +861,17 @@ VX_scope #(
`endif
// Vortex binding /////////////////////////////////////////////////////////////
// Vortex /////////////////////////////////////////////////////////////////////
assign cmd_run_done = !vx_busy;
Vortex_Socket #() vx_socket (
`SCOPE_SIGNALS_ICACHE_ATTACH
`SCOPE_SIGNALS_DCACHE_ATTACH
`SCOPE_SIGNALS_CORE_ATTACH
`SCOPE_SIGNALS_BE_ATTACH
`SCOPE_SIGNALS_ISTAGE_BIND
`SCOPE_SIGNALS_LSU_BIND
`SCOPE_SIGNALS_CORE_BIND
`SCOPE_SIGNALS_ICACHE_BIND
`SCOPE_SIGNALS_PIPELINE_BIND
`SCOPE_SIGNALS_BE_BIND
.clk (clk),
.reset (vx_reset),

View file

@ -3,7 +3,7 @@
module VX_back_end #(
parameter CORE_ID = 0
) (
`SCOPE_SIGNALS_DCACHE_IO
`SCOPE_SIGNALS_LSU_IO
`SCOPE_SIGNALS_BE_IO
input wire clk,
@ -71,7 +71,7 @@ module VX_back_end #(
VX_lsu_unit #(
.CORE_ID(CORE_ID)
) lsu_unit (
`SCOPE_SIGNALS_DCACHE_ATTACH
`SCOPE_SIGNALS_LSU_BIND
.clk (clk),
.reset (reset),

View file

@ -2,6 +2,7 @@
`define VX_DEFINE
`include "VX_config.vh"
`include "VX_scope.vh"
// `define QUEUE_FORCE_MLAB 1
// `define SYN 1
@ -139,7 +140,7 @@
///////////////////////////////////////////////////////////////////////////////
`ifndef NDEBUG // pc, wb, rd, warp_num
`ifdef DBG_CORE_REQ_INFO // pc, wb, rd, warp_num
`define DEBUG_CORE_REQ_MDATA_WIDTH (32 + 2 + 5 + `NW_BITS)
`else
`define DEBUG_CORE_REQ_MDATA_WIDTH 0
@ -286,316 +287,5 @@
`define DRAM_TO_BYTE_ADDR(x) {x, (32-$bits(x))'(0)}
///////////////////////////////////////////////////////////////////////////////
`ifdef SCOPE
`define SCOPE_SIGNALS_DATA_LIST \
scope_icache_req_warp_num, \
scope_icache_req_addr, \
scope_icache_req_tag, \
scope_icache_rsp_data, \
scope_icache_rsp_tag, \
scope_dcache_req_warp_num, \
scope_dcache_req_curr_PC, \
scope_dcache_req_addr, \
scope_dcache_req_rw, \
scope_dcache_req_byteen, \
scope_dcache_req_data, \
scope_dcache_req_tag, \
scope_dcache_rsp_data, \
scope_dcache_rsp_tag, \
scope_dram_req_addr, \
scope_dram_req_rw, \
scope_dram_req_byteen, \
scope_dram_req_data, \
scope_dram_req_tag, \
scope_dram_rsp_data, \
scope_dram_rsp_tag, \
scope_snp_req_addr, \
scope_snp_req_invalidate, \
scope_snp_req_tag, \
scope_snp_rsp_tag, \
scope_decode_warp_num, \
scope_decode_curr_PC, \
scope_decode_is_jal, \
scope_decode_rs1, \
scope_decode_rs2, \
scope_execute_warp_num, \
scope_execute_rd, \
scope_execute_a, \
scope_execute_b, \
scope_writeback_warp_num, \
scope_writeback_wb, \
scope_writeback_rd, \
scope_writeback_data,
`define SCOPE_SIGNALS_UPD_LIST \
scope_icache_req_valid, \
scope_icache_req_ready, \
scope_icache_rsp_valid, \
scope_icache_rsp_ready, \
scope_dcache_req_valid, \
scope_dcache_req_ready, \
scope_dcache_rsp_valid, \
scope_dcache_rsp_ready, \
scope_dram_req_valid, \
scope_dram_req_ready, \
scope_dram_rsp_valid, \
scope_dram_rsp_ready, \
scope_snp_req_valid, \
scope_snp_req_ready, \
scope_snp_rsp_valid, \
scope_snp_rsp_ready, \
scope_decode_valid, \
scope_execute_valid, \
scope_writeback_valid, \
scope_schedule_delay, \
scope_memory_delay, \
scope_exec_delay, \
scope_gpr_stage_delay, \
scope_busy
`define SCOPE_SIGNALS_DECL \
wire scope_icache_req_valid; \
wire [1:0] scope_icache_req_warp_num; \
wire [31:0] scope_icache_req_addr; \
wire [`ICORE_TAG_WIDTH-1:0] scope_icache_req_tag; \
wire scope_icache_req_ready; \
wire scope_icache_rsp_valid; \
wire [31:0] scope_icache_rsp_data; \
wire [`ICORE_TAG_WIDTH-1:0] scope_icache_rsp_tag; \
wire scope_icache_rsp_ready; \
wire [`DNUM_REQUESTS-1:0] scope_dcache_req_valid; \
wire [1:0] scope_dcache_req_warp_num; \
wire [31:0] scope_dcache_req_curr_PC; \
wire [31:0] scope_dcache_req_addr; \
wire scope_dcache_req_rw; \
wire [3:0] scope_dcache_req_byteen; \
wire [31:0] scope_dcache_req_data; \
wire [`DCORE_TAG_WIDTH-1:0] scope_dcache_req_tag; \
wire scope_dcache_req_ready; \
wire [`DNUM_REQUESTS-1:0] scope_dcache_rsp_valid; \
wire [31:0] scope_dcache_rsp_data; \
wire [`DCORE_TAG_WIDTH-1:0] scope_dcache_rsp_tag; \
wire scope_dcache_rsp_ready; \
wire scope_dram_req_valid; \
wire [31:0] scope_dram_req_addr; \
wire scope_dram_req_rw; \
wire [15:0] scope_dram_req_byteen; \
wire [31:0] scope_dram_req_data; \
wire [`VX_DRAM_TAG_WIDTH-1:0] scope_dram_req_tag; \
wire scope_dram_req_ready; \
wire scope_dram_rsp_valid; \
wire [31:0] scope_dram_rsp_data; \
wire [`VX_DRAM_TAG_WIDTH-1:0] scope_dram_rsp_tag; \
wire scope_dram_rsp_ready; \
wire scope_snp_req_valid; \
wire [31:0] scope_snp_req_addr; \
wire scope_snp_req_invalidate; \
wire [`VX_SNP_TAG_WIDTH-1:0] scope_snp_req_tag; \
wire scope_snp_req_ready; \
wire scope_snp_rsp_valid; \
wire [`VX_SNP_TAG_WIDTH-1:0] scope_snp_rsp_tag; \
wire scope_busy; \
wire scope_snp_rsp_ready; \
wire scope_schedule_delay; \
wire scope_memory_delay; \
wire scope_exec_delay; \
wire scope_gpr_stage_delay; \
wire [3:0] scope_decode_valid; \
wire [1:0] scope_decode_warp_num; \
wire [31:0] scope_decode_curr_PC; \
wire scope_decode_is_jal; \
wire [4:0] scope_decode_rs1; \
wire [4:0] scope_decode_rs2; \
wire [3:0] scope_execute_valid; \
wire [1:0] scope_execute_warp_num; \
wire [4:0] scope_execute_rd; \
wire [31:0] scope_execute_a; \
wire [31:0] scope_execute_b; \
wire [3:0] scope_writeback_valid; \
wire [1:0] scope_writeback_warp_num; \
wire [1:0] scope_writeback_wb; \
wire [4:0] scope_writeback_rd; \
wire [31:0] scope_writeback_data;
`define SCOPE_SIGNALS_ICACHE_IO \
/* verilator lint_off UNDRIVEN */ \
output wire scope_icache_req_valid, \
output wire [1:0] scope_icache_req_warp_num, \
output wire [31:0] scope_icache_req_addr, \
output wire [`ICORE_TAG_WIDTH-1:0] scope_icache_req_tag, \
output wire scope_icache_req_ready, \
output wire scope_icache_rsp_valid, \
output wire [31:0] scope_icache_rsp_data, \
output wire [`ICORE_TAG_WIDTH-1:0] scope_icache_rsp_tag, \
output wire scope_icache_rsp_ready, \
/* verilator lint_on UNDRIVEN */
`define SCOPE_SIGNALS_DCACHE_IO \
/* verilator lint_off UNDRIVEN */ \
output wire [`DNUM_REQUESTS-1:0] scope_dcache_req_valid, \
output wire [1:0] scope_dcache_req_warp_num, \
output wire [31:0] scope_dcache_req_curr_PC, \
output wire [31:0] scope_dcache_req_addr, \
output wire scope_dcache_req_rw, \
output wire [3:0] scope_dcache_req_byteen, \
output wire [31:0] scope_dcache_req_data, \
output wire [`DCORE_TAG_WIDTH-1:0] scope_dcache_req_tag, \
output wire scope_dcache_req_ready, \
output wire [`DNUM_REQUESTS-1:0] scope_dcache_rsp_valid, \
output wire [31:0] scope_dcache_rsp_data, \
output wire [`DCORE_TAG_WIDTH-1:0] scope_dcache_rsp_tag, \
output wire scope_dcache_rsp_ready, \
/* verilator lint_on UNDRIVEN */
`define SCOPE_SIGNALS_DRAM_IO \
/* verilator lint_off UNDRIVEN */ \
output wire scope_dram_req_valid, \
output wire [31:0] scope_dram_req_addr, \
output wire scope_dram_req_rw, \
output wire [15:0] scope_dram_req_byteen, \
output wire [31:0] scope_dram_req_data, \
output wire [`VX_DRAM_TAG_WIDTH-1:0] scope_dram_req_tag, \
output wire scope_dram_req_ready, \
output wire scope_dram_rsp_valid, \
output wire [31:0] scope_dram_rsp_data, \
output wire [`VX_DRAM_TAG_WIDTH-1:0] scope_dram_rsp_tag, \
output wire scope_dram_rsp_ready, \
/* verilator lint_on UNDRIVEN */
`define SCOPE_SIGNALS_SNP_IO \
/* verilator lint_off UNDRIVEN */ \
output wire scope_snp_req_valid, \
output wire [31:0] scope_snp_req_addr, \
output wire scope_snp_req_invalidate, \
output wire [`VX_SNP_TAG_WIDTH-1:0] scope_snp_req_tag, \
output wire scope_snp_req_ready, \
output wire scope_snp_rsp_valid, \
output wire [`VX_SNP_TAG_WIDTH-1:0] scope_snp_rsp_tag, \
output wire scope_snp_rsp_ready, \
/* verilator lint_on UNDRIVEN */
`define SCOPE_SIGNALS_CORE_IO \
/* verilator lint_off UNDRIVEN */ \
output wire scope_busy, \
output wire scope_schedule_delay, \
output wire scope_memory_delay, \
output wire scope_exec_delay, \
output wire scope_gpr_stage_delay, \
/* verilator lint_on UNDRIVEN */
`define SCOPE_SIGNALS_BE_IO \
/* verilator lint_off UNDRIVEN */ \
output wire [3:0] scope_decode_valid, \
output wire [1:0] scope_decode_warp_num, \
output wire [31:0] scope_decode_curr_PC, \
output wire scope_decode_is_jal, \
output wire [4:0] scope_decode_rs1, \
output wire [4:0] scope_decode_rs2, \
output wire [3:0] scope_execute_valid, \
output wire [1:0] scope_execute_warp_num, \
output wire [4:0] scope_execute_rd, \
output wire [31:0] scope_execute_a, \
output wire [31:0] scope_execute_b, \
output wire [3:0] scope_writeback_valid, \
output wire [1:0] scope_writeback_warp_num, \
output wire [1:0] scope_writeback_wb, \
output wire [4:0] scope_writeback_rd, \
output wire [31:0] scope_writeback_data,
/* verilator lint_on UNDRIVEN */
`define SCOPE_SIGNALS_ICACHE_ATTACH \
.scope_icache_req_valid (scope_icache_req_valid), \
.scope_icache_req_warp_num (scope_icache_req_warp_num), \
.scope_icache_req_addr (scope_icache_req_addr), \
.scope_icache_req_tag (scope_icache_req_tag), \
.scope_icache_req_ready (scope_icache_req_ready), \
.scope_icache_rsp_valid (scope_icache_rsp_valid), \
.scope_icache_rsp_data (scope_icache_rsp_data), \
.scope_icache_rsp_tag (scope_icache_rsp_tag), \
.scope_icache_rsp_ready (scope_icache_rsp_ready),
`define SCOPE_SIGNALS_DCACHE_ATTACH \
.scope_dcache_req_valid (scope_dcache_req_valid), \
.scope_dcache_req_warp_num (scope_dcache_req_warp_num), \
.scope_dcache_req_curr_PC (scope_dcache_req_curr_PC), \
.scope_dcache_req_addr (scope_dcache_req_addr), \
.scope_dcache_req_rw (scope_dcache_req_rw), \
.scope_dcache_req_byteen(scope_dcache_req_byteen), \
.scope_dcache_req_data (scope_dcache_req_data), \
.scope_dcache_req_tag (scope_dcache_req_tag), \
.scope_dcache_req_ready (scope_dcache_req_ready), \
.scope_dcache_rsp_valid (scope_dcache_rsp_valid), \
.scope_dcache_rsp_data (scope_dcache_rsp_data), \
.scope_dcache_rsp_tag (scope_dcache_rsp_tag), \
.scope_dcache_rsp_ready (scope_dcache_rsp_ready),
`define SCOPE_SIGNALS_DRAM_ATTACH \
.scope_dram_req_valid (scope_dram_req_valid), \
.scope_dram_req_addr (scope_dram_req_addr), \
.scope_dram_req_rw (scope_dram_req_rw), \
.scope_dram_req_byteen (scope_dram_req_byteen), \
.scope_dram_req_data (scope_dram_req_data), \
.scope_dram_req_tag (scope_dram_req_tag), \
.scope_dram_req_ready (scope_dram_req_ready), \
.scope_dram_rsp_valid (scope_dram_rsp_valid), \
.scope_dram_rsp_data (scope_dram_rsp_data), \
.scope_dram_rsp_tag (scope_dram_rsp_tag), \
.scope_dram_rsp_ready (scope_dram_rsp_ready),
`define SCOPE_SIGNALS_SNP_ATTACH \
.scope_snp_req_valid (scope_snp_req_valid), \
.scope_snp_req_addr (scope_snp_req_addr), \
.scope_snp_req_invalidate(scope_snp_req_invalidate), \
.scope_snp_req_tag (scope_snp_req_tag), \
.scope_snp_req_ready (scope_snp_req_ready), \
.scope_snp_rsp_valid (scope_snp_rsp_valid), \
.scope_snp_rsp_tag (scope_snp_rsp_tag), \
.scope_snp_rsp_ready (scope_snp_rsp_ready),
`define SCOPE_SIGNALS_CORE_ATTACH \
.scope_busy (scope_busy), \
.scope_schedule_delay (scope_schedule_delay), \
.scope_memory_delay (scope_memory_delay), \
.scope_exec_delay (scope_exec_delay), \
.scope_gpr_stage_delay (scope_gpr_stage_delay),
`define SCOPE_SIGNALS_BE_ATTACH \
.scope_decode_valid (scope_decode_valid), \
.scope_decode_warp_num (scope_decode_warp_num), \
.scope_decode_curr_PC (scope_decode_curr_PC), \
.scope_decode_is_jal (scope_decode_is_jal), \
.scope_decode_rs1 (scope_decode_rs1), \
.scope_decode_rs2 (scope_decode_rs2), \
.scope_execute_valid (scope_execute_valid), \
.scope_execute_warp_num (scope_execute_warp_num), \
.scope_execute_rd (scope_execute_rd), \
.scope_execute_a (scope_execute_a), \
.scope_execute_b (scope_execute_b), \
.scope_writeback_valid (scope_writeback_valid), \
.scope_writeback_warp_num (scope_writeback_warp_num), \
.scope_writeback_wb (scope_writeback_wb), \
.scope_writeback_rd (scope_writeback_rd), \
.scope_writeback_data (scope_writeback_data),
`define SCOPE_ASSIGN(d,s) assign d = s
`else
`define SCOPE_SIGNALS_ICACHE_IO
`define SCOPE_SIGNALS_DCACHE_IO
`define SCOPE_SIGNALS_DRAM_IO
`define SCOPE_SIGNALS_CORE_IO
`define SCOPE_SIGNALS_BE_IO
`define SCOPE_SIGNALS_ICACHE_ATTACH
`define SCOPE_SIGNALS_DCACHE_ATTACH
`define SCOPE_SIGNALS_DRAM_ATTACH
`define SCOPE_SIGNALS_CORE_ATTACH
`define SCOPE_SIGNALS_BE_ATTACH
`define SCOPE_ASSIGN(d,s)
`endif
// VX_DEFINE
`endif

View file

@ -3,7 +3,7 @@
module VX_front_end #(
parameter CORE_ID = 0
) (
`SCOPE_SIGNALS_ICACHE_IO
`SCOPE_SIGNALS_ISTAGE_IO
input wire clk,
input wire reset,
@ -65,7 +65,7 @@ module VX_front_end #(
VX_icache_stage #(
.CORE_ID(CORE_ID)
) icache_stage (
`SCOPE_SIGNALS_ICACHE_ATTACH
`SCOPE_SIGNALS_ISTAGE_BIND
.clk (clk),
.reset (reset),

View file

@ -3,7 +3,7 @@
module VX_icache_stage #(
parameter CORE_ID = 0
) (
`SCOPE_SIGNALS_ICACHE_IO
`SCOPE_SIGNALS_ISTAGE_IO
input wire clk,
input wire reset,
@ -68,7 +68,7 @@ module VX_icache_stage #(
// Can't accept new request
assign icache_stage_delay = mrq_full || ~icache_req_if.core_req_ready;
`ifndef NDEBUG
`ifdef DBG_CORE_REQ_INFO
assign icache_req_if.core_req_tag = {fe_inst_meta_fi.inst_pc, 2'b1, 5'b0, fe_inst_meta_fi.warp_num, mrq_write_addr};
`else
assign icache_req_if.core_req_tag = mrq_write_addr;
@ -95,7 +95,7 @@ module VX_icache_stage #(
`SCOPE_ASSIGN(scope_icache_rsp_ready, icache_rsp_if.core_rsp_ready);
`ifdef DBG_PRINT_CORE_ICACHE
always_ff @(posedge clk) begin
always @(posedge clk) begin
if (icache_req_if.core_req_valid && icache_req_if.core_req_ready) begin
$display("%t: I%01d$ req: tag=%0h, pc=%0h, warp=%0d", $time, CORE_ID, mrq_write_addr, fe_inst_meta_fi.inst_pc, fe_inst_meta_fi.warp_num);
end

View file

@ -3,7 +3,7 @@
module VX_lsu_unit #(
parameter CORE_ID = 0
) (
`SCOPE_SIGNALS_DCACHE_IO
`SCOPE_SIGNALS_LSU_IO
input wire clk,
input wire reset,
@ -130,10 +130,10 @@ module VX_lsu_unit #(
assign dcache_req_if.core_req_addr = mem_req_addr;
assign dcache_req_if.core_req_data = mem_req_data;
`ifndef NDEBUG
assign dcache_req_if.core_req_tag = {use_pc, use_wb, use_rd, use_warp_num, mrq_write_addr};
`ifdef DBG_CORE_REQ_INFO
assign dcache_req_if.core_req_tag = {use_pc, use_wb, use_rd, use_warp_num, mrq_write_addr};
`else
assign dcache_req_if.core_req_tag = mrq_write_addr;
assign dcache_req_if.core_req_tag = mrq_write_addr;
`endif
// Can't accept new request
@ -179,7 +179,7 @@ module VX_lsu_unit #(
`SCOPE_ASSIGN(scope_dcache_rsp_ready, dcache_rsp_if.core_rsp_ready);
`ifdef DBG_PRINT_CORE_DCACHE
always_ff @(posedge clk) begin
always @(posedge clk) begin
if ((| dcache_req_if.core_req_valid) && dcache_req_if.core_req_ready) begin
$display("%t: D%01d$ req: valid=%b, addr=%0h, tag=%0h, r=%0d, w=%0d, pc=%0h, rd=%0d, warp=%0d, byteen=%0h, data=%0h",
$time, CORE_ID, use_valid, use_address, mrq_write_addr, use_mem_read, use_mem_write, use_pc, use_rd, use_warp_num, mem_req_byteen, mem_req_data);

View file

@ -1,8 +1,10 @@
`include "VX_define.vh"
module VX_mem_ctrl # (
module VX_mem_unit # (
parameter CORE_ID = 0
) (
`SCOPE_SIGNALS_ICACHE_IO
input wire clk,
input wire reset,
@ -74,7 +76,7 @@ module VX_mem_ctrl # (
.CORE_TAG_WIDTH (`DCORE_TAG_WIDTH),
.CORE_TAG_ID_BITS (`DCORE_TAG_ID_BITS),
.DRAM_TAG_WIDTH (`SDRAM_TAG_WIDTH)
) gpu_smem (
) smem (
.clk (clk),
.reset (reset),
@ -157,7 +159,7 @@ module VX_mem_ctrl # (
.CORE_TAG_ID_BITS (`DCORE_TAG_ID_BITS),
.DRAM_TAG_WIDTH (`DDRAM_TAG_WIDTH),
.SNP_REQ_TAG_WIDTH (`DSNP_TAG_WIDTH)
) gpu_dcache (
) dcache (
.clk (clk),
.reset (reset),
@ -239,7 +241,9 @@ module VX_mem_ctrl # (
.CORE_TAG_WIDTH (`DCORE_TAG_WIDTH),
.CORE_TAG_ID_BITS (`DCORE_TAG_ID_BITS),
.DRAM_TAG_WIDTH (`IDRAM_TAG_WIDTH)
) gpu_icache (
) icache (
`SCOPE_SIGNALS_ICACHE_BIND
.clk (clk),
.reset (reset),

View file

@ -3,9 +3,9 @@
module VX_pipeline #(
parameter CORE_ID = 0
) (
`SCOPE_SIGNALS_ICACHE_IO
`SCOPE_SIGNALS_DCACHE_IO
`SCOPE_SIGNALS_CORE_IO
`SCOPE_SIGNALS_ISTAGE_IO
`SCOPE_SIGNALS_LSU_IO
`SCOPE_SIGNALS_PIPELINE_IO
`SCOPE_SIGNALS_BE_IO
// Clock
@ -100,7 +100,7 @@ module VX_pipeline #(
VX_front_end #(
.CORE_ID(CORE_ID)
) front_end (
`SCOPE_SIGNALS_ICACHE_ATTACH
`SCOPE_SIGNALS_ISTAGE_BIND
.clk (clk),
.reset (reset),
@ -129,8 +129,8 @@ module VX_pipeline #(
VX_back_end #(
.CORE_ID(CORE_ID)
) back_end (
`SCOPE_SIGNALS_DCACHE_ATTACH
`SCOPE_SIGNALS_BE_ATTACH
`SCOPE_SIGNALS_LSU_BIND
`SCOPE_SIGNALS_BE_BIND
.clk (clk),
.reset (reset),
@ -181,7 +181,7 @@ module VX_pipeline #(
`SCOPE_ASSIGN(scope_gpr_stage_delay, gpr_stage_delay);
`ifdef DBG_PRINT_WB
always_ff @(posedge clk) begin
always @(posedge clk) begin
if ((| writeback_if.valid) && (writeback_if.wb != 0)) begin
$display("%t: Writeback: wid=%0d, rd=%0d, data=%0h", $time, writeback_if.warp_num, writeback_if.rd, writeback_if.data);
end

View file

@ -13,11 +13,10 @@ module VX_scheduler (
output wire schedule_delay,
output wire is_empty
);
reg [31:0][`NUM_THREADS-1:0] rename_table[`NUM_WARPS-1:0];
reg [31:0] count_valid;
localparam CTVW = `CLOG2(`NUM_WARPS * 32 + 1);
wire acquire_rd = (| bckE_req_if.valid) && (bckE_req_if.wb != 0) && (bckE_req_if.rd != 0);
wire release_rd = (| writeback_if.valid) && (writeback_if.wb != 0) && (writeback_if.rd != 0);
reg [31:0][`NUM_THREADS-1:0] rename_table[`NUM_WARPS-1:0];
reg [CTVW-1:0] count_valid;
wire is_store = (bckE_req_if.mem_write != `BYTE_EN_NO);
wire is_load = (bckE_req_if.mem_read != `BYTE_EN_NO);
@ -51,7 +50,14 @@ module VX_scheduler (
integer i, w;
wire acquire_rd = (| bckE_req_if.valid) && (bckE_req_if.wb != 0) && (bckE_req_if.rd != 0) && !schedule_delay;
wire release_rd = (| writeback_if.valid) && (writeback_if.wb != 0) && (writeback_if.rd != 0);
wire [`NUM_THREADS-1:0] valid_wb_new_mask = rename_table[writeback_if.warp_num][writeback_if.rd] & ~writeback_if.valid;
reg [CTVW-1:0] count_valid_next = (acquire_rd && ~(release_rd && (0 == valid_wb_new_mask))) ? (count_valid + 1) :
(~acquire_rd && (release_rd && (0 == valid_wb_new_mask))) ? (count_valid - 1) :
count_valid;
always @(posedge clk) begin
if (reset) begin
@ -62,19 +68,14 @@ module VX_scheduler (
end
count_valid <= 0;
end else begin
if (acquire_rd && !schedule_delay) begin
if (acquire_rd) begin
rename_table[bckE_req_if.warp_num][bckE_req_if.rd] <= bckE_req_if.valid;
count_valid <= count_valid + 1;
end
if (release_rd) begin
assert(rename_table[writeback_if.warp_num][writeback_if.rd] != 0);
rename_table[writeback_if.warp_num][writeback_if.rd] <= valid_wb_new_mask;
if (0 == valid_wb_new_mask) begin
assert(count_valid != 0);
count_valid <= count_valid - 1;
end
end
rename_table[writeback_if.warp_num][writeback_if.rd] <= valid_wb_new_mask;
end
count_valid <= count_valid_next;
end
end

283
hw/rtl/VX_scope.vh Normal file
View file

@ -0,0 +1,283 @@
`ifndef VX_SCOPE
`define VX_SCOPE
`ifdef SCOPE
`define SCOPE_SIGNALS_DATA_LIST \
scope_icache_req_warp_num, \
scope_icache_req_addr, \
scope_icache_req_tag, \
scope_icache_rsp_data, \
scope_icache_rsp_tag, \
scope_dcache_req_warp_num, \
scope_dcache_req_curr_PC, \
scope_dcache_req_addr, \
scope_dcache_req_rw, \
scope_dcache_req_byteen, \
scope_dcache_req_data, \
scope_dcache_req_tag, \
scope_dcache_rsp_data, \
scope_dcache_rsp_tag, \
scope_dram_req_addr, \
scope_dram_req_rw, \
scope_dram_req_byteen, \
scope_dram_req_data, \
scope_dram_req_tag, \
scope_dram_rsp_data, \
scope_dram_rsp_tag, \
scope_snp_req_addr, \
scope_snp_req_invalidate, \
scope_snp_req_tag, \
scope_snp_rsp_tag, \
scope_decode_warp_num, \
scope_decode_curr_PC, \
scope_decode_is_jal, \
scope_decode_rs1, \
scope_decode_rs2, \
scope_execute_warp_num, \
scope_execute_rd, \
scope_execute_a, \
scope_execute_b, \
scope_writeback_warp_num, \
scope_writeback_wb, \
scope_writeback_rd, \
scope_writeback_data,
`define SCOPE_SIGNALS_UPD_LIST \
scope_icache_req_valid, \
scope_icache_req_ready, \
scope_icache_rsp_valid, \
scope_icache_rsp_ready, \
scope_dcache_req_valid, \
scope_dcache_req_ready, \
scope_dcache_rsp_valid, \
scope_dcache_rsp_ready, \
scope_dram_req_valid, \
scope_dram_req_ready, \
scope_dram_rsp_valid, \
scope_dram_rsp_ready, \
scope_snp_req_valid, \
scope_snp_req_ready, \
scope_snp_rsp_valid, \
scope_snp_rsp_ready, \
scope_decode_valid, \
scope_execute_valid, \
scope_writeback_valid, \
scope_schedule_delay, \
scope_memory_delay, \
scope_exec_delay, \
scope_gpr_stage_delay, \
scope_busy, \
scope_idram_req_valid, \
scope_idram_req_ready, \
scope_idram_rsp_valid, \
scope_idram_rsp_ready
`define SCOPE_SIGNALS_DECL \
wire scope_icache_req_valid; \
wire [`NW_BITS-1:0] scope_icache_req_warp_num; \
wire [31:0] scope_icache_req_addr; \
wire [`ICORE_TAG_WIDTH-1:0] scope_icache_req_tag; \
wire scope_icache_req_ready; \
wire scope_icache_rsp_valid; \
wire [31:0] scope_icache_rsp_data; \
wire [`ICORE_TAG_WIDTH-1:0] scope_icache_rsp_tag; \
wire scope_icache_rsp_ready; \
wire [`NUM_THREADS-1:0] scope_dcache_req_valid; \
wire [`NW_BITS-1:0] scope_dcache_req_warp_num; \
wire [31:0] scope_dcache_req_curr_PC; \
wire [31:0] scope_dcache_req_addr; \
wire scope_dcache_req_rw; \
wire [3:0] scope_dcache_req_byteen; \
wire [31:0] scope_dcache_req_data; \
wire [`DCORE_TAG_WIDTH-1:0] scope_dcache_req_tag; \
wire scope_dcache_req_ready; \
wire [`NUM_THREADS-1:0] scope_dcache_rsp_valid; \
wire [31:0] scope_dcache_rsp_data; \
wire [`DCORE_TAG_WIDTH-1:0] scope_dcache_rsp_tag; \
wire scope_dcache_rsp_ready; \
wire scope_dram_req_valid; \
wire [31:0] scope_dram_req_addr; \
wire scope_dram_req_rw; \
wire [15:0] scope_dram_req_byteen; \
wire [31:0] scope_dram_req_data; \
wire [`VX_DRAM_TAG_WIDTH-1:0] scope_dram_req_tag; \
wire scope_dram_req_ready; \
wire scope_dram_rsp_valid; \
wire [31:0] scope_dram_rsp_data; \
wire [`VX_DRAM_TAG_WIDTH-1:0] scope_dram_rsp_tag; \
wire scope_dram_rsp_ready; \
wire scope_snp_req_valid; \
wire [31:0] scope_snp_req_addr; \
wire scope_snp_req_invalidate; \
wire [`VX_SNP_TAG_WIDTH-1:0] scope_snp_req_tag; \
wire scope_snp_req_ready; \
wire scope_snp_rsp_valid; \
wire [`VX_SNP_TAG_WIDTH-1:0] scope_snp_rsp_tag; \
wire scope_busy; \
wire scope_snp_rsp_ready; \
wire scope_schedule_delay; \
wire scope_memory_delay; \
wire scope_exec_delay; \
wire scope_gpr_stage_delay; \
wire [`NUM_THREADS-1:0] scope_decode_valid; \
wire [`NW_BITS-1:0] scope_decode_warp_num; \
wire [31:0] scope_decode_curr_PC; \
wire scope_decode_is_jal; \
wire [4:0] scope_decode_rs1; \
wire [4:0] scope_decode_rs2; \
wire [`NUM_THREADS-1:0] scope_execute_valid; \
wire [`NW_BITS-1:0] scope_execute_warp_num; \
wire [4:0] scope_execute_rd; \
wire [31:0] scope_execute_a; \
wire [31:0] scope_execute_b; \
wire [`NUM_THREADS-1:0] scope_writeback_valid; \
wire [`NW_BITS-1:0] scope_writeback_warp_num; \
wire [1:0] scope_writeback_wb; \
wire [4:0] scope_writeback_rd; \
wire [31:0] scope_writeback_data; \
wire scope_idram_req_valid; \
wire scope_idram_req_ready; \
wire scope_idram_rsp_valid; \
wire scope_idram_rsp_ready;
`define SCOPE_SIGNALS_ISTAGE_IO \
output wire scope_icache_req_valid, \
output wire [`NW_BITS-1:0] scope_icache_req_warp_num, \
output wire [31:0] scope_icache_req_addr, \
output wire [`ICORE_TAG_WIDTH-1:0] scope_icache_req_tag, \
output wire scope_icache_req_ready, \
output wire scope_icache_rsp_valid, \
output wire [31:0] scope_icache_rsp_data, \
output wire [`ICORE_TAG_WIDTH-1:0] scope_icache_rsp_tag, \
output wire scope_icache_rsp_ready,
`define SCOPE_SIGNALS_LSU_IO \
output wire [`NUM_THREADS-1:0] scope_dcache_req_valid, \
output wire [`NW_BITS-1:0] scope_dcache_req_warp_num, \
output wire [31:0] scope_dcache_req_curr_PC, \
output wire [31:0] scope_dcache_req_addr, \
output wire scope_dcache_req_rw, \
output wire [3:0] scope_dcache_req_byteen, \
output wire [31:0] scope_dcache_req_data, \
output wire [`DCORE_TAG_WIDTH-1:0] scope_dcache_req_tag, \
output wire scope_dcache_req_ready, \
output wire [`NUM_THREADS-1:0] scope_dcache_rsp_valid, \
output wire [31:0] scope_dcache_rsp_data, \
output wire [`DCORE_TAG_WIDTH-1:0] scope_dcache_rsp_tag, \
output wire scope_dcache_rsp_ready,
`define SCOPE_SIGNALS_CORE_IO \
`define SCOPE_SIGNALS_ICACHE_IO \
output wire scope_idram_req_valid, \
output wire scope_idram_req_ready, \
output wire scope_idram_rsp_valid, \
output wire scope_idram_rsp_ready,
`define SCOPE_SIGNALS_PIPELINE_IO \
output wire scope_busy, \
output wire scope_schedule_delay, \
output wire scope_memory_delay, \
output wire scope_exec_delay, \
output wire scope_gpr_stage_delay,
`define SCOPE_SIGNALS_BE_IO \
output wire [`NUM_THREADS-1:0] scope_decode_valid, \
output wire [`NW_BITS-1:0] scope_decode_warp_num, \
output wire [31:0] scope_decode_curr_PC, \
output wire scope_decode_is_jal, \
output wire [4:0] scope_decode_rs1, \
output wire [4:0] scope_decode_rs2, \
output wire [`NUM_THREADS-1:0] scope_execute_valid, \
output wire [`NW_BITS-1:0] scope_execute_warp_num, \
output wire [4:0] scope_execute_rd, \
output wire [31:0] scope_execute_a, \
output wire [31:0] scope_execute_b, \
output wire [`NUM_THREADS-1:0] scope_writeback_valid, \
output wire [`NW_BITS-1:0] scope_writeback_warp_num, \
output wire [1:0] scope_writeback_wb, \
output wire [4:0] scope_writeback_rd, \
output wire [31:0] scope_writeback_data,
`define SCOPE_SIGNALS_ISTAGE_BIND \
.scope_icache_req_valid (scope_icache_req_valid), \
.scope_icache_req_warp_num (scope_icache_req_warp_num), \
.scope_icache_req_addr (scope_icache_req_addr), \
.scope_icache_req_tag (scope_icache_req_tag), \
.scope_icache_req_ready (scope_icache_req_ready), \
.scope_icache_rsp_valid (scope_icache_rsp_valid), \
.scope_icache_rsp_data (scope_icache_rsp_data), \
.scope_icache_rsp_tag (scope_icache_rsp_tag), \
.scope_icache_rsp_ready (scope_icache_rsp_ready),
`define SCOPE_SIGNALS_LSU_BIND \
.scope_dcache_req_valid (scope_dcache_req_valid), \
.scope_dcache_req_warp_num (scope_dcache_req_warp_num), \
.scope_dcache_req_curr_PC (scope_dcache_req_curr_PC), \
.scope_dcache_req_addr (scope_dcache_req_addr), \
.scope_dcache_req_rw (scope_dcache_req_rw), \
.scope_dcache_req_byteen(scope_dcache_req_byteen), \
.scope_dcache_req_data (scope_dcache_req_data), \
.scope_dcache_req_tag (scope_dcache_req_tag), \
.scope_dcache_req_ready (scope_dcache_req_ready), \
.scope_dcache_rsp_valid (scope_dcache_rsp_valid), \
.scope_dcache_rsp_data (scope_dcache_rsp_data), \
.scope_dcache_rsp_tag (scope_dcache_rsp_tag), \
.scope_dcache_rsp_ready (scope_dcache_rsp_ready),
`define SCOPE_SIGNALS_CORE_BIND \
`define SCOPE_SIGNALS_ICACHE_BIND \
.scope_idram_req_valid (scope_idram_req_valid), \
.scope_idram_req_ready (scope_idram_req_ready), \
.scope_idram_rsp_valid (scope_idram_rsp_valid), \
.scope_idram_rsp_ready (scope_idram_rsp_ready),
`define SCOPE_SIGNALS_PIPELINE_BIND \
.scope_busy (scope_busy), \
.scope_schedule_delay (scope_schedule_delay), \
.scope_memory_delay (scope_memory_delay), \
.scope_exec_delay (scope_exec_delay), \
.scope_gpr_stage_delay (scope_gpr_stage_delay),
`define SCOPE_SIGNALS_BE_BIND \
.scope_decode_valid (scope_decode_valid), \
.scope_decode_warp_num (scope_decode_warp_num), \
.scope_decode_curr_PC (scope_decode_curr_PC), \
.scope_decode_is_jal (scope_decode_is_jal), \
.scope_decode_rs1 (scope_decode_rs1), \
.scope_decode_rs2 (scope_decode_rs2), \
.scope_execute_valid (scope_execute_valid), \
.scope_execute_warp_num (scope_execute_warp_num), \
.scope_execute_rd (scope_execute_rd), \
.scope_execute_a (scope_execute_a), \
.scope_execute_b (scope_execute_b), \
.scope_writeback_valid (scope_writeback_valid), \
.scope_writeback_warp_num (scope_writeback_warp_num), \
.scope_writeback_wb (scope_writeback_wb), \
.scope_writeback_rd (scope_writeback_rd), \
.scope_writeback_data (scope_writeback_data),
`define SCOPE_ASSIGN(d,s) assign d = s
`else
`define SCOPE_SIGNALS_ISTAGE_IO
`define SCOPE_SIGNALS_LSU_IO
`define SCOPE_SIGNALS_CORE_IO
`define SCOPE_SIGNALS_ICACHE_IO
`define SCOPE_SIGNALS_PIPELINE_IO
`define SCOPE_SIGNALS_BE_IO
`define SCOPE_SIGNALS_ISTAGE_BIND
`define SCOPE_SIGNALS_LSU_BIND
`define SCOPE_SIGNALS_CORE_BIND
`define SCOPE_SIGNALS_ICACHE_BIND
`define SCOPE_SIGNALS_PIPELINE_BIND
`define SCOPE_SIGNALS_BE_BIND
`define SCOPE_ASSIGN(d,s)
`endif
// VX_SCOPE
`endif

View file

@ -3,9 +3,11 @@
module Vortex #(
parameter CORE_ID = 0
) (
`SCOPE_SIGNALS_ICACHE_IO
`SCOPE_SIGNALS_DCACHE_IO
`SCOPE_SIGNALS_ISTAGE_IO
`SCOPE_SIGNALS_LSU_IO
`SCOPE_SIGNALS_CORE_IO
`SCOPE_SIGNALS_ICACHE_IO
`SCOPE_SIGNALS_PIPELINE_IO
`SCOPE_SIGNALS_BE_IO
// Clock
@ -169,10 +171,10 @@ module Vortex #(
VX_pipeline #(
.CORE_ID(CORE_ID)
) pipeline (
`SCOPE_SIGNALS_ICACHE_ATTACH
`SCOPE_SIGNALS_DCACHE_ATTACH
`SCOPE_SIGNALS_CORE_ATTACH
`SCOPE_SIGNALS_BE_ATTACH
`SCOPE_SIGNALS_ISTAGE_BIND
`SCOPE_SIGNALS_LSU_BIND
`SCOPE_SIGNALS_PIPELINE_BIND
`SCOPE_SIGNALS_BE_BIND
.clk(clk),
.reset(reset),
@ -232,9 +234,11 @@ module Vortex #(
assign snp_rsp_tag = dcache_snp_rsp_if.snp_rsp_tag;
assign dcache_snp_rsp_if.snp_rsp_ready = snp_rsp_ready;
VX_mem_ctrl #(
VX_mem_unit #(
.CORE_ID(CORE_ID)
) mem_ctrl (
) mem_unit (
`SCOPE_SIGNALS_ICACHE_BIND
.clk (clk),
.reset (reset),
@ -269,7 +273,7 @@ module Vortex #(
.core_io_rsp_if (arb_io_rsp_if),
.core_rsp_if (core_dcache_rsp_if)
);
endmodule // Vortex

View file

@ -3,9 +3,11 @@
module Vortex_Cluster #(
parameter CLUSTER_ID = 0
) (
`SCOPE_SIGNALS_ICACHE_IO
`SCOPE_SIGNALS_DCACHE_IO
`SCOPE_SIGNALS_ISTAGE_IO
`SCOPE_SIGNALS_LSU_IO
`SCOPE_SIGNALS_CORE_IO
`SCOPE_SIGNALS_ICACHE_IO
`SCOPE_SIGNALS_PIPELINE_IO
`SCOPE_SIGNALS_BE_IO
// Clock
@ -115,10 +117,12 @@ module Vortex_Cluster #(
Vortex #(
.CORE_ID(i + (CLUSTER_ID * `NUM_CORES))
) vortex_core (
`SCOPE_SIGNALS_ICACHE_ATTACH
`SCOPE_SIGNALS_DCACHE_ATTACH
`SCOPE_SIGNALS_CORE_ATTACH
`SCOPE_SIGNALS_BE_ATTACH
`SCOPE_SIGNALS_ISTAGE_BIND
`SCOPE_SIGNALS_LSU_BIND
`SCOPE_SIGNALS_CORE_BIND
`SCOPE_SIGNALS_ICACHE_BIND
`SCOPE_SIGNALS_PIPELINE_BIND
`SCOPE_SIGNALS_BE_BIND
.clk (clk),
.reset (reset),

View file

@ -1,9 +1,11 @@
`include "VX_define.vh"
module Vortex_Socket (
`SCOPE_SIGNALS_ICACHE_IO
`SCOPE_SIGNALS_DCACHE_IO
`SCOPE_SIGNALS_ISTAGE_IO
`SCOPE_SIGNALS_LSU_IO
`SCOPE_SIGNALS_CORE_IO
`SCOPE_SIGNALS_ICACHE_IO
`SCOPE_SIGNALS_PIPELINE_IO
`SCOPE_SIGNALS_BE_IO
// Clock
@ -61,10 +63,12 @@ module Vortex_Socket (
Vortex_Cluster #(
.CLUSTER_ID(`L3CACHE_ID)
) Vortex_Cluster (
`SCOPE_SIGNALS_ICACHE_ATTACH
`SCOPE_SIGNALS_DCACHE_ATTACH
`SCOPE_SIGNALS_CORE_ATTACH
`SCOPE_SIGNALS_BE_ATTACH
`SCOPE_SIGNALS_ISTAGE_BIND
`SCOPE_SIGNALS_LSU_BIND
`SCOPE_SIGNALS_CORE_BIND
`SCOPE_SIGNALS_ICACHE_BIND
`SCOPE_SIGNALS_PIPELINE_BIND
`SCOPE_SIGNALS_BE_BIND
.clk (clk),
.reset (reset),
@ -155,10 +159,12 @@ module Vortex_Socket (
Vortex_Cluster #(
.CLUSTER_ID(i)
) Vortex_Cluster (
`SCOPE_SIGNALS_ICACHE_ATTACH
`SCOPE_SIGNALS_DCACHE_ATTACH
`SCOPE_SIGNALS_CORE_ATTACH
`SCOPE_SIGNALS_BE_ATTACH
`SCOPE_SIGNALS_ISTAGE_BIND
`SCOPE_SIGNALS_LSU_BIND
`SCOPE_SIGNALS_CORE_BIND
`SCOPE_SIGNALS_ICACHE_BIND
`SCOPE_SIGNALS_PIPELINE_BIND
`SCOPE_SIGNALS_BE_BIND
.clk (clk),
.reset (reset),
@ -387,7 +393,7 @@ module Vortex_Socket (
end
`ifdef DBG_PRINT_DRAM
always_ff @(posedge clk) begin
always @(posedge clk) begin
if (dram_req_valid && dram_req_ready) begin
$display("%t: DRAM req: rw=%b addr=%0h, tag=%0h, byteen=%0h data=%0h", $time, dram_req_rw, `DRAM_TO_BYTE_ADDR(dram_req_addr), dram_req_tag, dram_req_byteen, dram_req_data);
end

169
hw/rtl/cache/VX_bank.v vendored
View file

@ -101,7 +101,7 @@ module VX_bank #(
input wire snp_rsp_ready
);
`DEBUG_BLOCK(
`ifdef DBG_CORE_REQ_INFO
wire[31:0] debug_use_pc_st0;
wire[1:0] debug_wb_st0;
wire[4:0] debug_rd_st0;
@ -128,7 +128,7 @@ module VX_bank #(
wire[WORD_SIZE-1:0] debug_byteen_st2;
wire[`REQS_BITS-1:0] debug_tid_st2;
wire[`UP(CORE_TAG_ID_BITS)-1:0] debug_tagid_st2;
)
`endif
wire snrq_pop;
wire snrq_empty;
@ -300,7 +300,6 @@ module VX_bank #(
wire qual_is_snp_st0;
wire qual_snp_invalidate_st0;
wire valid_st1 [STAGE_1_CYCLES-1:0];
wire [`LINE_ADDR_WIDTH-1:0] addr_st1 [STAGE_1_CYCLES-1:0];
wire [`UP(`WORD_SELECT_WIDTH)-1:0] wsel_st1 [STAGE_1_CYCLES-1:0];
@ -313,17 +312,17 @@ module VX_bank #(
assign qual_is_fill_st0 = dfpq_pop_unqual;
assign qual_valid_st0 = dfpq_pop || mrvq_pop || reqq_pop || snrq_pop;
assign qual_valid_st0 = dfpq_pop || mrvq_pop || reqq_pop || snrq_pop;
assign qual_addr_st0 = dfpq_pop_unqual ? dfpq_addr_st0 :
mrvq_pop_unqual ? mrvq_addr_st0 :
reqq_pop_unqual ? reqq_req_addr_st0[`LINE_SELECT_ADDR_RNG] :
snrq_pop_unqual ? snrq_addr_st0 :
0;
assign qual_addr_st0 = dfpq_pop_unqual ? dfpq_addr_st0 :
mrvq_pop_unqual ? mrvq_addr_st0 :
reqq_pop_unqual ? reqq_req_addr_st0[`LINE_SELECT_ADDR_RNG] :
snrq_pop_unqual ? snrq_addr_st0 :
0;
if (`WORD_SELECT_WIDTH != 0) begin
assign qual_wsel_st0 = reqq_pop_unqual ? reqq_req_addr_st0[`WORD_SELECT_WIDTH-1:0] :
mrvq_pop_unqual ? mrvq_wsel_st0 :
0;
assign qual_wsel_st0 = reqq_pop_unqual ? reqq_req_addr_st0[`WORD_SELECT_WIDTH-1:0] :
mrvq_pop_unqual ? mrvq_wsel_st0 :
0;
end else begin
`UNUSED_VAR(mrvq_wsel_st0)
assign qual_wsel_st0 = 0;
@ -355,11 +354,11 @@ module VX_bank #(
assign qual_from_mrvq_st0 = mrvq_pop_unqual;
`DEBUG_BLOCK(
`ifdef DBG_CORE_REQ_INFO
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin
assign {debug_use_pc_st0, debug_wb_st0, debug_rd_st0, debug_warp_num_st0, debug_tagid_st0, debug_rw_st0, debug_byteen_st0, debug_tid_st0} = qual_inst_meta_st0;
end
)
`endif
VX_generic_register #(
.N(1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_WIDTH) + `WORD_WIDTH + `REQ_INST_META_WIDTH + 1 + `BANK_LINE_WIDTH)
@ -408,18 +407,23 @@ module VX_bank #(
wire from_mrvq_st1e;
wire mrvq_recover_ready_state_st1e;
assign from_mrvq_st1e = from_mrvq_st1[STAGE_1_CYCLES-1];
assign from_mrvq_st1e = from_mrvq_st1[STAGE_1_CYCLES-1];
assign valid_st1e = valid_st1 [STAGE_1_CYCLES-1];
assign is_snp_st1e = is_snp_st1 [STAGE_1_CYCLES-1];
assign snp_invalidate_st1e = snp_invalidate_st1 [STAGE_1_CYCLES-1];
assign {tag_st1e, mem_rw_st1e, mem_byteen_st1e, tid_st1e} = inst_meta_st1[STAGE_1_CYCLES-1];
assign st2_pending_hazard_st1e = (miss_add_because_miss) && ((addr_st2 == addr_st1[STAGE_1_CYCLES-1]) && !is_fill_st2);
assign st2_pending_hazard_st1e = (miss_add_because_miss)
&& ((addr_st2 == addr_st1[STAGE_1_CYCLES-1]) && !is_fill_st2);
assign force_request_miss_st1e = (valid_st1e && !from_mrvq_st1e && (mrvq_pending_hazard_st1e || st2_pending_hazard_st1e)) || (valid_st1e && from_mrvq_st1e && recover_mrvq_state_st2);
assign force_request_miss_st1e = (valid_st1e && !from_mrvq_st1e && (mrvq_pending_hazard_st1e || st2_pending_hazard_st1e))
|| (valid_st1e && from_mrvq_st1e && recover_mrvq_state_st2);
assign mrvq_recover_ready_state_st1e = valid_st1e && from_mrvq_st1e && recover_mrvq_state_st2 && (addr_st2 == addr_st1[STAGE_1_CYCLES-1]);
assign mrvq_recover_ready_state_st1e = valid_st1e
&& from_mrvq_st1e
&& recover_mrvq_state_st2
&& (addr_st2 == addr_st1[STAGE_1_CYCLES-1]);
VX_tag_data_access #(
.CACHE_SIZE (CACHE_SIZE),
@ -466,11 +470,12 @@ module VX_bank #(
.mrvq_init_ready_state_st1e(mrvq_init_ready_state_st1e)
);
`DEBUG_BLOCK(
`ifdef DBG_CORE_REQ_INFO
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin
assign {debug_use_pc_st1e, debug_wb_st1e, debug_rd_st1e, debug_warp_num_st1e, debug_tagid_st1e, debug_rw_st1e, debug_byteen_st1e, debug_tid_st1e} = inst_meta_st1[STAGE_1_CYCLES-1];
end
)
`endif
wire qual_valid_st1e_2 = valid_st1e && !is_fill_st1[STAGE_1_CYCLES-1];
wire from_mrvq_st1e_st2 = from_mrvq_st1e;
@ -506,11 +511,11 @@ module VX_bank #(
.out ({mrvq_recover_ready_state_st2 , from_mrvq_st2 , mrvq_init_ready_state_unqual_st2, snp_to_mrvq_st2 , is_snp_st2 , snp_invalidate_st2, fill_saw_dirty_st2 , is_fill_st2 , valid_st2 , addr_st2 , wsel_st2, writeword_st2 , readword_st2 , readdata_st2 , readtag_st2 , miss_st2 , dirty_st2 , dirtyb_st2, inst_meta_st2 })
);
`DEBUG_BLOCK(
`ifdef DBG_CORE_REQ_INFO
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin
assign {debug_use_pc_st2, debug_wb_st2, debug_rd_st2, debug_warp_num_st2, debug_tagid_st2, debug_rw_st2, debug_byteen_st2, debug_tid_st2} = inst_meta_st2;
end
)
`endif
// Enqueue to miss reserv if it's a valid miss
assign miss_add_because_miss = valid_st2 && !is_snp_st2 && miss_st2;
@ -539,7 +544,9 @@ module VX_bank #(
assign mrvq_init_ready_state_hazard_st0_st1 = miss_add_unqual && qual_is_fill_st0 && (miss_add_addr == qual_addr_st0 );
assign mrvq_init_ready_state_hazard_st1e_st1 = miss_add_unqual && is_fill_st1[STAGE_1_CYCLES-1] && (miss_add_addr == addr_st1[STAGE_1_CYCLES-1]);
assign mrvq_init_ready_state_st2 = mrvq_init_ready_state_unqual_st2 || mrvq_init_ready_state_hazard_st0_st1 || mrvq_init_ready_state_hazard_st1e_st1;
assign mrvq_init_ready_state_st2 = mrvq_init_ready_state_unqual_st2
|| mrvq_init_ready_state_hazard_st0_st1
|| mrvq_init_ready_state_hazard_st1e_st1;
VX_cache_miss_resrv #(
.BANK_ID (BANK_ID),
@ -592,10 +599,8 @@ module VX_bank #(
// Enqueue core response
wire cwbq_push;
wire cwbq_pop;
wire cwbq_empty;
wire cwbq_full;
wire cwbq_push, cwbq_pop;
wire cwbq_empty, cwbq_full;
wire cwbq_push_unqual = valid_st2 && !miss_st2 && !is_fill_st2 && !is_snp_st2;
assign cwbq_push_stall = cwbq_push_unqual && cwbq_full;
@ -634,42 +639,22 @@ module VX_bank #(
// Enqueue DRAM fill request
// `IGNORE_WARNINGS_BEGIN
// wire invalidate_fill;
// `IGNORE_WARNINGS_END
// wire possible_fill = valid_st2 && miss_st2 && dram_fill_req_ready && ~is_snp_st2;
// wire [`LINE_ADDR_WIDTH-1:0] fill_invalidator_addr = addr_st2;
wire dram_fill_req_unqual = miss_add_unqual
&& (!mrvq_init_ready_state_st2
|| (from_mrvq_st2 && !mrvq_recover_ready_state_st2));
// VX_fill_invalidator #(
// .BANK_LINE_SIZE (BANK_LINE_SIZE),
// .NUM_BANKS (NUM_BANKS),
// .FILL_INVALIDAOR_SIZE (FILL_INVALIDAOR_SIZE)
// ) fill_invalidator (
// .clk (clk),
// .reset (reset),
// .possible_fill (possible_fill),
// .success_fill (is_fill_st2),
// .fill_addr (fill_invalidator_addr),
// .invalidate_fill (invalidate_fill)
// );
wire dram_fill_req_unqual = miss_add_unqual && (!mrvq_init_ready_state_st2 || (from_mrvq_st2 && !mrvq_recover_ready_state_st2));
assign dram_fill_req_valid = dram_fill_req_unqual
&& dram_fill_req_ready
&& !( dwbq_push_stall
|| mrvq_push_stall
|| cwbq_push_stall);
assign dram_fill_req_valid = dram_fill_req_unqual
&& !(dwbq_push_stall
|| mrvq_push_stall
|| cwbq_push_stall);
assign dram_fill_req_addr = addr_st2;
assign dram_fill_req_stall = dram_fill_req_unqual && ~dram_fill_req_ready;
// Enqueue DRAM writeback request
wire dwbq_push;
wire dwbq_pop;
wire dwbq_empty;
wire dwbq_full;
wire dwbq_push, dwbq_pop;
wire dwbq_empty, dwbq_full;
wire dwbq_is_dwb_in, dwbq_is_snp_in;
wire dwbq_is_dwb_out, dwbq_is_snp_out;
@ -724,9 +709,9 @@ module VX_bank #(
assign dram_wb_req_valid = ~dwbq_empty && dwbq_is_dwb_out && (~dwbq_is_snp_out || dwbq_dual_valid_sel == 0);
assign snp_rsp_valid = ~dwbq_empty && dwbq_is_snp_out && (~dwbq_is_dwb_out || dwbq_dual_valid_sel == 1);
assign dwbq_pop = (dwbq_is_dwb_out && ~dwbq_is_snp_out && dram_wb_req_fire)
|| (dwbq_is_snp_out && ~dwbq_is_dwb_out && snp_rsp_fire)
|| (dwbq_is_dwb_out && dwbq_is_snp_out && snp_rsp_fire);
assign dwbq_pop = (dwbq_is_dwb_out && ~dwbq_is_snp_out && dram_wb_req_fire)
|| (dwbq_is_snp_out && ~dwbq_is_dwb_out && snp_rsp_fire)
|| (dwbq_is_dwb_out && dwbq_is_snp_out && snp_rsp_fire);
// bank pipeline stall
assign stall_bank_pipe = cwbq_push_stall
@ -735,53 +720,27 @@ module VX_bank #(
|| dram_fill_req_stall;
`ifdef DBG_PRINT_CACHE_BANK
if (NUM_BANKS == 1) begin
always_ff @(posedge clk) begin
if (core_req_valid && core_req_ready) begin
$display("%t: bank%0d-%0d core req: addr=%0h, tag=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR0(core_req_addr), core_req_tag);
end
if (core_rsp_valid && core_rsp_ready) begin
$display("%t: bank%0d-%0d core rsp: tag=%0h, data=%0h", $time, CACHE_ID, BANK_ID, core_rsp_tag, core_rsp_data);
end
if (dram_fill_req_valid && dram_fill_req_ready) begin
$display("%t: bank%0d-%0d dram_fill req: addr=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR0(dram_fill_req_addr));
end
if (dram_wb_req_valid && dram_wb_req_ready) begin
$display("%t: bank%0d-%0d dram_wb req: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR0(dram_wb_req_addr), dram_wb_req_data);
end
if (dram_fill_rsp_valid && dram_fill_rsp_ready) begin
$display("%t: bank%0d-%0d dram_fill rsp: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR0(dram_fill_rsp_addr), dram_fill_rsp_data);
end
if (snp_req_valid && snp_req_ready) begin
$display("%t: bank%0d-%0d snp req: addr=%0h, tag=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR0(snp_req_addr), snp_req_tag);
end
if (snp_rsp_valid && snp_rsp_ready) begin
$display("%t: bank%0d-%0d snp rsp: tag=%0h", $time, CACHE_ID, BANK_ID, snp_rsp_tag);
end
always @(posedge clk) begin
if ((|core_req_valid) && core_req_ready) begin
$display("%t: bank%0d-%0d core req: addr=%0h, tag=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(core_req_addr, BANK_ID), core_req_tag);
end
end else begin
always_ff @(posedge clk) begin
if ((|core_req_valid) && core_req_ready) begin
$display("%t: bank%0d-%0d core req: addr=%0h, tag=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(core_req_addr, BANK_ID), core_req_tag);
end
if (core_rsp_valid && core_rsp_ready) begin
$display("%t: bank%0d-%0d core rsp: tag=%0h, data=%0h", $time, CACHE_ID, BANK_ID, core_rsp_tag, core_rsp_data);
end
if (dram_fill_req_valid && dram_fill_req_ready) begin
$display("%t: bank%0d-%0d dram_fill req: addr=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dram_fill_req_addr, BANK_ID));
end
if (dram_wb_req_valid && dram_wb_req_ready) begin
$display("%t: bank%0d-%0d dram_wb req: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dram_wb_req_addr, BANK_ID), dram_wb_req_data);
end
if (dram_fill_rsp_valid && dram_fill_rsp_ready) begin
$display("%t: bank%0d-%0d dram_fill rsp: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dram_fill_rsp_addr, BANK_ID), dram_fill_rsp_data);
end
if (snp_req_valid && snp_req_ready) begin
$display("%t: bank%0d-%0d snp req: addr=%0h, invalidate=%0d, tag=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(snp_req_addr, BANK_ID), snp_req_invalidate, snp_req_tag);
end
if (snp_rsp_valid && snp_rsp_ready) begin
$display("%t: bank%0d-%0d snp rsp: tag=%0h", $time, CACHE_ID, BANK_ID, snp_rsp_tag);
end
if (core_rsp_valid && core_rsp_ready) begin
$display("%t: bank%0d-%0d core rsp: tag=%0h, data=%0h", $time, CACHE_ID, BANK_ID, core_rsp_tag, core_rsp_data);
end
if (dram_fill_req_valid && dram_fill_req_ready) begin
$display("%t: bank%0d-%0d dram_fill req: addr=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dram_fill_req_addr, BANK_ID));
end
if (dram_wb_req_firevalid && dram_wb_req_ready) begin
$display("%t: bank%0d-%0d dram_wb req: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dram_wb_req_addr, BANK_ID), dram_wb_req_data);
end
if (dram_fill_rsp_valid && dram_fill_rsp_ready) begin
$display("%t: bank%0d-%0d dram_fill rsp: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dram_fill_rsp_addr, BANK_ID), dram_fill_rsp_data);
end
if (snp_req_valid && snp_req_ready) begin
$display("%t: bank%0d-%0d snp req: addr=%0h, invalidate=%0d, tag=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(snp_req_addr, BANK_ID), snp_req_invalidate, snp_req_tag);
end
if (snp_rsp_valid && snp_rsp_ready) begin
$display("%t: bank%0d-%0d snp rsp: tag=%0h", $time, CACHE_ID, BANK_ID, snp_rsp_tag);
end
end
`endif

View file

@ -65,6 +65,8 @@ module VX_cache #(
// Snooping forward tag width
parameter SNP_FWD_TAG_WIDTH = 1
) (
`SCOPE_SIGNALS_ICACHE_IO
input wire clk,
input wire reset,
@ -125,7 +127,7 @@ module VX_cache #(
output wire [NUM_SNP_REQUESTS-1:0] snp_fwdin_ready
);
`DEBUG_BLOCK(
`ifdef DBG_CORE_REQ_INFO
wire[31:0] debug_core_req_use_pc;
wire[1:0] debug_core_req_wb;
wire[4:0] debug_core_req_rd;
@ -135,7 +137,8 @@ module VX_cache #(
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin
assign {debug_core_req_use_pc, debug_core_req_wb, debug_core_req_rd, debug_core_req_warp_num, debug_core_req_idx} = core_req_tag[0];
end
)
`endif
wire [NUM_BANKS-1:0][NUM_REQUESTS-1:0] per_bank_valid;
wire [NUM_BANKS-1:0] per_bank_core_req_ready;
@ -476,7 +479,13 @@ module VX_cache #(
.per_bank_snp_rsp_ready (per_bank_snp_rsp_ready),
.snp_rsp_valid (snp_rsp_valid),
.snp_rsp_tag (snp_rsp_tag),
.snp_rsp_ready (snp_rsp_ready)
);
.snp_rsp_ready (snp_rsp_ready)
);
`SCOPE_ASSIGN(scope_idram_req_valid, per_bank_dram_fill_req_valid[0]);
`SCOPE_ASSIGN(scope_idram_req_ready, dram_fill_req_ready);
`SCOPE_ASSIGN(scope_idram_rsp_valid, per_bank_core_rsp_valid[0]);
`SCOPE_ASSIGN(scope_idram_rsp_ready, per_bank_core_rsp_ready[0]);
endmodule

View file

@ -72,8 +72,6 @@
`define LINE_TO_DRAM_ADDR(x, i) {x, `BANK_SELECT_BITS'(i)}
`define LINE_TO_BYTE_ADDR(x, i) {x, `BANK_SELECT_BITS'(i), `BASE_ADDR_BITS'(0)}
`define LINE_TO_BYTE_ADDR0(x) {x, `BASE_ADDR_BITS'(0)}
`define LINE_TO_BYTE_ADDR(x, i) {x, (((`BANK_SELECT_BITS + `BASE_ADDR_BITS)'(i)) << `BASE_ADDR_BITS)}
`endif

View file

@ -151,38 +151,21 @@ module VX_cache_miss_resrv #(
end
end
`ifdef DBG_PRINT_CACHE_MSRQ
`ifdef DBG_PRINT_CACHE_MSRQ
integer j;
if (NUM_BANKS == 1) begin
always_ff @(posedge clk) begin
if (mrvq_push || mrvq_pop || increment_head || recover_state) begin
$write("%t: bank%0d-%0d msrq: push=%b pop=%b incr=%d recv=%d", $time, CACHE_ID, BANK_ID, mrvq_push, mrvq_pop, increment_head, recover_state);
for (j = 0; j < MRVQ_SIZE; j++) begin
if (valid_table[j]) begin
$write(" ");
if (schedule_ptr == $bits(schedule_ptr)'(j)) $write("*");
if (~ready_table[j]) $write("!");
$write("addr%0d=%0h", j, {addr_table[j], `BASE_ADDR_BITS'(0)});
end
end
$write("\n");
end
end
end else begin
always_ff @(posedge clk) begin
if (mrvq_push || mrvq_pop || increment_head || recover_state) begin
$write("%t: bank%0d-%0d msrq: push=%b pop=%b incr=%d recv=%d", $time, CACHE_ID, BANK_ID, mrvq_push, mrvq_pop, increment_head, recover_state);
for (j = 0; j < MRVQ_SIZE; j++) begin
if (valid_table[j]) begin
$write(" ");
if (schedule_ptr == $bits(schedule_ptr)'(j)) $write("*");
if (~ready_table[j]) $write("!");
$write("addr%0d=%0h", j, `LINE_TO_BYTE_ADDR(addr_table[j], BANK_ID));
end
end
$write("\n");
end
end
always @(posedge clk) begin
if (mrvq_push || mrvq_pop || increment_head || recover_state) begin
$write("%t: bank%0d-%0d msrq: push=%b pop=%b incr=%d recv=%d", $time, CACHE_ID, BANK_ID, mrvq_push, mrvq_pop, increment_head, recover_state);
for (j = 0; j < MRVQ_SIZE; j++) begin
if (valid_table[j]) begin
$write(" ");
if (schedule_ptr == $bits(schedule_ptr)'(j)) $write("*");
if (~ready_table[j]) $write("!");
$write("addr%0d=%0h", j, `LINE_TO_BYTE_ADDR(addr_table[j], BANK_ID));
end
end
$write("\n");
end
end
`endif

View file

@ -116,7 +116,7 @@ module VX_snp_forwarder #(
end
`ifdef DBG_PRINT_CACHE_SNP
always_ff @(posedge clk) begin
always @(posedge clk) begin
if (snp_req_valid && snp_req_ready) begin
$display("%t: cache%0d snp req: addr=%0h, invalidate=%0d, tag=%0h", $time, CACHE_ID, `DRAM_TO_BYTE_ADDR(snp_req_addr), snp_req_invalidate, snp_req_tag);
end

View file

@ -17,112 +17,105 @@ module VX_divide #(
output reg [WIDTHD-1:0] remainder
);
// synthesis read_comments_as_HDL on
// localparam IMPL = "quartus";
// synthesis read_comments_as_HDL off
// altera translate_off
localparam IMPL="fallback";
// altera translate_on
generate
if (NREP != DREP) begin
different_nrep_drep_not_yet_supported non_existing_module();
end
if (IMPL == "quartus") begin
`ifdef QUARTUS
localparam lpm_speed=SPEED == "HIGHEST" ? 9:5;
localparam lpm_speed=SPEED == "HIGHEST" ? 9 : 5;
lpm_divide #(
.LPM_WIDTHN(WIDTHN),
.LPM_WIDTHD(WIDTHD),
.LPM_NREPRESENTATION(NREP),
.LPM_DREPRESENTATION(DREP),
.LPM_PIPELINE(PIPELINE),
.LPM_REMAINDERPOSITIVE("FALSE"), // emulate verilog % operator
.MAXIMIZE_SPEED(lpm_speed)
) quartus_divider (
.clock(clock),
.aclr(aclr),
.clken(clken),
.numer(numer),
.denom(denom),
.quotient(quotient),
.remain(remainder)
);
end
else begin
lpm_divide #(
.LPM_WIDTHN(WIDTHN),
.LPM_WIDTHD(WIDTHD),
.LPM_NREPRESENTATION(NREP),
.LPM_DREPRESENTATION(DREP),
.LPM_PIPELINE(PIPELINE),
.LPM_REMAINDERPOSITIVE("FALSE"), // emulate verilog % operator
.MAXIMIZE_SPEED(lpm_speed)
) quartus_divider (
.clock(clock),
.aclr(aclr),
.clken(clken),
.numer(numer),
.denom(denom),
.quotient(quotient),
.remain(remainder)
);
wire [WIDTHN-1:0] numer_pipe_end;
wire [WIDTHD-1:0] denom_pipe_end;
`else
if (PIPELINE == 0) begin
assign numer_pipe_end = numer;
assign denom_pipe_end = denom;
end else begin
reg [WIDTHN-1:0] numer_pipe [0:PIPELINE-1];
reg [WIDTHD-1:0] denom_pipe [0:PIPELINE-1];
wire [WIDTHN-1:0] numer_pipe_end;
wire [WIDTHD-1:0] denom_pipe_end;
genvar i;
for (i = 0; i < PIPELINE-1; i++) begin : pipe_stages
always @(posedge clock or posedge aclr) begin
if (aclr) begin
numer_pipe[i+1] <= 0;
denom_pipe[i+1] <= 0;
end
else if (clken) begin
numer_pipe[i+1] <= numer_pipe[i];
denom_pipe[i+1] <= denom_pipe[i];
end
end
end
if (PIPELINE == 0) begin
assign numer_pipe_end = numer;
assign denom_pipe_end = denom;
end else begin
reg [WIDTHN-1:0] numer_pipe [0:PIPELINE-1];
reg [WIDTHD-1:0] denom_pipe [0:PIPELINE-1];
genvar i;
for (i = 0; i < PIPELINE-1; i++) begin : pipe_stages
always @(posedge clock or posedge aclr) begin
if (aclr) begin
numer_pipe[0] <= 0;
denom_pipe[0] <= 0;
numer_pipe[i+1] <= 0;
denom_pipe[i+1] <= 0;
end
else if (clken) begin
numer_pipe[0] <= numer;
denom_pipe[0] <= denom;
numer_pipe[i+1] <= numer_pipe[i];
denom_pipe[i+1] <= denom_pipe[i];
end
end
assign numer_pipe_end = numer_pipe[PIPELINE-1];
assign denom_pipe_end = denom_pipe[PIPELINE-1];
end
/* * * * * * * * * * * * * * * * * * * * * * */
/* Do the actual fallback computation here */
/* * * * * * * * * * * * * * * * * * * * * * */
if (NREP == "SIGNED") begin
always @(*) begin
if (denom_pipe_end == 0) begin
quotient = 32'hffffffff;
remainder = numer_pipe_end;
end
else if (denom_pipe_end == 32'hffffffff && numer_pipe_end == 32'h80000000) begin
// this edge case kills verilator in some cases by causing a division
// overflow exception. INT_MIN / -1 (on x86)
quotient = 0;
remainder = 0;
end
else begin
quotient = $signed($signed(numer_pipe_end) / $signed(denom_pipe_end));
remainder = $signed($signed(numer_pipe_end) % $signed(denom_pipe_end));
end
always @(posedge clock or posedge aclr) begin
if (aclr) begin
numer_pipe[0] <= 0;
denom_pipe[0] <= 0;
end
else if (clken) begin
numer_pipe[0] <= numer;
denom_pipe[0] <= denom;
end
end
else begin
assign quotient = (denom_pipe_end == 0) ? 32'hffffffff : numer_pipe_end/denom_pipe_end;
assign remainder = (denom_pipe_end == 0) ? numer_pipe_end : numer_pipe_end%denom_pipe_end;
end
assign numer_pipe_end = numer_pipe[PIPELINE-1];
assign denom_pipe_end = denom_pipe[PIPELINE-1];
end
/* * * * * * * * * * * * * * * * * * * * * * */
/* Do the actual fallback computation here */
/* * * * * * * * * * * * * * * * * * * * * * */
if (NREP == "SIGNED") begin
always @(*) begin
if (denom_pipe_end == 0) begin
quotient = 32'hffffffff;
remainder = numer_pipe_end;
end
else if (denom_pipe_end == 32'hffffffff
&& numer_pipe_end == 32'h80000000) begin
// this edge case kills verilator in some cases by causing a division
// overflow exception. INT_MIN / -1 (on x86)
quotient = 0;
remainder = 0;
end
else begin
quotient = $signed($signed(numer_pipe_end) / $signed(denom_pipe_end));
remainder = $signed($signed(numer_pipe_end) % $signed(denom_pipe_end));
end
end
end
else begin
assign quotient = (denom_pipe_end == 0) ? 32'hffffffff : numer_pipe_end/denom_pipe_end;
assign remainder = (denom_pipe_end == 0) ? numer_pipe_end : numer_pipe_end%denom_pipe_end;
end
`endif
endgenerate
endmodule : VX_divide

View file

@ -2,21 +2,17 @@
module VX_generic_priority_encoder #(
parameter N = 1
) (
input wire[N-1:0] valids,
//output reg[$clog2(N)-1:0] index,
output reg[(`LOG2UP(N))-1:0] index,
//output reg[`LOG2UP(N):0] index, // eh
output reg found
);
) (
input wire[N-1:0] valids,
output reg[(`LOG2UP(N))-1:0] index,
output reg found
);
integer i;
always @(*) begin
index = 0;
found = 0;
for (i = N-1; i >= 0; i = i - 1) begin
if (valids[i]) begin
//index = i[$clog2(N)-1:0];
index = i[(`LOG2UP(N))-1:0];
found = 1;
end

View file

@ -132,7 +132,7 @@ module VX_generic_queue #(
rd_ptr_r <= rd_ptr_next_r;
if (SIZE > 2) begin
rd_ptr_next_r <= rd_ptr_r + 2;
rd_ptr_next_r <= rd_ptr_r + $bits(rd_ptr_r)'(2);
end else begin // (SIZE == 2);
rd_ptr_next_r <= ~rd_ptr_next_r;
end

View file

@ -19,110 +19,102 @@ module VX_mult #(
output reg [WIDTHP-1:0] result
);
// synthesis read_comments_as_HDL on
// localparam IMPL = "quartus";
// synthesis read_comments_as_HDL off
// altera translate_off
localparam IMPL="fallback";
// altera translate_on
generate
if (IMPL == "quartus") begin
`ifdef QUARTUS
localparam lpm_speed = (SPEED == "HIGHEST") ? 10 : 5;
if (FORCE_LE == "YES") begin
lpm_mult #(
.LPM_WIDTHA(WIDTHA),
.LPM_WIDTHB(WIDTHB),
.LPM_WIDTHP(WIDTHP),
.LPM_REPRESENTATION(REP),
.LPM_PIPELINE(PIPELINE),
.DSP_BLOCK_BALANCING("LOGIC ELEMENTS"),
.MAXIMIZE_SPEED(lpm_speed)
) quartus_mult (
.clock(clock),
.aclr(aclr),
.clken(clken),
.dataa(dataa),
.datab(datab),
.result(result)
);
end
else begin
lpm_mult#(
.LPM_WIDTHA(WIDTHA),
.LPM_WIDTHB(WIDTHB),
.LPM_WIDTHP(WIDTHP),
.LPM_REPRESENTATION(REP),
.LPM_PIPELINE(PIPELINE),
.MAXIMIZE_SPEED(lpm_speed)
) quartus_mult(
.clock(clock),
.aclr(aclr),
.clken(clken),
.dataa(dataa),
.datab(datab),
.result(result)
);
end
localparam lpm_speed = (SPEED == "HIGHEST") ? 10 : 5;
if (FORCE_LE == "YES") begin
lpm_mult #(
.LPM_WIDTHA(WIDTHA),
.LPM_WIDTHB(WIDTHB),
.LPM_WIDTHP(WIDTHP),
.LPM_REPRESENTATION(REP),
.LPM_PIPELINE(PIPELINE),
.DSP_BLOCK_BALANCING("LOGIC ELEMENTS"),
.MAXIMIZE_SPEED(lpm_speed)
) quartus_mult (
.clock(clock),
.aclr(aclr),
.clken(clken),
.dataa(dataa),
.datab(datab),
.result(result)
);
end
else begin
lpm_mult#(
.LPM_WIDTHA(WIDTHA),
.LPM_WIDTHB(WIDTHB),
.LPM_WIDTHP(WIDTHP),
.LPM_REPRESENTATION(REP),
.LPM_PIPELINE(PIPELINE),
.MAXIMIZE_SPEED(lpm_speed)
) quartus_mult(
.clock(clock),
.aclr(aclr),
.clken(clken),
.dataa(dataa),
.datab(datab),
.result(result)
);
end
wire [WIDTHA-1:0] dataa_pipe_end;
wire [WIDTHB-1:0] datab_pipe_end;
`else
wire [WIDTHA-1:0] dataa_pipe_end;
wire [WIDTHB-1:0] datab_pipe_end;
if (PIPELINE == 0) begin
assign dataa_pipe_end = dataa;
assign datab_pipe_end = datab;
end else begin
reg [WIDTHA-1:0] dataa_pipe [0:PIPELINE-1];
reg [WIDTHB-1:0] datab_pipe [0:PIPELINE-1];
genvar i;
for (i = 0; i < PIPELINE-1; i++) begin : pipe_stages
always @(posedge clock or posedge aclr) begin
if (aclr) begin
dataa_pipe[i+1] <= 0;
datab_pipe[i+1] <= 0;
end
else if (clken) begin
dataa_pipe[i+1] <= dataa_pipe[i];
datab_pipe[i+1] <= datab_pipe[i];
end
end
end
if (PIPELINE == 0) begin
assign dataa_pipe_end = dataa;
assign datab_pipe_end = datab;
end else begin
reg [WIDTHA-1:0] dataa_pipe [0:PIPELINE-1];
reg [WIDTHB-1:0] datab_pipe [0:PIPELINE-1];
genvar i;
for (i = 0; i < PIPELINE-1; i++) begin : pipe_stages
always @(posedge clock or posedge aclr) begin
if (aclr) begin
dataa_pipe[0] <= 0;
datab_pipe[0] <= 0;
dataa_pipe[i+1] <= 0;
datab_pipe[i+1] <= 0;
end
else if (clken) begin
dataa_pipe[0] <= dataa;
datab_pipe[0] <= datab;
dataa_pipe[i+1] <= dataa_pipe[i];
datab_pipe[i+1] <= datab_pipe[i];
end
end
assign dataa_pipe_end = dataa_pipe[PIPELINE-1];
assign datab_pipe_end = datab_pipe[PIPELINE-1];
end
/* * * * * * * * * * * * * * * * * * * * * * */
/* Do the actual fallback computation here */
/* * * * * * * * * * * * * * * * * * * * * * */
if (REP == "SIGNED") begin
assign result = $signed($signed(dataa_pipe_end)*$signed(datab_pipe_end));
end
else begin
assign result = dataa_pipe_end*datab_pipe_end;
always @(posedge clock or posedge aclr) begin
if (aclr) begin
dataa_pipe[0] <= 0;
datab_pipe[0] <= 0;
end
else if (clken) begin
dataa_pipe[0] <= dataa;
datab_pipe[0] <= datab;
end
end
assign dataa_pipe_end = dataa_pipe[PIPELINE-1];
assign datab_pipe_end = datab_pipe[PIPELINE-1];
end
/* * * * * * * * * * * * * * * * * * * * * * */
/* Do the actual fallback computation here */
/* * * * * * * * * * * * * * * * * * * * * * */
if (REP == "SIGNED") begin
assign result = $signed($signed(dataa_pipe_end)*$signed(datab_pipe_end));
end
else begin
assign result = dataa_pipe_end * datab_pipe_end;
end
`endif
endgenerate
endmodule: VX_mult

View file

@ -18,26 +18,22 @@ module VX_scope #(
input wire bus_write,
input wire bus_read
);
localparam DELTA_ENABLE = (UPDW != 0);
localparam MAX_DELTA = (2 ** DELTAW) - 1;
localparam DELTA_ENABLE = (UPDW != 0);
localparam MAX_DELTA = (2 ** DELTAW) - 1;
typedef enum logic[2:0] {
CMD_GET_VALID,
CMD_GET_DATA,
CMD_GET_WIDTH,
CMD_GET_COUNT,
CMD_SET_DELAY,
CMD_SET_STOP,
CMD_RESERVED1,
CMD_RESERVED2
} cmd_t;
localparam CMD_GET_VALID = 3'd0;
localparam CMD_GET_DATA = 3'd1;
localparam CMD_GET_WIDTH = 3'd2;
localparam CMD_GET_COUNT = 3'd3;
localparam CMD_SET_DELAY = 3'd4;
localparam CMD_SET_STOP = 3'd5;
localparam CMD_RESERVED1 = 3'd6;
localparam CMD_RESERVED2 = 3'd7;
typedef enum logic[1:0] {
GET_VALID,
GET_DATA,
GET_WIDTH,
GET_COUNT
} cmd_get_t;
localparam GET_VALID = 2'd0;
localparam GET_DATA = 2'd1;
localparam GET_WIDTH = 2'd2;
localparam GET_COUNT = 2'd3;
reg [DATAW-1:0] data_store [SIZE-1:0];
reg [DELTAW-1:0] delta_store [SIZE-1:0];
@ -84,10 +80,10 @@ module VX_scope #(
CMD_GET_VALID,
CMD_GET_DATA,
CMD_GET_WIDTH,
CMD_GET_COUNT: out_cmd <= $bits(out_cmd)'(cmd_type);
CMD_SET_DELAY: delay_val <= $bits(delay_val)'(cmd_data);
CMD_SET_STOP: waddr_end <= $bits(waddr)'(cmd_data);
default:;
CMD_GET_COUNT: out_cmd <= $bits(out_cmd)'(cmd_type);
CMD_SET_DELAY: delay_val <= $bits(delay_val)'(cmd_data);
CMD_SET_STOP: waddr_end <= $bits(waddr)'(cmd_data);
default:;
endcase
end
@ -183,7 +179,7 @@ module VX_scope #(
end
`ifdef DBG_PRINT_SCOPE
always_ff @(posedge clk) begin
always @(posedge clk) begin
if (bus_read) begin
$display("%t: scope-read: cmd=%0d, out=0x%0h, addr=%0d", $time, out_cmd, bus_out, raddr);
end

View file

@ -28,6 +28,7 @@ VF += --x-initial unique
VF += -exe $(SRCS) $(INCLUDE)
DBG += -DVCD_OUTPUT $(DBG_PRINT)
DBG += -DDBG_CORE_REQ_INFO
THREADS ?= $(shell python3 -c 'import multiprocessing as mp; print(max(1, mp.cpu_count() // 2))')

View file

@ -11,11 +11,12 @@ double sc_time_stamp() {
Simulator::Simulator() {
// force random values for unitialized signals
Verilated::randReset(1);
Verilated::randReset(2);
ram_ = nullptr;
vortex_ = new VVortex_Socket();
dram_rsp_active_ = false;
snp_req_active_ = false;
#ifdef VCD_OUTPUT
@ -76,7 +77,7 @@ void Simulator::eval_dram_bus() {
return;
}
// handle DRAM response cycle
// schedule DRAM responses
int dequeue_index = -1;
for (int i = 0; i < dram_rsp_vec_.size(); i++) {
if (dram_rsp_vec_[i].cycles_left > 0) {
@ -88,16 +89,23 @@ void Simulator::eval_dram_bus() {
}
}
// handle DRAM response message
if ((dequeue_index != -1)
// send DRAM response
if (dram_rsp_active_
&& vortex_->dram_rsp_valid
&& vortex_->dram_rsp_ready) {
vortex_->dram_rsp_valid = 1;
memcpy((uint8_t*)vortex_->dram_rsp_data, dram_rsp_vec_[dequeue_index].data, GLOBAL_BLOCK_SIZE);
vortex_->dram_rsp_tag = dram_rsp_vec_[dequeue_index].tag;
free(dram_rsp_vec_[dequeue_index].data);
dram_rsp_vec_.erase(dram_rsp_vec_.begin() + dequeue_index);
} else {
vortex_->dram_rsp_valid = 0;
dram_rsp_active_ = false;
}
if (!dram_rsp_active_) {
if (dequeue_index != -1) {
vortex_->dram_rsp_valid = 1;
memcpy((uint8_t*)vortex_->dram_rsp_data, dram_rsp_vec_[dequeue_index].data, GLOBAL_BLOCK_SIZE);
vortex_->dram_rsp_tag = dram_rsp_vec_[dequeue_index].tag;
free(dram_rsp_vec_[dequeue_index].data);
dram_rsp_vec_.erase(dram_rsp_vec_.begin() + dequeue_index);
dram_rsp_active_ = true;
} else {
vortex_->dram_rsp_valid = 0;
}
}
// handle DRAM stalls
@ -111,7 +119,7 @@ void Simulator::eval_dram_bus() {
}
#endif
// handle DRAM requests
// process DRAM requests
if (!dram_stalled) {
if (vortex_->dram_req_valid) {
if (vortex_->dram_req_rw) {

View file

@ -55,8 +55,9 @@ private:
void eval_snp_bus();
std::vector<dram_req_t> dram_rsp_vec_;
int dram_rsp_active_;
uint32_t snp_req_active_;
bool snp_req_active_;
uint32_t snp_req_size_;
uint32_t pending_snp_reqs_;

View file

@ -49,7 +49,7 @@ smart.log: $(PROJECT_FILES)
# Project initialization
$(PROJECT_FILES):
quartus_sh -t project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src $(SRC_FILE) -sdc project.sdc -inc "../../../rtl;../../../rtl/libs;../../../rtl/interfaces;../../../rtl/cache"
quartus_sh -t ../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src $(SRC_FILE) -sdc ../project.sdc -inc "../../../rtl;../../../rtl/libs;../../../rtl/cache"
syn.chg:
$(STAMP) syn.chg

View file

@ -1 +0,0 @@
create_clock -name {clk} -period "250 MHz" -waveform { 0.0 1.0 } [get_ports {clk}]

View file

@ -1,67 +0,0 @@
load_package flow
package require cmdline
set options {
{ "project.arg" "" "Project name" }
{ "family.arg" "" "Device family name" }
{ "device.arg" "" "Device name" }
{ "top.arg" "" "Top level module" }
{ "src.arg" "" "Verilog source file" }
{ "inc.arg" "" "Include path (optional)" }
{ "sdc.arg" "" "Timing Design Constraints file (optional)" }
{ "set.arg" "" "Macro value (optional)" }
}
set q_args_orig $quartus(args)
array set opts [::cmdline::getoptions quartus(args) $options]
# Verify required parameters
set requiredParameters {project family device top src}
foreach p $requiredParameters {
if {$opts($p) == ""} {
puts stderr "Missing required parameter: -$p"
exit 1
}
}
project_new $opts(project) -overwrite
set_global_assignment -name FAMILY $opts(family)
set_global_assignment -name DEVICE $opts(device)
set_global_assignment -name TOP_LEVEL_ENTITY $opts(top)
set_global_assignment -name PROJECT_OUTPUT_DIRECTORY bin
set_global_assignment -name NUM_PARALLEL_PROCESSORS ALL
set_global_assignment -name VERILOG_INPUT_VERSION SYSTEMVERILOG_2009
set idx 0
foreach arg $q_args_orig {
incr idx
if [string match "-src" $arg] {
set_global_assignment -name VERILOG_FILE [lindex $q_args_orig $idx]
}
if [string match "-inc" $arg] {
set_global_assignment -name SEARCH_PATH [lindex $q_args_orig $idx]
}
if [string match "-sdc" $arg] {
set_global_assignment -name SDC_FILE [lindex $q_args_orig $idx]
}
if [string match "-set" $arg] {
set_global_assignment -name VERILOG_MACRO [lindex $q_args_orig $idx]
}
}
proc make_all_pins_virtual {} {
execute_module -tool map
set name_ids [get_names -filter * -node_type pin]
foreach_in_collection name_id $name_ids {
set pin_name [get_name_info -info full_path $name_id]
post_message "Making VIRTUAL_PIN assignment to $pin_name"
set_instance_assignment -to $pin_name -name VIRTUAL_PIN ON
}
export_assignments
}
make_all_pins_virtual
project_close

View file

@ -49,7 +49,7 @@ smart.log: $(PROJECT_FILES)
# Project initialization
$(PROJECT_FILES):
quartus_sh -t project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src $(SRC_FILE) -sdc project.sdc -inc "../../../rtl;../../../rtl/libs;../../../rtl/interfaces;../../../rtl/pipe_regs;../../../rtl/cache"
quartus_sh -t ../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src $(SRC_FILE) -sdc ../project.sdc -inc "../../../rtl;../../../rtl/libs;../../../rtl/interfaces;../../../rtl/pipe_regs"
syn.chg:
$(STAMP) syn.chg

View file

@ -1,67 +0,0 @@
load_package flow
package require cmdline
set options {
{ "project.arg" "" "Project name" }
{ "family.arg" "" "Device family name" }
{ "device.arg" "" "Device name" }
{ "top.arg" "" "Top level module" }
{ "src.arg" "" "Verilog source file" }
{ "inc.arg" "" "Include path (optional)" }
{ "sdc.arg" "" "Timing Design Constraints file (optional)" }
{ "set.arg" "" "Macro value (optional)" }
}
set q_args_orig $quartus(args)
array set opts [::cmdline::getoptions quartus(args) $options]
# Verify required parameters
set requiredParameters {project family device top src}
foreach p $requiredParameters {
if {$opts($p) == ""} {
puts stderr "Missing required parameter: -$p"
exit 1
}
}
project_new $opts(project) -overwrite
set_global_assignment -name FAMILY $opts(family)
set_global_assignment -name DEVICE $opts(device)
set_global_assignment -name TOP_LEVEL_ENTITY $opts(top)
set_global_assignment -name PROJECT_OUTPUT_DIRECTORY bin
set_global_assignment -name NUM_PARALLEL_PROCESSORS ALL
set_global_assignment -name VERILOG_INPUT_VERSION SYSTEMVERILOG_2009
set idx 0
foreach arg $q_args_orig {
incr idx
if [string match "-src" $arg] {
set_global_assignment -name VERILOG_FILE [lindex $q_args_orig $idx]
}
if [string match "-inc" $arg] {
set_global_assignment -name SEARCH_PATH [lindex $q_args_orig $idx]
}
if [string match "-sdc" $arg] {
set_global_assignment -name SDC_FILE [lindex $q_args_orig $idx]
}
if [string match "-set" $arg] {
set_global_assignment -name VERILOG_MACRO [lindex $q_args_orig $idx]
}
}
proc make_all_pins_virtual {} {
execute_module -tool map
set name_ids [get_names -filter * -node_type pin]
foreach_in_collection name_id $name_ids {
set pin_name [get_name_info -info full_path $name_id]
post_message "Making VIRTUAL_PIN assignment to $pin_name"
set_instance_assignment -to $pin_name -name VIRTUAL_PIN ON
}
export_assignments
}
make_all_pins_virtual
project_close

View file

@ -33,6 +33,9 @@ set_global_assignment -name TOP_LEVEL_ENTITY $opts(top)
set_global_assignment -name PROJECT_OUTPUT_DIRECTORY bin
set_global_assignment -name NUM_PARALLEL_PROCESSORS ALL
set_global_assignment -name VERILOG_INPUT_VERSION SYSTEMVERILOG_2009
set_global_assignment -name VERILOG_MACRO QUARTUS
set_global_assignment -name VERILOG_MACRO SYNTHESIS
set_global_assignment -name VERILOG_MACRO NDEBUG
set idx 0
foreach arg $q_args_orig {

View file

@ -49,7 +49,7 @@ smart.log: $(PROJECT_FILES)
# Project initialization
$(PROJECT_FILES):
quartus_sh -t project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src $(SRC_FILE) -sdc project.sdc -inc "../../../rtl;../../../rtl/libs;../../../rtl/interfaces;../../../rtl/pipe_regs;../../../rtl/cache;../../../opae;../../../opae/ccip" -macro "NOPAE"
quartus_sh -t ../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src $(SRC_FILE) -set "NOPAE" -sdc ../project.sdc -inc "../../../rtl;../../../rtl/libs;../../../rtl/interfaces;../../../rtl/pipe_regs;../../../rtl/cache;../../../opae;../../../opae/ccip"
syn.chg:
$(STAMP) syn.chg

View file

@ -1,9 +0,0 @@
set_time_format -unit ns -decimal_places 3
create_clock -name {clk} -period "250 MHz" -waveform { 0.0 1.0 } [get_ports {clk}]
derive_pll_clocks -create_base_clocks
derive_clock_uncertainty

View file

@ -1,67 +1,70 @@
load_package flow
package require cmdline
PROJECT = VX_vortex
TOP_LEVEL_ENTITY = VX_vortex
SRC_FILE = VX_vortex.v
PROJECT_FILES = $(PROJECT).qpf $(PROJECT).qsf
set options {
{ "project.arg" "" "Project name" }
{ "family.arg" "" "Device family name" }
{ "device.arg" "" "Device name" }
{ "top.arg" "" "Top level module" }
{ "src.arg" "" "Verilog source file" }
{ "inc.arg" "" "Include path (optional)" }
{ "sdc.arg" "" "Timing Design Constraints file (optional)" }
{ "set.arg" "" "Macro value (optional)" }
}
# Part, Family
FAMILY = "Arria 10"
DEVICE = 10AX115N3F40E2SG
set q_args_orig $quartus(args)
# Executable Configuration
SYN_ARGS = --parallel --read_settings_files=on
FIT_ARGS = --part=$(DEVICE) --read_settings_files=on
ASM_ARGS =
STA_ARGS = --do_report_timing
array set opts [::cmdline::getoptions quartus(args) $options]
# Build targets
all: $(PROJECT).sta.rpt
# Verify required parameters
set requiredParameters {project family device top src}
foreach p $requiredParameters {
if {$opts($p) == ""} {
puts stderr "Missing required parameter: -$p"
exit 1
}
}
syn: $(PROJECT).syn.rpt
project_new $opts(project) -overwrite
fit: $(PROJECT).fit.rpt
set_global_assignment -name FAMILY $opts(family)
set_global_assignment -name DEVICE $opts(device)
set_global_assignment -name TOP_LEVEL_ENTITY $opts(top)
set_global_assignment -name PROJECT_OUTPUT_DIRECTORY bin
set_global_assignment -name NUM_PARALLEL_PROCESSORS ALL
set_global_assignment -name VERILOG_INPUT_VERSION SYSTEMVERILOG_2009
asm: $(PROJECT).asm.rpt
set idx 0
foreach arg $q_args_orig {
incr idx
if [string match "-src" $arg] {
set_global_assignment -name VERILOG_FILE [lindex $q_args_orig $idx]
}
if [string match "-inc" $arg] {
set_global_assignment -name SEARCH_PATH [lindex $q_args_orig $idx]
}
if [string match "-sdc" $arg] {
set_global_assignment -name SDC_FILE [lindex $q_args_orig $idx]
}
if [string match "-set" $arg] {
set_global_assignment -name VERILOG_MACRO [lindex $q_args_orig $idx]
}
}
sta: $(PROJECT).sta.rpt
proc make_all_pins_virtual {} {
execute_module -tool map
set name_ids [get_names -filter * -node_type pin]
foreach_in_collection name_id $name_ids {
set pin_name [get_name_info -info full_path $name_id]
post_message "Making VIRTUAL_PIN assignment to $pin_name"
set_instance_assignment -to $pin_name -name VIRTUAL_PIN ON
}
export_assignments
}
smart: smart.log
make_all_pins_virtual
# Target implementations
STAMP = echo done >
project_close
$(PROJECT).syn.rpt: smart.log syn.chg $(SOURCE_FILES)
quartus_syn $(PROJECT) $(SYN_ARGS)
$(STAMP) fit.chg
$(PROJECT).fit.rpt: smart.log fit.chg $(PROJECT).syn.rpt
quartus_fit $(PROJECT) $(FIT_ARGS)
$(STAMP) asm.chg
$(STAMP) sta.chg
$(PROJECT).asm.rpt: smart.log asm.chg $(PROJECT).fit.rpt
quartus_asm $(PROJECT) $(ASM_ARGS)
$(PROJECT).sta.rpt: smart.log sta.chg $(PROJECT).fit.rpt
quartus_sta $(PROJECT) $(STA_ARGS)
smart.log: $(PROJECT_FILES)
quartus_sh --determine_smart_action $(PROJECT) > smart.log
# Project initialization
$(PROJECT_FILES):
quartus_sh -t ../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src $(SRC_FILE) -sdc ../project.sdc -inc "../../../rtl;../../../rtl/libs;../../../rtl/interfaces;../../../rtl/pipe_regs;../../../rtl/cache"
syn.chg:
$(STAMP) syn.chg
fit.chg:
$(STAMP) fit.chg
sta.chg:
$(STAMP) sta.chg
asm.chg:
$(STAMP) asm.chg
program: $(PROJECT).sof
quartus_pgm --no_banner --mode=jtag -o "$(PROJECT).sof"
clean:
rm -rf bin *.rpt *.chg *.qsf *.qpf *.qws smart.log *.htm *.eqn *.pin *.sof *.pof qdb incremental_db tmp-clearbox

View file

@ -1,9 +0,0 @@
set_time_format -unit ns -decimal_places 3
create_clock -name {clk} -period "250 MHz" -waveform { 0.0 1.0 } [get_ports {clk}]
derive_pll_clocks -create_base_clocks
derive_clock_uncertainty

View file

@ -1,41 +0,0 @@
load_package flow
package require cmdline
set options { \
{ "project.arg" "" "Project name" } \
{ "family.arg" "" "Device family name" } \
{ "device.arg" "" "Device name" } \
{ "top.arg" "" "Top level module" } \
{ "sdc.arg" "" "Timing Design Constraints file" } \
{ "src.arg" "" "Verilog source file" } \
{ "inc.arg" "." "Include path" } \
}
array set opts [::cmdline::getoptions quartus(args) $options]
project_new $opts(project) -overwrite
set_global_assignment -name FAMILY $opts(family)
set_global_assignment -name DEVICE $opts(device)
set_global_assignment -name TOP_LEVEL_ENTITY $opts(top)
set_global_assignment -name VERILOG_FILE $opts(src)
set_global_assignment -name SEARCH_PATH $opts(inc)
set_global_assignment -name SDC_FILE $opts(sdc)
set_global_assignment -name PROJECT_OUTPUT_DIRECTORY bin
set_global_assignment -name NUM_PARALLEL_PROCESSORS ALL
set_global_assignment -name VERILOG_INPUT_VERSION SYSTEMVERILOG_2009
proc make_all_pins_virtual {} {
execute_module -tool map
set name_ids [get_names -filter * -node_type pin]
foreach_in_collection name_id $name_ids {
set pin_name [get_name_info -info full_path $name_id]
post_message "Making VIRTUAL_PIN assignment to $pin_name"
set_instance_assignment -to $pin_name -name VIRTUAL_PIN ON
}
export_assignments
}
make_all_pins_virtual
project_close