mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-24 05:47:35 -04:00
driver basic test and demo test refactoring
This commit is contained in:
parent
e2e1b63e14
commit
68d9fc9a75
55 changed files with 1006 additions and 1205 deletions
|
@ -18,7 +18,10 @@ CXXFLAGS +=-fstack-protector
|
|||
CXXFLAGS += -fPIC
|
||||
|
||||
# Enable scope analyzer
|
||||
#CXXFLAGS += -DSCOPE
|
||||
CXXFLAGS += -DSCOPE
|
||||
|
||||
# config parameters
|
||||
CXXFLAGS += -DNUM_WARPS=2 -DNUM_THREADS=2
|
||||
|
||||
LDFLAGS += -luuid
|
||||
|
||||
|
|
|
@ -25,31 +25,34 @@ struct scope_signal_t {
|
|||
const char* name;
|
||||
};
|
||||
|
||||
constexpr int ilog2(int n) {
|
||||
return (n > 1) ? 1 + ilog2(n >> 1) : 0;
|
||||
}
|
||||
|
||||
static constexpr int NW_BITS = ilog2(NUM_WARPS);
|
||||
|
||||
static const scope_signal_t scope_signals[] = {
|
||||
{ 2, "icache_req_warp_num" },
|
||||
{ NW_BITS, "icache_req_warp_num" },
|
||||
{ 32, "icache_req_addr" },
|
||||
{ 2, "icache_req_tag" },
|
||||
|
||||
{ NW_BITS, "icache_req_tag" },
|
||||
{ 32, "icache_rsp_data" },
|
||||
{ 2, "icache_rsp_tag" },
|
||||
{ NW_BITS, "icache_rsp_tag" },
|
||||
|
||||
{ 2, "dcache_req_warp_num" },
|
||||
{ NW_BITS, "dcache_req_warp_num" },
|
||||
{ 32, "dcache_req_curr_PC" },
|
||||
{ 32, "dcache_req_addr" },
|
||||
{ 1, "dcache_req_rw" },
|
||||
{ 4, "dcache_req_byteen" },
|
||||
{ 32, "dcache_req_data" },
|
||||
{ 2, "dcache_req_tag" },
|
||||
|
||||
{ NW_BITS, "dcache_req_tag" },
|
||||
{ 32, "dcache_rsp_data" },
|
||||
{ 2 , "dcache_rsp_tag" },
|
||||
{ NW_BITS, "dcache_rsp_tag" },
|
||||
|
||||
{ 32, "dram_req_addr" },
|
||||
{ 1, "dram_req_rw" },
|
||||
{ 16, "dram_req_byteen" },
|
||||
{ 32, "dram_req_data" },
|
||||
{ 29, "dram_req_tag" },
|
||||
|
||||
{ 32, "dram_rsp_data" },
|
||||
{ 29, "dram_rsp_tag" },
|
||||
|
||||
|
@ -58,30 +61,32 @@ static const scope_signal_t scope_signals[] = {
|
|||
{ 16, "snp_req_tag" },
|
||||
{ 16, "snp_rsp_tag" },
|
||||
|
||||
{ 2, "decode_warp_num" },
|
||||
{ NW_BITS, "decode_warp_num" },
|
||||
{ 32, "decode_curr_PC" },
|
||||
{ 1, "decode_is_jal" },
|
||||
{ 5, "decode_rs1" },
|
||||
{ 5, "decode_rs2" },
|
||||
{ 1, "decode_is_jal" },
|
||||
{ 5, "decode_rs1" },
|
||||
{ 5, "decode_rs2" },
|
||||
|
||||
{ 2, "execute_warp_num" },
|
||||
{ NW_BITS, "execute_warp_num" },
|
||||
{ 5, "execute_rd" },
|
||||
{ 32, "execute_a" },
|
||||
{ 32, "execute_b" },
|
||||
|
||||
{ 2, "writeback_warp_num" },
|
||||
{ NW_BITS, "writeback_warp_num" },
|
||||
{ 2, "writeback_wb" },
|
||||
{ 5, "writeback_rd" },
|
||||
{ 32, "writeback_data" },
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
{ 1, "icache_req_valid" },
|
||||
{ 1, "icache_req_ready" },
|
||||
{ 1, "icache_rsp_valid" },
|
||||
{ 1, "icache_rsp_ready" },
|
||||
|
||||
{ 4, "dcache_req_valid" },
|
||||
{ NUM_THREADS, "dcache_req_valid" },
|
||||
{ 1, "dcache_req_ready" },
|
||||
{ 4, "dcache_rsp_valid" },
|
||||
{ NUM_THREADS, "dcache_rsp_valid" },
|
||||
{ 1, "dcache_rsp_ready" },
|
||||
|
||||
{ 1, "dram_req_valid" },
|
||||
|
@ -94,14 +99,19 @@ static const scope_signal_t scope_signals[] = {
|
|||
{ 1, "snp_rsp_valid" },
|
||||
{ 1, "snp_rsp_ready" },
|
||||
|
||||
{ 4, "decode_valid" },
|
||||
{ 4, "execute_valid" },
|
||||
{ 4, "writeback_valid" },
|
||||
{ NUM_THREADS, "decode_valid" },
|
||||
{ NUM_THREADS, "execute_valid" },
|
||||
{ NUM_THREADS, "writeback_valid" },
|
||||
{ 1, "schedule_delay" },
|
||||
{ 1, "memory_delay" },
|
||||
{ 1, "exec_delay" },
|
||||
{ 1, "gpr_stage_delay" },
|
||||
{ 1, "busy" },
|
||||
|
||||
{ 1, "idram_req_valid" },
|
||||
{ 1, "idram_req_ready" },
|
||||
{ 1, "idram_rsp_valid" },
|
||||
{ 1, "idram_rsp_ready" },
|
||||
};
|
||||
|
||||
static const int num_signals = sizeof(scope_signals) / sizeof(scope_signal_t);
|
||||
|
@ -161,7 +171,10 @@ int vx_scope_stop(fpga_handle hfpga, uint64_t delay) {
|
|||
|
||||
CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_CSR_SCOPE_CMD, 1));
|
||||
|
||||
assert(fwidth == (int)frame_width);
|
||||
if (fwidth != (int)frame_width) {
|
||||
std::cerr << "invalid frame_width: expecting " << std::dec << fwidth << "!" << std::endl;
|
||||
std::abort();
|
||||
}
|
||||
std::vector<char> signal_data(frame_width+1);
|
||||
|
||||
uint64_t frame_offset = 0;
|
||||
|
|
|
@ -13,18 +13,19 @@ DBG_PRINT_FLAGS = -DDBG_PRINT_CORE_ICACHE \
|
|||
-DDBG_PRINT_WB \
|
||||
-DDBG_PRINT_OPAE
|
||||
|
||||
#DBG_PRINT=$(DBG_PRINT_FLAGS)
|
||||
DBG_PRINT=$(DBG_PRINT_FLAGS)
|
||||
|
||||
#MULTICORE += -DNUM_CLUSTERS=2 -DNUM_CORES=4
|
||||
#MULTICORE += -DNUM_CLUSTERS=1 -DNUM_CORES=4
|
||||
#MULTICORE += -DNUM_CLUSTERS=1 -DNUM_CORES=2
|
||||
#CONFIGS += -DNUM_CLUSTERS=2 -DNUM_CORES=4
|
||||
#CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=4
|
||||
#CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=2
|
||||
CONFIGS += -DNUM_WARPS=2 -DNUM_THREADS=2
|
||||
|
||||
#DEBUG=1
|
||||
#AFU=1
|
||||
DEBUG=1
|
||||
AFU=1
|
||||
|
||||
CFLAGS += -fPIC
|
||||
|
||||
CFLAGS += -DUSE_RTLSIM $(MULTICORE)
|
||||
CFLAGS += -DUSE_RTLSIM $(CONFIGS)
|
||||
|
||||
LDFLAGS += -shared -pthread
|
||||
# LDFLAGS += -dynamiclib -pthread
|
||||
|
@ -35,7 +36,7 @@ SRCS = vortex.cpp ../common/vx_utils.cpp ../../hw/simulate/simulator.cpp
|
|||
|
||||
RTL_INCLUDE = -I../../hw/rtl -I../../hw/rtl/libs -I../../hw/rtl/interfaces -I../../hw/rtl/pipe_regs -I../../hw/rtl/cache
|
||||
|
||||
VL_FLAGS += --language 1800-2009 --assert -Wall -Wpedantic $(MULTICORE)
|
||||
VL_FLAGS += --language 1800-2009 --assert -Wall -Wpedantic $(CONFIGS)
|
||||
VL_FLAGS += -Wno-DECLFILENAME
|
||||
VL_FLAGS += --x-initial unique
|
||||
VL_FLAGS += --x-assign unique
|
||||
|
@ -47,9 +48,11 @@ VL_FLAGS += --x-assign unique
|
|||
# Debugigng
|
||||
ifdef DEBUG
|
||||
VL_FLAGS += --trace -DVCD_OUTPUT $(DBG_PRINT)
|
||||
CFLAGS += -DVCD_OUTPUT $(DBG_PRINT)
|
||||
CFLAGS += -DVCD_OUTPUT $(DBG_PRINT)
|
||||
#VL_FLAGS += -DDBG_CORE_REQ_INFO
|
||||
#CFLAGS += -DDBG_CORE_REQ_INFO
|
||||
else
|
||||
CFLAGS += -DNDEBUG
|
||||
CFLAGS += -DNDEBUG
|
||||
VL_FLAGS += -DNDEBUG
|
||||
endif
|
||||
|
||||
|
|
|
@ -44,16 +44,16 @@ $(PROJECT): $(SRCS)
|
|||
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -L../../stub -lvortex -o $@
|
||||
|
||||
run-fpga: $(PROJECT)
|
||||
LD_LIBRARY_PATH=../../opae:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||
LD_LIBRARY_PATH=../../opae:$(LD_LIBRARY_PATH) ./$(PROJECT) -n 32
|
||||
|
||||
run-ase: $(PROJECT)
|
||||
ASE_LOG=0 LD_LIBRARY_PATH=../../opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||
ASE_LOG=0 LD_LIBRARY_PATH=../../opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) -n 16
|
||||
|
||||
run-rtlsim: $(PROJECT)
|
||||
LD_LIBRARY_PATH=../../rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||
LD_LIBRARY_PATH=../../rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) -n 16
|
||||
|
||||
run-simx: $(PROJECT)
|
||||
LD_LIBRARY_PATH=../../simx:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||
LD_LIBRARY_PATH=../../simx:$(LD_LIBRARY_PATH) ./$(PROJECT) -n 16
|
||||
|
||||
.depend: $(SRCS)
|
||||
$(CXX) $(CXXFLAGS) -MM $^ > .depend;
|
||||
|
|
|
@ -1,10 +1,9 @@
|
|||
#include <iostream>
|
||||
#include <unistd.h>
|
||||
#include <string.h>
|
||||
#include <vortex.h>
|
||||
#include "common.h"
|
||||
|
||||
int test = -1;
|
||||
|
||||
#define RT_CHECK(_expr) \
|
||||
do { \
|
||||
int _ret = _expr; \
|
||||
|
@ -15,79 +14,84 @@ int test = -1;
|
|||
exit(-1); \
|
||||
} while (false)
|
||||
|
||||
const char* kernel_file = "kernel.bin";
|
||||
int test = -1;
|
||||
uint32_t count = 0;
|
||||
|
||||
vx_device_h device = nullptr;
|
||||
vx_buffer_h buffer = nullptr;
|
||||
|
||||
static void show_usage() {
|
||||
std::cout << "Vortex Driver Test." << std::endl;
|
||||
std::cout << "Usage: [-t testno][-k: kernel][-n words][-h: help]" << std::endl;
|
||||
}
|
||||
|
||||
static void parse_args(int argc, char **argv) {
|
||||
int c;
|
||||
while ((c = getopt(argc, argv, "t:h?")) != -1) {
|
||||
while ((c = getopt(argc, argv, "n:t:k:h?")) != -1) {
|
||||
switch (c) {
|
||||
case 't': {
|
||||
case 'n':
|
||||
count = atoi(optarg);
|
||||
break;
|
||||
case 't':
|
||||
test = atoi(optarg);
|
||||
} break;
|
||||
break;
|
||||
case 'k':
|
||||
kernel_file = optarg;
|
||||
break;
|
||||
case 'h':
|
||||
case '?': {
|
||||
std::cout << "Test." << std::endl;
|
||||
std::cout << "Usage: [-t testno][-h: help]" << std::endl;
|
||||
show_usage();
|
||||
exit(0);
|
||||
} break;
|
||||
default:
|
||||
show_usage();
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void cleanup() {
|
||||
if (buffer) {
|
||||
vx_buf_release(buffer);
|
||||
}
|
||||
if (device) {
|
||||
vx_dev_close(device);
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t shuffle(int i, uint64_t value) {
|
||||
return (value << i) | (value & ((1 << i)-1));;
|
||||
}
|
||||
|
||||
vx_device_h device = nullptr;
|
||||
vx_buffer_h sbuf = nullptr;
|
||||
vx_buffer_h dbuf = nullptr;
|
||||
|
||||
int total_blocks = NUM_BLOCKS;
|
||||
|
||||
void cleanup() {
|
||||
if (sbuf) {
|
||||
vx_buf_release(sbuf);
|
||||
}
|
||||
if (dbuf) {
|
||||
vx_buf_release(dbuf);
|
||||
}
|
||||
if (device) {
|
||||
vx_dev_close(device);
|
||||
}
|
||||
}
|
||||
|
||||
int run_memcopy_test(vx_buffer_h sbuf,
|
||||
vx_buffer_h dbuf,
|
||||
uint32_t address,
|
||||
uint64_t value,
|
||||
int num_blocks) {
|
||||
int run_memcopy_test(uint32_t dev_addr, uint64_t value, int num_blocks) {
|
||||
int errors = 0;
|
||||
|
||||
// write sbuf data
|
||||
// update source buffer
|
||||
for (int i = 0; i < (64 * num_blocks) / 8; ++i) {
|
||||
((uint64_t*)vx_host_ptr(sbuf))[i] = shuffle(i, value);
|
||||
}
|
||||
|
||||
// clear dbuf data
|
||||
for (int i = 0; i < (64 * num_blocks) / 8; ++i) {
|
||||
((uint64_t*)vx_host_ptr(dbuf))[i] = 0;
|
||||
((uint64_t*)vx_host_ptr(buffer))[i] = shuffle(i, value);
|
||||
}
|
||||
|
||||
// write buffer to local memory
|
||||
std::cout << "write buffer to local memory" << std::endl;
|
||||
RT_CHECK(vx_copy_to_dev(sbuf, address, 64 * num_blocks, 0));
|
||||
RT_CHECK(vx_copy_to_dev(buffer, dev_addr, 64 * num_blocks, 0));
|
||||
|
||||
// clear destination buffer
|
||||
for (int i = 0; i < (64 * num_blocks) / 8; ++i) {
|
||||
((uint64_t*)vx_host_ptr(buffer))[i] = 0;
|
||||
}
|
||||
|
||||
// read buffer from local memory
|
||||
std::cout << "read buffer from local memory" << std::endl;
|
||||
RT_CHECK(vx_copy_from_dev(dbuf, address, 64 * num_blocks, 0));
|
||||
RT_CHECK(vx_copy_from_dev(buffer, dev_addr, 64 * num_blocks, 0));
|
||||
|
||||
// verify result
|
||||
std::cout << "verify result" << std::endl;
|
||||
for (int i = 0; i < (64 * num_blocks) / 8; ++i) {
|
||||
auto curr = ((uint64_t*)vx_host_ptr(dbuf))[i];
|
||||
auto curr = ((uint64_t*)vx_host_ptr(buffer))[i];
|
||||
auto ref = shuffle(i, value);
|
||||
if (curr != ref) {
|
||||
std::cout << "error at 0x" << std::hex << (address + 8 * i)
|
||||
std::cout << "error at 0x" << std::hex << (dev_addr + 8 * i)
|
||||
<< ": actual 0x" << curr << ", expected 0x" << ref << std::endl;
|
||||
++errors;
|
||||
}
|
||||
|
@ -102,35 +106,19 @@ int run_memcopy_test(vx_buffer_h sbuf,
|
|||
return 0;
|
||||
}
|
||||
|
||||
int run_kernel_test(vx_device_h device,
|
||||
vx_buffer_h sbuf,
|
||||
vx_buffer_h dbuf,
|
||||
const char* program,
|
||||
int num_blocks) {
|
||||
int errors = 0;
|
||||
|
||||
uint64_t seed = 0x0badf00d40ff40ff;
|
||||
int run_kernel_test(const kernel_arg_t& kernel_arg,
|
||||
uint32_t buf_size,
|
||||
uint32_t num_points) {
|
||||
int errors = 0;
|
||||
|
||||
int src_dev_addr = DEV_MEM_SRC_ADDR;
|
||||
int dest_dev_addr = DEV_MEM_DST_ADDR;
|
||||
|
||||
// write sbuf data
|
||||
for (int i = 0; i < (64 * num_blocks) / 8; ++i) {
|
||||
((uint64_t*)vx_host_ptr(sbuf))[i] = shuffle(i, seed);
|
||||
}
|
||||
|
||||
// clear dbuf data
|
||||
for (int i = 0; i < (64 * num_blocks) / 8; ++i) {
|
||||
((uint64_t*)vx_host_ptr(dbuf))[i] = 0;
|
||||
// update source buffer
|
||||
for (uint32_t i = 0; i < num_points; ++i) {
|
||||
((int32_t*)vx_host_ptr(buffer))[i] = i;
|
||||
}
|
||||
|
||||
// write buffer to local memory
|
||||
std::cout << "write buffer to local memory" << std::endl;
|
||||
RT_CHECK(vx_copy_to_dev(sbuf, src_dev_addr, 64 * num_blocks, 0));
|
||||
|
||||
// upload program
|
||||
std::cout << "upload program" << std::endl;
|
||||
RT_CHECK(vx_upload_kernel_file(device, program));
|
||||
RT_CHECK(vx_copy_to_dev(buffer, kernel_arg.src_ptr, buf_size, 0));
|
||||
|
||||
// start device
|
||||
std::cout << "start device" << std::endl;
|
||||
|
@ -142,19 +130,24 @@ int run_kernel_test(vx_device_h device,
|
|||
|
||||
// flush the caches
|
||||
std::cout << "flush the caches" << std::endl;
|
||||
RT_CHECK(vx_flush_caches(device, dest_dev_addr, 64 * num_blocks));
|
||||
RT_CHECK(vx_flush_caches(device, kernel_arg.dst_ptr, buf_size));
|
||||
|
||||
// clear destination buffer
|
||||
for (uint32_t i = 0; i < num_points; ++i) {
|
||||
((int32_t*)vx_host_ptr(buffer))[i] = 0;
|
||||
}
|
||||
|
||||
// read buffer from local memory
|
||||
std::cout << "read buffer from local memory" << std::endl;
|
||||
RT_CHECK(vx_copy_from_dev(dbuf, dest_dev_addr, 64 * num_blocks, 0));
|
||||
RT_CHECK(vx_copy_from_dev(buffer, kernel_arg.dst_ptr, buf_size, 0));
|
||||
|
||||
// verify result
|
||||
std::cout << "verify result" << std::endl;
|
||||
for (int i = 0; i < (64 * num_blocks) / 8; ++i) {
|
||||
auto curr = ((uint64_t*)vx_host_ptr(dbuf))[i];
|
||||
auto ref = shuffle(i, seed);
|
||||
for (uint32_t i = 0; i < num_points; ++i) {
|
||||
int32_t curr = ((int32_t*)vx_host_ptr(buffer))[i];
|
||||
int32_t ref = i;
|
||||
if (curr != ref) {
|
||||
std::cout << "error at 0x" << std::hex << (dest_dev_addr + 8 * i)
|
||||
std::cout << "error at value " << i
|
||||
<< ": actual 0x" << curr << ", expected 0x" << ref << std::endl;
|
||||
++errors;
|
||||
}
|
||||
|
@ -170,33 +163,66 @@ int run_kernel_test(vx_device_h device,
|
|||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
size_t value;
|
||||
kernel_arg_t kernel_arg;
|
||||
|
||||
// parse command arguments
|
||||
parse_args(argc, argv);
|
||||
|
||||
std::cout << "total blocks: " << total_blocks << std::endl;
|
||||
if (count == 0) {
|
||||
count = 1;
|
||||
}
|
||||
|
||||
uint32_t max_cores = vx_dev_caps(VX_CAPS_MAX_CORES);
|
||||
uint32_t num_points = max_cores * count;
|
||||
uint32_t num_blocks = (num_points * sizeof(uint32_t) + 63) / 64;
|
||||
uint32_t buf_size = num_blocks * 64;
|
||||
|
||||
std::cout << "number of points: " << num_points << std::endl;
|
||||
std::cout << "buffer size: " << buf_size << " bytes" << std::endl;
|
||||
|
||||
// open device connection
|
||||
std::cout << "open device connection" << std::endl;
|
||||
RT_CHECK(vx_dev_open(&device));
|
||||
|
||||
// create source buffer
|
||||
std::cout << "create source buffer" << std::endl;
|
||||
RT_CHECK(vx_alloc_shared_mem(device, total_blocks * 64, &sbuf));
|
||||
|
||||
// create destination buffer
|
||||
std::cout << "create destination buffer" << std::endl;
|
||||
RT_CHECK(vx_alloc_shared_mem(device, total_blocks * 64, &dbuf));
|
||||
// allocate device memory
|
||||
RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
|
||||
kernel_arg.src_ptr = value;
|
||||
RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
|
||||
kernel_arg.dst_ptr = value;
|
||||
|
||||
kernel_arg.count = count;
|
||||
|
||||
std::cout << "dev_src=" << std::hex << kernel_arg.src_ptr << std::endl;
|
||||
std::cout << "dev_dst=" << std::hex << kernel_arg.dst_ptr << std::endl;
|
||||
|
||||
// allocate shared memory
|
||||
std::cout << "allocate shared memory" << std::endl;
|
||||
uint32_t alloc_size = std::max<uint32_t>(buf_size, sizeof(kernel_arg_t));
|
||||
RT_CHECK(vx_alloc_shared_mem(device, alloc_size, &buffer));
|
||||
|
||||
// run tests
|
||||
if (0 == test || -1 == test) {
|
||||
std::cout << "run memcopy test" << std::endl;
|
||||
RT_CHECK(run_memcopy_test(sbuf, dbuf, DEV_MEM_SRC_ADDR, 0x0badf00d00ff00ff, 1));
|
||||
RT_CHECK(run_memcopy_test(sbuf, dbuf, DEV_MEM_SRC_ADDR, 0x0badf00d40ff40ff, total_blocks));
|
||||
RT_CHECK(run_memcopy_test(kernel_arg.src_ptr, 0x0badf00d00ff00ff, 1));
|
||||
RT_CHECK(run_memcopy_test(kernel_arg.src_ptr, 0x0badf00d40ff40ff, num_blocks));
|
||||
}
|
||||
|
||||
if (1 == test || -1 == test) {
|
||||
// upload program
|
||||
std::cout << "upload program" << std::endl;
|
||||
RT_CHECK(vx_upload_kernel_file(device, kernel_file));
|
||||
|
||||
// upload kernel argument
|
||||
std::cout << "upload kernel argument" << std::endl;
|
||||
{
|
||||
auto buf_ptr = (void*)vx_host_ptr(buffer);
|
||||
memcpy(buf_ptr, &kernel_arg, sizeof(kernel_arg_t));
|
||||
RT_CHECK(vx_copy_to_dev(buffer, KERNEL_ARG_DEV_MEM_ADDR, sizeof(kernel_arg_t), 0));
|
||||
}
|
||||
|
||||
std::cout << "run kernel test" << std::endl;
|
||||
RT_CHECK(run_kernel_test(device, sbuf, dbuf, "kernel.bin", total_blocks));
|
||||
RT_CHECK(run_kernel_test(kernel_arg, buf_size, num_points));
|
||||
}
|
||||
|
||||
// cleanup
|
||||
|
|
|
@ -1,8 +1,12 @@
|
|||
#ifndef _COMMON_H_
|
||||
#define _COMMON_H_
|
||||
|
||||
#define DEV_MEM_SRC_ADDR 0x10000040
|
||||
#define DEV_MEM_DST_ADDR 0x20000080
|
||||
#define NUM_BLOCKS 16
|
||||
#define KERNEL_ARG_DEV_MEM_ADDR 0x7ffff000
|
||||
|
||||
struct kernel_arg_t {
|
||||
uint32_t count;
|
||||
uint32_t src_ptr;
|
||||
uint32_t dst_ptr;
|
||||
};
|
||||
|
||||
#endif
|
Binary file not shown.
|
@ -4,17 +4,14 @@
|
|||
#include "common.h"
|
||||
|
||||
void main() {
|
||||
int64_t* x = (int64_t*)DEV_MEM_SRC_ADDR;
|
||||
int64_t* y = (int64_t*)DEV_MEM_DST_ADDR;
|
||||
int num_words = (NUM_BLOCKS * 64) / 8;
|
||||
|
||||
int core_id = vx_core_id();
|
||||
int num_cores = vx_num_cores();
|
||||
int num_words_per_core = num_words / num_cores;
|
||||
|
||||
int offset = core_id * num_words_per_core;
|
||||
struct kernel_arg_t* arg = (struct kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR;
|
||||
uint32_t count = arg->count;
|
||||
int32_t* src_ptr = (int32_t*)arg->src_ptr;
|
||||
int32_t* dst_ptr = (int32_t*)arg->dst_ptr;
|
||||
|
||||
for (int i = 0; i < num_words_per_core; ++i) {
|
||||
y[offset + i] = x[offset + i];
|
||||
uint32_t offset = vx_core_id() * count;
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
dst_ptr[offset + i] = src_ptr[offset + i];
|
||||
}
|
||||
}
|
|
@ -41,16 +41,16 @@ $(PROJECT): $(SRCS)
|
|||
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -L../../stub -lvortex -o $@
|
||||
|
||||
run-fpga: $(PROJECT)
|
||||
LD_LIBRARY_PATH=../../opae:$(LD_LIBRARY_PATH) ./$(PROJECT) -f kernel.bin -n 16
|
||||
LD_LIBRARY_PATH=../../opae:$(LD_LIBRARY_PATH) ./$(PROJECT) -n 16
|
||||
|
||||
run-ase: $(PROJECT)
|
||||
ASE_LOG=0 LD_LIBRARY_PATH=../../opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) -f kernel.bin -n 16
|
||||
ASE_LOG=0 LD_LIBRARY_PATH=../../opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) -n 16
|
||||
|
||||
run-rtlsim: $(PROJECT)
|
||||
LD_LIBRARY_PATH=../../rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) -f kernel.bin -n 16
|
||||
LD_LIBRARY_PATH=../../rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) -n 16
|
||||
|
||||
run-simx: $(PROJECT)
|
||||
LD_LIBRARY_PATH=../../simx:$(LD_LIBRARY_PATH) ./$(PROJECT) -f kernel.bin -n 16
|
||||
LD_LIBRARY_PATH=../../simx:$(LD_LIBRARY_PATH) ./$(PROJECT) -n 16
|
||||
|
||||
.depend: $(SRCS)
|
||||
$(CXX) $(CXXFLAGS) -MM $^ > .depend;
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
#define KERNEL_ARG_DEV_MEM_ADDR 0x7ffff000
|
||||
|
||||
struct kernel_arg_t {
|
||||
uint32_t stride;
|
||||
uint32_t count;
|
||||
uint32_t src0_ptr;
|
||||
uint32_t src1_ptr;
|
||||
uint32_t dst_ptr;
|
||||
|
|
|
@ -14,23 +14,26 @@
|
|||
exit(-1); \
|
||||
} while (false)
|
||||
|
||||
const char* program_file = "kernel.bin";
|
||||
uint32_t data_stride = 0;
|
||||
const char* kernel_file = "kernel.bin";
|
||||
uint32_t count = 0;
|
||||
|
||||
vx_device_h device = nullptr;
|
||||
vx_buffer_h buffer = nullptr;
|
||||
|
||||
static void show_usage() {
|
||||
std::cout << "Vortex Driver Test." << std::endl;
|
||||
std::cout << "Usage: [-f: program] [-n stride] [-h: help]" << std::endl;
|
||||
std::cout << "Usage: [-k: kernel] [-n words] [-h: help]" << std::endl;
|
||||
}
|
||||
|
||||
static void parse_args(int argc, char **argv) {
|
||||
int c;
|
||||
while ((c = getopt(argc, argv, "n:f:h?")) != -1) {
|
||||
while ((c = getopt(argc, argv, "n:k:h?")) != -1) {
|
||||
switch (c) {
|
||||
case 'n':
|
||||
data_stride = atoi(optarg);
|
||||
count = atoi(optarg);
|
||||
break;
|
||||
case 'f':
|
||||
program_file = optarg;
|
||||
case 'k':
|
||||
kernel_file = optarg;
|
||||
break;
|
||||
case 'h':
|
||||
case '?': {
|
||||
|
@ -42,16 +45,8 @@ static void parse_args(int argc, char **argv) {
|
|||
exit(-1);
|
||||
}
|
||||
}
|
||||
|
||||
if (nullptr == program_file) {
|
||||
show_usage();
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
|
||||
vx_device_h device = nullptr;
|
||||
vx_buffer_h buffer = nullptr;
|
||||
|
||||
void cleanup() {
|
||||
if (buffer) {
|
||||
vx_buf_release(buffer);
|
||||
|
@ -61,9 +56,7 @@ void cleanup() {
|
|||
}
|
||||
}
|
||||
|
||||
int run_test(vx_device_h device,
|
||||
vx_buffer_h buffer,
|
||||
const kernel_arg_t& kernel_arg,
|
||||
int run_test(const kernel_arg_t& kernel_arg,
|
||||
uint32_t buf_size,
|
||||
uint32_t num_points) {
|
||||
// start device
|
||||
|
@ -86,13 +79,13 @@ int run_test(vx_device_h device,
|
|||
std::cout << "verify result" << std::endl;
|
||||
{
|
||||
int errors = 0;
|
||||
auto buf_ptr = (int*)vx_host_ptr(buffer);
|
||||
auto buf_ptr = (int32_t*)vx_host_ptr(buffer);
|
||||
for (uint32_t i = 0; i < num_points; ++i) {
|
||||
int ref = i + i;
|
||||
int cur = buf_ptr[i];
|
||||
if (cur != ref) {
|
||||
std::cout << "error at value " << i
|
||||
<< ": actual 0x" << cur << ", expected 0x" << ref << std::endl;
|
||||
<< ": actual 0x" << cur << ", expected 0x" << ref << std::endl;
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
|
@ -113,21 +106,18 @@ int main(int argc, char *argv[]) {
|
|||
// parse command arguments
|
||||
parse_args(argc, argv);
|
||||
|
||||
if (count == 0) {
|
||||
count = 1;
|
||||
}
|
||||
|
||||
uint32_t max_cores = vx_dev_caps(VX_CAPS_MAX_CORES);
|
||||
uint32_t max_warps = vx_dev_caps(VX_CAPS_MAX_WARPS);
|
||||
uint32_t max_threads = vx_dev_caps(VX_CAPS_MAX_THREADS);
|
||||
|
||||
if (data_stride == 0) {
|
||||
data_stride = 1;
|
||||
}
|
||||
uint32_t num_points = count * max_cores * max_warps * max_threads;
|
||||
uint32_t buf_size = num_points * sizeof(uint32_t);
|
||||
|
||||
kernel_arg.stride = data_stride;
|
||||
|
||||
uint32_t num_points = max_cores * max_warps * max_threads;
|
||||
uint32_t buf_size = num_points * data_stride * sizeof(uint32_t);
|
||||
|
||||
std::cout << "number of workitems: " << num_points << std::endl;
|
||||
std::cout << "workitem size: " << data_stride * sizeof(uint32_t) << " bytes" << std::endl;
|
||||
std::cout << "number of points: " << num_points << std::endl;
|
||||
std::cout << "buffer size: " << buf_size << " bytes" << std::endl;
|
||||
|
||||
// open device connection
|
||||
|
@ -136,55 +126,29 @@ int main(int argc, char *argv[]) {
|
|||
|
||||
// upload program
|
||||
std::cout << "upload program" << std::endl;
|
||||
RT_CHECK(vx_upload_kernel_file(device, program_file));
|
||||
RT_CHECK(vx_upload_kernel_file(device, kernel_file));
|
||||
|
||||
// allocate device memory
|
||||
std::cout << "allocate device memory" << std::endl;
|
||||
|
||||
RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
|
||||
kernel_arg.src0_ptr = value;
|
||||
|
||||
RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
|
||||
kernel_arg.src1_ptr = value;
|
||||
|
||||
RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
|
||||
kernel_arg.dst_ptr = value;
|
||||
|
||||
kernel_arg.count = count;
|
||||
|
||||
std::cout << "dev_src0=" << std::hex << kernel_arg.src0_ptr << std::endl;
|
||||
std::cout << "dev_src1=" << std::hex << kernel_arg.src1_ptr << std::endl;
|
||||
std::cout << "dev_dst=" << std::hex << kernel_arg.dst_ptr << std::endl;
|
||||
|
||||
|
||||
// allocate shared memory
|
||||
std::cout << "allocate shared memory" << std::endl;
|
||||
uint32_t alloc_size = std::max<uint32_t>(buf_size, sizeof(kernel_arg_t));
|
||||
RT_CHECK(vx_alloc_shared_mem(device, alloc_size, &buffer));
|
||||
|
||||
// populate source buffer0 values
|
||||
std::cout << "populate source buffer0 values" << std::endl;
|
||||
{
|
||||
auto buf_ptr = (int*)vx_host_ptr(buffer);
|
||||
for (uint32_t i = 0; i < num_points; ++i) {
|
||||
buf_ptr[i] = i-1;
|
||||
}
|
||||
}
|
||||
|
||||
// upload source buffer0
|
||||
std::cout << "upload source buffer0" << std::endl;
|
||||
RT_CHECK(vx_copy_to_dev(buffer, kernel_arg.src0_ptr, buf_size, 0));
|
||||
|
||||
// populate source buffer1 values
|
||||
std::cout << "populate source buffer1 values" << std::endl;
|
||||
{
|
||||
auto buf_ptr = (int*)vx_host_ptr(buffer);
|
||||
for (uint32_t i = 0; i < num_points; ++i) {
|
||||
buf_ptr[i] = i+1;
|
||||
}
|
||||
}
|
||||
|
||||
// upload source buffer1
|
||||
std::cout << "upload source buffer1" << std::endl;
|
||||
RT_CHECK(vx_copy_to_dev(buffer, kernel_arg.src1_ptr, buf_size, 0));
|
||||
|
||||
|
||||
// upload kernel argument
|
||||
std::cout << "upload kernel argument" << std::endl;
|
||||
{
|
||||
|
@ -193,9 +157,41 @@ int main(int argc, char *argv[]) {
|
|||
RT_CHECK(vx_copy_to_dev(buffer, KERNEL_ARG_DEV_MEM_ADDR, sizeof(kernel_arg_t), 0));
|
||||
}
|
||||
|
||||
// upload source buffer0
|
||||
{
|
||||
auto buf_ptr = (int32_t*)vx_host_ptr(buffer);
|
||||
for (uint32_t i = 0; i < num_points; ++i) {
|
||||
buf_ptr[i] = i-1;
|
||||
}
|
||||
}
|
||||
std::cout << "upload source buffer0" << std::endl;
|
||||
RT_CHECK(vx_copy_to_dev(buffer, kernel_arg.src0_ptr, buf_size, 0));
|
||||
|
||||
// upload source buffer1
|
||||
{
|
||||
auto buf_ptr = (int32_t*)vx_host_ptr(buffer);
|
||||
for (uint32_t i = 0; i < num_points; ++i) {
|
||||
buf_ptr[i] = i+1;
|
||||
}
|
||||
}
|
||||
std::cout << "upload source buffer1" << std::endl;
|
||||
RT_CHECK(vx_copy_to_dev(buffer, kernel_arg.src1_ptr, buf_size, 0));
|
||||
|
||||
// clear destination buffer
|
||||
{
|
||||
auto buf_ptr = (int32_t*)vx_host_ptr(buffer);
|
||||
for (uint32_t i = 0; i < num_points; ++i) {
|
||||
buf_ptr[i] = 0;
|
||||
}
|
||||
}
|
||||
std::cout << "clear destination buffer" << std::endl;
|
||||
RT_CHECK(vx_copy_to_dev(buffer, kernel_arg.dst_ptr, buf_size, 0));
|
||||
|
||||
|
||||
|
||||
// run tests
|
||||
std::cout << "run tests" << std::endl;
|
||||
RT_CHECK(run_test(device, buffer, kernel_arg, buf_size, num_points));
|
||||
RT_CHECK(run_test(kernel_arg, buf_size, num_points));
|
||||
|
||||
// cleanup
|
||||
std::cout << "cleanup" << std::endl;
|
||||
|
|
BIN
driver/tests/demo/kernel.bin
Executable file → Normal file
BIN
driver/tests/demo/kernel.bin
Executable file → Normal file
Binary file not shown.
|
@ -6,13 +6,14 @@
|
|||
|
||||
void kernel_body(void* arg) {
|
||||
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
|
||||
int* src0_ptr = (int*)_arg->src0_ptr;
|
||||
int* src1_ptr = (int*)_arg->src1_ptr;
|
||||
int* dst_ptr = (int*)_arg->dst_ptr;
|
||||
uint32_t count = _arg->count;
|
||||
int32_t* src0_ptr = (int32_t*)_arg->src0_ptr;
|
||||
int32_t* src1_ptr = (int32_t*)_arg->src1_ptr;
|
||||
int32_t* dst_ptr = (int32_t*)_arg->dst_ptr;
|
||||
|
||||
uint32_t offset = vx_thread_gid() * count;
|
||||
|
||||
unsigned offset = vx_thread_gid() * _arg->stride;
|
||||
|
||||
for (unsigned i = 0; i < _arg->stride; ++i) {
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
dst_ptr[offset+i] = src0_ptr[offset+i] + src1_ptr[offset+i];
|
||||
}
|
||||
}
|
||||
|
|
|
@ -68,6 +68,11 @@ vcd file vortex.vcd
|
|||
vcd add -r /*/Vortex/hw/rtl/*
|
||||
run -all
|
||||
|
||||
# compress FPGA output files
|
||||
tar -zcvf output_files_1c.tar.gz `find ./build_fpga_1c -type f \( -iname \*.rpt -o -iname \*.txt -o -iname \*summary -o -iname \*.log \)`
|
||||
tar -zcvf output_files_1c_rel.tar.gz `find ./build_fpga_1c_rel -type f \( -iname \*.rpt -o -iname \*.txt -o -iname \*summary -o -iname \*.log \)`
|
||||
tar -zcvf output_files_2c_rel.tar.gz `find ./build_fpga_2c_rel -type f \( -iname \*.rpt -o -iname \*.txt -o -iname \*summary -o -iname \*.log \)`
|
||||
|
||||
# compress VCD trace
|
||||
tar -zcvf vortex.vcd.tar.gz ./build_ase_1c/work/vortex.vcd
|
||||
|
||||
|
|
|
@ -2,8 +2,10 @@ vortex_afu.json
|
|||
|
||||
QI:vortex_afu.qsf
|
||||
|
||||
+define+NDEBUG
|
||||
#+define+SCOPE
|
||||
+define+SCOPE
|
||||
|
||||
+define+NUM_WARPS=2
|
||||
+define+NUM_THREADS=2
|
||||
|
||||
#+define+DBG_PRINT_CORE_ICACHE
|
||||
#+define+DBG_PRINT_CORE_DCACHE
|
||||
|
@ -77,6 +79,7 @@ QI:vortex_afu.qsf
|
|||
../rtl/Vortex_Socket.v
|
||||
../rtl/Vortex_Cluster.v
|
||||
../rtl/Vortex.v
|
||||
../rtl/VX_mem_unit.v
|
||||
../rtl/VX_pipeline.v
|
||||
../rtl/VX_front_end.v
|
||||
../rtl/VX_back_end.v
|
||||
|
@ -94,12 +97,11 @@ QI:vortex_afu.qsf
|
|||
../rtl/VX_gpr.v
|
||||
../rtl/VX_gpr_ram.v
|
||||
../rtl/VX_gpr_stage.v
|
||||
../rtl/VX_mem_ctrl.v
|
||||
../rtl/VX_alu_unit.v
|
||||
../rtl/VX_lsu_unit.v
|
||||
../rtl/VX_lsu_addr_gen.v
|
||||
../rtl/VX_decode.v
|
||||
../rtl/VX_inst_multiplex.v
|
||||
../rtl/VX_lsu_addr_gen.v
|
||||
../rtl/VX_dcache_arb.v
|
||||
../rtl/VX_mem_arb.v
|
||||
|
||||
|
|
|
@ -1,4 +1,7 @@
|
|||
|
||||
# Analysis & Synthesis Assignments
|
||||
set_global_assignment -name VERILOG_INPUT_VERSION SYSTEMVERILOG_2009
|
||||
set_global_assignment -name ADD_PASS_THROUGH_LOGIC_TO_INFERRED_RAMS ON
|
||||
set_global_assignment -name ADD_PASS_THROUGH_LOGIC_TO_INFERRED_RAMS ON
|
||||
set_global_assignment -name VERILOG_MACRO QUARTUS
|
||||
set_global_assignment -name VERILOG_MACRO SYNTHESIS
|
||||
set_global_assignment -name VERILOG_MACRO NDEBUG
|
|
@ -13,8 +13,6 @@ import local_mem_cfg_pkg::*;
|
|||
|
||||
`include "VX_define.vh"
|
||||
|
||||
`define VX_TO_DRAM_ADDR(x) x[`VX_DRAM_ADDR_WIDTH-1:(`VX_DRAM_ADDR_WIDTH-DRAM_ADDR_WIDTH)]
|
||||
|
||||
module vortex_afu #(
|
||||
parameter NUM_LOCAL_MEM_BANKS = 2
|
||||
) (
|
||||
|
@ -139,10 +137,12 @@ t_ccip_clAddr csr_io_addr;
|
|||
logic[DRAM_ADDR_WIDTH-1:0] csr_mem_addr;
|
||||
logic[DRAM_ADDR_WIDTH-1:0] csr_data_size;
|
||||
|
||||
`ifdef SCOPE
|
||||
logic [63:0] csr_scope_cmd;
|
||||
logic [63:0] csr_scope_data;
|
||||
logic csr_scope_read;
|
||||
logic csr_scope_write;
|
||||
`endif
|
||||
|
||||
// MMIO controller ////////////////////////////////////////////////////////////
|
||||
|
||||
|
@ -154,9 +154,11 @@ assign mmio_hdr = t_ccip_c0_ReqMmioHdr'(cp2af_sRxPort.c0.hdr);
|
|||
t_if_ccip_c2_Tx mmio_tx;
|
||||
assign af2cp_sTxPort.c2 = mmio_tx;
|
||||
|
||||
`ifdef SCOPE
|
||||
assign csr_scope_cmd = 64'(cp2af_sRxPort.c0.data);
|
||||
assign csr_scope_write = cp2af_sRxPort.c0.mmioWrValid && (MMIO_CSR_SCOPE_CMD == mmio_hdr.address);
|
||||
assign csr_scope_read = cp2af_sRxPort.c0.mmioRdValid && (MMIO_CSR_SCOPE_DATA == mmio_hdr.address);
|
||||
`endif
|
||||
|
||||
always_ff @(posedge clk)
|
||||
begin
|
||||
|
@ -202,11 +204,13 @@ begin
|
|||
$display("%t: CSR_CMD: %0d", $time, $bits(csr_cmd)'(cp2af_sRxPort.c0.data));
|
||||
`endif
|
||||
end
|
||||
`ifdef SCOPE
|
||||
MMIO_CSR_SCOPE_CMD: begin
|
||||
`ifdef DBG_PRINT_OPAE
|
||||
$display("%t: CSR_SCOPE_CMD: %0h", $time, 64'(cp2af_sRxPort.c0.data));
|
||||
`endif
|
||||
end
|
||||
`endif
|
||||
default: begin
|
||||
// user-defined CSRs
|
||||
//if (mmio_hdr.addres >= MMIO_CSR_USER) begin
|
||||
|
@ -237,18 +241,20 @@ begin
|
|||
16'h0008: mmio_tx.data <= 64'h0; // reserved
|
||||
MMIO_CSR_STATUS: begin
|
||||
`ifdef DBG_PRINT_OPAE
|
||||
if (state != mmio_tx.data) begin
|
||||
if (state != state_t'(mmio_tx.data)) begin
|
||||
$display("%t: STATUS: state=%0d", $time, state);
|
||||
end
|
||||
`endif
|
||||
mmio_tx.data <= 64'(state);
|
||||
end
|
||||
`ifdef SCOPE
|
||||
MMIO_CSR_SCOPE_DATA: begin
|
||||
mmio_tx.data <= csr_scope_data;
|
||||
`ifdef DBG_PRINT_OPAE
|
||||
$display("%t: SCOPE: data=%0h", $time, csr_scope_data);
|
||||
`endif
|
||||
end
|
||||
`endif
|
||||
default: mmio_tx.data <= 64'h0;
|
||||
endcase
|
||||
mmio_tx.mmioRdValid <= 1; // post response
|
||||
|
@ -406,7 +412,7 @@ begin
|
|||
case (state)
|
||||
CMD_TYPE_READ: avs_address = cci_dram_rd_req_addr;
|
||||
CMD_TYPE_WRITE: avs_address = cci_dram_wr_req_addr + ((DRAM_ADDR_WIDTH)'(t_cci_rdq_tag'(cci_rdq_dout)));
|
||||
default: avs_address = `VX_TO_DRAM_ADDR(vx_dram_req_addr);
|
||||
default: avs_address = vx_dram_req_addr[`VX_DRAM_ADDR_WIDTH-1:`VX_DRAM_ADDR_WIDTH-DRAM_ADDR_WIDTH];
|
||||
endcase
|
||||
|
||||
case (state)
|
||||
|
@ -821,7 +827,7 @@ end
|
|||
`SCOPE_ASSIGN(scope_snp_rsp_tag, vx_snp_rsp_tag);
|
||||
`SCOPE_ASSIGN(scope_snp_rsp_ready, vx_snp_rsp_ready);
|
||||
|
||||
`STATIC_ASSERT($bits({`SCOPE_SIGNALS_DATA_LIST `SCOPE_SIGNALS_UPD_LIST}) == 641, "oops!")
|
||||
`STATIC_ASSERT($bits({`SCOPE_SIGNALS_DATA_LIST `SCOPE_SIGNALS_UPD_LIST}) == 626, "oops!")
|
||||
|
||||
wire scope_changed = (scope_icache_req_valid && scope_icache_req_ready)
|
||||
|| (scope_icache_rsp_valid && scope_icache_rsp_ready)
|
||||
|
@ -855,15 +861,17 @@ VX_scope #(
|
|||
|
||||
`endif
|
||||
|
||||
// Vortex binding /////////////////////////////////////////////////////////////
|
||||
// Vortex /////////////////////////////////////////////////////////////////////
|
||||
|
||||
assign cmd_run_done = !vx_busy;
|
||||
|
||||
Vortex_Socket #() vx_socket (
|
||||
`SCOPE_SIGNALS_ICACHE_ATTACH
|
||||
`SCOPE_SIGNALS_DCACHE_ATTACH
|
||||
`SCOPE_SIGNALS_CORE_ATTACH
|
||||
`SCOPE_SIGNALS_BE_ATTACH
|
||||
`SCOPE_SIGNALS_ISTAGE_BIND
|
||||
`SCOPE_SIGNALS_LSU_BIND
|
||||
`SCOPE_SIGNALS_CORE_BIND
|
||||
`SCOPE_SIGNALS_ICACHE_BIND
|
||||
`SCOPE_SIGNALS_PIPELINE_BIND
|
||||
`SCOPE_SIGNALS_BE_BIND
|
||||
|
||||
.clk (clk),
|
||||
.reset (vx_reset),
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
module VX_back_end #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
`SCOPE_SIGNALS_DCACHE_IO
|
||||
`SCOPE_SIGNALS_LSU_IO
|
||||
`SCOPE_SIGNALS_BE_IO
|
||||
|
||||
input wire clk,
|
||||
|
@ -71,7 +71,7 @@ module VX_back_end #(
|
|||
VX_lsu_unit #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) lsu_unit (
|
||||
`SCOPE_SIGNALS_DCACHE_ATTACH
|
||||
`SCOPE_SIGNALS_LSU_BIND
|
||||
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
`define VX_DEFINE
|
||||
|
||||
`include "VX_config.vh"
|
||||
`include "VX_scope.vh"
|
||||
|
||||
// `define QUEUE_FORCE_MLAB 1
|
||||
// `define SYN 1
|
||||
|
@ -139,7 +140,7 @@
|
|||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`ifndef NDEBUG // pc, wb, rd, warp_num
|
||||
`ifdef DBG_CORE_REQ_INFO // pc, wb, rd, warp_num
|
||||
`define DEBUG_CORE_REQ_MDATA_WIDTH (32 + 2 + 5 + `NW_BITS)
|
||||
`else
|
||||
`define DEBUG_CORE_REQ_MDATA_WIDTH 0
|
||||
|
@ -286,316 +287,5 @@
|
|||
|
||||
`define DRAM_TO_BYTE_ADDR(x) {x, (32-$bits(x))'(0)}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`ifdef SCOPE
|
||||
`define SCOPE_SIGNALS_DATA_LIST \
|
||||
scope_icache_req_warp_num, \
|
||||
scope_icache_req_addr, \
|
||||
scope_icache_req_tag, \
|
||||
scope_icache_rsp_data, \
|
||||
scope_icache_rsp_tag, \
|
||||
scope_dcache_req_warp_num, \
|
||||
scope_dcache_req_curr_PC, \
|
||||
scope_dcache_req_addr, \
|
||||
scope_dcache_req_rw, \
|
||||
scope_dcache_req_byteen, \
|
||||
scope_dcache_req_data, \
|
||||
scope_dcache_req_tag, \
|
||||
scope_dcache_rsp_data, \
|
||||
scope_dcache_rsp_tag, \
|
||||
scope_dram_req_addr, \
|
||||
scope_dram_req_rw, \
|
||||
scope_dram_req_byteen, \
|
||||
scope_dram_req_data, \
|
||||
scope_dram_req_tag, \
|
||||
scope_dram_rsp_data, \
|
||||
scope_dram_rsp_tag, \
|
||||
scope_snp_req_addr, \
|
||||
scope_snp_req_invalidate, \
|
||||
scope_snp_req_tag, \
|
||||
scope_snp_rsp_tag, \
|
||||
scope_decode_warp_num, \
|
||||
scope_decode_curr_PC, \
|
||||
scope_decode_is_jal, \
|
||||
scope_decode_rs1, \
|
||||
scope_decode_rs2, \
|
||||
scope_execute_warp_num, \
|
||||
scope_execute_rd, \
|
||||
scope_execute_a, \
|
||||
scope_execute_b, \
|
||||
scope_writeback_warp_num, \
|
||||
scope_writeback_wb, \
|
||||
scope_writeback_rd, \
|
||||
scope_writeback_data,
|
||||
|
||||
|
||||
`define SCOPE_SIGNALS_UPD_LIST \
|
||||
scope_icache_req_valid, \
|
||||
scope_icache_req_ready, \
|
||||
scope_icache_rsp_valid, \
|
||||
scope_icache_rsp_ready, \
|
||||
scope_dcache_req_valid, \
|
||||
scope_dcache_req_ready, \
|
||||
scope_dcache_rsp_valid, \
|
||||
scope_dcache_rsp_ready, \
|
||||
scope_dram_req_valid, \
|
||||
scope_dram_req_ready, \
|
||||
scope_dram_rsp_valid, \
|
||||
scope_dram_rsp_ready, \
|
||||
scope_snp_req_valid, \
|
||||
scope_snp_req_ready, \
|
||||
scope_snp_rsp_valid, \
|
||||
scope_snp_rsp_ready, \
|
||||
scope_decode_valid, \
|
||||
scope_execute_valid, \
|
||||
scope_writeback_valid, \
|
||||
scope_schedule_delay, \
|
||||
scope_memory_delay, \
|
||||
scope_exec_delay, \
|
||||
scope_gpr_stage_delay, \
|
||||
scope_busy
|
||||
|
||||
`define SCOPE_SIGNALS_DECL \
|
||||
wire scope_icache_req_valid; \
|
||||
wire [1:0] scope_icache_req_warp_num; \
|
||||
wire [31:0] scope_icache_req_addr; \
|
||||
wire [`ICORE_TAG_WIDTH-1:0] scope_icache_req_tag; \
|
||||
wire scope_icache_req_ready; \
|
||||
wire scope_icache_rsp_valid; \
|
||||
wire [31:0] scope_icache_rsp_data; \
|
||||
wire [`ICORE_TAG_WIDTH-1:0] scope_icache_rsp_tag; \
|
||||
wire scope_icache_rsp_ready; \
|
||||
wire [`DNUM_REQUESTS-1:0] scope_dcache_req_valid; \
|
||||
wire [1:0] scope_dcache_req_warp_num; \
|
||||
wire [31:0] scope_dcache_req_curr_PC; \
|
||||
wire [31:0] scope_dcache_req_addr; \
|
||||
wire scope_dcache_req_rw; \
|
||||
wire [3:0] scope_dcache_req_byteen; \
|
||||
wire [31:0] scope_dcache_req_data; \
|
||||
wire [`DCORE_TAG_WIDTH-1:0] scope_dcache_req_tag; \
|
||||
wire scope_dcache_req_ready; \
|
||||
wire [`DNUM_REQUESTS-1:0] scope_dcache_rsp_valid; \
|
||||
wire [31:0] scope_dcache_rsp_data; \
|
||||
wire [`DCORE_TAG_WIDTH-1:0] scope_dcache_rsp_tag; \
|
||||
wire scope_dcache_rsp_ready; \
|
||||
wire scope_dram_req_valid; \
|
||||
wire [31:0] scope_dram_req_addr; \
|
||||
wire scope_dram_req_rw; \
|
||||
wire [15:0] scope_dram_req_byteen; \
|
||||
wire [31:0] scope_dram_req_data; \
|
||||
wire [`VX_DRAM_TAG_WIDTH-1:0] scope_dram_req_tag; \
|
||||
wire scope_dram_req_ready; \
|
||||
wire scope_dram_rsp_valid; \
|
||||
wire [31:0] scope_dram_rsp_data; \
|
||||
wire [`VX_DRAM_TAG_WIDTH-1:0] scope_dram_rsp_tag; \
|
||||
wire scope_dram_rsp_ready; \
|
||||
wire scope_snp_req_valid; \
|
||||
wire [31:0] scope_snp_req_addr; \
|
||||
wire scope_snp_req_invalidate; \
|
||||
wire [`VX_SNP_TAG_WIDTH-1:0] scope_snp_req_tag; \
|
||||
wire scope_snp_req_ready; \
|
||||
wire scope_snp_rsp_valid; \
|
||||
wire [`VX_SNP_TAG_WIDTH-1:0] scope_snp_rsp_tag; \
|
||||
wire scope_busy; \
|
||||
wire scope_snp_rsp_ready; \
|
||||
wire scope_schedule_delay; \
|
||||
wire scope_memory_delay; \
|
||||
wire scope_exec_delay; \
|
||||
wire scope_gpr_stage_delay; \
|
||||
wire [3:0] scope_decode_valid; \
|
||||
wire [1:0] scope_decode_warp_num; \
|
||||
wire [31:0] scope_decode_curr_PC; \
|
||||
wire scope_decode_is_jal; \
|
||||
wire [4:0] scope_decode_rs1; \
|
||||
wire [4:0] scope_decode_rs2; \
|
||||
wire [3:0] scope_execute_valid; \
|
||||
wire [1:0] scope_execute_warp_num; \
|
||||
wire [4:0] scope_execute_rd; \
|
||||
wire [31:0] scope_execute_a; \
|
||||
wire [31:0] scope_execute_b; \
|
||||
wire [3:0] scope_writeback_valid; \
|
||||
wire [1:0] scope_writeback_warp_num; \
|
||||
wire [1:0] scope_writeback_wb; \
|
||||
wire [4:0] scope_writeback_rd; \
|
||||
wire [31:0] scope_writeback_data;
|
||||
|
||||
`define SCOPE_SIGNALS_ICACHE_IO \
|
||||
/* verilator lint_off UNDRIVEN */ \
|
||||
output wire scope_icache_req_valid, \
|
||||
output wire [1:0] scope_icache_req_warp_num, \
|
||||
output wire [31:0] scope_icache_req_addr, \
|
||||
output wire [`ICORE_TAG_WIDTH-1:0] scope_icache_req_tag, \
|
||||
output wire scope_icache_req_ready, \
|
||||
output wire scope_icache_rsp_valid, \
|
||||
output wire [31:0] scope_icache_rsp_data, \
|
||||
output wire [`ICORE_TAG_WIDTH-1:0] scope_icache_rsp_tag, \
|
||||
output wire scope_icache_rsp_ready, \
|
||||
/* verilator lint_on UNDRIVEN */
|
||||
|
||||
`define SCOPE_SIGNALS_DCACHE_IO \
|
||||
/* verilator lint_off UNDRIVEN */ \
|
||||
output wire [`DNUM_REQUESTS-1:0] scope_dcache_req_valid, \
|
||||
output wire [1:0] scope_dcache_req_warp_num, \
|
||||
output wire [31:0] scope_dcache_req_curr_PC, \
|
||||
output wire [31:0] scope_dcache_req_addr, \
|
||||
output wire scope_dcache_req_rw, \
|
||||
output wire [3:0] scope_dcache_req_byteen, \
|
||||
output wire [31:0] scope_dcache_req_data, \
|
||||
output wire [`DCORE_TAG_WIDTH-1:0] scope_dcache_req_tag, \
|
||||
output wire scope_dcache_req_ready, \
|
||||
output wire [`DNUM_REQUESTS-1:0] scope_dcache_rsp_valid, \
|
||||
output wire [31:0] scope_dcache_rsp_data, \
|
||||
output wire [`DCORE_TAG_WIDTH-1:0] scope_dcache_rsp_tag, \
|
||||
output wire scope_dcache_rsp_ready, \
|
||||
/* verilator lint_on UNDRIVEN */
|
||||
|
||||
`define SCOPE_SIGNALS_DRAM_IO \
|
||||
/* verilator lint_off UNDRIVEN */ \
|
||||
output wire scope_dram_req_valid, \
|
||||
output wire [31:0] scope_dram_req_addr, \
|
||||
output wire scope_dram_req_rw, \
|
||||
output wire [15:0] scope_dram_req_byteen, \
|
||||
output wire [31:0] scope_dram_req_data, \
|
||||
output wire [`VX_DRAM_TAG_WIDTH-1:0] scope_dram_req_tag, \
|
||||
output wire scope_dram_req_ready, \
|
||||
output wire scope_dram_rsp_valid, \
|
||||
output wire [31:0] scope_dram_rsp_data, \
|
||||
output wire [`VX_DRAM_TAG_WIDTH-1:0] scope_dram_rsp_tag, \
|
||||
output wire scope_dram_rsp_ready, \
|
||||
/* verilator lint_on UNDRIVEN */
|
||||
|
||||
`define SCOPE_SIGNALS_SNP_IO \
|
||||
/* verilator lint_off UNDRIVEN */ \
|
||||
output wire scope_snp_req_valid, \
|
||||
output wire [31:0] scope_snp_req_addr, \
|
||||
output wire scope_snp_req_invalidate, \
|
||||
output wire [`VX_SNP_TAG_WIDTH-1:0] scope_snp_req_tag, \
|
||||
output wire scope_snp_req_ready, \
|
||||
output wire scope_snp_rsp_valid, \
|
||||
output wire [`VX_SNP_TAG_WIDTH-1:0] scope_snp_rsp_tag, \
|
||||
output wire scope_snp_rsp_ready, \
|
||||
/* verilator lint_on UNDRIVEN */
|
||||
|
||||
`define SCOPE_SIGNALS_CORE_IO \
|
||||
/* verilator lint_off UNDRIVEN */ \
|
||||
output wire scope_busy, \
|
||||
output wire scope_schedule_delay, \
|
||||
output wire scope_memory_delay, \
|
||||
output wire scope_exec_delay, \
|
||||
output wire scope_gpr_stage_delay, \
|
||||
/* verilator lint_on UNDRIVEN */
|
||||
|
||||
`define SCOPE_SIGNALS_BE_IO \
|
||||
/* verilator lint_off UNDRIVEN */ \
|
||||
output wire [3:0] scope_decode_valid, \
|
||||
output wire [1:0] scope_decode_warp_num, \
|
||||
output wire [31:0] scope_decode_curr_PC, \
|
||||
output wire scope_decode_is_jal, \
|
||||
output wire [4:0] scope_decode_rs1, \
|
||||
output wire [4:0] scope_decode_rs2, \
|
||||
output wire [3:0] scope_execute_valid, \
|
||||
output wire [1:0] scope_execute_warp_num, \
|
||||
output wire [4:0] scope_execute_rd, \
|
||||
output wire [31:0] scope_execute_a, \
|
||||
output wire [31:0] scope_execute_b, \
|
||||
output wire [3:0] scope_writeback_valid, \
|
||||
output wire [1:0] scope_writeback_warp_num, \
|
||||
output wire [1:0] scope_writeback_wb, \
|
||||
output wire [4:0] scope_writeback_rd, \
|
||||
output wire [31:0] scope_writeback_data,
|
||||
/* verilator lint_on UNDRIVEN */
|
||||
|
||||
`define SCOPE_SIGNALS_ICACHE_ATTACH \
|
||||
.scope_icache_req_valid (scope_icache_req_valid), \
|
||||
.scope_icache_req_warp_num (scope_icache_req_warp_num), \
|
||||
.scope_icache_req_addr (scope_icache_req_addr), \
|
||||
.scope_icache_req_tag (scope_icache_req_tag), \
|
||||
.scope_icache_req_ready (scope_icache_req_ready), \
|
||||
.scope_icache_rsp_valid (scope_icache_rsp_valid), \
|
||||
.scope_icache_rsp_data (scope_icache_rsp_data), \
|
||||
.scope_icache_rsp_tag (scope_icache_rsp_tag), \
|
||||
.scope_icache_rsp_ready (scope_icache_rsp_ready),
|
||||
|
||||
`define SCOPE_SIGNALS_DCACHE_ATTACH \
|
||||
.scope_dcache_req_valid (scope_dcache_req_valid), \
|
||||
.scope_dcache_req_warp_num (scope_dcache_req_warp_num), \
|
||||
.scope_dcache_req_curr_PC (scope_dcache_req_curr_PC), \
|
||||
.scope_dcache_req_addr (scope_dcache_req_addr), \
|
||||
.scope_dcache_req_rw (scope_dcache_req_rw), \
|
||||
.scope_dcache_req_byteen(scope_dcache_req_byteen), \
|
||||
.scope_dcache_req_data (scope_dcache_req_data), \
|
||||
.scope_dcache_req_tag (scope_dcache_req_tag), \
|
||||
.scope_dcache_req_ready (scope_dcache_req_ready), \
|
||||
.scope_dcache_rsp_valid (scope_dcache_rsp_valid), \
|
||||
.scope_dcache_rsp_data (scope_dcache_rsp_data), \
|
||||
.scope_dcache_rsp_tag (scope_dcache_rsp_tag), \
|
||||
.scope_dcache_rsp_ready (scope_dcache_rsp_ready),
|
||||
|
||||
`define SCOPE_SIGNALS_DRAM_ATTACH \
|
||||
.scope_dram_req_valid (scope_dram_req_valid), \
|
||||
.scope_dram_req_addr (scope_dram_req_addr), \
|
||||
.scope_dram_req_rw (scope_dram_req_rw), \
|
||||
.scope_dram_req_byteen (scope_dram_req_byteen), \
|
||||
.scope_dram_req_data (scope_dram_req_data), \
|
||||
.scope_dram_req_tag (scope_dram_req_tag), \
|
||||
.scope_dram_req_ready (scope_dram_req_ready), \
|
||||
.scope_dram_rsp_valid (scope_dram_rsp_valid), \
|
||||
.scope_dram_rsp_data (scope_dram_rsp_data), \
|
||||
.scope_dram_rsp_tag (scope_dram_rsp_tag), \
|
||||
.scope_dram_rsp_ready (scope_dram_rsp_ready),
|
||||
|
||||
`define SCOPE_SIGNALS_SNP_ATTACH \
|
||||
.scope_snp_req_valid (scope_snp_req_valid), \
|
||||
.scope_snp_req_addr (scope_snp_req_addr), \
|
||||
.scope_snp_req_invalidate(scope_snp_req_invalidate), \
|
||||
.scope_snp_req_tag (scope_snp_req_tag), \
|
||||
.scope_snp_req_ready (scope_snp_req_ready), \
|
||||
.scope_snp_rsp_valid (scope_snp_rsp_valid), \
|
||||
.scope_snp_rsp_tag (scope_snp_rsp_tag), \
|
||||
.scope_snp_rsp_ready (scope_snp_rsp_ready),
|
||||
|
||||
`define SCOPE_SIGNALS_CORE_ATTACH \
|
||||
.scope_busy (scope_busy), \
|
||||
.scope_schedule_delay (scope_schedule_delay), \
|
||||
.scope_memory_delay (scope_memory_delay), \
|
||||
.scope_exec_delay (scope_exec_delay), \
|
||||
.scope_gpr_stage_delay (scope_gpr_stage_delay),
|
||||
|
||||
`define SCOPE_SIGNALS_BE_ATTACH \
|
||||
.scope_decode_valid (scope_decode_valid), \
|
||||
.scope_decode_warp_num (scope_decode_warp_num), \
|
||||
.scope_decode_curr_PC (scope_decode_curr_PC), \
|
||||
.scope_decode_is_jal (scope_decode_is_jal), \
|
||||
.scope_decode_rs1 (scope_decode_rs1), \
|
||||
.scope_decode_rs2 (scope_decode_rs2), \
|
||||
.scope_execute_valid (scope_execute_valid), \
|
||||
.scope_execute_warp_num (scope_execute_warp_num), \
|
||||
.scope_execute_rd (scope_execute_rd), \
|
||||
.scope_execute_a (scope_execute_a), \
|
||||
.scope_execute_b (scope_execute_b), \
|
||||
.scope_writeback_valid (scope_writeback_valid), \
|
||||
.scope_writeback_warp_num (scope_writeback_warp_num), \
|
||||
.scope_writeback_wb (scope_writeback_wb), \
|
||||
.scope_writeback_rd (scope_writeback_rd), \
|
||||
.scope_writeback_data (scope_writeback_data),
|
||||
|
||||
`define SCOPE_ASSIGN(d,s) assign d = s
|
||||
`else
|
||||
`define SCOPE_SIGNALS_ICACHE_IO
|
||||
`define SCOPE_SIGNALS_DCACHE_IO
|
||||
`define SCOPE_SIGNALS_DRAM_IO
|
||||
`define SCOPE_SIGNALS_CORE_IO
|
||||
`define SCOPE_SIGNALS_BE_IO
|
||||
|
||||
`define SCOPE_SIGNALS_ICACHE_ATTACH
|
||||
`define SCOPE_SIGNALS_DCACHE_ATTACH
|
||||
`define SCOPE_SIGNALS_DRAM_ATTACH
|
||||
`define SCOPE_SIGNALS_CORE_ATTACH
|
||||
`define SCOPE_SIGNALS_BE_ATTACH
|
||||
|
||||
`define SCOPE_ASSIGN(d,s)
|
||||
`endif
|
||||
|
||||
// VX_DEFINE
|
||||
`endif
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
module VX_front_end #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
`SCOPE_SIGNALS_ICACHE_IO
|
||||
`SCOPE_SIGNALS_ISTAGE_IO
|
||||
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
@ -65,7 +65,7 @@ module VX_front_end #(
|
|||
VX_icache_stage #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) icache_stage (
|
||||
`SCOPE_SIGNALS_ICACHE_ATTACH
|
||||
`SCOPE_SIGNALS_ISTAGE_BIND
|
||||
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
module VX_icache_stage #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
`SCOPE_SIGNALS_ICACHE_IO
|
||||
`SCOPE_SIGNALS_ISTAGE_IO
|
||||
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
@ -68,7 +68,7 @@ module VX_icache_stage #(
|
|||
// Can't accept new request
|
||||
assign icache_stage_delay = mrq_full || ~icache_req_if.core_req_ready;
|
||||
|
||||
`ifndef NDEBUG
|
||||
`ifdef DBG_CORE_REQ_INFO
|
||||
assign icache_req_if.core_req_tag = {fe_inst_meta_fi.inst_pc, 2'b1, 5'b0, fe_inst_meta_fi.warp_num, mrq_write_addr};
|
||||
`else
|
||||
assign icache_req_if.core_req_tag = mrq_write_addr;
|
||||
|
@ -95,7 +95,7 @@ module VX_icache_stage #(
|
|||
`SCOPE_ASSIGN(scope_icache_rsp_ready, icache_rsp_if.core_rsp_ready);
|
||||
|
||||
`ifdef DBG_PRINT_CORE_ICACHE
|
||||
always_ff @(posedge clk) begin
|
||||
always @(posedge clk) begin
|
||||
if (icache_req_if.core_req_valid && icache_req_if.core_req_ready) begin
|
||||
$display("%t: I%01d$ req: tag=%0h, pc=%0h, warp=%0d", $time, CORE_ID, mrq_write_addr, fe_inst_meta_fi.inst_pc, fe_inst_meta_fi.warp_num);
|
||||
end
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
module VX_lsu_unit #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
`SCOPE_SIGNALS_DCACHE_IO
|
||||
`SCOPE_SIGNALS_LSU_IO
|
||||
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
@ -130,10 +130,10 @@ module VX_lsu_unit #(
|
|||
assign dcache_req_if.core_req_addr = mem_req_addr;
|
||||
assign dcache_req_if.core_req_data = mem_req_data;
|
||||
|
||||
`ifndef NDEBUG
|
||||
assign dcache_req_if.core_req_tag = {use_pc, use_wb, use_rd, use_warp_num, mrq_write_addr};
|
||||
`ifdef DBG_CORE_REQ_INFO
|
||||
assign dcache_req_if.core_req_tag = {use_pc, use_wb, use_rd, use_warp_num, mrq_write_addr};
|
||||
`else
|
||||
assign dcache_req_if.core_req_tag = mrq_write_addr;
|
||||
assign dcache_req_if.core_req_tag = mrq_write_addr;
|
||||
`endif
|
||||
|
||||
// Can't accept new request
|
||||
|
@ -179,7 +179,7 @@ module VX_lsu_unit #(
|
|||
`SCOPE_ASSIGN(scope_dcache_rsp_ready, dcache_rsp_if.core_rsp_ready);
|
||||
|
||||
`ifdef DBG_PRINT_CORE_DCACHE
|
||||
always_ff @(posedge clk) begin
|
||||
always @(posedge clk) begin
|
||||
if ((| dcache_req_if.core_req_valid) && dcache_req_if.core_req_ready) begin
|
||||
$display("%t: D%01d$ req: valid=%b, addr=%0h, tag=%0h, r=%0d, w=%0d, pc=%0h, rd=%0d, warp=%0d, byteen=%0h, data=%0h",
|
||||
$time, CORE_ID, use_valid, use_address, mrq_write_addr, use_mem_read, use_mem_write, use_pc, use_rd, use_warp_num, mem_req_byteen, mem_req_data);
|
||||
|
|
|
@ -1,8 +1,10 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_mem_ctrl # (
|
||||
module VX_mem_unit # (
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
`SCOPE_SIGNALS_ICACHE_IO
|
||||
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
|
@ -74,7 +76,7 @@ module VX_mem_ctrl # (
|
|||
.CORE_TAG_WIDTH (`DCORE_TAG_WIDTH),
|
||||
.CORE_TAG_ID_BITS (`DCORE_TAG_ID_BITS),
|
||||
.DRAM_TAG_WIDTH (`SDRAM_TAG_WIDTH)
|
||||
) gpu_smem (
|
||||
) smem (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
|
@ -157,7 +159,7 @@ module VX_mem_ctrl # (
|
|||
.CORE_TAG_ID_BITS (`DCORE_TAG_ID_BITS),
|
||||
.DRAM_TAG_WIDTH (`DDRAM_TAG_WIDTH),
|
||||
.SNP_REQ_TAG_WIDTH (`DSNP_TAG_WIDTH)
|
||||
) gpu_dcache (
|
||||
) dcache (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
|
@ -239,7 +241,9 @@ module VX_mem_ctrl # (
|
|||
.CORE_TAG_WIDTH (`DCORE_TAG_WIDTH),
|
||||
.CORE_TAG_ID_BITS (`DCORE_TAG_ID_BITS),
|
||||
.DRAM_TAG_WIDTH (`IDRAM_TAG_WIDTH)
|
||||
) gpu_icache (
|
||||
) icache (
|
||||
`SCOPE_SIGNALS_ICACHE_BIND
|
||||
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
|
@ -3,9 +3,9 @@
|
|||
module VX_pipeline #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
`SCOPE_SIGNALS_ICACHE_IO
|
||||
`SCOPE_SIGNALS_DCACHE_IO
|
||||
`SCOPE_SIGNALS_CORE_IO
|
||||
`SCOPE_SIGNALS_ISTAGE_IO
|
||||
`SCOPE_SIGNALS_LSU_IO
|
||||
`SCOPE_SIGNALS_PIPELINE_IO
|
||||
`SCOPE_SIGNALS_BE_IO
|
||||
|
||||
// Clock
|
||||
|
@ -100,7 +100,7 @@ module VX_pipeline #(
|
|||
VX_front_end #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) front_end (
|
||||
`SCOPE_SIGNALS_ICACHE_ATTACH
|
||||
`SCOPE_SIGNALS_ISTAGE_BIND
|
||||
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -129,8 +129,8 @@ module VX_pipeline #(
|
|||
VX_back_end #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) back_end (
|
||||
`SCOPE_SIGNALS_DCACHE_ATTACH
|
||||
`SCOPE_SIGNALS_BE_ATTACH
|
||||
`SCOPE_SIGNALS_LSU_BIND
|
||||
`SCOPE_SIGNALS_BE_BIND
|
||||
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -181,7 +181,7 @@ module VX_pipeline #(
|
|||
`SCOPE_ASSIGN(scope_gpr_stage_delay, gpr_stage_delay);
|
||||
|
||||
`ifdef DBG_PRINT_WB
|
||||
always_ff @(posedge clk) begin
|
||||
always @(posedge clk) begin
|
||||
if ((| writeback_if.valid) && (writeback_if.wb != 0)) begin
|
||||
$display("%t: Writeback: wid=%0d, rd=%0d, data=%0h", $time, writeback_if.warp_num, writeback_if.rd, writeback_if.data);
|
||||
end
|
||||
|
|
|
@ -13,11 +13,10 @@ module VX_scheduler (
|
|||
output wire schedule_delay,
|
||||
output wire is_empty
|
||||
);
|
||||
reg [31:0][`NUM_THREADS-1:0] rename_table[`NUM_WARPS-1:0];
|
||||
reg [31:0] count_valid;
|
||||
localparam CTVW = `CLOG2(`NUM_WARPS * 32 + 1);
|
||||
|
||||
wire acquire_rd = (| bckE_req_if.valid) && (bckE_req_if.wb != 0) && (bckE_req_if.rd != 0);
|
||||
wire release_rd = (| writeback_if.valid) && (writeback_if.wb != 0) && (writeback_if.rd != 0);
|
||||
reg [31:0][`NUM_THREADS-1:0] rename_table[`NUM_WARPS-1:0];
|
||||
reg [CTVW-1:0] count_valid;
|
||||
|
||||
wire is_store = (bckE_req_if.mem_write != `BYTE_EN_NO);
|
||||
wire is_load = (bckE_req_if.mem_read != `BYTE_EN_NO);
|
||||
|
@ -51,7 +50,14 @@ module VX_scheduler (
|
|||
|
||||
integer i, w;
|
||||
|
||||
wire acquire_rd = (| bckE_req_if.valid) && (bckE_req_if.wb != 0) && (bckE_req_if.rd != 0) && !schedule_delay;
|
||||
wire release_rd = (| writeback_if.valid) && (writeback_if.wb != 0) && (writeback_if.rd != 0);
|
||||
|
||||
wire [`NUM_THREADS-1:0] valid_wb_new_mask = rename_table[writeback_if.warp_num][writeback_if.rd] & ~writeback_if.valid;
|
||||
|
||||
reg [CTVW-1:0] count_valid_next = (acquire_rd && ~(release_rd && (0 == valid_wb_new_mask))) ? (count_valid + 1) :
|
||||
(~acquire_rd && (release_rd && (0 == valid_wb_new_mask))) ? (count_valid - 1) :
|
||||
count_valid;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
|
@ -62,19 +68,14 @@ module VX_scheduler (
|
|||
end
|
||||
count_valid <= 0;
|
||||
end else begin
|
||||
if (acquire_rd && !schedule_delay) begin
|
||||
if (acquire_rd) begin
|
||||
rename_table[bckE_req_if.warp_num][bckE_req_if.rd] <= bckE_req_if.valid;
|
||||
count_valid <= count_valid + 1;
|
||||
end
|
||||
if (release_rd) begin
|
||||
assert(rename_table[writeback_if.warp_num][writeback_if.rd] != 0);
|
||||
rename_table[writeback_if.warp_num][writeback_if.rd] <= valid_wb_new_mask;
|
||||
if (0 == valid_wb_new_mask) begin
|
||||
assert(count_valid != 0);
|
||||
count_valid <= count_valid - 1;
|
||||
end
|
||||
end
|
||||
|
||||
rename_table[writeback_if.warp_num][writeback_if.rd] <= valid_wb_new_mask;
|
||||
end
|
||||
count_valid <= count_valid_next;
|
||||
end
|
||||
end
|
||||
|
||||
|
|
283
hw/rtl/VX_scope.vh
Normal file
283
hw/rtl/VX_scope.vh
Normal file
|
@ -0,0 +1,283 @@
|
|||
`ifndef VX_SCOPE
|
||||
`define VX_SCOPE
|
||||
|
||||
`ifdef SCOPE
|
||||
|
||||
`define SCOPE_SIGNALS_DATA_LIST \
|
||||
scope_icache_req_warp_num, \
|
||||
scope_icache_req_addr, \
|
||||
scope_icache_req_tag, \
|
||||
scope_icache_rsp_data, \
|
||||
scope_icache_rsp_tag, \
|
||||
scope_dcache_req_warp_num, \
|
||||
scope_dcache_req_curr_PC, \
|
||||
scope_dcache_req_addr, \
|
||||
scope_dcache_req_rw, \
|
||||
scope_dcache_req_byteen, \
|
||||
scope_dcache_req_data, \
|
||||
scope_dcache_req_tag, \
|
||||
scope_dcache_rsp_data, \
|
||||
scope_dcache_rsp_tag, \
|
||||
scope_dram_req_addr, \
|
||||
scope_dram_req_rw, \
|
||||
scope_dram_req_byteen, \
|
||||
scope_dram_req_data, \
|
||||
scope_dram_req_tag, \
|
||||
scope_dram_rsp_data, \
|
||||
scope_dram_rsp_tag, \
|
||||
scope_snp_req_addr, \
|
||||
scope_snp_req_invalidate, \
|
||||
scope_snp_req_tag, \
|
||||
scope_snp_rsp_tag, \
|
||||
scope_decode_warp_num, \
|
||||
scope_decode_curr_PC, \
|
||||
scope_decode_is_jal, \
|
||||
scope_decode_rs1, \
|
||||
scope_decode_rs2, \
|
||||
scope_execute_warp_num, \
|
||||
scope_execute_rd, \
|
||||
scope_execute_a, \
|
||||
scope_execute_b, \
|
||||
scope_writeback_warp_num, \
|
||||
scope_writeback_wb, \
|
||||
scope_writeback_rd, \
|
||||
scope_writeback_data,
|
||||
|
||||
|
||||
`define SCOPE_SIGNALS_UPD_LIST \
|
||||
scope_icache_req_valid, \
|
||||
scope_icache_req_ready, \
|
||||
scope_icache_rsp_valid, \
|
||||
scope_icache_rsp_ready, \
|
||||
scope_dcache_req_valid, \
|
||||
scope_dcache_req_ready, \
|
||||
scope_dcache_rsp_valid, \
|
||||
scope_dcache_rsp_ready, \
|
||||
scope_dram_req_valid, \
|
||||
scope_dram_req_ready, \
|
||||
scope_dram_rsp_valid, \
|
||||
scope_dram_rsp_ready, \
|
||||
scope_snp_req_valid, \
|
||||
scope_snp_req_ready, \
|
||||
scope_snp_rsp_valid, \
|
||||
scope_snp_rsp_ready, \
|
||||
scope_decode_valid, \
|
||||
scope_execute_valid, \
|
||||
scope_writeback_valid, \
|
||||
scope_schedule_delay, \
|
||||
scope_memory_delay, \
|
||||
scope_exec_delay, \
|
||||
scope_gpr_stage_delay, \
|
||||
scope_busy, \
|
||||
scope_idram_req_valid, \
|
||||
scope_idram_req_ready, \
|
||||
scope_idram_rsp_valid, \
|
||||
scope_idram_rsp_ready
|
||||
|
||||
`define SCOPE_SIGNALS_DECL \
|
||||
wire scope_icache_req_valid; \
|
||||
wire [`NW_BITS-1:0] scope_icache_req_warp_num; \
|
||||
wire [31:0] scope_icache_req_addr; \
|
||||
wire [`ICORE_TAG_WIDTH-1:0] scope_icache_req_tag; \
|
||||
wire scope_icache_req_ready; \
|
||||
wire scope_icache_rsp_valid; \
|
||||
wire [31:0] scope_icache_rsp_data; \
|
||||
wire [`ICORE_TAG_WIDTH-1:0] scope_icache_rsp_tag; \
|
||||
wire scope_icache_rsp_ready; \
|
||||
wire [`NUM_THREADS-1:0] scope_dcache_req_valid; \
|
||||
wire [`NW_BITS-1:0] scope_dcache_req_warp_num; \
|
||||
wire [31:0] scope_dcache_req_curr_PC; \
|
||||
wire [31:0] scope_dcache_req_addr; \
|
||||
wire scope_dcache_req_rw; \
|
||||
wire [3:0] scope_dcache_req_byteen; \
|
||||
wire [31:0] scope_dcache_req_data; \
|
||||
wire [`DCORE_TAG_WIDTH-1:0] scope_dcache_req_tag; \
|
||||
wire scope_dcache_req_ready; \
|
||||
wire [`NUM_THREADS-1:0] scope_dcache_rsp_valid; \
|
||||
wire [31:0] scope_dcache_rsp_data; \
|
||||
wire [`DCORE_TAG_WIDTH-1:0] scope_dcache_rsp_tag; \
|
||||
wire scope_dcache_rsp_ready; \
|
||||
wire scope_dram_req_valid; \
|
||||
wire [31:0] scope_dram_req_addr; \
|
||||
wire scope_dram_req_rw; \
|
||||
wire [15:0] scope_dram_req_byteen; \
|
||||
wire [31:0] scope_dram_req_data; \
|
||||
wire [`VX_DRAM_TAG_WIDTH-1:0] scope_dram_req_tag; \
|
||||
wire scope_dram_req_ready; \
|
||||
wire scope_dram_rsp_valid; \
|
||||
wire [31:0] scope_dram_rsp_data; \
|
||||
wire [`VX_DRAM_TAG_WIDTH-1:0] scope_dram_rsp_tag; \
|
||||
wire scope_dram_rsp_ready; \
|
||||
wire scope_snp_req_valid; \
|
||||
wire [31:0] scope_snp_req_addr; \
|
||||
wire scope_snp_req_invalidate; \
|
||||
wire [`VX_SNP_TAG_WIDTH-1:0] scope_snp_req_tag; \
|
||||
wire scope_snp_req_ready; \
|
||||
wire scope_snp_rsp_valid; \
|
||||
wire [`VX_SNP_TAG_WIDTH-1:0] scope_snp_rsp_tag; \
|
||||
wire scope_busy; \
|
||||
wire scope_snp_rsp_ready; \
|
||||
wire scope_schedule_delay; \
|
||||
wire scope_memory_delay; \
|
||||
wire scope_exec_delay; \
|
||||
wire scope_gpr_stage_delay; \
|
||||
wire [`NUM_THREADS-1:0] scope_decode_valid; \
|
||||
wire [`NW_BITS-1:0] scope_decode_warp_num; \
|
||||
wire [31:0] scope_decode_curr_PC; \
|
||||
wire scope_decode_is_jal; \
|
||||
wire [4:0] scope_decode_rs1; \
|
||||
wire [4:0] scope_decode_rs2; \
|
||||
wire [`NUM_THREADS-1:0] scope_execute_valid; \
|
||||
wire [`NW_BITS-1:0] scope_execute_warp_num; \
|
||||
wire [4:0] scope_execute_rd; \
|
||||
wire [31:0] scope_execute_a; \
|
||||
wire [31:0] scope_execute_b; \
|
||||
wire [`NUM_THREADS-1:0] scope_writeback_valid; \
|
||||
wire [`NW_BITS-1:0] scope_writeback_warp_num; \
|
||||
wire [1:0] scope_writeback_wb; \
|
||||
wire [4:0] scope_writeback_rd; \
|
||||
wire [31:0] scope_writeback_data; \
|
||||
wire scope_idram_req_valid; \
|
||||
wire scope_idram_req_ready; \
|
||||
wire scope_idram_rsp_valid; \
|
||||
wire scope_idram_rsp_ready;
|
||||
|
||||
`define SCOPE_SIGNALS_ISTAGE_IO \
|
||||
output wire scope_icache_req_valid, \
|
||||
output wire [`NW_BITS-1:0] scope_icache_req_warp_num, \
|
||||
output wire [31:0] scope_icache_req_addr, \
|
||||
output wire [`ICORE_TAG_WIDTH-1:0] scope_icache_req_tag, \
|
||||
output wire scope_icache_req_ready, \
|
||||
output wire scope_icache_rsp_valid, \
|
||||
output wire [31:0] scope_icache_rsp_data, \
|
||||
output wire [`ICORE_TAG_WIDTH-1:0] scope_icache_rsp_tag, \
|
||||
output wire scope_icache_rsp_ready,
|
||||
|
||||
`define SCOPE_SIGNALS_LSU_IO \
|
||||
output wire [`NUM_THREADS-1:0] scope_dcache_req_valid, \
|
||||
output wire [`NW_BITS-1:0] scope_dcache_req_warp_num, \
|
||||
output wire [31:0] scope_dcache_req_curr_PC, \
|
||||
output wire [31:0] scope_dcache_req_addr, \
|
||||
output wire scope_dcache_req_rw, \
|
||||
output wire [3:0] scope_dcache_req_byteen, \
|
||||
output wire [31:0] scope_dcache_req_data, \
|
||||
output wire [`DCORE_TAG_WIDTH-1:0] scope_dcache_req_tag, \
|
||||
output wire scope_dcache_req_ready, \
|
||||
output wire [`NUM_THREADS-1:0] scope_dcache_rsp_valid, \
|
||||
output wire [31:0] scope_dcache_rsp_data, \
|
||||
output wire [`DCORE_TAG_WIDTH-1:0] scope_dcache_rsp_tag, \
|
||||
output wire scope_dcache_rsp_ready,
|
||||
|
||||
`define SCOPE_SIGNALS_CORE_IO \
|
||||
|
||||
`define SCOPE_SIGNALS_ICACHE_IO \
|
||||
output wire scope_idram_req_valid, \
|
||||
output wire scope_idram_req_ready, \
|
||||
output wire scope_idram_rsp_valid, \
|
||||
output wire scope_idram_rsp_ready,
|
||||
|
||||
`define SCOPE_SIGNALS_PIPELINE_IO \
|
||||
output wire scope_busy, \
|
||||
output wire scope_schedule_delay, \
|
||||
output wire scope_memory_delay, \
|
||||
output wire scope_exec_delay, \
|
||||
output wire scope_gpr_stage_delay,
|
||||
|
||||
`define SCOPE_SIGNALS_BE_IO \
|
||||
output wire [`NUM_THREADS-1:0] scope_decode_valid, \
|
||||
output wire [`NW_BITS-1:0] scope_decode_warp_num, \
|
||||
output wire [31:0] scope_decode_curr_PC, \
|
||||
output wire scope_decode_is_jal, \
|
||||
output wire [4:0] scope_decode_rs1, \
|
||||
output wire [4:0] scope_decode_rs2, \
|
||||
output wire [`NUM_THREADS-1:0] scope_execute_valid, \
|
||||
output wire [`NW_BITS-1:0] scope_execute_warp_num, \
|
||||
output wire [4:0] scope_execute_rd, \
|
||||
output wire [31:0] scope_execute_a, \
|
||||
output wire [31:0] scope_execute_b, \
|
||||
output wire [`NUM_THREADS-1:0] scope_writeback_valid, \
|
||||
output wire [`NW_BITS-1:0] scope_writeback_warp_num, \
|
||||
output wire [1:0] scope_writeback_wb, \
|
||||
output wire [4:0] scope_writeback_rd, \
|
||||
output wire [31:0] scope_writeback_data,
|
||||
|
||||
`define SCOPE_SIGNALS_ISTAGE_BIND \
|
||||
.scope_icache_req_valid (scope_icache_req_valid), \
|
||||
.scope_icache_req_warp_num (scope_icache_req_warp_num), \
|
||||
.scope_icache_req_addr (scope_icache_req_addr), \
|
||||
.scope_icache_req_tag (scope_icache_req_tag), \
|
||||
.scope_icache_req_ready (scope_icache_req_ready), \
|
||||
.scope_icache_rsp_valid (scope_icache_rsp_valid), \
|
||||
.scope_icache_rsp_data (scope_icache_rsp_data), \
|
||||
.scope_icache_rsp_tag (scope_icache_rsp_tag), \
|
||||
.scope_icache_rsp_ready (scope_icache_rsp_ready),
|
||||
|
||||
`define SCOPE_SIGNALS_LSU_BIND \
|
||||
.scope_dcache_req_valid (scope_dcache_req_valid), \
|
||||
.scope_dcache_req_warp_num (scope_dcache_req_warp_num), \
|
||||
.scope_dcache_req_curr_PC (scope_dcache_req_curr_PC), \
|
||||
.scope_dcache_req_addr (scope_dcache_req_addr), \
|
||||
.scope_dcache_req_rw (scope_dcache_req_rw), \
|
||||
.scope_dcache_req_byteen(scope_dcache_req_byteen), \
|
||||
.scope_dcache_req_data (scope_dcache_req_data), \
|
||||
.scope_dcache_req_tag (scope_dcache_req_tag), \
|
||||
.scope_dcache_req_ready (scope_dcache_req_ready), \
|
||||
.scope_dcache_rsp_valid (scope_dcache_rsp_valid), \
|
||||
.scope_dcache_rsp_data (scope_dcache_rsp_data), \
|
||||
.scope_dcache_rsp_tag (scope_dcache_rsp_tag), \
|
||||
.scope_dcache_rsp_ready (scope_dcache_rsp_ready),
|
||||
|
||||
`define SCOPE_SIGNALS_CORE_BIND \
|
||||
|
||||
`define SCOPE_SIGNALS_ICACHE_BIND \
|
||||
.scope_idram_req_valid (scope_idram_req_valid), \
|
||||
.scope_idram_req_ready (scope_idram_req_ready), \
|
||||
.scope_idram_rsp_valid (scope_idram_rsp_valid), \
|
||||
.scope_idram_rsp_ready (scope_idram_rsp_ready),
|
||||
|
||||
`define SCOPE_SIGNALS_PIPELINE_BIND \
|
||||
.scope_busy (scope_busy), \
|
||||
.scope_schedule_delay (scope_schedule_delay), \
|
||||
.scope_memory_delay (scope_memory_delay), \
|
||||
.scope_exec_delay (scope_exec_delay), \
|
||||
.scope_gpr_stage_delay (scope_gpr_stage_delay),
|
||||
|
||||
`define SCOPE_SIGNALS_BE_BIND \
|
||||
.scope_decode_valid (scope_decode_valid), \
|
||||
.scope_decode_warp_num (scope_decode_warp_num), \
|
||||
.scope_decode_curr_PC (scope_decode_curr_PC), \
|
||||
.scope_decode_is_jal (scope_decode_is_jal), \
|
||||
.scope_decode_rs1 (scope_decode_rs1), \
|
||||
.scope_decode_rs2 (scope_decode_rs2), \
|
||||
.scope_execute_valid (scope_execute_valid), \
|
||||
.scope_execute_warp_num (scope_execute_warp_num), \
|
||||
.scope_execute_rd (scope_execute_rd), \
|
||||
.scope_execute_a (scope_execute_a), \
|
||||
.scope_execute_b (scope_execute_b), \
|
||||
.scope_writeback_valid (scope_writeback_valid), \
|
||||
.scope_writeback_warp_num (scope_writeback_warp_num), \
|
||||
.scope_writeback_wb (scope_writeback_wb), \
|
||||
.scope_writeback_rd (scope_writeback_rd), \
|
||||
.scope_writeback_data (scope_writeback_data),
|
||||
|
||||
`define SCOPE_ASSIGN(d,s) assign d = s
|
||||
`else
|
||||
`define SCOPE_SIGNALS_ISTAGE_IO
|
||||
`define SCOPE_SIGNALS_LSU_IO
|
||||
`define SCOPE_SIGNALS_CORE_IO
|
||||
`define SCOPE_SIGNALS_ICACHE_IO
|
||||
`define SCOPE_SIGNALS_PIPELINE_IO
|
||||
`define SCOPE_SIGNALS_BE_IO
|
||||
|
||||
`define SCOPE_SIGNALS_ISTAGE_BIND
|
||||
`define SCOPE_SIGNALS_LSU_BIND
|
||||
`define SCOPE_SIGNALS_CORE_BIND
|
||||
`define SCOPE_SIGNALS_ICACHE_BIND
|
||||
`define SCOPE_SIGNALS_PIPELINE_BIND
|
||||
`define SCOPE_SIGNALS_BE_BIND
|
||||
|
||||
`define SCOPE_ASSIGN(d,s)
|
||||
`endif
|
||||
|
||||
// VX_SCOPE
|
||||
`endif
|
|
@ -3,9 +3,11 @@
|
|||
module Vortex #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
`SCOPE_SIGNALS_ICACHE_IO
|
||||
`SCOPE_SIGNALS_DCACHE_IO
|
||||
`SCOPE_SIGNALS_ISTAGE_IO
|
||||
`SCOPE_SIGNALS_LSU_IO
|
||||
`SCOPE_SIGNALS_CORE_IO
|
||||
`SCOPE_SIGNALS_ICACHE_IO
|
||||
`SCOPE_SIGNALS_PIPELINE_IO
|
||||
`SCOPE_SIGNALS_BE_IO
|
||||
|
||||
// Clock
|
||||
|
@ -169,10 +171,10 @@ module Vortex #(
|
|||
VX_pipeline #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) pipeline (
|
||||
`SCOPE_SIGNALS_ICACHE_ATTACH
|
||||
`SCOPE_SIGNALS_DCACHE_ATTACH
|
||||
`SCOPE_SIGNALS_CORE_ATTACH
|
||||
`SCOPE_SIGNALS_BE_ATTACH
|
||||
`SCOPE_SIGNALS_ISTAGE_BIND
|
||||
`SCOPE_SIGNALS_LSU_BIND
|
||||
`SCOPE_SIGNALS_PIPELINE_BIND
|
||||
`SCOPE_SIGNALS_BE_BIND
|
||||
|
||||
.clk(clk),
|
||||
.reset(reset),
|
||||
|
@ -232,9 +234,11 @@ module Vortex #(
|
|||
assign snp_rsp_tag = dcache_snp_rsp_if.snp_rsp_tag;
|
||||
assign dcache_snp_rsp_if.snp_rsp_ready = snp_rsp_ready;
|
||||
|
||||
VX_mem_ctrl #(
|
||||
VX_mem_unit #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) mem_ctrl (
|
||||
) mem_unit (
|
||||
`SCOPE_SIGNALS_ICACHE_BIND
|
||||
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
|
@ -269,7 +273,7 @@ module Vortex #(
|
|||
.core_io_rsp_if (arb_io_rsp_if),
|
||||
.core_rsp_if (core_dcache_rsp_if)
|
||||
);
|
||||
|
||||
|
||||
endmodule // Vortex
|
||||
|
||||
|
||||
|
|
|
@ -3,9 +3,11 @@
|
|||
module Vortex_Cluster #(
|
||||
parameter CLUSTER_ID = 0
|
||||
) (
|
||||
`SCOPE_SIGNALS_ICACHE_IO
|
||||
`SCOPE_SIGNALS_DCACHE_IO
|
||||
`SCOPE_SIGNALS_ISTAGE_IO
|
||||
`SCOPE_SIGNALS_LSU_IO
|
||||
`SCOPE_SIGNALS_CORE_IO
|
||||
`SCOPE_SIGNALS_ICACHE_IO
|
||||
`SCOPE_SIGNALS_PIPELINE_IO
|
||||
`SCOPE_SIGNALS_BE_IO
|
||||
|
||||
// Clock
|
||||
|
@ -115,10 +117,12 @@ module Vortex_Cluster #(
|
|||
Vortex #(
|
||||
.CORE_ID(i + (CLUSTER_ID * `NUM_CORES))
|
||||
) vortex_core (
|
||||
`SCOPE_SIGNALS_ICACHE_ATTACH
|
||||
`SCOPE_SIGNALS_DCACHE_ATTACH
|
||||
`SCOPE_SIGNALS_CORE_ATTACH
|
||||
`SCOPE_SIGNALS_BE_ATTACH
|
||||
`SCOPE_SIGNALS_ISTAGE_BIND
|
||||
`SCOPE_SIGNALS_LSU_BIND
|
||||
`SCOPE_SIGNALS_CORE_BIND
|
||||
`SCOPE_SIGNALS_ICACHE_BIND
|
||||
`SCOPE_SIGNALS_PIPELINE_BIND
|
||||
`SCOPE_SIGNALS_BE_BIND
|
||||
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
|
|
@ -1,9 +1,11 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module Vortex_Socket (
|
||||
`SCOPE_SIGNALS_ICACHE_IO
|
||||
`SCOPE_SIGNALS_DCACHE_IO
|
||||
`SCOPE_SIGNALS_ISTAGE_IO
|
||||
`SCOPE_SIGNALS_LSU_IO
|
||||
`SCOPE_SIGNALS_CORE_IO
|
||||
`SCOPE_SIGNALS_ICACHE_IO
|
||||
`SCOPE_SIGNALS_PIPELINE_IO
|
||||
`SCOPE_SIGNALS_BE_IO
|
||||
|
||||
// Clock
|
||||
|
@ -61,10 +63,12 @@ module Vortex_Socket (
|
|||
Vortex_Cluster #(
|
||||
.CLUSTER_ID(`L3CACHE_ID)
|
||||
) Vortex_Cluster (
|
||||
`SCOPE_SIGNALS_ICACHE_ATTACH
|
||||
`SCOPE_SIGNALS_DCACHE_ATTACH
|
||||
`SCOPE_SIGNALS_CORE_ATTACH
|
||||
`SCOPE_SIGNALS_BE_ATTACH
|
||||
`SCOPE_SIGNALS_ISTAGE_BIND
|
||||
`SCOPE_SIGNALS_LSU_BIND
|
||||
`SCOPE_SIGNALS_CORE_BIND
|
||||
`SCOPE_SIGNALS_ICACHE_BIND
|
||||
`SCOPE_SIGNALS_PIPELINE_BIND
|
||||
`SCOPE_SIGNALS_BE_BIND
|
||||
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -155,10 +159,12 @@ module Vortex_Socket (
|
|||
Vortex_Cluster #(
|
||||
.CLUSTER_ID(i)
|
||||
) Vortex_Cluster (
|
||||
`SCOPE_SIGNALS_ICACHE_ATTACH
|
||||
`SCOPE_SIGNALS_DCACHE_ATTACH
|
||||
`SCOPE_SIGNALS_CORE_ATTACH
|
||||
`SCOPE_SIGNALS_BE_ATTACH
|
||||
`SCOPE_SIGNALS_ISTAGE_BIND
|
||||
`SCOPE_SIGNALS_LSU_BIND
|
||||
`SCOPE_SIGNALS_CORE_BIND
|
||||
`SCOPE_SIGNALS_ICACHE_BIND
|
||||
`SCOPE_SIGNALS_PIPELINE_BIND
|
||||
`SCOPE_SIGNALS_BE_BIND
|
||||
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -387,7 +393,7 @@ module Vortex_Socket (
|
|||
end
|
||||
|
||||
`ifdef DBG_PRINT_DRAM
|
||||
always_ff @(posedge clk) begin
|
||||
always @(posedge clk) begin
|
||||
if (dram_req_valid && dram_req_ready) begin
|
||||
$display("%t: DRAM req: rw=%b addr=%0h, tag=%0h, byteen=%0h data=%0h", $time, dram_req_rw, `DRAM_TO_BYTE_ADDR(dram_req_addr), dram_req_tag, dram_req_byteen, dram_req_data);
|
||||
end
|
||||
|
|
169
hw/rtl/cache/VX_bank.v
vendored
169
hw/rtl/cache/VX_bank.v
vendored
|
@ -101,7 +101,7 @@ module VX_bank #(
|
|||
input wire snp_rsp_ready
|
||||
);
|
||||
|
||||
`DEBUG_BLOCK(
|
||||
`ifdef DBG_CORE_REQ_INFO
|
||||
wire[31:0] debug_use_pc_st0;
|
||||
wire[1:0] debug_wb_st0;
|
||||
wire[4:0] debug_rd_st0;
|
||||
|
@ -128,7 +128,7 @@ module VX_bank #(
|
|||
wire[WORD_SIZE-1:0] debug_byteen_st2;
|
||||
wire[`REQS_BITS-1:0] debug_tid_st2;
|
||||
wire[`UP(CORE_TAG_ID_BITS)-1:0] debug_tagid_st2;
|
||||
)
|
||||
`endif
|
||||
|
||||
wire snrq_pop;
|
||||
wire snrq_empty;
|
||||
|
@ -300,7 +300,6 @@ module VX_bank #(
|
|||
wire qual_is_snp_st0;
|
||||
wire qual_snp_invalidate_st0;
|
||||
|
||||
|
||||
wire valid_st1 [STAGE_1_CYCLES-1:0];
|
||||
wire [`LINE_ADDR_WIDTH-1:0] addr_st1 [STAGE_1_CYCLES-1:0];
|
||||
wire [`UP(`WORD_SELECT_WIDTH)-1:0] wsel_st1 [STAGE_1_CYCLES-1:0];
|
||||
|
@ -313,17 +312,17 @@ module VX_bank #(
|
|||
|
||||
assign qual_is_fill_st0 = dfpq_pop_unqual;
|
||||
|
||||
assign qual_valid_st0 = dfpq_pop || mrvq_pop || reqq_pop || snrq_pop;
|
||||
assign qual_valid_st0 = dfpq_pop || mrvq_pop || reqq_pop || snrq_pop;
|
||||
|
||||
assign qual_addr_st0 = dfpq_pop_unqual ? dfpq_addr_st0 :
|
||||
mrvq_pop_unqual ? mrvq_addr_st0 :
|
||||
reqq_pop_unqual ? reqq_req_addr_st0[`LINE_SELECT_ADDR_RNG] :
|
||||
snrq_pop_unqual ? snrq_addr_st0 :
|
||||
0;
|
||||
assign qual_addr_st0 = dfpq_pop_unqual ? dfpq_addr_st0 :
|
||||
mrvq_pop_unqual ? mrvq_addr_st0 :
|
||||
reqq_pop_unqual ? reqq_req_addr_st0[`LINE_SELECT_ADDR_RNG] :
|
||||
snrq_pop_unqual ? snrq_addr_st0 :
|
||||
0;
|
||||
if (`WORD_SELECT_WIDTH != 0) begin
|
||||
assign qual_wsel_st0 = reqq_pop_unqual ? reqq_req_addr_st0[`WORD_SELECT_WIDTH-1:0] :
|
||||
mrvq_pop_unqual ? mrvq_wsel_st0 :
|
||||
0;
|
||||
assign qual_wsel_st0 = reqq_pop_unqual ? reqq_req_addr_st0[`WORD_SELECT_WIDTH-1:0] :
|
||||
mrvq_pop_unqual ? mrvq_wsel_st0 :
|
||||
0;
|
||||
end else begin
|
||||
`UNUSED_VAR(mrvq_wsel_st0)
|
||||
assign qual_wsel_st0 = 0;
|
||||
|
@ -355,11 +354,11 @@ module VX_bank #(
|
|||
|
||||
assign qual_from_mrvq_st0 = mrvq_pop_unqual;
|
||||
|
||||
`DEBUG_BLOCK(
|
||||
`ifdef DBG_CORE_REQ_INFO
|
||||
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin
|
||||
assign {debug_use_pc_st0, debug_wb_st0, debug_rd_st0, debug_warp_num_st0, debug_tagid_st0, debug_rw_st0, debug_byteen_st0, debug_tid_st0} = qual_inst_meta_st0;
|
||||
end
|
||||
)
|
||||
`endif
|
||||
|
||||
VX_generic_register #(
|
||||
.N(1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_WIDTH) + `WORD_WIDTH + `REQ_INST_META_WIDTH + 1 + `BANK_LINE_WIDTH)
|
||||
|
@ -408,18 +407,23 @@ module VX_bank #(
|
|||
wire from_mrvq_st1e;
|
||||
wire mrvq_recover_ready_state_st1e;
|
||||
|
||||
assign from_mrvq_st1e = from_mrvq_st1[STAGE_1_CYCLES-1];
|
||||
assign from_mrvq_st1e = from_mrvq_st1[STAGE_1_CYCLES-1];
|
||||
assign valid_st1e = valid_st1 [STAGE_1_CYCLES-1];
|
||||
assign is_snp_st1e = is_snp_st1 [STAGE_1_CYCLES-1];
|
||||
assign snp_invalidate_st1e = snp_invalidate_st1 [STAGE_1_CYCLES-1];
|
||||
|
||||
assign {tag_st1e, mem_rw_st1e, mem_byteen_st1e, tid_st1e} = inst_meta_st1[STAGE_1_CYCLES-1];
|
||||
|
||||
assign st2_pending_hazard_st1e = (miss_add_because_miss) && ((addr_st2 == addr_st1[STAGE_1_CYCLES-1]) && !is_fill_st2);
|
||||
assign st2_pending_hazard_st1e = (miss_add_because_miss)
|
||||
&& ((addr_st2 == addr_st1[STAGE_1_CYCLES-1]) && !is_fill_st2);
|
||||
|
||||
assign force_request_miss_st1e = (valid_st1e && !from_mrvq_st1e && (mrvq_pending_hazard_st1e || st2_pending_hazard_st1e)) || (valid_st1e && from_mrvq_st1e && recover_mrvq_state_st2);
|
||||
assign force_request_miss_st1e = (valid_st1e && !from_mrvq_st1e && (mrvq_pending_hazard_st1e || st2_pending_hazard_st1e))
|
||||
|| (valid_st1e && from_mrvq_st1e && recover_mrvq_state_st2);
|
||||
|
||||
assign mrvq_recover_ready_state_st1e = valid_st1e && from_mrvq_st1e && recover_mrvq_state_st2 && (addr_st2 == addr_st1[STAGE_1_CYCLES-1]);
|
||||
assign mrvq_recover_ready_state_st1e = valid_st1e
|
||||
&& from_mrvq_st1e
|
||||
&& recover_mrvq_state_st2
|
||||
&& (addr_st2 == addr_st1[STAGE_1_CYCLES-1]);
|
||||
|
||||
VX_tag_data_access #(
|
||||
.CACHE_SIZE (CACHE_SIZE),
|
||||
|
@ -466,11 +470,12 @@ module VX_bank #(
|
|||
.mrvq_init_ready_state_st1e(mrvq_init_ready_state_st1e)
|
||||
);
|
||||
|
||||
`DEBUG_BLOCK(
|
||||
`ifdef DBG_CORE_REQ_INFO
|
||||
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin
|
||||
assign {debug_use_pc_st1e, debug_wb_st1e, debug_rd_st1e, debug_warp_num_st1e, debug_tagid_st1e, debug_rw_st1e, debug_byteen_st1e, debug_tid_st1e} = inst_meta_st1[STAGE_1_CYCLES-1];
|
||||
end
|
||||
)
|
||||
`endif
|
||||
|
||||
wire qual_valid_st1e_2 = valid_st1e && !is_fill_st1[STAGE_1_CYCLES-1];
|
||||
wire from_mrvq_st1e_st2 = from_mrvq_st1e;
|
||||
|
||||
|
@ -506,11 +511,11 @@ module VX_bank #(
|
|||
.out ({mrvq_recover_ready_state_st2 , from_mrvq_st2 , mrvq_init_ready_state_unqual_st2, snp_to_mrvq_st2 , is_snp_st2 , snp_invalidate_st2, fill_saw_dirty_st2 , is_fill_st2 , valid_st2 , addr_st2 , wsel_st2, writeword_st2 , readword_st2 , readdata_st2 , readtag_st2 , miss_st2 , dirty_st2 , dirtyb_st2, inst_meta_st2 })
|
||||
);
|
||||
|
||||
`DEBUG_BLOCK(
|
||||
`ifdef DBG_CORE_REQ_INFO
|
||||
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin
|
||||
assign {debug_use_pc_st2, debug_wb_st2, debug_rd_st2, debug_warp_num_st2, debug_tagid_st2, debug_rw_st2, debug_byteen_st2, debug_tid_st2} = inst_meta_st2;
|
||||
end
|
||||
)
|
||||
`endif
|
||||
|
||||
// Enqueue to miss reserv if it's a valid miss
|
||||
assign miss_add_because_miss = valid_st2 && !is_snp_st2 && miss_st2;
|
||||
|
@ -539,7 +544,9 @@ module VX_bank #(
|
|||
assign mrvq_init_ready_state_hazard_st0_st1 = miss_add_unqual && qual_is_fill_st0 && (miss_add_addr == qual_addr_st0 );
|
||||
assign mrvq_init_ready_state_hazard_st1e_st1 = miss_add_unqual && is_fill_st1[STAGE_1_CYCLES-1] && (miss_add_addr == addr_st1[STAGE_1_CYCLES-1]);
|
||||
|
||||
assign mrvq_init_ready_state_st2 = mrvq_init_ready_state_unqual_st2 || mrvq_init_ready_state_hazard_st0_st1 || mrvq_init_ready_state_hazard_st1e_st1;
|
||||
assign mrvq_init_ready_state_st2 = mrvq_init_ready_state_unqual_st2
|
||||
|| mrvq_init_ready_state_hazard_st0_st1
|
||||
|| mrvq_init_ready_state_hazard_st1e_st1;
|
||||
|
||||
VX_cache_miss_resrv #(
|
||||
.BANK_ID (BANK_ID),
|
||||
|
@ -592,10 +599,8 @@ module VX_bank #(
|
|||
|
||||
// Enqueue core response
|
||||
|
||||
wire cwbq_push;
|
||||
wire cwbq_pop;
|
||||
wire cwbq_empty;
|
||||
wire cwbq_full;
|
||||
wire cwbq_push, cwbq_pop;
|
||||
wire cwbq_empty, cwbq_full;
|
||||
|
||||
wire cwbq_push_unqual = valid_st2 && !miss_st2 && !is_fill_st2 && !is_snp_st2;
|
||||
assign cwbq_push_stall = cwbq_push_unqual && cwbq_full;
|
||||
|
@ -634,42 +639,22 @@ module VX_bank #(
|
|||
|
||||
// Enqueue DRAM fill request
|
||||
|
||||
// `IGNORE_WARNINGS_BEGIN
|
||||
// wire invalidate_fill;
|
||||
// `IGNORE_WARNINGS_END
|
||||
// wire possible_fill = valid_st2 && miss_st2 && dram_fill_req_ready && ~is_snp_st2;
|
||||
// wire [`LINE_ADDR_WIDTH-1:0] fill_invalidator_addr = addr_st2;
|
||||
wire dram_fill_req_unqual = miss_add_unqual
|
||||
&& (!mrvq_init_ready_state_st2
|
||||
|| (from_mrvq_st2 && !mrvq_recover_ready_state_st2));
|
||||
|
||||
// VX_fill_invalidator #(
|
||||
// .BANK_LINE_SIZE (BANK_LINE_SIZE),
|
||||
// .NUM_BANKS (NUM_BANKS),
|
||||
// .FILL_INVALIDAOR_SIZE (FILL_INVALIDAOR_SIZE)
|
||||
// ) fill_invalidator (
|
||||
// .clk (clk),
|
||||
// .reset (reset),
|
||||
// .possible_fill (possible_fill),
|
||||
// .success_fill (is_fill_st2),
|
||||
// .fill_addr (fill_invalidator_addr),
|
||||
// .invalidate_fill (invalidate_fill)
|
||||
// );
|
||||
|
||||
wire dram_fill_req_unqual = miss_add_unqual && (!mrvq_init_ready_state_st2 || (from_mrvq_st2 && !mrvq_recover_ready_state_st2));
|
||||
|
||||
assign dram_fill_req_valid = dram_fill_req_unqual
|
||||
&& dram_fill_req_ready
|
||||
&& !( dwbq_push_stall
|
||||
|| mrvq_push_stall
|
||||
|| cwbq_push_stall);
|
||||
assign dram_fill_req_valid = dram_fill_req_unqual
|
||||
&& !(dwbq_push_stall
|
||||
|| mrvq_push_stall
|
||||
|| cwbq_push_stall);
|
||||
|
||||
assign dram_fill_req_addr = addr_st2;
|
||||
assign dram_fill_req_stall = dram_fill_req_unqual && ~dram_fill_req_ready;
|
||||
|
||||
// Enqueue DRAM writeback request
|
||||
|
||||
wire dwbq_push;
|
||||
wire dwbq_pop;
|
||||
wire dwbq_empty;
|
||||
wire dwbq_full;
|
||||
wire dwbq_push, dwbq_pop;
|
||||
wire dwbq_empty, dwbq_full;
|
||||
|
||||
wire dwbq_is_dwb_in, dwbq_is_snp_in;
|
||||
wire dwbq_is_dwb_out, dwbq_is_snp_out;
|
||||
|
@ -724,9 +709,9 @@ module VX_bank #(
|
|||
assign dram_wb_req_valid = ~dwbq_empty && dwbq_is_dwb_out && (~dwbq_is_snp_out || dwbq_dual_valid_sel == 0);
|
||||
assign snp_rsp_valid = ~dwbq_empty && dwbq_is_snp_out && (~dwbq_is_dwb_out || dwbq_dual_valid_sel == 1);
|
||||
|
||||
assign dwbq_pop = (dwbq_is_dwb_out && ~dwbq_is_snp_out && dram_wb_req_fire)
|
||||
|| (dwbq_is_snp_out && ~dwbq_is_dwb_out && snp_rsp_fire)
|
||||
|| (dwbq_is_dwb_out && dwbq_is_snp_out && snp_rsp_fire);
|
||||
assign dwbq_pop = (dwbq_is_dwb_out && ~dwbq_is_snp_out && dram_wb_req_fire)
|
||||
|| (dwbq_is_snp_out && ~dwbq_is_dwb_out && snp_rsp_fire)
|
||||
|| (dwbq_is_dwb_out && dwbq_is_snp_out && snp_rsp_fire);
|
||||
|
||||
// bank pipeline stall
|
||||
assign stall_bank_pipe = cwbq_push_stall
|
||||
|
@ -735,53 +720,27 @@ module VX_bank #(
|
|||
|| dram_fill_req_stall;
|
||||
|
||||
`ifdef DBG_PRINT_CACHE_BANK
|
||||
if (NUM_BANKS == 1) begin
|
||||
always_ff @(posedge clk) begin
|
||||
if (core_req_valid && core_req_ready) begin
|
||||
$display("%t: bank%0d-%0d core req: addr=%0h, tag=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR0(core_req_addr), core_req_tag);
|
||||
end
|
||||
if (core_rsp_valid && core_rsp_ready) begin
|
||||
$display("%t: bank%0d-%0d core rsp: tag=%0h, data=%0h", $time, CACHE_ID, BANK_ID, core_rsp_tag, core_rsp_data);
|
||||
end
|
||||
if (dram_fill_req_valid && dram_fill_req_ready) begin
|
||||
$display("%t: bank%0d-%0d dram_fill req: addr=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR0(dram_fill_req_addr));
|
||||
end
|
||||
if (dram_wb_req_valid && dram_wb_req_ready) begin
|
||||
$display("%t: bank%0d-%0d dram_wb req: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR0(dram_wb_req_addr), dram_wb_req_data);
|
||||
end
|
||||
if (dram_fill_rsp_valid && dram_fill_rsp_ready) begin
|
||||
$display("%t: bank%0d-%0d dram_fill rsp: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR0(dram_fill_rsp_addr), dram_fill_rsp_data);
|
||||
end
|
||||
if (snp_req_valid && snp_req_ready) begin
|
||||
$display("%t: bank%0d-%0d snp req: addr=%0h, tag=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR0(snp_req_addr), snp_req_tag);
|
||||
end
|
||||
if (snp_rsp_valid && snp_rsp_ready) begin
|
||||
$display("%t: bank%0d-%0d snp rsp: tag=%0h", $time, CACHE_ID, BANK_ID, snp_rsp_tag);
|
||||
end
|
||||
always @(posedge clk) begin
|
||||
if ((|core_req_valid) && core_req_ready) begin
|
||||
$display("%t: bank%0d-%0d core req: addr=%0h, tag=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(core_req_addr, BANK_ID), core_req_tag);
|
||||
end
|
||||
end else begin
|
||||
always_ff @(posedge clk) begin
|
||||
if ((|core_req_valid) && core_req_ready) begin
|
||||
$display("%t: bank%0d-%0d core req: addr=%0h, tag=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(core_req_addr, BANK_ID), core_req_tag);
|
||||
end
|
||||
if (core_rsp_valid && core_rsp_ready) begin
|
||||
$display("%t: bank%0d-%0d core rsp: tag=%0h, data=%0h", $time, CACHE_ID, BANK_ID, core_rsp_tag, core_rsp_data);
|
||||
end
|
||||
if (dram_fill_req_valid && dram_fill_req_ready) begin
|
||||
$display("%t: bank%0d-%0d dram_fill req: addr=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dram_fill_req_addr, BANK_ID));
|
||||
end
|
||||
if (dram_wb_req_valid && dram_wb_req_ready) begin
|
||||
$display("%t: bank%0d-%0d dram_wb req: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dram_wb_req_addr, BANK_ID), dram_wb_req_data);
|
||||
end
|
||||
if (dram_fill_rsp_valid && dram_fill_rsp_ready) begin
|
||||
$display("%t: bank%0d-%0d dram_fill rsp: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dram_fill_rsp_addr, BANK_ID), dram_fill_rsp_data);
|
||||
end
|
||||
if (snp_req_valid && snp_req_ready) begin
|
||||
$display("%t: bank%0d-%0d snp req: addr=%0h, invalidate=%0d, tag=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(snp_req_addr, BANK_ID), snp_req_invalidate, snp_req_tag);
|
||||
end
|
||||
if (snp_rsp_valid && snp_rsp_ready) begin
|
||||
$display("%t: bank%0d-%0d snp rsp: tag=%0h", $time, CACHE_ID, BANK_ID, snp_rsp_tag);
|
||||
end
|
||||
if (core_rsp_valid && core_rsp_ready) begin
|
||||
$display("%t: bank%0d-%0d core rsp: tag=%0h, data=%0h", $time, CACHE_ID, BANK_ID, core_rsp_tag, core_rsp_data);
|
||||
end
|
||||
if (dram_fill_req_valid && dram_fill_req_ready) begin
|
||||
$display("%t: bank%0d-%0d dram_fill req: addr=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dram_fill_req_addr, BANK_ID));
|
||||
end
|
||||
if (dram_wb_req_firevalid && dram_wb_req_ready) begin
|
||||
$display("%t: bank%0d-%0d dram_wb req: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dram_wb_req_addr, BANK_ID), dram_wb_req_data);
|
||||
end
|
||||
if (dram_fill_rsp_valid && dram_fill_rsp_ready) begin
|
||||
$display("%t: bank%0d-%0d dram_fill rsp: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dram_fill_rsp_addr, BANK_ID), dram_fill_rsp_data);
|
||||
end
|
||||
if (snp_req_valid && snp_req_ready) begin
|
||||
$display("%t: bank%0d-%0d snp req: addr=%0h, invalidate=%0d, tag=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(snp_req_addr, BANK_ID), snp_req_invalidate, snp_req_tag);
|
||||
end
|
||||
if (snp_rsp_valid && snp_rsp_ready) begin
|
||||
$display("%t: bank%0d-%0d snp rsp: tag=%0h", $time, CACHE_ID, BANK_ID, snp_rsp_tag);
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
|
17
hw/rtl/cache/VX_cache.v
vendored
17
hw/rtl/cache/VX_cache.v
vendored
|
@ -65,6 +65,8 @@ module VX_cache #(
|
|||
// Snooping forward tag width
|
||||
parameter SNP_FWD_TAG_WIDTH = 1
|
||||
) (
|
||||
`SCOPE_SIGNALS_ICACHE_IO
|
||||
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
|
@ -125,7 +127,7 @@ module VX_cache #(
|
|||
output wire [NUM_SNP_REQUESTS-1:0] snp_fwdin_ready
|
||||
);
|
||||
|
||||
`DEBUG_BLOCK(
|
||||
`ifdef DBG_CORE_REQ_INFO
|
||||
wire[31:0] debug_core_req_use_pc;
|
||||
wire[1:0] debug_core_req_wb;
|
||||
wire[4:0] debug_core_req_rd;
|
||||
|
@ -135,7 +137,8 @@ module VX_cache #(
|
|||
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin
|
||||
assign {debug_core_req_use_pc, debug_core_req_wb, debug_core_req_rd, debug_core_req_warp_num, debug_core_req_idx} = core_req_tag[0];
|
||||
end
|
||||
)
|
||||
`endif
|
||||
|
||||
wire [NUM_BANKS-1:0][NUM_REQUESTS-1:0] per_bank_valid;
|
||||
|
||||
wire [NUM_BANKS-1:0] per_bank_core_req_ready;
|
||||
|
@ -476,7 +479,13 @@ module VX_cache #(
|
|||
.per_bank_snp_rsp_ready (per_bank_snp_rsp_ready),
|
||||
.snp_rsp_valid (snp_rsp_valid),
|
||||
.snp_rsp_tag (snp_rsp_tag),
|
||||
.snp_rsp_ready (snp_rsp_ready)
|
||||
);
|
||||
.snp_rsp_ready (snp_rsp_ready)
|
||||
);
|
||||
|
||||
`SCOPE_ASSIGN(scope_idram_req_valid, per_bank_dram_fill_req_valid[0]);
|
||||
`SCOPE_ASSIGN(scope_idram_req_ready, dram_fill_req_ready);
|
||||
`SCOPE_ASSIGN(scope_idram_rsp_valid, per_bank_core_rsp_valid[0]);
|
||||
`SCOPE_ASSIGN(scope_idram_rsp_ready, per_bank_core_rsp_ready[0]);
|
||||
|
||||
|
||||
endmodule
|
4
hw/rtl/cache/VX_cache_config.vh
vendored
4
hw/rtl/cache/VX_cache_config.vh
vendored
|
@ -72,8 +72,6 @@
|
|||
|
||||
`define LINE_TO_DRAM_ADDR(x, i) {x, `BANK_SELECT_BITS'(i)}
|
||||
|
||||
`define LINE_TO_BYTE_ADDR(x, i) {x, `BANK_SELECT_BITS'(i), `BASE_ADDR_BITS'(0)}
|
||||
|
||||
`define LINE_TO_BYTE_ADDR0(x) {x, `BASE_ADDR_BITS'(0)}
|
||||
`define LINE_TO_BYTE_ADDR(x, i) {x, (((`BANK_SELECT_BITS + `BASE_ADDR_BITS)'(i)) << `BASE_ADDR_BITS)}
|
||||
|
||||
`endif
|
||||
|
|
45
hw/rtl/cache/VX_cache_miss_resrv.v
vendored
45
hw/rtl/cache/VX_cache_miss_resrv.v
vendored
|
@ -151,38 +151,21 @@ module VX_cache_miss_resrv #(
|
|||
end
|
||||
end
|
||||
|
||||
`ifdef DBG_PRINT_CACHE_MSRQ
|
||||
`ifdef DBG_PRINT_CACHE_MSRQ
|
||||
integer j;
|
||||
if (NUM_BANKS == 1) begin
|
||||
always_ff @(posedge clk) begin
|
||||
if (mrvq_push || mrvq_pop || increment_head || recover_state) begin
|
||||
$write("%t: bank%0d-%0d msrq: push=%b pop=%b incr=%d recv=%d", $time, CACHE_ID, BANK_ID, mrvq_push, mrvq_pop, increment_head, recover_state);
|
||||
for (j = 0; j < MRVQ_SIZE; j++) begin
|
||||
if (valid_table[j]) begin
|
||||
$write(" ");
|
||||
if (schedule_ptr == $bits(schedule_ptr)'(j)) $write("*");
|
||||
if (~ready_table[j]) $write("!");
|
||||
$write("addr%0d=%0h", j, {addr_table[j], `BASE_ADDR_BITS'(0)});
|
||||
end
|
||||
end
|
||||
$write("\n");
|
||||
end
|
||||
end
|
||||
end else begin
|
||||
always_ff @(posedge clk) begin
|
||||
if (mrvq_push || mrvq_pop || increment_head || recover_state) begin
|
||||
$write("%t: bank%0d-%0d msrq: push=%b pop=%b incr=%d recv=%d", $time, CACHE_ID, BANK_ID, mrvq_push, mrvq_pop, increment_head, recover_state);
|
||||
for (j = 0; j < MRVQ_SIZE; j++) begin
|
||||
if (valid_table[j]) begin
|
||||
$write(" ");
|
||||
if (schedule_ptr == $bits(schedule_ptr)'(j)) $write("*");
|
||||
if (~ready_table[j]) $write("!");
|
||||
$write("addr%0d=%0h", j, `LINE_TO_BYTE_ADDR(addr_table[j], BANK_ID));
|
||||
end
|
||||
end
|
||||
$write("\n");
|
||||
end
|
||||
end
|
||||
always @(posedge clk) begin
|
||||
if (mrvq_push || mrvq_pop || increment_head || recover_state) begin
|
||||
$write("%t: bank%0d-%0d msrq: push=%b pop=%b incr=%d recv=%d", $time, CACHE_ID, BANK_ID, mrvq_push, mrvq_pop, increment_head, recover_state);
|
||||
for (j = 0; j < MRVQ_SIZE; j++) begin
|
||||
if (valid_table[j]) begin
|
||||
$write(" ");
|
||||
if (schedule_ptr == $bits(schedule_ptr)'(j)) $write("*");
|
||||
if (~ready_table[j]) $write("!");
|
||||
$write("addr%0d=%0h", j, `LINE_TO_BYTE_ADDR(addr_table[j], BANK_ID));
|
||||
end
|
||||
end
|
||||
$write("\n");
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
|
|
2
hw/rtl/cache/VX_snp_forwarder.v
vendored
2
hw/rtl/cache/VX_snp_forwarder.v
vendored
|
@ -116,7 +116,7 @@ module VX_snp_forwarder #(
|
|||
end
|
||||
|
||||
`ifdef DBG_PRINT_CACHE_SNP
|
||||
always_ff @(posedge clk) begin
|
||||
always @(posedge clk) begin
|
||||
if (snp_req_valid && snp_req_ready) begin
|
||||
$display("%t: cache%0d snp req: addr=%0h, invalidate=%0d, tag=%0h", $time, CACHE_ID, `DRAM_TO_BYTE_ADDR(snp_req_addr), snp_req_invalidate, snp_req_tag);
|
||||
end
|
||||
|
|
|
@ -17,112 +17,105 @@ module VX_divide #(
|
|||
output reg [WIDTHD-1:0] remainder
|
||||
);
|
||||
|
||||
// synthesis read_comments_as_HDL on
|
||||
// localparam IMPL = "quartus";
|
||||
// synthesis read_comments_as_HDL off
|
||||
|
||||
// altera translate_off
|
||||
localparam IMPL="fallback";
|
||||
// altera translate_on
|
||||
|
||||
generate
|
||||
|
||||
if (NREP != DREP) begin
|
||||
different_nrep_drep_not_yet_supported non_existing_module();
|
||||
end
|
||||
|
||||
if (IMPL == "quartus") begin
|
||||
`ifdef QUARTUS
|
||||
|
||||
localparam lpm_speed=SPEED == "HIGHEST" ? 9:5;
|
||||
localparam lpm_speed=SPEED == "HIGHEST" ? 9 : 5;
|
||||
|
||||
lpm_divide #(
|
||||
.LPM_WIDTHN(WIDTHN),
|
||||
.LPM_WIDTHD(WIDTHD),
|
||||
.LPM_NREPRESENTATION(NREP),
|
||||
.LPM_DREPRESENTATION(DREP),
|
||||
.LPM_PIPELINE(PIPELINE),
|
||||
.LPM_REMAINDERPOSITIVE("FALSE"), // emulate verilog % operator
|
||||
.MAXIMIZE_SPEED(lpm_speed)
|
||||
) quartus_divider (
|
||||
.clock(clock),
|
||||
.aclr(aclr),
|
||||
.clken(clken),
|
||||
.numer(numer),
|
||||
.denom(denom),
|
||||
.quotient(quotient),
|
||||
.remain(remainder)
|
||||
);
|
||||
end
|
||||
else begin
|
||||
lpm_divide #(
|
||||
.LPM_WIDTHN(WIDTHN),
|
||||
.LPM_WIDTHD(WIDTHD),
|
||||
.LPM_NREPRESENTATION(NREP),
|
||||
.LPM_DREPRESENTATION(DREP),
|
||||
.LPM_PIPELINE(PIPELINE),
|
||||
.LPM_REMAINDERPOSITIVE("FALSE"), // emulate verilog % operator
|
||||
.MAXIMIZE_SPEED(lpm_speed)
|
||||
) quartus_divider (
|
||||
.clock(clock),
|
||||
.aclr(aclr),
|
||||
.clken(clken),
|
||||
.numer(numer),
|
||||
.denom(denom),
|
||||
.quotient(quotient),
|
||||
.remain(remainder)
|
||||
);
|
||||
|
||||
wire [WIDTHN-1:0] numer_pipe_end;
|
||||
wire [WIDTHD-1:0] denom_pipe_end;
|
||||
`else
|
||||
|
||||
if (PIPELINE == 0) begin
|
||||
assign numer_pipe_end = numer;
|
||||
assign denom_pipe_end = denom;
|
||||
end else begin
|
||||
reg [WIDTHN-1:0] numer_pipe [0:PIPELINE-1];
|
||||
reg [WIDTHD-1:0] denom_pipe [0:PIPELINE-1];
|
||||
wire [WIDTHN-1:0] numer_pipe_end;
|
||||
wire [WIDTHD-1:0] denom_pipe_end;
|
||||
|
||||
genvar i;
|
||||
for (i = 0; i < PIPELINE-1; i++) begin : pipe_stages
|
||||
always @(posedge clock or posedge aclr) begin
|
||||
if (aclr) begin
|
||||
numer_pipe[i+1] <= 0;
|
||||
denom_pipe[i+1] <= 0;
|
||||
end
|
||||
else if (clken) begin
|
||||
numer_pipe[i+1] <= numer_pipe[i];
|
||||
denom_pipe[i+1] <= denom_pipe[i];
|
||||
end
|
||||
end
|
||||
end
|
||||
if (PIPELINE == 0) begin
|
||||
assign numer_pipe_end = numer;
|
||||
assign denom_pipe_end = denom;
|
||||
end else begin
|
||||
reg [WIDTHN-1:0] numer_pipe [0:PIPELINE-1];
|
||||
reg [WIDTHD-1:0] denom_pipe [0:PIPELINE-1];
|
||||
|
||||
genvar i;
|
||||
for (i = 0; i < PIPELINE-1; i++) begin : pipe_stages
|
||||
always @(posedge clock or posedge aclr) begin
|
||||
if (aclr) begin
|
||||
numer_pipe[0] <= 0;
|
||||
denom_pipe[0] <= 0;
|
||||
numer_pipe[i+1] <= 0;
|
||||
denom_pipe[i+1] <= 0;
|
||||
end
|
||||
else if (clken) begin
|
||||
numer_pipe[0] <= numer;
|
||||
denom_pipe[0] <= denom;
|
||||
numer_pipe[i+1] <= numer_pipe[i];
|
||||
denom_pipe[i+1] <= denom_pipe[i];
|
||||
end
|
||||
end
|
||||
|
||||
assign numer_pipe_end = numer_pipe[PIPELINE-1];
|
||||
assign denom_pipe_end = denom_pipe[PIPELINE-1];
|
||||
end
|
||||
|
||||
/* * * * * * * * * * * * * * * * * * * * * * */
|
||||
/* Do the actual fallback computation here */
|
||||
/* * * * * * * * * * * * * * * * * * * * * * */
|
||||
|
||||
if (NREP == "SIGNED") begin
|
||||
|
||||
always @(*) begin
|
||||
if (denom_pipe_end == 0) begin
|
||||
quotient = 32'hffffffff;
|
||||
remainder = numer_pipe_end;
|
||||
end
|
||||
else if (denom_pipe_end == 32'hffffffff && numer_pipe_end == 32'h80000000) begin
|
||||
// this edge case kills verilator in some cases by causing a division
|
||||
// overflow exception. INT_MIN / -1 (on x86)
|
||||
quotient = 0;
|
||||
remainder = 0;
|
||||
end
|
||||
else begin
|
||||
quotient = $signed($signed(numer_pipe_end) / $signed(denom_pipe_end));
|
||||
remainder = $signed($signed(numer_pipe_end) % $signed(denom_pipe_end));
|
||||
end
|
||||
always @(posedge clock or posedge aclr) begin
|
||||
if (aclr) begin
|
||||
numer_pipe[0] <= 0;
|
||||
denom_pipe[0] <= 0;
|
||||
end
|
||||
else if (clken) begin
|
||||
numer_pipe[0] <= numer;
|
||||
denom_pipe[0] <= denom;
|
||||
end
|
||||
|
||||
end
|
||||
else begin
|
||||
assign quotient = (denom_pipe_end == 0) ? 32'hffffffff : numer_pipe_end/denom_pipe_end;
|
||||
assign remainder = (denom_pipe_end == 0) ? numer_pipe_end : numer_pipe_end%denom_pipe_end;
|
||||
end
|
||||
|
||||
assign numer_pipe_end = numer_pipe[PIPELINE-1];
|
||||
assign denom_pipe_end = denom_pipe[PIPELINE-1];
|
||||
end
|
||||
|
||||
/* * * * * * * * * * * * * * * * * * * * * * */
|
||||
/* Do the actual fallback computation here */
|
||||
/* * * * * * * * * * * * * * * * * * * * * * */
|
||||
|
||||
if (NREP == "SIGNED") begin
|
||||
always @(*) begin
|
||||
if (denom_pipe_end == 0) begin
|
||||
quotient = 32'hffffffff;
|
||||
remainder = numer_pipe_end;
|
||||
end
|
||||
else if (denom_pipe_end == 32'hffffffff
|
||||
&& numer_pipe_end == 32'h80000000) begin
|
||||
// this edge case kills verilator in some cases by causing a division
|
||||
// overflow exception. INT_MIN / -1 (on x86)
|
||||
quotient = 0;
|
||||
remainder = 0;
|
||||
end
|
||||
else begin
|
||||
quotient = $signed($signed(numer_pipe_end) / $signed(denom_pipe_end));
|
||||
remainder = $signed($signed(numer_pipe_end) % $signed(denom_pipe_end));
|
||||
end
|
||||
end
|
||||
end
|
||||
else begin
|
||||
assign quotient = (denom_pipe_end == 0) ? 32'hffffffff : numer_pipe_end/denom_pipe_end;
|
||||
assign remainder = (denom_pipe_end == 0) ? numer_pipe_end : numer_pipe_end%denom_pipe_end;
|
||||
end
|
||||
|
||||
`endif
|
||||
|
||||
endgenerate
|
||||
|
||||
endmodule : VX_divide
|
||||
|
|
|
@ -2,21 +2,17 @@
|
|||
|
||||
module VX_generic_priority_encoder #(
|
||||
parameter N = 1
|
||||
) (
|
||||
input wire[N-1:0] valids,
|
||||
//output reg[$clog2(N)-1:0] index,
|
||||
output reg[(`LOG2UP(N))-1:0] index,
|
||||
//output reg[`LOG2UP(N):0] index, // eh
|
||||
output reg found
|
||||
);
|
||||
|
||||
) (
|
||||
input wire[N-1:0] valids,
|
||||
output reg[(`LOG2UP(N))-1:0] index,
|
||||
output reg found
|
||||
);
|
||||
integer i;
|
||||
always @(*) begin
|
||||
index = 0;
|
||||
found = 0;
|
||||
for (i = N-1; i >= 0; i = i - 1) begin
|
||||
if (valids[i]) begin
|
||||
//index = i[$clog2(N)-1:0];
|
||||
index = i[(`LOG2UP(N))-1:0];
|
||||
found = 1;
|
||||
end
|
||||
|
|
|
@ -132,7 +132,7 @@ module VX_generic_queue #(
|
|||
rd_ptr_r <= rd_ptr_next_r;
|
||||
|
||||
if (SIZE > 2) begin
|
||||
rd_ptr_next_r <= rd_ptr_r + 2;
|
||||
rd_ptr_next_r <= rd_ptr_r + $bits(rd_ptr_r)'(2);
|
||||
end else begin // (SIZE == 2);
|
||||
rd_ptr_next_r <= ~rd_ptr_next_r;
|
||||
end
|
||||
|
|
|
@ -19,110 +19,102 @@ module VX_mult #(
|
|||
output reg [WIDTHP-1:0] result
|
||||
);
|
||||
|
||||
// synthesis read_comments_as_HDL on
|
||||
// localparam IMPL = "quartus";
|
||||
// synthesis read_comments_as_HDL off
|
||||
|
||||
// altera translate_off
|
||||
localparam IMPL="fallback";
|
||||
// altera translate_on
|
||||
|
||||
generate
|
||||
|
||||
if (IMPL == "quartus") begin
|
||||
`ifdef QUARTUS
|
||||
|
||||
localparam lpm_speed = (SPEED == "HIGHEST") ? 10 : 5;
|
||||
|
||||
if (FORCE_LE == "YES") begin
|
||||
lpm_mult #(
|
||||
.LPM_WIDTHA(WIDTHA),
|
||||
.LPM_WIDTHB(WIDTHB),
|
||||
.LPM_WIDTHP(WIDTHP),
|
||||
.LPM_REPRESENTATION(REP),
|
||||
.LPM_PIPELINE(PIPELINE),
|
||||
.DSP_BLOCK_BALANCING("LOGIC ELEMENTS"),
|
||||
.MAXIMIZE_SPEED(lpm_speed)
|
||||
) quartus_mult (
|
||||
.clock(clock),
|
||||
.aclr(aclr),
|
||||
.clken(clken),
|
||||
.dataa(dataa),
|
||||
.datab(datab),
|
||||
.result(result)
|
||||
);
|
||||
end
|
||||
else begin
|
||||
lpm_mult#(
|
||||
.LPM_WIDTHA(WIDTHA),
|
||||
.LPM_WIDTHB(WIDTHB),
|
||||
.LPM_WIDTHP(WIDTHP),
|
||||
.LPM_REPRESENTATION(REP),
|
||||
.LPM_PIPELINE(PIPELINE),
|
||||
.MAXIMIZE_SPEED(lpm_speed)
|
||||
) quartus_mult(
|
||||
.clock(clock),
|
||||
.aclr(aclr),
|
||||
.clken(clken),
|
||||
.dataa(dataa),
|
||||
.datab(datab),
|
||||
.result(result)
|
||||
);
|
||||
end
|
||||
localparam lpm_speed = (SPEED == "HIGHEST") ? 10 : 5;
|
||||
|
||||
if (FORCE_LE == "YES") begin
|
||||
lpm_mult #(
|
||||
.LPM_WIDTHA(WIDTHA),
|
||||
.LPM_WIDTHB(WIDTHB),
|
||||
.LPM_WIDTHP(WIDTHP),
|
||||
.LPM_REPRESENTATION(REP),
|
||||
.LPM_PIPELINE(PIPELINE),
|
||||
.DSP_BLOCK_BALANCING("LOGIC ELEMENTS"),
|
||||
.MAXIMIZE_SPEED(lpm_speed)
|
||||
) quartus_mult (
|
||||
.clock(clock),
|
||||
.aclr(aclr),
|
||||
.clken(clken),
|
||||
.dataa(dataa),
|
||||
.datab(datab),
|
||||
.result(result)
|
||||
);
|
||||
end
|
||||
else begin
|
||||
lpm_mult#(
|
||||
.LPM_WIDTHA(WIDTHA),
|
||||
.LPM_WIDTHB(WIDTHB),
|
||||
.LPM_WIDTHP(WIDTHP),
|
||||
.LPM_REPRESENTATION(REP),
|
||||
.LPM_PIPELINE(PIPELINE),
|
||||
.MAXIMIZE_SPEED(lpm_speed)
|
||||
) quartus_mult(
|
||||
.clock(clock),
|
||||
.aclr(aclr),
|
||||
.clken(clken),
|
||||
.dataa(dataa),
|
||||
.datab(datab),
|
||||
.result(result)
|
||||
);
|
||||
end
|
||||
|
||||
wire [WIDTHA-1:0] dataa_pipe_end;
|
||||
wire [WIDTHB-1:0] datab_pipe_end;
|
||||
`else
|
||||
|
||||
wire [WIDTHA-1:0] dataa_pipe_end;
|
||||
wire [WIDTHB-1:0] datab_pipe_end;
|
||||
|
||||
if (PIPELINE == 0) begin
|
||||
assign dataa_pipe_end = dataa;
|
||||
assign datab_pipe_end = datab;
|
||||
end else begin
|
||||
reg [WIDTHA-1:0] dataa_pipe [0:PIPELINE-1];
|
||||
reg [WIDTHB-1:0] datab_pipe [0:PIPELINE-1];
|
||||
|
||||
genvar i;
|
||||
for (i = 0; i < PIPELINE-1; i++) begin : pipe_stages
|
||||
always @(posedge clock or posedge aclr) begin
|
||||
if (aclr) begin
|
||||
dataa_pipe[i+1] <= 0;
|
||||
datab_pipe[i+1] <= 0;
|
||||
end
|
||||
else if (clken) begin
|
||||
dataa_pipe[i+1] <= dataa_pipe[i];
|
||||
datab_pipe[i+1] <= datab_pipe[i];
|
||||
end
|
||||
end
|
||||
end
|
||||
if (PIPELINE == 0) begin
|
||||
assign dataa_pipe_end = dataa;
|
||||
assign datab_pipe_end = datab;
|
||||
end else begin
|
||||
reg [WIDTHA-1:0] dataa_pipe [0:PIPELINE-1];
|
||||
reg [WIDTHB-1:0] datab_pipe [0:PIPELINE-1];
|
||||
|
||||
genvar i;
|
||||
for (i = 0; i < PIPELINE-1; i++) begin : pipe_stages
|
||||
always @(posedge clock or posedge aclr) begin
|
||||
if (aclr) begin
|
||||
dataa_pipe[0] <= 0;
|
||||
datab_pipe[0] <= 0;
|
||||
dataa_pipe[i+1] <= 0;
|
||||
datab_pipe[i+1] <= 0;
|
||||
end
|
||||
else if (clken) begin
|
||||
dataa_pipe[0] <= dataa;
|
||||
datab_pipe[0] <= datab;
|
||||
dataa_pipe[i+1] <= dataa_pipe[i];
|
||||
datab_pipe[i+1] <= datab_pipe[i];
|
||||
end
|
||||
end
|
||||
|
||||
assign dataa_pipe_end = dataa_pipe[PIPELINE-1];
|
||||
assign datab_pipe_end = datab_pipe[PIPELINE-1];
|
||||
end
|
||||
|
||||
/* * * * * * * * * * * * * * * * * * * * * * */
|
||||
/* Do the actual fallback computation here */
|
||||
/* * * * * * * * * * * * * * * * * * * * * * */
|
||||
|
||||
if (REP == "SIGNED") begin
|
||||
assign result = $signed($signed(dataa_pipe_end)*$signed(datab_pipe_end));
|
||||
end
|
||||
else begin
|
||||
assign result = dataa_pipe_end*datab_pipe_end;
|
||||
always @(posedge clock or posedge aclr) begin
|
||||
if (aclr) begin
|
||||
dataa_pipe[0] <= 0;
|
||||
datab_pipe[0] <= 0;
|
||||
end
|
||||
else if (clken) begin
|
||||
dataa_pipe[0] <= dataa;
|
||||
datab_pipe[0] <= datab;
|
||||
end
|
||||
end
|
||||
|
||||
assign dataa_pipe_end = dataa_pipe[PIPELINE-1];
|
||||
assign datab_pipe_end = datab_pipe[PIPELINE-1];
|
||||
end
|
||||
|
||||
/* * * * * * * * * * * * * * * * * * * * * * */
|
||||
/* Do the actual fallback computation here */
|
||||
/* * * * * * * * * * * * * * * * * * * * * * */
|
||||
|
||||
if (REP == "SIGNED") begin
|
||||
assign result = $signed($signed(dataa_pipe_end)*$signed(datab_pipe_end));
|
||||
end
|
||||
else begin
|
||||
assign result = dataa_pipe_end * datab_pipe_end;
|
||||
end
|
||||
|
||||
`endif
|
||||
|
||||
endgenerate
|
||||
|
||||
endmodule: VX_mult
|
||||
|
|
|
@ -18,26 +18,22 @@ module VX_scope #(
|
|||
input wire bus_write,
|
||||
input wire bus_read
|
||||
);
|
||||
localparam DELTA_ENABLE = (UPDW != 0);
|
||||
localparam MAX_DELTA = (2 ** DELTAW) - 1;
|
||||
localparam DELTA_ENABLE = (UPDW != 0);
|
||||
localparam MAX_DELTA = (2 ** DELTAW) - 1;
|
||||
|
||||
typedef enum logic[2:0] {
|
||||
CMD_GET_VALID,
|
||||
CMD_GET_DATA,
|
||||
CMD_GET_WIDTH,
|
||||
CMD_GET_COUNT,
|
||||
CMD_SET_DELAY,
|
||||
CMD_SET_STOP,
|
||||
CMD_RESERVED1,
|
||||
CMD_RESERVED2
|
||||
} cmd_t;
|
||||
localparam CMD_GET_VALID = 3'd0;
|
||||
localparam CMD_GET_DATA = 3'd1;
|
||||
localparam CMD_GET_WIDTH = 3'd2;
|
||||
localparam CMD_GET_COUNT = 3'd3;
|
||||
localparam CMD_SET_DELAY = 3'd4;
|
||||
localparam CMD_SET_STOP = 3'd5;
|
||||
localparam CMD_RESERVED1 = 3'd6;
|
||||
localparam CMD_RESERVED2 = 3'd7;
|
||||
|
||||
typedef enum logic[1:0] {
|
||||
GET_VALID,
|
||||
GET_DATA,
|
||||
GET_WIDTH,
|
||||
GET_COUNT
|
||||
} cmd_get_t;
|
||||
localparam GET_VALID = 2'd0;
|
||||
localparam GET_DATA = 2'd1;
|
||||
localparam GET_WIDTH = 2'd2;
|
||||
localparam GET_COUNT = 2'd3;
|
||||
|
||||
reg [DATAW-1:0] data_store [SIZE-1:0];
|
||||
reg [DELTAW-1:0] delta_store [SIZE-1:0];
|
||||
|
@ -84,10 +80,10 @@ module VX_scope #(
|
|||
CMD_GET_VALID,
|
||||
CMD_GET_DATA,
|
||||
CMD_GET_WIDTH,
|
||||
CMD_GET_COUNT: out_cmd <= $bits(out_cmd)'(cmd_type);
|
||||
CMD_SET_DELAY: delay_val <= $bits(delay_val)'(cmd_data);
|
||||
CMD_SET_STOP: waddr_end <= $bits(waddr)'(cmd_data);
|
||||
default:;
|
||||
CMD_GET_COUNT: out_cmd <= $bits(out_cmd)'(cmd_type);
|
||||
CMD_SET_DELAY: delay_val <= $bits(delay_val)'(cmd_data);
|
||||
CMD_SET_STOP: waddr_end <= $bits(waddr)'(cmd_data);
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
|
||||
|
@ -183,7 +179,7 @@ module VX_scope #(
|
|||
end
|
||||
|
||||
`ifdef DBG_PRINT_SCOPE
|
||||
always_ff @(posedge clk) begin
|
||||
always @(posedge clk) begin
|
||||
if (bus_read) begin
|
||||
$display("%t: scope-read: cmd=%0d, out=0x%0h, addr=%0d", $time, out_cmd, bus_out, raddr);
|
||||
end
|
||||
|
|
|
@ -28,6 +28,7 @@ VF += --x-initial unique
|
|||
VF += -exe $(SRCS) $(INCLUDE)
|
||||
|
||||
DBG += -DVCD_OUTPUT $(DBG_PRINT)
|
||||
DBG += -DDBG_CORE_REQ_INFO
|
||||
|
||||
THREADS ?= $(shell python3 -c 'import multiprocessing as mp; print(max(1, mp.cpu_count() // 2))')
|
||||
|
||||
|
|
|
@ -11,11 +11,12 @@ double sc_time_stamp() {
|
|||
|
||||
Simulator::Simulator() {
|
||||
// force random values for unitialized signals
|
||||
Verilated::randReset(1);
|
||||
Verilated::randReset(2);
|
||||
|
||||
ram_ = nullptr;
|
||||
vortex_ = new VVortex_Socket();
|
||||
|
||||
dram_rsp_active_ = false;
|
||||
snp_req_active_ = false;
|
||||
|
||||
#ifdef VCD_OUTPUT
|
||||
|
@ -76,7 +77,7 @@ void Simulator::eval_dram_bus() {
|
|||
return;
|
||||
}
|
||||
|
||||
// handle DRAM response cycle
|
||||
// schedule DRAM responses
|
||||
int dequeue_index = -1;
|
||||
for (int i = 0; i < dram_rsp_vec_.size(); i++) {
|
||||
if (dram_rsp_vec_[i].cycles_left > 0) {
|
||||
|
@ -88,16 +89,23 @@ void Simulator::eval_dram_bus() {
|
|||
}
|
||||
}
|
||||
|
||||
// handle DRAM response message
|
||||
if ((dequeue_index != -1)
|
||||
// send DRAM response
|
||||
if (dram_rsp_active_
|
||||
&& vortex_->dram_rsp_valid
|
||||
&& vortex_->dram_rsp_ready) {
|
||||
vortex_->dram_rsp_valid = 1;
|
||||
memcpy((uint8_t*)vortex_->dram_rsp_data, dram_rsp_vec_[dequeue_index].data, GLOBAL_BLOCK_SIZE);
|
||||
vortex_->dram_rsp_tag = dram_rsp_vec_[dequeue_index].tag;
|
||||
free(dram_rsp_vec_[dequeue_index].data);
|
||||
dram_rsp_vec_.erase(dram_rsp_vec_.begin() + dequeue_index);
|
||||
} else {
|
||||
vortex_->dram_rsp_valid = 0;
|
||||
dram_rsp_active_ = false;
|
||||
}
|
||||
if (!dram_rsp_active_) {
|
||||
if (dequeue_index != -1) {
|
||||
vortex_->dram_rsp_valid = 1;
|
||||
memcpy((uint8_t*)vortex_->dram_rsp_data, dram_rsp_vec_[dequeue_index].data, GLOBAL_BLOCK_SIZE);
|
||||
vortex_->dram_rsp_tag = dram_rsp_vec_[dequeue_index].tag;
|
||||
free(dram_rsp_vec_[dequeue_index].data);
|
||||
dram_rsp_vec_.erase(dram_rsp_vec_.begin() + dequeue_index);
|
||||
dram_rsp_active_ = true;
|
||||
} else {
|
||||
vortex_->dram_rsp_valid = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// handle DRAM stalls
|
||||
|
@ -111,7 +119,7 @@ void Simulator::eval_dram_bus() {
|
|||
}
|
||||
#endif
|
||||
|
||||
// handle DRAM requests
|
||||
// process DRAM requests
|
||||
if (!dram_stalled) {
|
||||
if (vortex_->dram_req_valid) {
|
||||
if (vortex_->dram_req_rw) {
|
||||
|
|
|
@ -55,8 +55,9 @@ private:
|
|||
void eval_snp_bus();
|
||||
|
||||
std::vector<dram_req_t> dram_rsp_vec_;
|
||||
int dram_rsp_active_;
|
||||
|
||||
uint32_t snp_req_active_;
|
||||
bool snp_req_active_;
|
||||
uint32_t snp_req_size_;
|
||||
uint32_t pending_snp_reqs_;
|
||||
|
||||
|
|
2
hw/syn/quartus/cache/Makefile
vendored
2
hw/syn/quartus/cache/Makefile
vendored
|
@ -49,7 +49,7 @@ smart.log: $(PROJECT_FILES)
|
|||
|
||||
# Project initialization
|
||||
$(PROJECT_FILES):
|
||||
quartus_sh -t project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src $(SRC_FILE) -sdc project.sdc -inc "../../../rtl;../../../rtl/libs;../../../rtl/interfaces;../../../rtl/cache"
|
||||
quartus_sh -t ../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src $(SRC_FILE) -sdc ../project.sdc -inc "../../../rtl;../../../rtl/libs;../../../rtl/cache"
|
||||
|
||||
syn.chg:
|
||||
$(STAMP) syn.chg
|
||||
|
|
1
hw/syn/quartus/cache/project.sdc
vendored
1
hw/syn/quartus/cache/project.sdc
vendored
|
@ -1 +0,0 @@
|
|||
create_clock -name {clk} -period "250 MHz" -waveform { 0.0 1.0 } [get_ports {clk}]
|
67
hw/syn/quartus/cache/project.tcl
vendored
67
hw/syn/quartus/cache/project.tcl
vendored
|
@ -1,67 +0,0 @@
|
|||
load_package flow
|
||||
package require cmdline
|
||||
|
||||
set options {
|
||||
{ "project.arg" "" "Project name" }
|
||||
{ "family.arg" "" "Device family name" }
|
||||
{ "device.arg" "" "Device name" }
|
||||
{ "top.arg" "" "Top level module" }
|
||||
{ "src.arg" "" "Verilog source file" }
|
||||
{ "inc.arg" "" "Include path (optional)" }
|
||||
{ "sdc.arg" "" "Timing Design Constraints file (optional)" }
|
||||
{ "set.arg" "" "Macro value (optional)" }
|
||||
}
|
||||
|
||||
set q_args_orig $quartus(args)
|
||||
|
||||
array set opts [::cmdline::getoptions quartus(args) $options]
|
||||
|
||||
# Verify required parameters
|
||||
set requiredParameters {project family device top src}
|
||||
foreach p $requiredParameters {
|
||||
if {$opts($p) == ""} {
|
||||
puts stderr "Missing required parameter: -$p"
|
||||
exit 1
|
||||
}
|
||||
}
|
||||
|
||||
project_new $opts(project) -overwrite
|
||||
|
||||
set_global_assignment -name FAMILY $opts(family)
|
||||
set_global_assignment -name DEVICE $opts(device)
|
||||
set_global_assignment -name TOP_LEVEL_ENTITY $opts(top)
|
||||
set_global_assignment -name PROJECT_OUTPUT_DIRECTORY bin
|
||||
set_global_assignment -name NUM_PARALLEL_PROCESSORS ALL
|
||||
set_global_assignment -name VERILOG_INPUT_VERSION SYSTEMVERILOG_2009
|
||||
|
||||
set idx 0
|
||||
foreach arg $q_args_orig {
|
||||
incr idx
|
||||
if [string match "-src" $arg] {
|
||||
set_global_assignment -name VERILOG_FILE [lindex $q_args_orig $idx]
|
||||
}
|
||||
if [string match "-inc" $arg] {
|
||||
set_global_assignment -name SEARCH_PATH [lindex $q_args_orig $idx]
|
||||
}
|
||||
if [string match "-sdc" $arg] {
|
||||
set_global_assignment -name SDC_FILE [lindex $q_args_orig $idx]
|
||||
}
|
||||
if [string match "-set" $arg] {
|
||||
set_global_assignment -name VERILOG_MACRO [lindex $q_args_orig $idx]
|
||||
}
|
||||
}
|
||||
|
||||
proc make_all_pins_virtual {} {
|
||||
execute_module -tool map
|
||||
set name_ids [get_names -filter * -node_type pin]
|
||||
foreach_in_collection name_id $name_ids {
|
||||
set pin_name [get_name_info -info full_path $name_id]
|
||||
post_message "Making VIRTUAL_PIN assignment to $pin_name"
|
||||
set_instance_assignment -to $pin_name -name VIRTUAL_PIN ON
|
||||
}
|
||||
export_assignments
|
||||
}
|
||||
|
||||
make_all_pins_virtual
|
||||
|
||||
project_close
|
|
@ -49,7 +49,7 @@ smart.log: $(PROJECT_FILES)
|
|||
|
||||
# Project initialization
|
||||
$(PROJECT_FILES):
|
||||
quartus_sh -t project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src $(SRC_FILE) -sdc project.sdc -inc "../../../rtl;../../../rtl/libs;../../../rtl/interfaces;../../../rtl/pipe_regs;../../../rtl/cache"
|
||||
quartus_sh -t ../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src $(SRC_FILE) -sdc ../project.sdc -inc "../../../rtl;../../../rtl/libs;../../../rtl/interfaces;../../../rtl/pipe_regs"
|
||||
|
||||
syn.chg:
|
||||
$(STAMP) syn.chg
|
||||
|
|
|
@ -1,67 +0,0 @@
|
|||
load_package flow
|
||||
package require cmdline
|
||||
|
||||
set options {
|
||||
{ "project.arg" "" "Project name" }
|
||||
{ "family.arg" "" "Device family name" }
|
||||
{ "device.arg" "" "Device name" }
|
||||
{ "top.arg" "" "Top level module" }
|
||||
{ "src.arg" "" "Verilog source file" }
|
||||
{ "inc.arg" "" "Include path (optional)" }
|
||||
{ "sdc.arg" "" "Timing Design Constraints file (optional)" }
|
||||
{ "set.arg" "" "Macro value (optional)" }
|
||||
}
|
||||
|
||||
set q_args_orig $quartus(args)
|
||||
|
||||
array set opts [::cmdline::getoptions quartus(args) $options]
|
||||
|
||||
# Verify required parameters
|
||||
set requiredParameters {project family device top src}
|
||||
foreach p $requiredParameters {
|
||||
if {$opts($p) == ""} {
|
||||
puts stderr "Missing required parameter: -$p"
|
||||
exit 1
|
||||
}
|
||||
}
|
||||
|
||||
project_new $opts(project) -overwrite
|
||||
|
||||
set_global_assignment -name FAMILY $opts(family)
|
||||
set_global_assignment -name DEVICE $opts(device)
|
||||
set_global_assignment -name TOP_LEVEL_ENTITY $opts(top)
|
||||
set_global_assignment -name PROJECT_OUTPUT_DIRECTORY bin
|
||||
set_global_assignment -name NUM_PARALLEL_PROCESSORS ALL
|
||||
set_global_assignment -name VERILOG_INPUT_VERSION SYSTEMVERILOG_2009
|
||||
|
||||
set idx 0
|
||||
foreach arg $q_args_orig {
|
||||
incr idx
|
||||
if [string match "-src" $arg] {
|
||||
set_global_assignment -name VERILOG_FILE [lindex $q_args_orig $idx]
|
||||
}
|
||||
if [string match "-inc" $arg] {
|
||||
set_global_assignment -name SEARCH_PATH [lindex $q_args_orig $idx]
|
||||
}
|
||||
if [string match "-sdc" $arg] {
|
||||
set_global_assignment -name SDC_FILE [lindex $q_args_orig $idx]
|
||||
}
|
||||
if [string match "-set" $arg] {
|
||||
set_global_assignment -name VERILOG_MACRO [lindex $q_args_orig $idx]
|
||||
}
|
||||
}
|
||||
|
||||
proc make_all_pins_virtual {} {
|
||||
execute_module -tool map
|
||||
set name_ids [get_names -filter * -node_type pin]
|
||||
foreach_in_collection name_id $name_ids {
|
||||
set pin_name [get_name_info -info full_path $name_id]
|
||||
post_message "Making VIRTUAL_PIN assignment to $pin_name"
|
||||
set_instance_assignment -to $pin_name -name VIRTUAL_PIN ON
|
||||
}
|
||||
export_assignments
|
||||
}
|
||||
|
||||
make_all_pins_virtual
|
||||
|
||||
project_close
|
|
@ -33,6 +33,9 @@ set_global_assignment -name TOP_LEVEL_ENTITY $opts(top)
|
|||
set_global_assignment -name PROJECT_OUTPUT_DIRECTORY bin
|
||||
set_global_assignment -name NUM_PARALLEL_PROCESSORS ALL
|
||||
set_global_assignment -name VERILOG_INPUT_VERSION SYSTEMVERILOG_2009
|
||||
set_global_assignment -name VERILOG_MACRO QUARTUS
|
||||
set_global_assignment -name VERILOG_MACRO SYNTHESIS
|
||||
set_global_assignment -name VERILOG_MACRO NDEBUG
|
||||
|
||||
set idx 0
|
||||
foreach arg $q_args_orig {
|
|
@ -49,7 +49,7 @@ smart.log: $(PROJECT_FILES)
|
|||
|
||||
# Project initialization
|
||||
$(PROJECT_FILES):
|
||||
quartus_sh -t project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src $(SRC_FILE) -sdc project.sdc -inc "../../../rtl;../../../rtl/libs;../../../rtl/interfaces;../../../rtl/pipe_regs;../../../rtl/cache;../../../opae;../../../opae/ccip" -macro "NOPAE"
|
||||
quartus_sh -t ../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src $(SRC_FILE) -set "NOPAE" -sdc ../project.sdc -inc "../../../rtl;../../../rtl/libs;../../../rtl/interfaces;../../../rtl/pipe_regs;../../../rtl/cache;../../../opae;../../../opae/ccip"
|
||||
|
||||
syn.chg:
|
||||
$(STAMP) syn.chg
|
||||
|
|
|
@ -1,9 +0,0 @@
|
|||
set_time_format -unit ns -decimal_places 3
|
||||
|
||||
create_clock -name {clk} -period "250 MHz" -waveform { 0.0 1.0 } [get_ports {clk}]
|
||||
|
||||
derive_pll_clocks -create_base_clocks
|
||||
derive_clock_uncertainty
|
||||
|
||||
|
||||
|
|
@ -1,67 +1,70 @@
|
|||
load_package flow
|
||||
package require cmdline
|
||||
PROJECT = VX_vortex
|
||||
TOP_LEVEL_ENTITY = VX_vortex
|
||||
SRC_FILE = VX_vortex.v
|
||||
PROJECT_FILES = $(PROJECT).qpf $(PROJECT).qsf
|
||||
|
||||
set options {
|
||||
{ "project.arg" "" "Project name" }
|
||||
{ "family.arg" "" "Device family name" }
|
||||
{ "device.arg" "" "Device name" }
|
||||
{ "top.arg" "" "Top level module" }
|
||||
{ "src.arg" "" "Verilog source file" }
|
||||
{ "inc.arg" "" "Include path (optional)" }
|
||||
{ "sdc.arg" "" "Timing Design Constraints file (optional)" }
|
||||
{ "set.arg" "" "Macro value (optional)" }
|
||||
}
|
||||
# Part, Family
|
||||
FAMILY = "Arria 10"
|
||||
DEVICE = 10AX115N3F40E2SG
|
||||
|
||||
set q_args_orig $quartus(args)
|
||||
# Executable Configuration
|
||||
SYN_ARGS = --parallel --read_settings_files=on
|
||||
FIT_ARGS = --part=$(DEVICE) --read_settings_files=on
|
||||
ASM_ARGS =
|
||||
STA_ARGS = --do_report_timing
|
||||
|
||||
array set opts [::cmdline::getoptions quartus(args) $options]
|
||||
# Build targets
|
||||
all: $(PROJECT).sta.rpt
|
||||
|
||||
# Verify required parameters
|
||||
set requiredParameters {project family device top src}
|
||||
foreach p $requiredParameters {
|
||||
if {$opts($p) == ""} {
|
||||
puts stderr "Missing required parameter: -$p"
|
||||
exit 1
|
||||
}
|
||||
}
|
||||
syn: $(PROJECT).syn.rpt
|
||||
|
||||
project_new $opts(project) -overwrite
|
||||
fit: $(PROJECT).fit.rpt
|
||||
|
||||
set_global_assignment -name FAMILY $opts(family)
|
||||
set_global_assignment -name DEVICE $opts(device)
|
||||
set_global_assignment -name TOP_LEVEL_ENTITY $opts(top)
|
||||
set_global_assignment -name PROJECT_OUTPUT_DIRECTORY bin
|
||||
set_global_assignment -name NUM_PARALLEL_PROCESSORS ALL
|
||||
set_global_assignment -name VERILOG_INPUT_VERSION SYSTEMVERILOG_2009
|
||||
asm: $(PROJECT).asm.rpt
|
||||
|
||||
set idx 0
|
||||
foreach arg $q_args_orig {
|
||||
incr idx
|
||||
if [string match "-src" $arg] {
|
||||
set_global_assignment -name VERILOG_FILE [lindex $q_args_orig $idx]
|
||||
}
|
||||
if [string match "-inc" $arg] {
|
||||
set_global_assignment -name SEARCH_PATH [lindex $q_args_orig $idx]
|
||||
}
|
||||
if [string match "-sdc" $arg] {
|
||||
set_global_assignment -name SDC_FILE [lindex $q_args_orig $idx]
|
||||
}
|
||||
if [string match "-set" $arg] {
|
||||
set_global_assignment -name VERILOG_MACRO [lindex $q_args_orig $idx]
|
||||
}
|
||||
}
|
||||
sta: $(PROJECT).sta.rpt
|
||||
|
||||
proc make_all_pins_virtual {} {
|
||||
execute_module -tool map
|
||||
set name_ids [get_names -filter * -node_type pin]
|
||||
foreach_in_collection name_id $name_ids {
|
||||
set pin_name [get_name_info -info full_path $name_id]
|
||||
post_message "Making VIRTUAL_PIN assignment to $pin_name"
|
||||
set_instance_assignment -to $pin_name -name VIRTUAL_PIN ON
|
||||
}
|
||||
export_assignments
|
||||
}
|
||||
smart: smart.log
|
||||
|
||||
make_all_pins_virtual
|
||||
# Target implementations
|
||||
STAMP = echo done >
|
||||
|
||||
project_close
|
||||
$(PROJECT).syn.rpt: smart.log syn.chg $(SOURCE_FILES)
|
||||
quartus_syn $(PROJECT) $(SYN_ARGS)
|
||||
$(STAMP) fit.chg
|
||||
|
||||
$(PROJECT).fit.rpt: smart.log fit.chg $(PROJECT).syn.rpt
|
||||
quartus_fit $(PROJECT) $(FIT_ARGS)
|
||||
$(STAMP) asm.chg
|
||||
$(STAMP) sta.chg
|
||||
|
||||
$(PROJECT).asm.rpt: smart.log asm.chg $(PROJECT).fit.rpt
|
||||
quartus_asm $(PROJECT) $(ASM_ARGS)
|
||||
|
||||
$(PROJECT).sta.rpt: smart.log sta.chg $(PROJECT).fit.rpt
|
||||
quartus_sta $(PROJECT) $(STA_ARGS)
|
||||
|
||||
smart.log: $(PROJECT_FILES)
|
||||
quartus_sh --determine_smart_action $(PROJECT) > smart.log
|
||||
|
||||
# Project initialization
|
||||
$(PROJECT_FILES):
|
||||
quartus_sh -t ../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src $(SRC_FILE) -sdc ../project.sdc -inc "../../../rtl;../../../rtl/libs;../../../rtl/interfaces;../../../rtl/pipe_regs;../../../rtl/cache"
|
||||
|
||||
syn.chg:
|
||||
$(STAMP) syn.chg
|
||||
|
||||
fit.chg:
|
||||
$(STAMP) fit.chg
|
||||
|
||||
sta.chg:
|
||||
$(STAMP) sta.chg
|
||||
|
||||
asm.chg:
|
||||
$(STAMP) asm.chg
|
||||
|
||||
program: $(PROJECT).sof
|
||||
quartus_pgm --no_banner --mode=jtag -o "$(PROJECT).sof"
|
||||
|
||||
clean:
|
||||
rm -rf bin *.rpt *.chg *.qsf *.qpf *.qws smart.log *.htm *.eqn *.pin *.sof *.pof qdb incremental_db tmp-clearbox
|
||||
|
|
|
@ -1,9 +0,0 @@
|
|||
set_time_format -unit ns -decimal_places 3
|
||||
|
||||
create_clock -name {clk} -period "250 MHz" -waveform { 0.0 1.0 } [get_ports {clk}]
|
||||
|
||||
derive_pll_clocks -create_base_clocks
|
||||
derive_clock_uncertainty
|
||||
|
||||
|
||||
|
|
@ -1,41 +0,0 @@
|
|||
load_package flow
|
||||
package require cmdline
|
||||
|
||||
set options { \
|
||||
{ "project.arg" "" "Project name" } \
|
||||
{ "family.arg" "" "Device family name" } \
|
||||
{ "device.arg" "" "Device name" } \
|
||||
{ "top.arg" "" "Top level module" } \
|
||||
{ "sdc.arg" "" "Timing Design Constraints file" } \
|
||||
{ "src.arg" "" "Verilog source file" } \
|
||||
{ "inc.arg" "." "Include path" } \
|
||||
}
|
||||
|
||||
array set opts [::cmdline::getoptions quartus(args) $options]
|
||||
|
||||
project_new $opts(project) -overwrite
|
||||
|
||||
set_global_assignment -name FAMILY $opts(family)
|
||||
set_global_assignment -name DEVICE $opts(device)
|
||||
set_global_assignment -name TOP_LEVEL_ENTITY $opts(top)
|
||||
set_global_assignment -name VERILOG_FILE $opts(src)
|
||||
set_global_assignment -name SEARCH_PATH $opts(inc)
|
||||
set_global_assignment -name SDC_FILE $opts(sdc)
|
||||
set_global_assignment -name PROJECT_OUTPUT_DIRECTORY bin
|
||||
set_global_assignment -name NUM_PARALLEL_PROCESSORS ALL
|
||||
set_global_assignment -name VERILOG_INPUT_VERSION SYSTEMVERILOG_2009
|
||||
|
||||
proc make_all_pins_virtual {} {
|
||||
execute_module -tool map
|
||||
set name_ids [get_names -filter * -node_type pin]
|
||||
foreach_in_collection name_id $name_ids {
|
||||
set pin_name [get_name_info -info full_path $name_id]
|
||||
post_message "Making VIRTUAL_PIN assignment to $pin_name"
|
||||
set_instance_assignment -to $pin_name -name VIRTUAL_PIN ON
|
||||
}
|
||||
export_assignments
|
||||
}
|
||||
|
||||
make_all_pins_virtual
|
||||
|
||||
project_close
|
Loading…
Add table
Add a link
Reference in a new issue