OPAE rtl fixes

This commit is contained in:
Blaise Tine 2020-05-08 08:28:28 -07:00
parent 13dfd5c8c7
commit c2e9240b7d
12 changed files with 208 additions and 243 deletions

View file

@ -143,6 +143,7 @@ extern int vx_alloc_dev_mem(vx_device_h hdevice, size_t size, size_t* dev_maddr)
size_t dev_mem_size = vx_dev_caps(VX_CAPS_LOCAL_MEM_SIZE);
size_t asize = align_size(size, line_size);
if (device->mem_allocation + asize > dev_mem_size)
return -1;
@ -189,11 +190,11 @@ extern int vx_alloc_shared_mem(vx_device_h hdevice, size_t size, vx_buffer_h* hb
return -1;
}
buffer->wsid = wsid;
buffer->wsid = wsid;
buffer->host_ptr = host_ptr;
buffer->io_addr = io_addr;
buffer->hdevice = hdevice;
buffer->size = size;
buffer->io_addr = io_addr;
buffer->hdevice = hdevice;
buffer->size = asize;
*hbuffer = buffer;
@ -265,18 +266,18 @@ extern int vx_copy_to_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, si
int line_size = vx_dev_caps(VX_CAPS_CACHE_LINESIZE);
size_t dev_mem_size = vx_dev_caps(VX_CAPS_LOCAL_MEM_SIZE);
size_t asize = align_size(size, line_size);
// check alignment
if (!is_aligned(dev_maddr, line_size))
return -1;
if (!is_aligned(size, line_size))
return -1;
if (!is_aligned(buffer->io_addr + src_offset, line_size))
return -1;
// bound checking
if (size + src_offset > buffer->size)
if (src_offset + asize > buffer->size)
return -1;
if (dev_maddr + size > dev_mem_size)
if (dev_maddr + asize > dev_mem_size)
return -1;
// Ensure ready for new command
@ -287,7 +288,7 @@ extern int vx_copy_to_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, si
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_IO_ADDR, (buffer->io_addr + src_offset) >> ls_shift));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_MEM_ADDR, (dev_maddr >> ls_shift) ));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_DATA_SIZE, size >> ls_shift));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_DATA_SIZE, asize >> ls_shift));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CMD, CMD_TYPE_WRITE));
// Wait for the write operation to finish
@ -308,18 +309,18 @@ extern int vx_copy_from_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size,
int line_size = vx_dev_caps(VX_CAPS_CACHE_LINESIZE);
size_t dev_mem_size = vx_dev_caps(VX_CAPS_LOCAL_MEM_SIZE);
size_t asize = align_size(size, line_size);
// check alignment
if (!is_aligned(dev_maddr, line_size))
return -1;
if (!is_aligned(size, line_size))
return -1;
if (!is_aligned(buffer->io_addr + dest_offset, line_size))
return -1;
// bound checking
if (size + dest_offset > buffer->size)
if (dest_offset + asize > buffer->size)
return -1;
if (dev_maddr + size > dev_mem_size)
if (dev_maddr + asize > dev_mem_size)
return -1;
// Ensure ready for new command
@ -330,7 +331,7 @@ extern int vx_copy_from_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size,
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_IO_ADDR, (buffer->io_addr + dest_offset) >> ls_shift));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_MEM_ADDR, (dev_maddr) >> ls_shift));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_DATA_SIZE, size >> ls_shift));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_DATA_SIZE, asize >> ls_shift));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CMD, CMD_TYPE_READ));
// Wait for the write operation to finish
@ -347,13 +348,13 @@ extern int vx_flush_caches(vx_device_h hdevice, size_t dev_maddr, size_t size) {
vx_device_t* device = ((vx_device_t*)hdevice);
int line_size = vx_dev_caps(VX_CAPS_CACHE_LINESIZE);
int line_size = vx_dev_caps(VX_CAPS_CACHE_LINESIZE);
size_t asize = align_size(size, line_size);
// check alignment
if (!is_aligned(dev_maddr, line_size))
return -1;
if (!is_aligned(size, line_size))
return -1;
// Ensure ready for new command
if (vx_ready_wait(hdevice, -1) != 0)
@ -362,7 +363,7 @@ extern int vx_flush_caches(vx_device_h hdevice, size_t dev_maddr, size_t size) {
auto ls_shift = (int)std::log2(line_size);
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_MEM_ADDR, dev_maddr >> ls_shift));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_DATA_SIZE, size >> ls_shift));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_DATA_SIZE, asize >> ls_shift));
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CMD, CMD_TYPE_CLFLUSH));
// Wait for the write operation to finish

View file

@ -4,6 +4,16 @@
int test = -1;
#define RT_CHECK(_expr) \
do { \
int _ret = _expr; \
if (0 == _ret) \
break; \
printf("Error: '%s' returned %d!\n", #_expr, (int)_ret); \
cleanup(); \
exit(-1); \
} while (false)
static void parse_args(int argc, char **argv) {
int c;
while ((c = getopt(argc, argv, "t:h?")) != -1) {
@ -27,12 +37,27 @@ uint64_t shuffle(int i, uint64_t value) {
return (value << i) | (value & ((1 << i)-1));;
}
vx_device_h device = nullptr;
vx_buffer_h sbuf = nullptr;
vx_buffer_h dbuf = nullptr;
void cleanup() {
if (sbuf) {
vx_buf_release(sbuf);
}
if (dbuf) {
vx_buf_release(dbuf);
}
if (device) {
vx_dev_close(device);
}
}
int run_memcopy_test(vx_buffer_h sbuf,
vx_buffer_h dbuf,
uint32_t address,
uint64_t value,
int num_blocks) {
int ret;
int errors = 0;
// write sbuf data
@ -42,15 +67,11 @@ int run_memcopy_test(vx_buffer_h sbuf,
// write buffer to local memory
std::cout << "write buffer to local memory" << std::endl;
ret = vx_copy_to_dev(sbuf, address, 64 * num_blocks, 0);
if (ret != 0)
return ret;
RT_CHECK(vx_copy_to_dev(sbuf, address, 64 * num_blocks, 0));
// read buffer from local memory
std::cout << "read buffer from local memory" << std::endl;
ret = vx_copy_from_dev(dbuf, address, 64 * num_blocks, 0);
if (ret != 0)
return ret;
RT_CHECK(vx_copy_from_dev(dbuf, address, 64 * num_blocks, 0));
// verify result
std::cout << "verify result" << std::endl;
@ -77,7 +98,6 @@ int run_kernel_test(vx_device_h device,
vx_buffer_h sbuf,
vx_buffer_h dbuf,
const char* program) {
int ret;
int errors = 0;
uint64_t seed = 0x0badf00d40ff40ff;
@ -93,43 +113,27 @@ int run_kernel_test(vx_device_h device,
// write buffer to local memory
std::cout << "write buffer to local memory" << std::endl;
ret = vx_copy_to_dev(sbuf, src_dev_addr, 64 * num_blocks, 0);
if (ret != 0)
return ret;
RT_CHECK(vx_copy_to_dev(sbuf, src_dev_addr, 64 * num_blocks, 0));
// upload program
std::cout << "upload program" << std::endl;
ret = vx_upload_kernel_file(device, program);
if (ret != 0) {
return ret;
}
RT_CHECK(vx_upload_kernel_file(device, program));
// start device
std::cout << "start device" << std::endl;
ret = vx_start(device);
if (ret != 0) {
return ret;
}
RT_CHECK(vx_start(device));
// wait for completion
std::cout << "wait for completion" << std::endl;
ret = vx_ready_wait(device, -1);
if (ret != 0) {
return ret;
}
RT_CHECK(vx_ready_wait(device, -1));
// flush the caches
std::cout << "flush the caches" << std::endl;
ret = vx_flush_caches(device, dest_dev_addr, 64 * num_blocks);
if (ret != 0) {
return ret;
}
RT_CHECK(vx_flush_caches(device, dest_dev_addr, 64 * num_blocks));
// read buffer from local memory
std::cout << "read buffer from local memory" << std::endl;
ret = vx_copy_from_dev(dbuf, dest_dev_addr, 64 * num_blocks, 0);
if (ret != 0)
return ret;
RT_CHECK(vx_copy_from_dev(dbuf, dest_dev_addr, 64 * num_blocks, 0));
// verify result
std::cout << "verify result" << std::endl;
@ -152,75 +156,33 @@ int run_kernel_test(vx_device_h device,
return 0;
}
vx_device_h device = nullptr;
vx_buffer_h sbuf = nullptr;
vx_buffer_h dbuf = nullptr;
void cleanup() {
if (sbuf) {
vx_buf_release(sbuf);
}
if (dbuf) {
vx_buf_release(dbuf);
}
if (device) {
vx_dev_close(device);
}
}
int main(int argc, char *argv[]) {
int ret;
// parse command arguments
parse_args(argc, argv);
// open device connection
std::cout << "open device connection" << std::endl;
vx_device_h device;
ret = vx_dev_open(&device);
if (ret != 0)
return ret;
RT_CHECK(vx_dev_open(&device));
// create source buffer
std::cout << "create source buffer" << std::endl;
ret = vx_alloc_shared_mem(device, 4096, &sbuf);
if (ret != 0) {
cleanup();
return ret;
}
RT_CHECK(vx_alloc_shared_mem(device, 4096, &sbuf));
// create destination buffer
std::cout << "create destination buffer" << std::endl;
ret = vx_alloc_shared_mem(device, 4096, &dbuf);
if (ret != 0) {
cleanup();
return ret;
}
RT_CHECK(vx_alloc_shared_mem(device, 4096, &dbuf));
// run tests
if (0 == test || -1 == test) {
/*9if (0 == test || -1 == test) {
std::cout << "run memcopy test" << std::endl;
ret = run_memcopy_test(sbuf, dbuf, 0x10000000, 0x0badf00d00ff00ff, 1);
if (ret != 0) {
cleanup();
return ret;
}
ret = run_memcopy_test(sbuf, dbuf, 0x20000000, 0x0badf00d40ff40ff, 8);
if (ret != 0) {
cleanup();
return ret;
}
}
RT_CHECK(run_memcopy_test(sbuf, dbuf, 0x10000000, 0x0badf00d00ff00ff, 1));
RT_CHECK(run_memcopy_test(sbuf, dbuf, 0x20000000, 0x0badf00d40ff40ff, 8));
}*/
if (1 == test || -1 == test) {
std::cout << "run kernel test" << std::endl;
ret = run_kernel_test(device, sbuf, dbuf, "kernel.bin");
if (ret != 0) {
cleanup();
return ret;
}
RT_CHECK(run_kernel_test(device, sbuf, dbuf, "kernel.bin"));
}
// cleanup

View file

@ -4,6 +4,16 @@
#include <vortex.h>
#include "common.h"
#define RT_CHECK(_expr) \
do { \
int _ret = _expr; \
if (0 == _ret) \
break; \
printf("Error: '%s' returned %d!\n", #_expr, (int)_ret); \
cleanup(); \
exit(-1); \
} while (false)
const char* program_file = "kernel.bin";
uint32_t data_stride = 0xffffffff;
@ -39,40 +49,38 @@ static void parse_args(int argc, char **argv) {
}
}
vx_device_h device = nullptr;
vx_buffer_h buffer = nullptr;
void cleanup() {
if (buffer) {
vx_buf_release(buffer);
}
if (device) {
vx_dev_close(device);
}
}
int run_test(vx_device_h device,
vx_buffer_h buffer,
const kernel_arg_t& kernel_arg,
uint32_t buf_size,
uint32_t num_points) {
int ret;
// start device
std::cout << "start device" << std::endl;
ret = vx_start(device);
if (ret != 0) {
return ret;
}
RT_CHECK(vx_start(device));
// wait for completion
std::cout << "wait for completion" << std::endl;
ret = vx_ready_wait(device, -1);
if (ret != 0) {
return ret;
}
RT_CHECK(vx_ready_wait(device, -1));
// flush the destination buffer caches
std::cout << "flush the destination buffer caches" << std::endl;
ret = vx_flush_caches(device, kernel_arg.dst_ptr, buf_size);
if (ret != 0) {
return ret;
}
RT_CHECK(vx_flush_caches(device, kernel_arg.dst_ptr, buf_size));
// download destination buffer
std::cout << "download destination buffer" << std::endl;
ret = vx_copy_from_dev(buffer, kernel_arg.dst_ptr, buf_size, 0);
if (ret != 0) {
return ret;
}
RT_CHECK(vx_copy_from_dev(buffer, kernel_arg.dst_ptr, buf_size, 0));
// verify result
std::cout << "verify result" << std::endl;
@ -96,20 +104,7 @@ int run_test(vx_device_h device,
return 0;
}
vx_device_h device = nullptr;
vx_buffer_h buffer = nullptr;
void cleanup() {
if (buffer) {
vx_buf_release(buffer);
}
if (device) {
vx_dev_close(device);
}
}
int main(int argc, char *argv[]) {
int ret;
size_t value;
kernel_arg_t kernel_arg;
@ -132,50 +127,28 @@ int main(int argc, char *argv[]) {
// open device connection
std::cout << "open device connection" << std::endl;
ret = vx_dev_open(&device);
if (ret != 0)
return ret;
RT_CHECK(vx_dev_open(&device));
// upload program
std::cout << "upload program" << std::endl;
ret = vx_upload_kernel_file(device, program_file);
if (ret != 0) {
cleanup();
return ret;
}
RT_CHECK(vx_upload_kernel_file(device, program_file));
// allocate device memory
std::cout << "allocate device memory" << std::endl;
ret = vx_alloc_dev_mem(device, buf_size, &value);
if (ret != 0) {
cleanup();
return ret;
}
RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
kernel_arg.src0_ptr = value;
ret = vx_alloc_dev_mem(device, buf_size, &value);
if (ret != 0) {
cleanup();
return ret;
}
RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
kernel_arg.src1_ptr = value;
ret = vx_alloc_dev_mem(device, buf_size, &value);
if (ret != 0) {
cleanup();
return ret;
}
RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
kernel_arg.dst_ptr = value;
// allocate shared memory
std::cout << "allocate shared memory" << std::endl;
uint32_t alloc_size = std::max<uint32_t>(buf_size, sizeof(kernel_arg_t));
ret = vx_alloc_shared_mem(device, alloc_size, &buffer);
if (ret != 0) {
cleanup();
return ret;
}
RT_CHECK(vx_alloc_shared_mem(device, alloc_size, &buffer));
// populate source buffer values
std::cout << "populate source buffer values" << std::endl;
@ -187,19 +160,9 @@ int main(int argc, char *argv[]) {
}
// upload source buffers
std::cout << "upload source buffers" << std::endl;
ret = vx_copy_to_dev(buffer, kernel_arg.src0_ptr, buf_size, 0);
if (ret != 0) {
cleanup();
return ret;
}
ret = vx_copy_to_dev(buffer, kernel_arg.src1_ptr, buf_size, 0);
if (ret != 0) {
cleanup();
return ret;
}
std::cout << "upload source buffers" << std::endl;
RT_CHECK(vx_copy_to_dev(buffer, kernel_arg.src0_ptr, buf_size, 0));
RT_CHECK(vx_copy_to_dev(buffer, kernel_arg.src1_ptr, buf_size, 0));
// upload kernel argument
std::cout << "upload kernel argument" << std::endl;
@ -210,20 +173,12 @@ int main(int argc, char *argv[]) {
auto buf_ptr = (int*)vx_host_ptr(buffer);
memcpy(buf_ptr, &kernel_arg, sizeof(kernel_arg_t));
ret = vx_copy_to_dev(buffer, KERNEL_ARG_DEV_MEM_ADDR, sizeof(kernel_arg_t), 0);
if (ret != 0) {
cleanup();
return ret;
}
RT_CHECK(vx_copy_to_dev(buffer, KERNEL_ARG_DEV_MEM_ADDR, sizeof(kernel_arg_t), 0));
}
// run tests
std::cout << "run tests" << std::endl;
ret = run_test(device, buffer, kernel_arg, buf_size, num_points);
if (ret != 0) {
cleanup();
return ret;
}
RT_CHECK(run_test(device, buffer, kernel_arg, buf_size, num_points));
// cleanup
std::cout << "cleanup" << std::endl;

View file

@ -42,6 +42,8 @@ make
# ASE build instructions
#
source /export/fpga/bin/setup-fpga-env fpga-pac-a10
# Acquire a sever node for running ASE simulations
qsub-sim
@ -51,7 +53,17 @@ vcd add -r /*/Vortex/hw/rtl/*
run -all
# compress VCD trace
tar -zcvf vortex.vcd.tar.gz work/vortex.vcd
tar -zcvf vortex.vcd.tar.gz ./build_ase/work/vortex.vcd
# decompress VCD trace
tar -zxvf vortex.vcd.tar.gz vortex.vcd
tar -zxvf /mnt/c/Users/Blaise/Downloads/vortex.vcd.tar.gz
# launch Gtkwave
gtkwave ./build_ase/work/vortex.vcd &
# test
./run_ase.sh ../../driver/tests/basic/basic
# kill process by Users
ps -u tinebp
kill -9 <pid>

View file

@ -14,17 +14,19 @@ rm -rf $ASE_WORKDIR/.app_lock.pid $ASE_WORKDIR/.ase_ready.pid
# Start Simulator in background
pushd $SCRIPT_DIR/build_ase
make sim &
echo " [DBG] starting ASE simnulator"
nohup make sim &
popd
# Wait for simulator readiness
# When .ase_ready is created in the $ASE_WORKDIR, ASE is ready for simulation
while [! -f $ASE_WORKDIR/.ase_ready.pid]
while [ ! -f $ASE_WORKDIR/.ase_ready.pid ]
do
sleep 1
done
# run application
pushd $PROGRAM_DIR
echo " [DBG] running ./$PROGRAM $*"
ASE_LOG=0 LD_LIBRARY_PATH=../../opae/ase:$LD_LIBRARY_PATH ./$PROGRAM $*
popd

View file

@ -28,7 +28,12 @@ module vortex_afu #(
output logic [$clog2(NUM_LOCAL_MEM_BANKS)-1:0] mem_bank_select
);
localparam DRAM_ADDR_WIDTH = (32 - `CLOG2(`GLOBAL_BLOCK_SIZE));
localparam DRAM_ADDR_WIDTH = $bits(t_local_mem_addr);
localparam DRAM_LINE_WIDTH = $bits(t_local_mem_data);
localparam DRAM_TAG_WIDTH = `L3DRAM_TAG_WIDTH;
`STATIC_ASSERT(DRAM_ADDR_WIDTH == `L3DRAM_ADDR_WIDTH, "invalid vortex dram bus!")
`STATIC_ASSERT(DRAM_LINE_WIDTH == `L3DRAM_LINE_WIDTH, "invalid vortex dram bus!")
localparam AVS_RD_QUEUE_SIZE = 16;
@ -58,6 +63,7 @@ typedef enum logic[3:0] {
STATE_IDLE,
STATE_READ,
STATE_WRITE,
STATE_START,
STATE_RUN,
STATE_CLFLUSH
} state_t;
@ -72,13 +78,13 @@ state_t state;
logic vx_dram_req_read;
logic vx_dram_req_write;
logic [DRAM_ADDR_WIDTH-1:0] vx_dram_req_addr;
logic [`GLOBAL_BLOCK_SIZE-1:0] vx_dram_req_data;
logic [`L3DRAM_TAG_WIDTH-1:0] vx_dram_req_tag;
logic [DRAM_LINE_WIDTH-1:0] vx_dram_req_data;
logic [DRAM_TAG_WIDTH-1:0] vx_dram_req_tag;
logic vx_dram_req_ready;
logic vx_dram_rsp_valid;
logic [`GLOBAL_BLOCK_SIZE-1:0] vx_dram_rsp_data;
logic [`L3DRAM_TAG_WIDTH-1:0] vx_dram_rsp_tag;
logic [DRAM_LINE_WIDTH-1:0] vx_dram_rsp_data;
logic [DRAM_TAG_WIDTH-1:0] vx_dram_rsp_tag;
logic vx_dram_rsp_ready;
logic vx_snp_req_valid;
@ -90,9 +96,9 @@ logic vx_busy;
// AVS Queues /////////////////////////////////////////////////////////////////
logic avs_rtq_push;
t_local_mem_addr avs_rtq_din;
logic [DRAM_TAG_WIDTH-1:0] avs_rtq_din;
logic avs_rtq_pop;
t_local_mem_addr avs_rtq_dout;
logic [DRAM_TAG_WIDTH-1:0] avs_rtq_dout;
logic avs_rtq_empty;
logic avs_rtq_full;
@ -229,7 +235,7 @@ begin
CMD_TYPE_RUN: begin
$display("%t: STATE START", $time);
vx_reset <= 1;
state <= STATE_RUN;
state <= STATE_START;
end
CMD_TYPE_CLFLUSH: begin
$display("%t: STATE CFLUSH: da=%h sz=%0d", $time, csr_mem_addr, csr_data_size);
@ -250,6 +256,10 @@ begin
end
end
STATE_START: begin // vortex reset cycle
state <= STATE_RUN;
end
STATE_RUN: begin
if (!vx_busy) begin
state <= STATE_IDLE;
@ -271,7 +281,7 @@ end
logic cci_rdq_empty;
t_cci_rdq_data cci_rdq_dout;
logic cci_rdq_pop;
logic [`L3DRAM_TAG_WIDTH-1:0] dram_req_tag;
logic [DRAM_TAG_WIDTH-1:0] dram_req_tag;
t_ccip_clAddr next_avs_address;
always_comb
@ -372,7 +382,7 @@ end
always_comb
begin
vx_dram_rsp_valid = vortex_enabled && !avs_rdq_empty && vx_dram_rsp_ready;
vx_dram_rsp_valid = vortex_enabled && !avs_rdq_empty;
vx_dram_rsp_tag = avs_rtq_dout;
vx_dram_rsp_data = avs_rdq_dout;
end
@ -389,7 +399,7 @@ begin
end
VX_generic_queue #(
.DATAW($bits(t_local_mem_addr)),
.DATAW(DRAM_TAG_WIDTH),
.SIZE(AVS_RD_QUEUE_SIZE)
) avs_rd_req_queue (
.clk (clk),
@ -412,7 +422,7 @@ begin
end
VX_generic_queue #(
.DATAW($bits(t_local_mem_data)),
.DATAW(DRAM_LINE_WIDTH),
.SIZE(AVS_RD_QUEUE_SIZE)
) avs_rd_rsp_queue (
.clk (clk),
@ -595,30 +605,46 @@ end
// Vortex binding /////////////////////////////////////////////////////////////
Vortex_Socket #() vx_socket (
.clk (clk),
.reset (SoftReset || vx_reset),
.clk (clk),
.reset (vx_reset),
// DRAM Req
.dram_req_write (vx_dram_req_write),
.dram_req_read (vx_dram_req_read),
.dram_req_addr (vx_dram_req_addr),
.dram_req_data (vx_dram_req_data),
.dram_req_tag (vx_dram_req_tag),
.dram_req_ready (vx_dram_req_ready),
// DRAM request
.dram_req_write (vx_dram_req_write),
.dram_req_read (vx_dram_req_read),
.dram_req_addr (vx_dram_req_addr),
.dram_req_data (vx_dram_req_data),
.dram_req_tag (vx_dram_req_tag),
.dram_req_ready (vx_dram_req_ready),
// DRAM Rsp
.dram_rsp_valid (vx_dram_rsp_valid),
.dram_rsp_data (vx_dram_rsp_data),
.dram_rsp_tag (vx_dram_rsp_tag),
.dram_rsp_ready (vx_dram_rsp_ready),
// DRAM response
.dram_rsp_valid (vx_dram_rsp_valid),
.dram_rsp_data (vx_dram_rsp_data),
.dram_rsp_tag (vx_dram_rsp_tag),
.dram_rsp_ready (vx_dram_rsp_ready),
// Cache Snooping Req
.snp_req_valid (vx_snp_req_valid),
.snp_req_addr (vx_snp_req_addr),
.snp_req_ready (vx_snp_req_ready),
// Cache snooping
.snp_req_valid (vx_snp_req_valid),
.snp_req_addr (vx_snp_req_addr),
.snp_req_ready (vx_snp_req_ready),
// I/O request
.io_req_read (),
.io_req_write (),
.io_req_addr (),
.io_req_data (),
.io_req_byteen (),
.io_req_tag (),
.io_req_ready (0),
// I/O response
.io_rsp_valid (0),
.io_rsp_data (0),
.io_rsp_tag (0),
.io_rsp_ready (),
// status
.busy (vx_busy)
.busy (vx_busy),
.ebreak ()
);
endmodule

View file

@ -8,12 +8,8 @@ module VX_back_end #(
input wire schedule_delay,
VX_cache_core_rsp_if dcache_rsp_if,
VX_cache_core_req_if dcache_req_if,
output wire mem_delay,
output wire exec_delay,
output wire gpr_stage_delay,
VX_cache_core_rsp_if dcache_rsp_if,
VX_jal_rsp_if jal_rsp_if,
VX_branch_rsp_if branch_rsp_if,
@ -22,6 +18,10 @@ module VX_back_end #(
VX_warp_ctl_if warp_ctl_if,
output wire mem_delay,
output wire exec_delay,
output wire gpr_stage_delay,
output wire ebreak
);
@ -78,8 +78,8 @@ module VX_back_end #(
.reset (reset),
.lsu_req_if (lsu_req_if),
.mem_wb_if (mem_wb_if),
.dcache_rsp_if (dcache_rsp_if),
.dcache_req_if (dcache_req_if),
.dcache_rsp_if (dcache_rsp_if),
.delay (mem_delay),
.no_slot_mem (no_slot_mem)
);

View file

@ -35,8 +35,11 @@ module VX_dmem_ctrl (
.CORE_TAG_ID_BITS(`CORE_TAG_ID_BITS)
) dcache_core_rsp_qual_if(), smem_core_rsp_if();
// use "case equality" to handle uninitialized address value
wire smem_select = ((dcache_core_req_if.core_req_addr[0][31:24] == `SHARED_MEM_TOP_ADDR) === 1'b1);
VX_dcache_io_arb dcache_io_arb (
.io_select (dcache_core_req_if.core_req_addr[0][31:24] == `SHARED_MEM_TOP_ADDR),
.io_select (smem_select),
.core_req_if (dcache_core_req_if),
.dcache_core_req_if (dcache_core_req_qual_if),
.io_core_req_if (smem_core_req_if),

View file

@ -9,19 +9,24 @@ module VX_icache_stage (
output wire[`NUM_THREADS-1:0] icache_stage_valids,
VX_inst_meta_if fe_inst_meta_fi,
VX_inst_meta_if fe_inst_meta_id,
VX_cache_core_rsp_if icache_rsp_if,
VX_cache_core_req_if icache_req_if
VX_cache_core_req_if icache_req_if,
VX_cache_core_rsp_if icache_rsp_if
);
reg[`NUM_THREADS-1:0] pending_threads[`NUM_WARPS-1:0];
reg [`NUM_THREADS-1:0] valid_threads [`NUM_WARPS-1:0];
wire valid_inst = (| fe_inst_meta_fi.valid);
`DEBUG_BEGIN
wire [`CORE_REQ_TAG_WIDTH-1:0] core_req_tag = icache_req_if.core_req_tag;
wire [`CORE_REQ_TAG_WIDTH-1:0] core_rsp_tag = icache_rsp_if.core_rsp_tag;
`DEBUG_END
// Icache Request
assign icache_req_if.core_req_valid = valid_inst && !total_freeze;
assign icache_req_if.core_req_addr = fe_inst_meta_fi.inst_pc;
assign icache_req_if.core_req_data = 32'b0;
assign icache_req_if.core_req_data = 'z;
assign icache_req_if.core_req_read = `BYTE_EN_LW;
assign icache_req_if.core_req_write = `BYTE_EN_NO;
assign icache_req_if.core_req_tag = {fe_inst_meta_fi.inst_pc, 2'b1, 5'b0, fe_inst_meta_fi.warp_num};
@ -33,8 +38,8 @@ module VX_icache_stage (
assign {fe_inst_meta_id.inst_pc, rsp_wb, rsp_rd, fe_inst_meta_id.warp_num} = icache_rsp_if.core_rsp_tag;
assign fe_inst_meta_id.instruction = icache_rsp_if.core_rsp_data[0][31:0];
assign fe_inst_meta_id.valid = icache_rsp_if.core_rsp_valid ? pending_threads[fe_inst_meta_id.warp_num] : 0;
assign fe_inst_meta_id.instruction = icache_rsp_if.core_rsp_data[0];
assign fe_inst_meta_id.valid = icache_rsp_if.core_rsp_valid ? valid_threads[fe_inst_meta_id.warp_num] : 0;
assign icache_stage_wid = fe_inst_meta_id.warp_num;
assign icache_stage_valids = fe_inst_meta_id.valid & {`NUM_THREADS{!icache_stage_delay}};
@ -49,12 +54,10 @@ module VX_icache_stage (
always @(posedge clk) begin
if (reset) begin
for (i = 0; i < `NUM_WARPS; i = i + 1) begin
pending_threads[i] <= 0;
end
//--
end else begin
if (icache_req_if.core_req_valid && icache_req_if.core_req_ready) begin
pending_threads[fe_inst_meta_fi.warp_num] <= fe_inst_meta_fi.valid;
valid_threads[fe_inst_meta_fi.warp_num] <= fe_inst_meta_fi.valid;
end
end
end

View file

@ -11,8 +11,8 @@ module VX_lsu_unit (
VX_wb_if mem_wb_if,
// Dcache interface
VX_cache_core_rsp_if dcache_rsp_if,
VX_cache_core_req_if dcache_req_if,
VX_cache_core_rsp_if dcache_rsp_if,
output wire delay
);

View file

@ -244,8 +244,11 @@ module Vortex #(
.icache_dram_rsp_if (icache_dram_rsp_if)
);
// use "case equality" to handle uninitialized address value
wire io_select = ((dcache_io_core_req_if.core_req_addr[0] >= `IO_BUS_BASE_ADDR) === 1'b1);
VX_dcache_io_arb dcache_io_arb (
.io_select (dcache_io_core_req_if.core_req_addr[0] >= `IO_BUS_BASE_ADDR),
.io_select (io_select),
.core_req_if (dcache_io_core_req_if),
.dcache_core_req_if (dcache_core_req_if),
.io_core_req_if (io_core_req_if),

View file

@ -90,13 +90,11 @@ module VX_tag_data_access #(
wire fill_sent;
wire invalidate_line;
wire tags_match;
wire real_writefill = writefill_st1e
&& ((valid_req_st1e
&& !use_read_valid_st1e)
|| (valid_req_st1e
&& use_read_valid_st1e
&& (writeaddr_st1e[`TAG_LINE_ADDR_RNG] != use_read_tag_st1e)));
&& ((valid_req_st1e && !use_read_valid_st1e)
|| (valid_req_st1e && use_read_valid_st1e && !tags_match));
VX_tag_data_structure #(
.CACHE_SIZE (CACHE_SIZE),
@ -256,14 +254,14 @@ module VX_tag_data_access #(
assign data_write[i * `WORD_WIDTH +: `WORD_WIDTH] = force_write ? writedata_st1e[i * `WORD_WIDTH +: `WORD_WIDTH] : use_write_dat;
end
end
assign use_write_enable = (writefill_st1e && !real_writefill) ? 0 : we;
assign use_write_data = data_write;
wire[`TAG_SELECT_BITS-1:0] writeaddr_tag = writeaddr_st1e[`TAG_LINE_ADDR_RNG];
wire tags_match = writeaddr_tag == use_read_tag_st1e;
// use "case equality" to handle uninitialized tag when block entry is not valid
assign tags_match = ((writeaddr_st1e[`TAG_LINE_ADDR_RNG] == use_read_tag_st1e) === 1'b1);
wire snoop_hit = valid_req_st1e && is_snp_st1e && use_read_valid_st1e && tags_match && use_read_dirty_st1e;
wire req_invalid = valid_req_st1e && !is_snp_st1e && !use_read_valid_st1e && !writefill_st1e;