opae rtl fixes

This commit is contained in:
Blaise Tine 2020-06-01 23:06:13 -07:00
parent 16d5a8a09c
commit e01c411b20
16 changed files with 192 additions and 121 deletions

View file

@ -34,6 +34,7 @@ RTL_INCLUDE = -I../../hw/rtl -I../../hw/rtl/libs -I../../hw/rtl/interfaces -I../
VL_FLAGS += --language 1800-2009 --assert -Wall -Wpedantic $(MULTICORE)
VL_FLAGS += -Wno-DECLFILENAME
VL_FLAGS += --x-initial unique
VL_FLAGS += --x-assign unique
# Enable Verilator multithreaded simulation
#THREADS ?= $(shell python3 -c 'import multiprocessing as mp; print(max(1, mp.cpu_count() // 2))')

View file

@ -1,8 +1,8 @@
#ifndef _COMMON_H_
#define _COMMON_H_
#define DEV_MEM_SRC_ADDR 0x10000000
#define DEV_MEM_DST_ADDR 0x20000000
#define DEV_MEM_SRC_ADDR 0x10000040
#define DEV_MEM_DST_ADDR 0x20000080
#define NUM_BLOCKS 64
#endif

BIN
driver/tests/basic/kernel.bin Executable file → Normal file

Binary file not shown.

View file

@ -91,7 +91,7 @@ int run_test(vx_device_h device,
int ref = i + i;
int cur = buf_ptr[i];
if (cur != ref) {
std::cout << "error at 0x" << std::hex << (buf_ptr + i)
std::cout << "error at value " << i
<< ": actual 0x" << cur << ", expected 0x" << ref << std::endl;
++errors;
}
@ -150,23 +150,39 @@ int main(int argc, char *argv[]) {
RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value));
kernel_arg.dst_ptr = value;
std::cout << "dev_src0=" << std::hex << kernel_arg.src0_ptr << std::endl;
std::cout << "dev_src1=" << std::hex << kernel_arg.src1_ptr << std::endl;
std::cout << "dev_dst=" << std::hex << kernel_arg.dst_ptr << std::endl;
// allocate shared memory
std::cout << "allocate shared memory" << std::endl;
uint32_t alloc_size = std::max<uint32_t>(buf_size, sizeof(kernel_arg_t));
RT_CHECK(vx_alloc_shared_mem(device, alloc_size, &buffer));
// populate source buffer values
std::cout << "populate source buffer values" << std::endl;
// populate source buffer0 values
std::cout << "populate source buffer0 values" << std::endl;
{
auto buf_ptr = (int*)vx_host_ptr(buffer);
for (uint32_t i = 0; i < num_points; ++i) {
buf_ptr[i] = i;
buf_ptr[i] = i-1;
}
}
// upload source buffers
std::cout << "upload source buffers" << std::endl;
// upload source buffer0
std::cout << "upload source buffer0" << std::endl;
RT_CHECK(vx_copy_to_dev(buffer, kernel_arg.src0_ptr, buf_size, 0));
// populate source buffer1 values
std::cout << "populate source buffer1 values" << std::endl;
{
auto buf_ptr = (int*)vx_host_ptr(buffer);
for (uint32_t i = 0; i < num_points; ++i) {
buf_ptr[i] = i+1;
}
}
// upload source buffer1
std::cout << "upload source buffer1" << std::endl;
RT_CHECK(vx_copy_to_dev(buffer, kernel_arg.src1_ptr, buf_size, 0));
// upload kernel argument

View file

@ -4,8 +4,8 @@ CF += -std=c++11 -fms-extensions
VF += --language 1800-2009 --assert -Wall -Wpedantic
VF += -Wno-DECLFILENAME
VF += --x-initial unique
VF += --x-initial unique
VF += --x-assign unique
VF += -exe $(SRCS) $(INCLUDE)
#MULTICORE += -DNUM_CLUSTERS=2 -DNUM_CORES=4

View file

@ -1,6 +1,16 @@
`ifndef NOPAE
`include "platform_if.vh"
import local_mem_cfg_pkg::*;
`include "afu_json_info.vh"
`include "VX_define.vh"
`else
`include "vortex_afu.vh"
/* verilator lint_off IMPORTSTAR */
import ccip_if_pkg::*;
import local_mem_cfg_pkg::*;
/* verilator lint_on IMPORTSTAR */
`endif
`include "VX_define.vh"
`define VX_TO_DRAM_ADDR(x) x[`VX_DRAM_ADDR_WIDTH-1:(`VX_DRAM_ADDR_WIDTH-DRAM_ADDR_WIDTH)]
@ -93,55 +103,68 @@ logic [`VX_SNP_TAG_WIDTH-1:0] vx_snp_req_tag;
logic vx_snp_req_ready;
logic vx_snp_rsp_valid;
`DEBUG_BEGIN
logic [`VX_SNP_TAG_WIDTH-1:0] vx_snp_rsp_tag;
`DEBUG_END
logic vx_snp_rsp_ready;
logic vx_reset;
logic vx_busy;
// AVS Queues /////////////////////////////////////////////////////////////////
logic avs_rtq_push;
logic avs_rtq_pop;
`DEBUG_BEGIN
logic avs_rtq_empty;
logic avs_rtq_full;
`DEBUG_BEGIN
logic avs_rdq_push;
logic avs_rdq_pop;
t_local_mem_data avs_rdq_dout;
logic avs_rdq_empty;
`DEBUG_BEGIN
logic avs_rdq_full;
`DEBUG_END
// CSR variables //////////////////////////////////////////////////////////////
logic [2:0] csr_cmd;
t_ccip_clAddr csr_io_addr;
t_local_mem_addr csr_mem_addr;
t_ccip_clAddr csr_data_size;
logic [2:0] csr_cmd;
t_ccip_clAddr csr_io_addr;
logic[DRAM_ADDR_WIDTH-1:0] csr_mem_addr;
logic[DRAM_ADDR_WIDTH-1:0] csr_data_size;
// MMIO controller ////////////////////////////////////////////////////////////
t_ccip_c0_ReqMmioHdr mmioHdr = t_ccip_c0_ReqMmioHdr'(cp2af_sRxPort.c0.hdr);
`IGNORE_WARNINGS_BEGIN
t_ccip_c0_ReqMmioHdr mmio_hdr;
`IGNORE_WARNINGS_END
assign mmio_hdr = t_ccip_c0_ReqMmioHdr'(cp2af_sRxPort.c0.hdr);
t_if_ccip_c2_Tx mmio_tx;
assign af2cp_sTxPort.c2 = mmio_tx;
always_ff @(posedge clk)
begin
if (SoftReset) begin
af2cp_sTxPort.c2.hdr <= 0;
af2cp_sTxPort.c2.data <= 0;
af2cp_sTxPort.c2.mmioRdValid <= 0;
csr_cmd <= 0;
csr_io_addr <= 0;
csr_mem_addr <= 0;
csr_data_size <= 0;
mmio_tx.hdr <= 0;
mmio_tx.data <= 0;
mmio_tx.mmioRdValid <= 0;
csr_cmd <= 0;
csr_io_addr <= 0;
csr_mem_addr <= 0;
csr_data_size <= 0;
end
else begin
csr_cmd <= 0;
af2cp_sTxPort.c2.mmioRdValid <= 0;
mmio_tx.mmioRdValid <= 0;
// serve MMIO write request
if (cp2af_sRxPort.c0.mmioWrValid)
begin
case (mmioHdr.address)
case (mmio_hdr.address)
MMIO_CSR_IO_ADDR: begin
csr_io_addr <= t_ccip_clAddr'(cp2af_sRxPort.c0.data);
`ifdef DBG_PRINT_OPAE
@ -168,7 +191,7 @@ begin
end
default: begin
// user-defined CSRs
//if (mmioHdr.addres >= MMIO_CSR_USER) begin
//if (mmio_hdr.addres >= MMIO_CSR_USER) begin
// write Vortex CRS
//end
end
@ -177,10 +200,10 @@ begin
// serve MMIO read requests
if (cp2af_sRxPort.c0.mmioRdValid) begin
af2cp_sTxPort.c2.hdr.tid <= mmioHdr.tid; // copy TID
case (mmioHdr.address)
mmio_tx.hdr.tid <= mmio_hdr.tid; // copy TID
case (mmio_hdr.address)
// AFU header
16'h0000: af2cp_sTxPort.c2.data <= {
16'h0000: mmio_tx.data <= {
4'b0001, // Feature type = AFU
8'b0, // reserved
4'b0, // afu minor revision = 0
@ -190,37 +213,31 @@ begin
4'b0, // afu major revision = 0
12'b0 // feature ID = 0
};
AFU_ID_L: af2cp_sTxPort.c2.data <= afu_id[63:0]; // afu id low
AFU_ID_H: af2cp_sTxPort.c2.data <= afu_id[127:64]; // afu id hi
16'h0006: af2cp_sTxPort.c2.data <= 64'h0; // next AFU
16'h0008: af2cp_sTxPort.c2.data <= 64'h0; // reserved
AFU_ID_L: mmio_tx.data <= afu_id[63:0]; // afu id low
AFU_ID_H: mmio_tx.data <= afu_id[127:64]; // afu id hi
16'h0006: mmio_tx.data <= 64'h0; // next AFU
16'h0008: mmio_tx.data <= 64'h0; // reserved
MMIO_CSR_STATUS: begin
`ifdef DBG_PRINT_OPAE
if (state != af2cp_sTxPort.c2.data) begin
if (state != mmio_tx.data) begin
$display("%t: STATUS: state=%0d", $time, state);
end
`endif
af2cp_sTxPort.c2.data <= state;
mmio_tx.data <= {60'b0, state};
end
default: af2cp_sTxPort.c2.data <= 64'h0;
default: mmio_tx.data <= 64'h0;
endcase
af2cp_sTxPort.c2.mmioRdValid <= 1; // post response
mmio_tx.mmioRdValid <= 1; // post response
end
end
end
// COMMAND FSM ////////////////////////////////////////////////////////////////
t_ccip_clAddr cci_wr_req_ctr;
logic [DRAM_ADDR_WIDTH-1:0] avs_rd_req_ctr;
logic [DRAM_ADDR_WIDTH-1:0] avs_wr_req_ctr;
logic vx_reset;
logic cmd_read_done;
logic cmd_write_done;
logic cmd_clflush_done;
logic cmd_run_done = !vx_busy;
logic cmd_run_done;
always_ff @(posedge clk)
begin
@ -260,6 +277,9 @@ begin
`endif
state <= STATE_CLFLUSH;
end
default: begin
state <= state;
end
endcase
end
@ -291,6 +311,10 @@ begin
end
end
default: begin
state <= state;
end
endcase
end
end
@ -304,7 +328,9 @@ t_cci_rdq_data cci_rdq_dout;
logic cci_dram_rd_req_fire;
logic cci_dram_wr_req_fire;
logic vx_dram_rd_req_fire;
`DEBUG_BEGIN
logic vx_dram_wr_req_fire;
`DEBUG_END
logic vx_dram_rd_rsp_fire;
t_local_mem_byte_mask vx_dram_req_byteen_;
@ -315,15 +341,17 @@ logic [DRAM_ADDR_WIDTH-1:0] cci_dram_rd_req_addr, cci_dram_wr_req_addr;
logic cci_dram_rd_req_enable, cci_dram_wr_req_enable;
logic vx_dram_req_enable, vx_dram_rd_req_enable, vx_dram_wr_req_enable;
logic [DRAM_ADDR_WIDTH-1:0] cci_dram_rd_req_ctr, cci_dram_wr_req_ctr;
assign vortex_enabled = (STATE_RUN == state) || (STATE_CLFLUSH == state);
assign cci_dram_rd_req_enable = (state == STATE_READ)
&& (avs_pending_reads < AVS_RD_QUEUE_SIZE)
&& (avs_rd_req_ctr != 0);
&& (cci_dram_rd_req_ctr != 0);
assign cci_dram_wr_req_enable = (state == STATE_WRITE)
&& !cci_rdq_empty
&& (avs_wr_req_ctr != 0);
&& (cci_dram_wr_req_ctr < csr_data_size);
assign vx_dram_req_enable = vortex_enabled && (avs_pending_reads < AVS_RD_QUEUE_SIZE);
assign vx_dram_rd_req_enable = vx_dram_req_enable && vx_dram_req_valid && ~vx_dram_req_rw;
@ -338,24 +366,22 @@ assign vx_dram_wr_req_fire = vx_dram_wr_req_enable && ~avs_waitrequest;
assign vx_dram_rd_rsp_fire = vx_dram_rsp_valid && vx_dram_rsp_ready;
assign avs_pending_reads_next = avs_pending_reads
+ ((cci_dram_rd_req_fire || vx_dram_rd_req_fire) && ~avs_rdq_pop) ? 1 :
(~(cci_dram_rd_req_fire || vx_dram_rd_req_fire) && avs_rdq_pop) ? -1 : 0;
assign cmd_write_done = (0 == avs_wr_req_ctr);
+ (((cci_dram_rd_req_fire || vx_dram_rd_req_fire) && ~avs_rdq_pop) ? 1 :
(~(cci_dram_rd_req_fire || vx_dram_rd_req_fire) && avs_rdq_pop) ? -1 : 0);
if (`VX_DRAM_LINE_WIDTH != DRAM_LINE_WIDTH) begin
assign vx_dram_req_offset = {{VX_DRAM_LINE_LW{1'b0}}, vx_dram_req_addr[(DRAM_LINE_LW-VX_DRAM_LINE_LW)-1:0]} << VX_DRAM_LINE_LW;
assign vx_dram_req_byteen_ = vx_dram_req_byteen << ({(VX_DRAM_LINE_LW - 3)'(0), vx_dram_req_addr[(DRAM_LINE_LW-VX_DRAM_LINE_LW)-1:0]} << (VX_DRAM_LINE_LW - 3));
assign vx_dram_req_offset = ((DRAM_LINE_LW)'(vx_dram_req_addr[(DRAM_LINE_LW-VX_DRAM_LINE_LW)-1:0])) << VX_DRAM_LINE_LW;
assign vx_dram_req_byteen_ = 64'(vx_dram_req_byteen) << (6'(vx_dram_req_addr[(DRAM_LINE_LW-VX_DRAM_LINE_LW)-1:0]) << (VX_DRAM_LINE_LW - 3));
end else begin
assign vx_dram_req_offset = 0;
assign vx_dram_req_byteen_ = 64'hffffffffffffffff;
assign vx_dram_req_byteen_ = vx_dram_req_byteen;
end
always_comb
begin
case (state)
CMD_TYPE_READ: avs_address = cci_dram_rd_req_addr;
CMD_TYPE_WRITE: avs_address = cci_dram_wr_req_addr;
CMD_TYPE_WRITE: avs_address = cci_dram_wr_req_addr + ((DRAM_ADDR_WIDTH)'(t_cci_rdq_tag'(cci_rdq_dout)));
default: avs_address = `VX_TO_DRAM_ADDR(vx_dram_req_addr);
endcase
@ -367,51 +393,53 @@ begin
case (state)
CMD_TYPE_WRITE: avs_writedata = cci_rdq_dout[$bits(t_ccip_clData) + $bits(t_cci_rdq_tag)-1:$bits(t_cci_rdq_tag)];
default: avs_writedata = vx_dram_req_data << vx_dram_req_offset;
default: avs_writedata = (DRAM_LINE_WIDTH)'(vx_dram_req_data) << vx_dram_req_offset;
endcase
end
assign avs_read = cci_dram_rd_req_enable || vx_dram_rd_req_enable;
assign avs_write = cci_dram_wr_req_enable || vx_dram_wr_req_enable;
assign cmd_write_done = (cci_dram_wr_req_ctr >= csr_data_size);
always_ff @(posedge clk)
begin
if (SoftReset)
begin
mem_bank_select <= 0;
avs_burstcount <= 1;
avs_rd_req_ctr <= 0;
avs_wr_req_ctr <= 0;
avs_pending_reads <= 0;
cci_dram_rd_req_addr <= 0;
cci_dram_wr_req_addr <= 0;
cci_dram_rd_req_ctr <= 0;
cci_dram_wr_req_ctr <= 0;
avs_pending_reads <= 0;
end
else begin
if (state == STATE_IDLE) begin
if (CMD_TYPE_READ == csr_cmd) begin
cci_dram_rd_req_addr <= csr_mem_addr;
avs_rd_req_ctr <= csr_data_size;
cci_dram_rd_req_ctr <= csr_data_size;
end
else if (CMD_TYPE_WRITE == csr_cmd) begin
cci_dram_wr_req_addr <= csr_mem_addr;
avs_wr_req_ctr <= csr_data_size;
cci_dram_wr_req_ctr <= 0;
end
end
if (cci_dram_rd_req_fire) begin
cci_dram_rd_req_addr <= cci_dram_rd_req_addr + 1;
avs_rd_req_ctr <= avs_rd_req_ctr - 1;
cci_dram_rd_req_ctr <= cci_dram_rd_req_ctr - 1;
`ifdef DBG_PRINT_OPAE
$display("%t: AVS Rd Req: addr=%0h, rem=%0d, pending=%0d", $time, `DRAM_TO_BYTE_ADDR(avs_address), (avs_rd_req_ctr - 1), avs_pending_reads_next);
$display("%t: AVS Rd Req: addr=%0h, rem=%0d, pending=%0d", $time, `DRAM_TO_BYTE_ADDR(avs_address), (cci_dram_rd_req_ctr - 1), avs_pending_reads_next);
`endif
end
if (cci_dram_wr_req_fire) begin
cci_dram_wr_req_addr <= ((cci_dram_wr_req_addr + 1) & ~(CCI_RD_WINDOW_SIZE-1)) | t_cci_rdq_tag'(cci_rdq_dout);
avs_wr_req_ctr <= avs_wr_req_ctr - 1;
cci_dram_wr_req_addr <= cci_dram_wr_req_addr + ((t_cci_rdq_tag'(cci_dram_wr_req_ctr) == (DRAM_ADDR_WIDTH)'(CCI_RD_WINDOW_SIZE-1)) ? (DRAM_ADDR_WIDTH)'(CCI_RD_WINDOW_SIZE) : 0);
cci_dram_wr_req_ctr <= cci_dram_wr_req_ctr + 1;
`ifdef DBG_PRINT_OPAE
$display("%t: AVS Wr Req: addr=%0h, data=%0h, rem=%0d", $time, `DRAM_TO_BYTE_ADDR(avs_address), avs_writedata, (avs_wr_req_ctr - 1));
$display("%t: AVS Wr Req: addr=%0h, data=%0h, rem=%0d", $time, `DRAM_TO_BYTE_ADDR(avs_address), avs_writedata, (cci_dram_wr_req_ctr + 1));
`endif
end
@ -441,7 +469,7 @@ assign vx_dram_req_ready = vx_dram_req_enable && !avs_waitrequest;
assign vx_dram_rsp_valid = vortex_enabled && !avs_rdq_empty;
if (`VX_DRAM_LINE_WIDTH != DRAM_LINE_WIDTH) begin
assign vx_dram_rsp_data = (avs_rdq_dout >> vx_dram_rsp_offset);
assign vx_dram_rsp_data = (`VX_DRAM_LINE_WIDTH)'(avs_rdq_dout >> vx_dram_rsp_offset);
end else begin
assign vx_dram_rsp_data = avs_rdq_dout;
end
@ -462,7 +490,8 @@ VX_generic_queue #(
.pop (avs_rtq_pop),
.data_out ({vx_dram_rsp_tag, vx_dram_rsp_offset}),
.empty (avs_rtq_empty),
.full (avs_rtq_full)
.full (avs_rtq_full),
`UNUSED_PIN (size)
);
// AVS data read response queue ///////////////////////////////////////////////
@ -483,25 +512,27 @@ VX_generic_queue #(
.pop (avs_rdq_pop),
.data_out (avs_rdq_dout),
.empty (avs_rdq_empty),
.full (avs_rdq_full)
.full (avs_rdq_full),
`UNUSED_PIN (size)
);
// CCI-P Read Request ///////////////////////////////////////////////////////////
logic [$clog2(CCI_RD_QUEUE_SIZE+1)-1:0] cci_pending_reads, cci_pending_reads_next;
t_ccip_clAddr cci_rd_req_addr, cci_rd_req_ctr, cci_rd_req_ctr_next;
logic [DRAM_ADDR_WIDTH-1:0] cci_rd_req_ctr, cci_rd_req_ctr_next;
t_ccip_clAddr cci_rd_req_addr;
t_cci_rdq_tag cci_rd_rsp_ctr;
logic cci_rd_req_fire, cci_rd_rsp_fire;
logic cci_rd_req_enable, cci_rd_req_wait;
logic cci_rdq_full, cci_rdq_push, cci_rdq_pop;
logic cci_rdq_push, cci_rdq_pop;
t_cci_rdq_data cci_rdq_din;
always_comb begin
af2cp_sTxPort.c0.hdr = t_ccip_c0_ReqMemHdr'(0);
af2cp_sTxPort.c0.hdr.address = cci_rd_req_addr;
af2cp_sTxPort.c0.hdr.mdata = t_cci_rdq_tag'(cci_rd_req_ctr);
af2cp_sTxPort.c0.hdr.mdata = t_ccip_mdata'(t_cci_rdq_tag'(cci_rd_req_ctr));
end
assign cci_rd_req_fire = af2cp_sTxPort.c0.valid && !cp2af_sRxPort.c0TxAlmFull;
@ -514,8 +545,8 @@ assign cci_rdq_push = cci_rd_rsp_fire;
assign cci_rdq_din = {cp2af_sRxPort.c0.data, t_cci_rdq_tag'(cp2af_sRxPort.c0.hdr.mdata)};
assign cci_pending_reads_next = cci_pending_reads
+ (cci_rd_req_fire && ~cci_rdq_pop) ? 1 :
(~cci_rd_req_fire && cci_rdq_pop) ? -1 : 0;
+ ((cci_rd_req_fire && ~cci_rdq_pop) ? 1 :
(~cci_rd_req_fire && cci_rdq_pop) ? -1 : 0);
assign af2cp_sTxPort.c0.valid = cci_rd_req_enable && ~cci_rd_req_wait;
@ -549,7 +580,7 @@ begin
if (cci_rd_req_fire) begin
cci_rd_req_addr <= cci_rd_req_addr + 1;
cci_rd_req_ctr <= cci_rd_req_ctr_next;
if (t_cci_rdq_tag'(cci_rd_req_ctr) == (CCI_RD_WINDOW_SIZE-1)) begin
if (t_cci_rdq_tag'(cci_rd_req_ctr) == t_cci_rdq_tag'(CCI_RD_WINDOW_SIZE-1)) begin
cci_rd_req_wait <= 1; // end current request batch
end
`ifdef DBG_PRINT_OPAE
@ -559,7 +590,7 @@ begin
if (cci_rd_rsp_fire) begin
cci_rd_rsp_ctr <= cci_rd_rsp_ctr + 1;
if (cci_rd_rsp_ctr == (CCI_RD_WINDOW_SIZE-1)) begin
if (cci_rd_rsp_ctr == t_cci_rdq_tag'(CCI_RD_WINDOW_SIZE-1)) begin
cci_rd_req_wait <= 0; // restart new request batch
end
`ifdef DBG_PRINT_OPAE
@ -589,12 +620,14 @@ VX_generic_queue #(
.pop (cci_rdq_pop),
.data_out (cci_rdq_dout),
.empty (cci_rdq_empty),
.full (cci_rdq_full)
`UNUSED_PIN (full),
`UNUSED_PIN (size)
);
// CCI-P Write Request //////////////////////////////////////////////////////////
logic [$clog2(CCI_RW_QUEUE_SIZE+1)-1:0] cci_pending_writes, cci_pending_writes_next;
logic [DRAM_ADDR_WIDTH-1:0] cci_wr_req_ctr;
t_ccip_clAddr cci_wr_req_addr;
logic cci_wr_req_enable, cci_wr_rsp_fire;
@ -609,8 +642,8 @@ assign cci_wr_req_fire = af2cp_sTxPort.c1.valid && !cp2af_sRxPort.c1TxAlmFull;
assign cci_wr_rsp_fire = (STATE_READ == state) && cp2af_sRxPort.c1.rspValid;
assign cci_pending_writes_next = cci_pending_writes
+ (cci_wr_req_fire && ~cci_wr_rsp_fire) ? 1 :
(~cci_wr_req_fire && cci_wr_rsp_fire) ? -1 : 0;
+ ((cci_wr_req_fire && ~cci_wr_rsp_fire) ? 1 :
(~cci_wr_req_fire && cci_wr_rsp_fire) ? -1 : 0);
assign cmd_read_done = (0 == cci_wr_req_ctr) && (0 == cci_pending_writes);
@ -660,7 +693,8 @@ end
logic [`VX_DRAM_ADDR_WIDTH-1:0] snp_req_size;
logic [`VX_DRAM_ADDR_WIDTH-1:0] snp_req_baseaddr;
logic [`VX_DRAM_ADDR_WIDTH-1:0] snp_req_ctr, snp_rsp_ctr;
logic [`VX_DRAM_ADDR_WIDTH-1:0] snp_req_ctr, snp_req_ctr_next;
logic [`VX_DRAM_ADDR_WIDTH-1:0] snp_rsp_ctr, snp_rsp_ctr_next;
logic vx_snp_req_fire, vx_snp_rsp_fire;
@ -674,6 +708,10 @@ end
assign vx_snp_req_fire = vx_snp_req_valid && vx_snp_req_ready;
assign vx_snp_rsp_fire = vx_snp_rsp_valid && vx_snp_rsp_ready;
assign snp_req_ctr_next = vx_snp_req_fire ? (snp_req_ctr + 1) : snp_req_ctr;
assign snp_rsp_ctr_next = vx_snp_rsp_fire ? (snp_rsp_ctr - 1) : snp_rsp_ctr;
assign cmd_clflush_done = (0 == snp_rsp_ctr);
always_ff @(posedge clk)
@ -691,38 +729,40 @@ begin
if ((STATE_IDLE == state)
&& (CMD_TYPE_CLFLUSH == csr_cmd)) begin
vx_snp_req_addr <= snp_req_baseaddr;
snp_req_ctr <= snp_req_size;
vx_snp_req_tag <= 0;
snp_req_ctr <= 0;
snp_rsp_ctr <= snp_req_size;
vx_snp_req_valid <= (snp_req_size != 0);
vx_snp_rsp_ready <= (snp_req_size != 0);
end
if ((STATE_CLFLUSH == state)
&& (0 == snp_rsp_ctr)) begin
vx_snp_rsp_ready <= 0;
&& (snp_req_ctr_next >= snp_req_size)) begin
vx_snp_req_valid <= 0;
end
if ((STATE_CLFLUSH == state)
&& (0 == snp_req_ctr)) begin
vx_snp_req_valid <= 0;
&& (0 == snp_rsp_ctr_next)) begin
vx_snp_rsp_ready <= 0;
end
if (vx_snp_req_fire)
begin
assert(snp_req_ctr < snp_req_size);
vx_snp_req_addr <= vx_snp_req_addr + 1;
vx_snp_req_tag <= snp_req_ctr[`VX_SNP_TAG_WIDTH-1:0];
snp_req_ctr <= snp_req_ctr - 1;
vx_snp_req_tag <= (`VX_SNP_TAG_WIDTH)'(snp_req_ctr_next);
snp_req_ctr <= snp_req_ctr_next;
`ifdef DBG_PRINT_OPAE
$display("%t: AFU Snp Req: addr=%0h, tag=%0d, rem=%0d", $time, `DRAM_TO_BYTE_ADDR(vx_snp_req_addr), vx_snp_req_tag, (snp_req_ctr - 1));
$display("%t: AFU Snp Req: addr=%0h, tag=%0d, rem=%0d", $time, `DRAM_TO_BYTE_ADDR(vx_snp_req_addr), (`VX_SNP_TAG_WIDTH)'(snp_req_ctr_next), (snp_req_size - snp_req_ctr_next));
`endif
end
if ((STATE_CLFLUSH == state)
&& vx_snp_rsp_fire) begin
assert(snp_rsp_ctr != 0);
snp_rsp_ctr <= snp_rsp_ctr - 1;
snp_rsp_ctr <= snp_rsp_ctr_next;
`ifdef DBG_PRINT_OPAE
$display("%t: AFU Snp Rsp: tag=%0d, rem=%0d", $time, vx_snp_rsp_tag, (snp_rsp_ctr - 1));
$display("%t: AFU Snp Rsp: tag=%0d, rem=%0d", $time, vx_snp_rsp_tag, snp_rsp_ctr_next);
`endif
end
end
@ -730,6 +770,8 @@ end
// Vortex binding /////////////////////////////////////////////////////////////
assign cmd_run_done = !vx_busy;
Vortex_Socket #() vx_socket (
.clk (clk),
.reset (vx_reset),
@ -761,23 +803,23 @@ Vortex_Socket #() vx_socket (
.snp_rsp_ready (vx_snp_rsp_ready),
// I/O request
.io_req_valid (),
.io_req_rw (),
.io_req_byteen (),
.io_req_addr (),
.io_req_data (),
.io_req_tag (),
`UNUSED_PIN (io_req_valid),
`UNUSED_PIN (io_req_rw),
`UNUSED_PIN (io_req_byteen),
`UNUSED_PIN (io_req_addr),
`UNUSED_PIN (io_req_data),
`UNUSED_PIN (io_req_tag),
.io_req_ready (1),
// I/O response
.io_rsp_valid (0),
.io_rsp_data (0),
.io_rsp_tag (0),
.io_rsp_ready (),
`UNUSED_PIN (io_rsp_ready),
// status
.busy (vx_busy),
.ebreak ()
`UNUSED_PIN (ebreak)
);
endmodule
endmodule

View file

@ -48,6 +48,7 @@
`define CLOG2(x) $clog2(x)
`define FLOG2(x) ($clog2(x) - (((1 << $clog2(x)) > (x)) ? 1 : 0))
`define LOG2UP(x) (((x) > 1) ? $clog2(x) : 1)
`define ISPOW2(x) (((x) != 0) && (0 == ((x) & ((x) - 1))))
`define MIN(x, y) ((x < y) ? (x) : (y))
`define MAX(x, y) ((x > y) ? (x) : (y))

View file

@ -70,7 +70,7 @@ module VX_lsu_unit #(
for (i = 0; i < `NUM_THREADS; ++i) begin
assign mem_req_addr[i] = use_address[i][31:2];
assign mem_req_offset[i] = {3'b0, use_address[i][1:0]} << 3;
assign mem_req_offset[i] = (5'(use_address[i][1:0])) << 3;
assign mem_req_byteen[i] = (wmask << use_address[i][1:0]);
assign mem_req_data[i] = (use_store_data[i] << mem_req_offset[i]);
end

View file

@ -123,7 +123,7 @@ module Vortex #(
assign io_req_tag = io_core_req_if.core_req_tag[0];
assign io_core_req_if.core_req_ready = io_req_ready;
assign io_core_rsp_if.core_rsp_valid = {{`NUM_THREADS-1{1'b0}}, io_rsp_valid};
assign io_core_rsp_if.core_rsp_valid = {{(`NUM_THREADS-1){1'b0}}, io_rsp_valid};
assign io_core_rsp_if.core_rsp_data[0] = io_rsp_data;
assign io_core_rsp_if.core_rsp_tag = io_rsp_tag;
assign io_rsp_ready = io_core_rsp_if.core_rsp_ready;

View file

@ -718,12 +718,12 @@ module VX_bank #(
`ifdef DBG_PRINT_CACHE_BANK
if (NUM_BANKS == 1) begin
always_ff @(posedge clk) begin
/*if (core_req_valid && core_req_ready) begin
if (core_req_valid && core_req_ready) begin
$display("%t: bank%01d%01d core req: addr=%0h, tag=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR0(core_req_addr), core_req_tag);
end
if (core_rsp_valid && core_rsp_ready) begin
$display("%t: bank%01d%01d core rsp: tag=%0h, data=%0h", $time, CACHE_ID, BANK_ID, core_rsp_tag, core_rsp_data);
end*/
end
if (dram_fill_req_valid && dram_fill_req_ready) begin
$display("%t: bank%01d%01d dram_fill req: addr=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR0(dram_fill_req_addr));
end
@ -733,21 +733,21 @@ module VX_bank #(
if (dram_fill_rsp_valid && dram_fill_rsp_ready) begin
$display("%t: bank%01d%01d dram_fill rsp: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR0(dram_fill_rsp_addr), dram_fill_rsp_data);
end
/*if (snp_req_valid && snp_req_ready) begin
if (snp_req_valid && snp_req_ready) begin
$display("%t: bank%01d%01d snp req: addr=%0h, tag=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR0(snp_req_addr), snp_req_tag);
end
if (snp_rsp_valid && snp_rsp_ready) begin
$display("%t: bank%01d%01d snp rsp: tag=%0h", $time, CACHE_ID, BANK_ID, snp_rsp_tag);
end*/
end
end
end else begin
always_ff @(posedge clk) begin
/*if ((|core_req_valid) && core_req_ready) begin
if ((|core_req_valid) && core_req_ready) begin
$display("%t: bank%01d%01d core req: addr=%0h, tag=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(core_req_addr, BANK_ID), core_req_tag);
end
if (core_rsp_valid && core_rsp_ready) begin
$display("%t: bank%01d%01d core rsp: tag=%0h, data=%0h", $time, CACHE_ID, BANK_ID, core_rsp_tag, core_rsp_data);
end*/
end
if (dram_fill_req_valid && dram_fill_req_ready) begin
$display("%t: bank%01d%01d dram_fill req: addr=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dram_fill_req_addr, BANK_ID));
end
@ -757,12 +757,12 @@ module VX_bank #(
if (dram_fill_rsp_valid && dram_fill_rsp_ready) begin
$display("%t: bank%01d%01d dram_fill rsp: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dram_fill_rsp_addr, BANK_ID), dram_fill_rsp_data);
end
/*if (snp_req_valid && snp_req_ready) begin
if (snp_req_valid && snp_req_ready) begin
$display("%t: bank%01d%01d snp req: addr=%0h, tag=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(snp_req_addr, BANK_ID), snp_req_tag);
end
if (snp_rsp_valid && snp_rsp_ready) begin
$display("%t: bank%01d%01d snp rsp: tag=%0h", $time, CACHE_ID, BANK_ID, snp_rsp_tag);
end*/
end
end
end
`endif

View file

@ -46,6 +46,7 @@ module VX_cache_core_rsp_merge #(
assign core_rsp_tag = per_bank_core_rsp_tag[main_bank_index];
always @(*) begin
core_rsp_valid = 0;
core_rsp_data = 0;
for (i = 0; i < NUM_BANKS; i++) begin
if (per_bank_core_rsp_valid[i]
&& (per_bank_core_rsp_tag[i][CORE_TAG_ID_BITS-1:0] == per_bank_core_rsp_tag[main_bank_index][CORE_TAG_ID_BITS-1:0])) begin
@ -60,6 +61,8 @@ module VX_cache_core_rsp_merge #(
end else begin
always @(*) begin
core_rsp_valid = 0;
core_rsp_data = 0;
core_rsp_tag = 0;
for (i = 0; i < NUM_BANKS; i++) begin
if (per_bank_core_rsp_valid[i]
&& !core_rsp_valid[per_bank_core_rsp_tid[i]]

View file

@ -97,7 +97,7 @@ module VX_snp_forwarder #(
always @(posedge clk) begin
if (reset) begin
fwdin_sel <= 0;
end else begin
end else if (NUM_REQUESTS > 1) begin
fwdin_sel <= fwdin_sel + 1;
end
end

View file

@ -136,7 +136,7 @@ module VX_tag_data_access #(
end
assign use_read_valid_st1e = read_valid_st1c[STAGE_1_CYCLES-1] || ~DRAM_ENABLE; // If shared memory, always valid
assign use_read_dirty_st1e = read_dirty_st1c[STAGE_1_CYCLES-1] && DRAM_ENABLE; // Dirty only applies in Dcache
assign use_read_dirty_st1e = read_dirty_st1c[STAGE_1_CYCLES-1] && DRAM_ENABLE && WRITE_ENABLE; // Dirty only applies in Dcache
assign use_read_tag_st1e = DRAM_ENABLE ? read_tag_st1c[STAGE_1_CYCLES-1] : writetag_st1e; // Tag is always the same in SM
assign use_read_dirtyb_st1e= read_dirtyb_st1c[STAGE_1_CYCLES-1];
assign use_read_data_st1e = read_data_st1c[STAGE_1_CYCLES-1];

View file

@ -33,7 +33,7 @@ module VX_tag_data_structure #(
reg [`BANK_LINE_WORDS-1:0][WORD_SIZE-1:0][7:0] data [`BANK_LINE_COUNT-1:0];
reg [`TAG_SELECT_BITS-1:0] tag [`BANK_LINE_COUNT-1:0];
reg [`BANK_LINE_WORDS-1:0][WORD_SIZE-1:0] dirtyb[`BANK_LINE_COUNT-1:0];
reg dirty[`BANK_LINE_COUNT-1:0];
reg [`BANK_LINE_COUNT-1:0] dirty;
reg [`BANK_LINE_COUNT-1:0] valid;
assign read_valid = valid [read_addr];
@ -49,6 +49,7 @@ module VX_tag_data_structure #(
if (reset) begin
for (i = 0; i < `BANK_LINE_COUNT; i++) begin
valid[i] <= 0;
dirty[i] <= 0;
end
end else if (!stall_bank_pipe) begin
if (do_write) begin

View file

@ -15,6 +15,8 @@ module VX_generic_queue #(
output wire full,
output wire [`LOG2UP(SIZE+1)-1:0] size
);
`STATIC_ASSERT(0 == SIZE || `ISPOW2(SIZE), "must be 0 or power of 2!");
if (SIZE == 0) begin
assign empty = 1;
@ -88,6 +90,7 @@ module VX_generic_queue #(
if (writing) begin
data[wr_ptr_a] <= data_in;
wr_ptr_r <= wr_ptr_r + 1;
if (!reading) begin
size_r <= size_r + 1;
end
@ -120,16 +123,17 @@ module VX_generic_queue #(
always @(posedge clk) begin
if (reset) begin
size_r <= 0;
empty_r <= 1;
full_r <= 0;
wr_ptr_r <= 0;
rd_ptr_r <= 0;
rd_ptr_next_r <= 1;
empty_r <= 1;
full_r <= 0;
size_r <= 0;
end else begin
if (writing) begin
data[wr_ptr_r] <= data_in;
wr_ptr_r <= wr_ptr_r + 1;
if (!reading) begin
empty_r <= 0;
if (size_r == SIZE-1) begin
@ -140,15 +144,17 @@ module VX_generic_queue #(
end
if (reading) begin
rd_ptr_r <= rd_ptr_next_r;
if (SIZE == 2) begin
rd_ptr_next_r <= ~rd_ptr_next_r;
end else if (SIZE > 2) begin
rd_ptr_r <= rd_ptr_next_r;
if (SIZE > 2) begin
rd_ptr_next_r <= rd_ptr_r + 2;
end else begin // (SIZE == 2);
rd_ptr_next_r <= ~rd_ptr_next_r;
end
if (!writing) begin
if (size_r == 1) begin
assert(rd_ptr_next_r == wr_ptr_r);
empty_r <= 1;
end;
full_r <= 0;
@ -156,7 +162,9 @@ module VX_generic_queue #(
end
end
bypass_r <= writing && (empty_r || (1 == size_r) && reading);
bypass_r <= writing
&& (empty_r || ((1 == size_r) && reading)); // empty or about to go empty
curr_r <= data_in;
head_r <= data[reading ? rd_ptr_next_r : rd_ptr_r];
end

View file

@ -10,9 +10,8 @@ double sc_time_stamp() {
}
Simulator::Simulator() {
// force random values for unitialized signals
const char* args[] = {"", "+verilator+rand+reset+2", "+verilator+seed+50"};
Verilated::commandArgs(3, args);
// force random values for unitialized signals
Verilated::randReset(2);
ram_ = nullptr;
vortex_ = new VVortex_Socket();