mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 13:27:29 -04:00
fixed l3cache hang using memory arbiter in afu
This commit is contained in:
parent
2e0f51af80
commit
5d58bf3d11
20 changed files with 514 additions and 388 deletions
|
@ -4,20 +4,21 @@ CFLAGS += -std=c++11 -g -O0 -Wall -Wextra -Wfatal-errors
|
|||
CFLAGS += -I../../../../hw
|
||||
|
||||
# control RTL debug print states
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_PIPELINE
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CORE_ICACHE
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CORE_DCACHE
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_BANK
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_SNP
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_MSRQ
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_TAG
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_DATA
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_DRAM
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_PIPELINE
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_SNP
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_DRAM
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE
|
||||
DBG_PRINT_FLAGS += -DDBG_CORE_REQ_INFO
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_AVS
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_SCOPE
|
||||
|
||||
DBG_FLAGS += $(DBG_PRINT_FLAGS)
|
||||
DBG_FLAGS += -DDBG_CORE_REQ_INFO
|
||||
DBG_FLAGS += -DDBG_CACHE_REQ_INFO
|
||||
|
||||
#CONFIGS += -DNUM_CLUSTERS=2 -DNUM_CORES=4 -DL2_ENABLE=1 -DL3_ENABLE=1
|
||||
#CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1
|
||||
|
@ -59,7 +60,7 @@ VL_FLAGS += verilator.vlt
|
|||
|
||||
# Debugigng
|
||||
ifdef DEBUG
|
||||
VL_FLAGS += -DVCD_OUTPUT --assert --trace --trace-structs --trace-threads 1 $(DBG_FLAGS)
|
||||
VL_FLAGS += -DVCD_OUTPUT --assert --trace --trace-structs $(DBG_FLAGS)
|
||||
CFLAGS += -DVCD_OUTPUT $(DBG_FLAGS)
|
||||
else
|
||||
VL_FLAGS += -DNDEBUG
|
||||
|
@ -78,7 +79,7 @@ VL_FLAGS += -DNOPAE
|
|||
CFLAGS += -DNOPAE
|
||||
|
||||
# use DPI FPU
|
||||
#VL_FLAGS += -DFPU_FAST
|
||||
VL_FLAGS += -DFPU_FAST
|
||||
|
||||
RTL_INCLUDE += -I../../../hw/opae -I../../../hw/opae/ccip
|
||||
|
||||
|
@ -94,7 +95,7 @@ $(RTL_DIR)/scope-defs.vh: $(SCRIPT_DIR)/scope.json
|
|||
|
||||
$(PROJECT): $(SRCS) $(SCOPE_VH)
|
||||
verilator --exe --cc $(TOP) --top-module $(TOP) $(RTL_INCLUDE) $(VL_FLAGS) $(SRCS) -CFLAGS '$(CFLAGS)' -LDFLAGS '$(LDFLAGS)' -o ../$(PROJECT)
|
||||
OPT_FAST="-O0 -g" make -j -C obj_dir -f V$(TOP).mk
|
||||
make -j -C obj_dir -f V$(TOP).mk
|
||||
|
||||
clean:
|
||||
rm -rf $(PROJECT) obj_dir ../scope-defs.h $(RTL_DIR)/scope-defs.vh
|
||||
|
|
|
@ -9,15 +9,16 @@ DBG_PRINT_FLAGS += -DDBG_PRINT_CORE_DCACHE
|
|||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_BANK
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_SNP
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_MSRQ
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_TAG
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_DATA
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_DRAM
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_PIPELINE
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE
|
||||
DBG_PRINT_FLAGS += -DDBG_CORE_REQ_INFO
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_AVS
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_SCOPE
|
||||
|
||||
DBG_FLAGS += $(DBG_PRINT_FLAGS)
|
||||
DBG_FLAGS += -DDBG_CORE_REQ_INFO
|
||||
DBG_FLAGS += -DDBG_CACHE_REQ_INFO
|
||||
|
||||
#CONFIGS += -DNUM_CLUSTERS=2 -DNUM_CORES=4 -DL2_ENABLE=1
|
||||
#CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1
|
||||
|
@ -70,7 +71,7 @@ all: $(PROJECT)
|
|||
|
||||
$(PROJECT): $(SRCS)
|
||||
verilator --exe --cc $(TOP) --top-module $(TOP) $(RTL_INCLUDE) $(VL_FLAGS) $(SRCS) -CFLAGS '$(CFLAGS)' -LDFLAGS '$(LDFLAGS)' -o ../$(PROJECT)
|
||||
OPT_FAST="-O0 -g" make -j -C obj_dir -f V$(TOP).mk
|
||||
make -j -C obj_dir -f V$(TOP).mk
|
||||
|
||||
clean:
|
||||
rm -rf $(PROJECT) obj_dir
|
||||
|
|
|
@ -14,17 +14,29 @@ union Float_t {
|
|||
} parts;
|
||||
};
|
||||
|
||||
inline float fround(float x, int32_t precision = 4) {
|
||||
auto power_of_10 = std::pow(10, precision);
|
||||
return std::round(x * power_of_10) / power_of_10;
|
||||
}
|
||||
|
||||
inline bool almost_equal_eps(float a, float b, float eps = std::numeric_limits<float>::epsilon()) {
|
||||
auto tolerance = std::max(fabs(a), fabs(b)) * eps;
|
||||
auto tolerance = std::min(fabs(a), fabs(b)) * eps;
|
||||
return fabs(a - b) <= tolerance;
|
||||
}
|
||||
|
||||
inline bool almost_equal_ulp(float a, float b, int32_t ulp = 5) {
|
||||
inline bool almost_equal_ulp(float a, float b, int32_t ulp = 4) {
|
||||
Float_t fa{a}, fb{b};
|
||||
return std::abs(fa.i - fb.i) <= ulp;
|
||||
auto d = std::abs(fa.i - fb.i);
|
||||
if (d > ulp) {
|
||||
std::cout << "*** float compare: a=" << a << ", b=" << b << ", ulp=" << d << ", ia=" << std::hex << fa.i << ", ib=" << fb.i << std::endl;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
inline bool almost_equal(float a, float b) {
|
||||
if (almost_equal_eps(a, b))
|
||||
return true;
|
||||
return almost_equal_ulp(a, b);
|
||||
}
|
||||
|
||||
|
@ -158,8 +170,8 @@ public:
|
|||
auto a = (float*)src1;
|
||||
auto b = (float*)src2;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
a[i] = (n - i) * (1.0f/n);
|
||||
b[i] = (n + i) * (1.0f/n);
|
||||
a[i] = fround((n - i) * (1.0f/n));
|
||||
b[i] = fround((n + i) * (1.0f/n));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -186,8 +198,8 @@ public:
|
|||
auto a = (float*)src1;
|
||||
auto b = (float*)src2;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
a[i] = (n - i) * (1.0f/n);
|
||||
b[i] = (n + i) * (1.0f/n);
|
||||
a[i] = fround((n - i) * (1.0f/n));
|
||||
b[i] = fround((n + i) * (1.0f/n));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -214,8 +226,8 @@ public:
|
|||
auto a = (float*)src1;
|
||||
auto b = (float*)src2;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
a[i] = (n - i) * (1.0f/n);
|
||||
b[i] = (n + i) * (1.0f/n);
|
||||
a[i] = fround((n - i) * (1.0f/n));
|
||||
b[i] = fround((n + i) * (1.0f/n));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -242,8 +254,8 @@ public:
|
|||
auto a = (float*)src1;
|
||||
auto b = (float*)src2;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
a[i] = (n - i) * (1.0f/n);
|
||||
b[i] = (n + i) * (1.0f/n);
|
||||
a[i] = fround((n - i) * (1.0f/n));
|
||||
b[i] = fround((n + i) * (1.0f/n));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -270,8 +282,8 @@ public:
|
|||
auto a = (float*)src1;
|
||||
auto b = (float*)src2;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
a[i] = (n - i) * (1.0f/n);
|
||||
b[i] = (n + i) * (1.0f/n);
|
||||
a[i] = fround((n - i) * (1.0f/n));
|
||||
b[i] = fround((n + i) * (1.0f/n));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -298,8 +310,8 @@ public:
|
|||
auto a = (float*)src1;
|
||||
auto b = (float*)src2;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
a[i] = (n - i) * (1.0f/n);
|
||||
b[i] = (n + i) * (1.0f/n);
|
||||
a[i] = fround((n - i) * (1.0f/n));
|
||||
b[i] = fround((n + i) * (1.0f/n));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -326,8 +338,8 @@ public:
|
|||
auto a = (float*)src1;
|
||||
auto b = (float*)src2;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
a[i] = (n - i) * (1.0f/n);
|
||||
b[i] = (n + i) * (1.0f/n);
|
||||
a[i] = fround((n - i) * (1.0f/n));
|
||||
b[i] = fround((n + i) * (1.0f/n));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -354,8 +366,8 @@ public:
|
|||
auto a = (float*)src1;
|
||||
auto b = (float*)src2;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
a[i] = (n - i) * (1.0f/n);
|
||||
b[i] = (n + i) * (1.0f/n);
|
||||
a[i] = fround((n - i) * (1.0f/n));
|
||||
b[i] = fround((n + i) * (1.0f/n));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -384,8 +396,8 @@ public:
|
|||
auto a = (float*)src1;
|
||||
auto b = (float*)src2;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
a[i] = (n - i) * (1.0f/n);
|
||||
b[i] = (n + i) * (1.0f/n);
|
||||
a[i] = fround((n - i) * (1.0f/n));
|
||||
b[i] = fround((n + i) * (1.0f/n));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -412,8 +424,8 @@ public:
|
|||
auto a = (float*)src1;
|
||||
auto b = (float*)src2;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
a[i] = (n - i) * (1.0f/n);
|
||||
b[i] = (n + i) * (1.0f/n);
|
||||
a[i] = fround((n - i) * (1.0f/n));
|
||||
b[i] = fround((n + i) * (1.0f/n));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -442,7 +454,7 @@ public:
|
|||
auto a = (float*)src1;
|
||||
auto b = (float*)src2;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
int q = 1.0f + (i % 64);
|
||||
float q = 1.0f + (i % 64);
|
||||
a[i] = q;
|
||||
b[i] = q;
|
||||
}
|
||||
|
@ -471,8 +483,8 @@ public:
|
|||
auto a = (float*)src1;
|
||||
auto b = (float*)src2;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
a[i] = (n/2 - i) * (1.0f/n);
|
||||
b[i] = (n/2 - i) * (1.0f/n);
|
||||
a[i] = fround((n/2 - i) * (1.0f/n));
|
||||
b[i] = fround((n/2 - i) * (1.0f/n));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -500,8 +512,8 @@ public:
|
|||
auto a = (float*)src1;
|
||||
auto b = (float*)src2;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
a[i] = i * (1.0f/n);
|
||||
b[i] = i * (1.0f/n);
|
||||
a[i] = fround(i * (1.0f/n));
|
||||
b[i] = fround(i * (1.0f/n));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
128
hw/opae/VX_avs_wrapper.v
Normal file
128
hw/opae/VX_avs_wrapper.v
Normal file
|
@ -0,0 +1,128 @@
|
|||
`include "VX_platform.vh"
|
||||
|
||||
module VX_avs_wrapper #(
|
||||
parameter AVS_DATAW = 1,
|
||||
parameter AVS_ADDRW = 1,
|
||||
parameter AVS_BURSTW = 1,
|
||||
parameter AVS_BANKS = 1,
|
||||
parameter REQ_TAGW = 1,
|
||||
parameter RD_QUEUE_SIZE = 1,
|
||||
|
||||
parameter AVS_BYTEENW = (AVS_DATAW / 8),
|
||||
parameter RD_QUEUE_ADDRW= $clog2(RD_QUEUE_SIZE+1),
|
||||
parameter AVS_BANKS_BITS= $clog2(AVS_BANKS)
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// AVS bus
|
||||
output wire [AVS_DATAW-1:0] avs_writedata,
|
||||
input wire [AVS_DATAW-1:0] avs_readdata,
|
||||
output wire [AVS_ADDRW-1:0] avs_address,
|
||||
input wire avs_waitrequest,
|
||||
output wire avs_write,
|
||||
output wire avs_read,
|
||||
output wire [AVS_BYTEENW-1:0] avs_byteenable,
|
||||
output wire [AVS_BURSTW-1:0] avs_burstcount,
|
||||
input avs_readdatavalid,
|
||||
output wire [AVS_BANKS_BITS-1:0] avs_bankselect,
|
||||
|
||||
// DRAM request
|
||||
input wire dram_req_valid,
|
||||
input wire dram_req_rw,
|
||||
input wire [AVS_BYTEENW-1:0] dram_req_byteen,
|
||||
input wire [AVS_ADDRW-1:0] dram_req_addr,
|
||||
input wire [AVS_DATAW-1:0] dram_req_data,
|
||||
input wire [REQ_TAGW-1:0] dram_req_tag,
|
||||
output wire dram_req_ready,
|
||||
|
||||
// DRAM response
|
||||
output wire dram_rsp_valid,
|
||||
output wire [AVS_DATAW-1:0] dram_rsp_data,
|
||||
output wire [REQ_TAGW-1:0] dram_rsp_tag,
|
||||
input wire dram_rsp_ready
|
||||
);
|
||||
reg [AVS_BANKS_BITS-1:0] avs_bankselect_r;
|
||||
reg [AVS_BURSTW-1:0] avs_burstcount_r;
|
||||
|
||||
wire avs_rtq_push = !dram_req_rw && dram_req_valid && dram_req_ready;
|
||||
wire avs_rtq_pop = dram_rsp_valid && dram_rsp_ready;
|
||||
|
||||
wire avs_rdq_push = avs_readdatavalid;
|
||||
wire avs_rdq_pop = avs_rtq_pop;
|
||||
wire avs_rdq_empty;
|
||||
|
||||
reg [RD_QUEUE_ADDRW-1:0] avs_pending_reads;
|
||||
wire [RD_QUEUE_ADDRW-1:0] avs_pending_reads_n;
|
||||
|
||||
assign avs_pending_reads_n = avs_pending_reads
|
||||
+ RD_QUEUE_ADDRW'((avs_rtq_push && !avs_rdq_pop) ? 1 :
|
||||
(avs_rdq_pop && !avs_rtq_push) ? -1 : 0);
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
avs_burstcount_r <= 1;
|
||||
avs_bankselect_r <= 0;
|
||||
avs_pending_reads <= 0;
|
||||
end else begin
|
||||
avs_pending_reads <= avs_pending_reads_n;
|
||||
end
|
||||
end
|
||||
|
||||
VX_generic_queue #(
|
||||
.DATAW (REQ_TAGW),
|
||||
.SIZE (RD_QUEUE_SIZE)
|
||||
) rd_req_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.push (avs_rtq_push),
|
||||
.data_in (dram_req_tag),
|
||||
.pop (avs_rtq_pop),
|
||||
.data_out (dram_rsp_tag),
|
||||
`UNUSED_PIN (empty),
|
||||
`UNUSED_PIN (full),
|
||||
`UNUSED_PIN (size)
|
||||
);
|
||||
|
||||
VX_generic_queue #(
|
||||
.DATAW (AVS_DATAW),
|
||||
.SIZE (RD_QUEUE_SIZE)
|
||||
) rd_rsp_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.push (avs_rdq_push),
|
||||
.data_in (avs_readdata),
|
||||
.pop (avs_rdq_pop),
|
||||
.data_out (dram_rsp_data),
|
||||
.empty (avs_rdq_empty),
|
||||
`UNUSED_PIN (full),
|
||||
`UNUSED_PIN (size)
|
||||
);
|
||||
|
||||
assign avs_read = dram_req_valid && !dram_req_rw;
|
||||
assign avs_write = dram_req_valid && dram_req_rw;
|
||||
assign avs_address = dram_req_addr;
|
||||
assign avs_byteenable = dram_req_byteen;
|
||||
assign avs_writedata = dram_req_data;
|
||||
assign dram_req_ready = !avs_waitrequest
|
||||
&& (avs_pending_reads < RD_QUEUE_SIZE);
|
||||
assign avs_burstcount = avs_burstcount_r;
|
||||
assign avs_bankselect = avs_bankselect_r;
|
||||
|
||||
assign dram_rsp_valid = !avs_rdq_empty;
|
||||
|
||||
`ifdef DBG_PRINT_AVS
|
||||
always @(posedge clk) begin
|
||||
if (dram_req_valid && dram_req_ready) begin
|
||||
if (dram_req_rw)
|
||||
$display("%t: AVS Wr Req: addr=%0h, byteen=%0h, tag=%0h, data=%0h", $time, `DRAM_TO_BYTE_ADDR(avs_address), avs_byteenable, dram_req_tag, avs_writedata);
|
||||
else
|
||||
$display("%t: AVS Rd Req: addr=%0h, byteen=%0h, tag=%0h, pending=%0d", $time, `DRAM_TO_BYTE_ADDR(avs_address), avs_byteenable, dram_req_tag, avs_pending_reads_n);
|
||||
end
|
||||
if (dram_rsp_valid && dram_rsp_ready) begin
|
||||
$display("%t: AVS Rd Rsp: data=%0h, pending=%0d", $time, avs_readdata, avs_pending_reads_n);
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
|
@ -10,12 +10,14 @@
|
|||
#+define+DBG_PRINT_CACHE_BANK
|
||||
#+define+DBG_PRINT_CACHE_SNP
|
||||
#+define+DBG_PRINT_CACHE_MSRQ
|
||||
#+define+DBG_PRINT_CACHE_TAG
|
||||
#+define+DBG_PRINT_CACHE_DATA
|
||||
#+define+DBG_PRINT_DRAM
|
||||
#+define+DBG_PRINT_PIPELINE
|
||||
#+define+DBG_PRINT_OPAE
|
||||
#+define+DBG_CORE_REQ_INFO
|
||||
#+define+DBG_PRINT_AVS
|
||||
#+define+DBG_PRINT_SCOPE
|
||||
#+define+DBG_CACHE_REQ_INFO
|
||||
|
||||
vortex_afu.json
|
||||
QI:vortex_afu.qsf
|
||||
|
|
|
@ -39,11 +39,13 @@ module vortex_afu #(
|
|||
|
||||
localparam DRAM_ADDR_WIDTH = $bits(t_local_mem_addr);
|
||||
localparam DRAM_LINE_WIDTH = $bits(t_local_mem_data);
|
||||
|
||||
localparam DRAM_LINE_LW = $clog2(DRAM_LINE_WIDTH);
|
||||
|
||||
localparam VX_DRAM_LINE_LW = $clog2(`VX_DRAM_LINE_WIDTH);
|
||||
localparam VX_DRAM_LINE_IDX = (DRAM_LINE_LW - VX_DRAM_LINE_LW);
|
||||
|
||||
localparam AVS_RD_QUEUE_SIZE = 16;
|
||||
localparam AVS_REQ_TAGW = `VX_DRAM_TAG_WIDTH + VX_DRAM_LINE_IDX;
|
||||
|
||||
localparam CCI_RD_WINDOW_SIZE = 8;
|
||||
localparam CCI_RD_QUEUE_SIZE = 2 * CCI_RD_WINDOW_SIZE;
|
||||
|
@ -134,28 +136,12 @@ wire [31:0] vx_csr_io_rsp_data;
|
|||
wire vx_csr_io_rsp_ready;
|
||||
|
||||
reg vx_reset;
|
||||
reg vx_enabled;
|
||||
wire vx_busy;
|
||||
|
||||
// AVS Queues /////////////////////////////////////////////////////////////////
|
||||
|
||||
wire avs_rtq_push;
|
||||
wire avs_rtq_pop;
|
||||
`DEBUG_BEGIN
|
||||
wire avs_rtq_empty;
|
||||
wire avs_rtq_full;
|
||||
`DEBUG_BEGIN
|
||||
|
||||
wire avs_rdq_push;
|
||||
wire avs_rdq_pop;
|
||||
t_local_mem_data avs_rdq_dout;
|
||||
wire avs_rdq_empty;
|
||||
`DEBUG_BEGIN
|
||||
wire avs_rdq_full;
|
||||
`DEBUG_END
|
||||
|
||||
// CMD variables //////////////////////////////////////////////////////////////
|
||||
|
||||
t_ccip_clAddr cmd_io_addr;
|
||||
t_ccip_clAddr cmd_io_addr;
|
||||
reg [DRAM_ADDR_WIDTH-1:0] cmd_mem_addr;
|
||||
reg [DRAM_ADDR_WIDTH-1:0] cmd_data_size;
|
||||
|
||||
|
@ -167,9 +153,9 @@ wire cmd_scope_write;
|
|||
`endif
|
||||
|
||||
reg [`VX_CSR_ID_WIDTH-1:0] cmd_csr_core;
|
||||
reg [11:0] cmd_csr_addr;
|
||||
reg [31:0] cmd_csr_rdata;
|
||||
reg [31:0] cmd_csr_wdata;
|
||||
reg [11:0] cmd_csr_addr;
|
||||
reg [31:0] cmd_csr_rdata;
|
||||
reg [31:0] cmd_csr_wdata;
|
||||
|
||||
// MMIO controller ////////////////////////////////////////////////////////////
|
||||
|
||||
|
@ -189,6 +175,7 @@ assign cmd_scope_read = cp2af_sRxPort.c0.mmioRdValid && (MMIO_SCOPE_READ == mmi
|
|||
assign cmd_scope_write = cp2af_sRxPort.c0.mmioWrValid && (MMIO_SCOPE_WRITE == mmio_hdr.address);
|
||||
`endif
|
||||
|
||||
/*
|
||||
`DEBUG_BEGIN
|
||||
wire cp2af_sRxPort_c0_mmioWrValid = cp2af_sRxPort.c0.mmioWrValid;
|
||||
wire cp2af_sRxPort_c0_mmioRdValid = cp2af_sRxPort.c0.mmioRdValid;
|
||||
|
@ -201,6 +188,7 @@ wire[$bits(mmio_hdr.length)-1:0] mmio_hdr_length = mmio_hdr.length;
|
|||
wire[$bits(mmio_hdr.tid)-1:0] mmio_hdr_tid = mmio_hdr.tid;
|
||||
wire[$bits(cp2af_sRxPort.c0.hdr.mdata)-1:0] cp2af_sRxPort_c0_hdr_mdata = cp2af_sRxPort.c0.hdr.mdata;
|
||||
`DEBUG_END
|
||||
*/
|
||||
|
||||
wire [2:0] cmd_type = (cp2af_sRxPort.c0.mmioWrValid && (MMIO_CMD_TYPE == mmio_hdr.address)) ? 3'(cp2af_sRxPort.c0.data) : 3'h0;
|
||||
|
||||
|
@ -220,13 +208,8 @@ always @(posedge clk) begin
|
|||
`ifndef VERILATOR
|
||||
$asserton; // enable assertions
|
||||
`endif
|
||||
|
||||
mmio_tx.hdr <= 0;
|
||||
mmio_tx.data <= 0;
|
||||
mmio_tx.mmioRdValid <= 0;
|
||||
cmd_io_addr <= 0;
|
||||
cmd_mem_addr <= 0;
|
||||
cmd_data_size <= 0;
|
||||
`ifdef SCOPE
|
||||
scope_start <= 0;
|
||||
`endif
|
||||
|
@ -359,10 +342,10 @@ wire cmd_run_done;
|
|||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
state <= STATE_IDLE;
|
||||
vx_reset <= 0;
|
||||
end
|
||||
else begin
|
||||
state <= STATE_IDLE;
|
||||
vx_reset <= 0;
|
||||
vx_enabled <= 0;
|
||||
end else begin
|
||||
|
||||
vx_reset <= 0;
|
||||
|
||||
|
@ -385,7 +368,8 @@ always @(posedge clk) begin
|
|||
`ifdef DBG_PRINT_OPAE
|
||||
$display("%t: STATE START", $time);
|
||||
`endif
|
||||
vx_reset <= 1;
|
||||
vx_reset <= 1;
|
||||
vx_enabled <= 1;
|
||||
state <= STATE_START;
|
||||
end
|
||||
CMD_CLFLUSH: begin
|
||||
|
@ -480,215 +464,186 @@ end
|
|||
|
||||
// AVS Controller /////////////////////////////////////////////////////////////
|
||||
|
||||
wire vortex_enabled;
|
||||
wire cci_rdq_empty;
|
||||
wire dram_req_valid;
|
||||
wire dram_req_rw;
|
||||
t_local_mem_byte_mask dram_req_byteen;
|
||||
t_local_mem_addr dram_req_addr;
|
||||
t_local_mem_data dram_req_data;
|
||||
wire [AVS_REQ_TAGW:0] dram_req_tag;
|
||||
wire dram_req_ready;
|
||||
|
||||
wire dram_rsp_valid;
|
||||
t_local_mem_data dram_rsp_data;
|
||||
wire [AVS_REQ_TAGW:0] dram_rsp_tag;
|
||||
wire dram_rsp_ready;
|
||||
|
||||
wire cci_dram_req_valid;
|
||||
wire cci_dram_req_rw;
|
||||
t_local_mem_byte_mask cci_dram_req_byteen;
|
||||
t_local_mem_addr cci_dram_req_addr;
|
||||
t_local_mem_data cci_dram_req_data;
|
||||
wire [AVS_REQ_TAGW-1:0] cci_dram_req_tag;
|
||||
wire cci_dram_req_ready;
|
||||
|
||||
wire cci_dram_rsp_valid;
|
||||
t_local_mem_data cci_dram_rsp_data;
|
||||
wire [AVS_REQ_TAGW-1:0] cci_dram_rsp_tag;
|
||||
wire cci_dram_rsp_ready;
|
||||
|
||||
wire vx_dram_req_valid_qual;
|
||||
t_local_mem_addr vx_dram_req_addr_qual;
|
||||
t_local_mem_byte_mask vx_dram_req_byteen_qual;
|
||||
t_local_mem_data vx_dram_req_data_qual;
|
||||
wire [AVS_REQ_TAGW-1:0] vx_dram_req_tag_qual;
|
||||
|
||||
wire [(1 << VX_DRAM_LINE_IDX)-1:0][`VX_DRAM_LINE_WIDTH-1:0] vx_dram_rsp_data_unqual;
|
||||
wire [AVS_REQ_TAGW-1:0] vx_dram_rsp_tag_unqual;
|
||||
|
||||
wire cci_dram_rd_req_valid, cci_dram_wr_req_valid;
|
||||
wire [DRAM_ADDR_WIDTH-1:0] cci_dram_rd_req_addr, cci_dram_wr_req_addr;
|
||||
wire [CCI_RD_RQ_DATAW-1:0] cci_rdq_dout;
|
||||
wire [VX_DRAM_LINE_IDX-1:0] vx_dram_req_idx, vx_dram_rsp_idx;
|
||||
|
||||
wire cci_dram_rd_req_fire;
|
||||
wire cci_dram_wr_req_fire;
|
||||
wire vx_dram_rd_req_fire;
|
||||
`DEBUG_BEGIN
|
||||
wire vx_dram_wr_req_fire;
|
||||
`DEBUG_END
|
||||
wire vx_dram_rd_rsp_fire;
|
||||
//--
|
||||
|
||||
t_local_mem_byte_mask vx_dram_req_byteen_;
|
||||
reg [$clog2(AVS_RD_QUEUE_SIZE+1)-1:0] avs_pending_reads;
|
||||
wire [$clog2(AVS_RD_QUEUE_SIZE+1)-1:0] avs_pending_reads_next;
|
||||
wire [DRAM_LINE_LW-1:0] vx_dram_req_offset, vx_dram_rsp_offset;
|
||||
reg [DRAM_ADDR_WIDTH-1:0] cci_dram_rd_req_addr, cci_dram_wr_req_addr;
|
||||
assign cci_dram_req_valid = (CMD_MEM_WRITE == state) ? cci_dram_wr_req_valid : cci_dram_rd_req_valid;
|
||||
|
||||
wire cci_dram_rd_req_enable, cci_dram_wr_req_enable;
|
||||
wire vx_dram_req_enable, vx_dram_rd_req_enable, vx_dram_wr_req_enable;
|
||||
assign cci_dram_req_addr = (CMD_MEM_WRITE == state) ? cci_dram_wr_req_addr : cci_dram_rd_req_addr;
|
||||
|
||||
reg [DRAM_ADDR_WIDTH-1:0] cci_dram_rd_req_ctr, cci_dram_wr_req_ctr;
|
||||
assign cci_dram_req_rw = (CMD_MEM_WRITE == state);
|
||||
|
||||
assign vortex_enabled = (STATE_RUN == state) || (STATE_CLFLUSH == state);
|
||||
assign cci_dram_req_byteen = {64{1'b1}};
|
||||
|
||||
assign cci_dram_rd_req_enable = (state == STATE_READ)
|
||||
&& (avs_pending_reads < AVS_RD_QUEUE_SIZE)
|
||||
&& (cci_dram_rd_req_ctr != 0);
|
||||
assign cci_dram_req_data = cci_rdq_dout[CCI_RD_RQ_DATAW-1:CCI_RD_RQ_TAGW];
|
||||
|
||||
assign cci_dram_wr_req_enable = (state == STATE_WRITE)
|
||||
&& !cci_rdq_empty
|
||||
&& (cci_dram_wr_req_ctr < cmd_data_size);
|
||||
assign cci_dram_req_tag = AVS_REQ_TAGW'(0);
|
||||
|
||||
assign vx_dram_req_enable = vortex_enabled && (avs_pending_reads < AVS_RD_QUEUE_SIZE);
|
||||
assign vx_dram_rd_req_enable = vx_dram_req_enable && vx_dram_req_valid && !vx_dram_req_rw;
|
||||
assign vx_dram_wr_req_enable = vx_dram_req_enable && vx_dram_req_valid && vx_dram_req_rw;
|
||||
`UNUSED_VAR (cci_dram_rsp_tag)
|
||||
|
||||
assign cci_dram_rd_req_fire = cci_dram_rd_req_enable && !avs_waitrequest;
|
||||
assign cci_dram_wr_req_fire = cci_dram_wr_req_enable && !avs_waitrequest;
|
||||
//--
|
||||
|
||||
assign vx_dram_rd_req_fire = vx_dram_rd_req_enable && !avs_waitrequest;
|
||||
assign vx_dram_wr_req_fire = vx_dram_wr_req_enable && !avs_waitrequest;
|
||||
assign vx_dram_req_valid_qual = vx_dram_req_valid && vx_enabled;
|
||||
|
||||
assign vx_dram_rd_rsp_fire = vx_dram_rsp_valid && vx_dram_rsp_ready;
|
||||
|
||||
assign avs_pending_reads_next = avs_pending_reads
|
||||
+ $bits(avs_pending_reads)'(((cci_dram_rd_req_fire || vx_dram_rd_req_fire) && !avs_rdq_pop) ? 1 :
|
||||
(~(cci_dram_rd_req_fire || vx_dram_rd_req_fire) && avs_rdq_pop) ? -1 : 0);
|
||||
assign vx_dram_req_addr_qual = vx_dram_req_addr[`VX_DRAM_ADDR_WIDTH-1:`VX_DRAM_ADDR_WIDTH-DRAM_ADDR_WIDTH];
|
||||
|
||||
if (`VX_DRAM_LINE_WIDTH != DRAM_LINE_WIDTH) begin
|
||||
assign vx_dram_req_offset = ((DRAM_LINE_LW)'(vx_dram_req_addr[(DRAM_LINE_LW-VX_DRAM_LINE_LW)-1:0])) << VX_DRAM_LINE_LW;
|
||||
assign vx_dram_req_byteen_ = 64'(vx_dram_req_byteen) << (6'(vx_dram_req_addr[(DRAM_LINE_LW-VX_DRAM_LINE_LW)-1:0]) << (VX_DRAM_LINE_LW - 3));
|
||||
assign vx_dram_req_idx = vx_dram_req_addr[VX_DRAM_LINE_IDX-1:0];
|
||||
assign vx_dram_req_byteen_qual = 64'(vx_dram_req_byteen) << (6'(vx_dram_req_addr[VX_DRAM_LINE_IDX-1:0]) << (VX_DRAM_LINE_LW-3));
|
||||
assign vx_dram_req_data_qual = DRAM_LINE_WIDTH'(vx_dram_req_data) << ((DRAM_LINE_LW'(vx_dram_req_idx)) << VX_DRAM_LINE_LW);
|
||||
assign vx_dram_req_tag_qual = {vx_dram_req_tag, vx_dram_req_idx};
|
||||
assign vx_dram_rsp_data = vx_dram_rsp_data_unqual[vx_dram_rsp_idx];
|
||||
end else begin
|
||||
assign vx_dram_req_offset = 0;
|
||||
assign vx_dram_req_byteen_ = vx_dram_req_byteen;
|
||||
assign vx_dram_req_idx = VX_DRAM_LINE_IDX'(0);
|
||||
assign vx_dram_req_byteen_qual = vx_dram_req_byteen;
|
||||
assign vx_dram_req_tag_qual = vx_dram_req_tag;
|
||||
assign vx_dram_req_data_qual = vx_dram_req_data;
|
||||
assign vx_dram_rsp_data = vx_dram_rsp_data_unqual;
|
||||
end
|
||||
|
||||
always @(*) begin
|
||||
case (state)
|
||||
CMD_MEM_READ: avs_address = cci_dram_rd_req_addr;
|
||||
CMD_MEM_WRITE: avs_address = cci_dram_wr_req_addr + (DRAM_ADDR_WIDTH'(CCI_RD_RQ_TAGW'(cci_rdq_dout)));
|
||||
default: avs_address = vx_dram_req_addr[`VX_DRAM_ADDR_WIDTH-1:`VX_DRAM_ADDR_WIDTH-DRAM_ADDR_WIDTH];
|
||||
endcase
|
||||
assign vx_dram_rsp_idx = vx_dram_rsp_tag_unqual[VX_DRAM_LINE_IDX-1:0];
|
||||
assign vx_dram_rsp_tag = vx_dram_rsp_tag_unqual[`VX_DRAM_TAG_WIDTH+VX_DRAM_LINE_IDX-1:VX_DRAM_LINE_IDX];
|
||||
|
||||
case (state)
|
||||
CMD_MEM_READ: avs_byteenable = 64'hffffffffffffffff;
|
||||
CMD_MEM_WRITE: avs_byteenable = 64'hffffffffffffffff;
|
||||
default: avs_byteenable = vx_dram_req_byteen_;
|
||||
endcase
|
||||
//--
|
||||
|
||||
case (state)
|
||||
CMD_MEM_WRITE: avs_writedata = cci_rdq_dout[CCI_RD_RQ_DATAW-1:CCI_RD_RQ_TAGW];
|
||||
default: avs_writedata = DRAM_LINE_WIDTH'(vx_dram_req_data) << vx_dram_req_offset;
|
||||
endcase
|
||||
end
|
||||
VX_mem_arb #(
|
||||
.NUM_REQUESTS (2),
|
||||
.DATA_WIDTH ($bits(t_local_mem_data)),
|
||||
.ADDR_WIDTH ($bits(t_local_mem_addr)),
|
||||
.TAG_IN_WIDTH (AVS_REQ_TAGW),
|
||||
.TAG_OUT_WIDTH (AVS_REQ_TAGW+1)
|
||||
) vx_cci_avs_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
assign avs_read = cci_dram_rd_req_enable || vx_dram_rd_req_enable;
|
||||
assign avs_write = cci_dram_wr_req_enable || vx_dram_wr_req_enable;
|
||||
// Source request
|
||||
.req_valid_in ({cci_dram_req_valid, vx_dram_req_valid_qual}),
|
||||
.req_rw_in ({cci_dram_req_rw, vx_dram_req_rw}),
|
||||
.req_byteen_in ({cci_dram_req_byteen, vx_dram_req_byteen_qual}),
|
||||
.req_addr_in ({cci_dram_req_addr, vx_dram_req_addr_qual}),
|
||||
.req_data_in ({cci_dram_req_data, vx_dram_req_data_qual}),
|
||||
.req_tag_in ({cci_dram_req_tag, vx_dram_req_tag_qual}),
|
||||
.req_ready_in ({cci_dram_req_ready, vx_dram_req_ready}),
|
||||
|
||||
assign cmd_write_done = (cci_dram_wr_req_ctr >= cmd_data_size);
|
||||
// Source response
|
||||
.rsp_valid_out ({cci_dram_rsp_valid, vx_dram_rsp_valid}),
|
||||
.rsp_data_out ({cci_dram_rsp_data, vx_dram_rsp_data_unqual}),
|
||||
.rsp_tag_out ({cci_dram_rsp_tag, vx_dram_rsp_tag_unqual}),
|
||||
.rsp_ready_out ({cci_dram_rsp_ready, vx_dram_rsp_ready}),
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset)
|
||||
begin
|
||||
mem_bank_select <= 0;
|
||||
avs_burstcount <= 1;
|
||||
cci_dram_rd_req_addr <= 0;
|
||||
cci_dram_wr_req_addr <= 0;
|
||||
cci_dram_rd_req_ctr <= 0;
|
||||
cci_dram_wr_req_ctr <= 0;
|
||||
avs_pending_reads <= 0;
|
||||
end
|
||||
else begin
|
||||
|
||||
if (state == STATE_IDLE) begin
|
||||
if (CMD_MEM_READ == cmd_type) begin
|
||||
cci_dram_rd_req_addr <= cmd_mem_addr;
|
||||
cci_dram_rd_req_ctr <= cmd_data_size;
|
||||
end
|
||||
else if (CMD_MEM_WRITE == cmd_type) begin
|
||||
cci_dram_wr_req_addr <= cmd_mem_addr;
|
||||
cci_dram_wr_req_ctr <= 0;
|
||||
end
|
||||
end
|
||||
|
||||
if (cci_dram_rd_req_fire) begin
|
||||
cci_dram_rd_req_addr <= cci_dram_rd_req_addr + DRAM_ADDR_WIDTH'(1);
|
||||
cci_dram_rd_req_ctr <= cci_dram_rd_req_ctr - DRAM_ADDR_WIDTH'(1);
|
||||
`ifdef DBG_PRINT_OPAE
|
||||
$display("%t: AVS Rd Req: addr=%0h, rem=%0d, pending=%0d", $time, `DRAM_TO_BYTE_ADDR(avs_address), (cci_dram_rd_req_ctr - 1), avs_pending_reads_next);
|
||||
`endif
|
||||
end
|
||||
|
||||
if (cci_dram_wr_req_fire) begin
|
||||
cci_dram_wr_req_addr <= cci_dram_wr_req_addr + ((CCI_RD_RQ_TAGW'(cci_dram_wr_req_ctr) == CCI_RD_RQ_TAGW'(CCI_RD_WINDOW_SIZE-1)) ? DRAM_ADDR_WIDTH'(CCI_RD_WINDOW_SIZE) : DRAM_ADDR_WIDTH'(0));
|
||||
cci_dram_wr_req_ctr <= cci_dram_wr_req_ctr + DRAM_ADDR_WIDTH'(1);
|
||||
`ifdef DBG_PRINT_OPAE
|
||||
$display("%t: AVS Wr Req: addr=%0h, data=%0h, rem=%0d", $time, `DRAM_TO_BYTE_ADDR(avs_address), avs_writedata, (cci_dram_wr_req_ctr + 1));
|
||||
`endif
|
||||
end
|
||||
|
||||
`ifdef DBG_PRINT_OPAE
|
||||
if (vx_dram_rd_req_fire) begin
|
||||
$display("%t: AVS Rd Req: addr=%0h, byteen=%0h, tag=%0h, pending=%0d", $time, `DRAM_TO_BYTE_ADDR(avs_address), avs_byteenable, vx_dram_req_tag, avs_pending_reads_next);
|
||||
end
|
||||
|
||||
if (vx_dram_wr_req_fire) begin
|
||||
$display("%t: AVS Wr Req: addr=%0h, byteen=%0h, tag=%0h, data=%0h", $time, `DRAM_TO_BYTE_ADDR(avs_address), avs_byteenable, vx_dram_req_tag, avs_writedata);
|
||||
end
|
||||
|
||||
if (avs_readdatavalid) begin
|
||||
$display("%t: AVS Rd Rsp: data=%0h, pending=%0d", $time, avs_readdata, avs_pending_reads_next);
|
||||
end
|
||||
`endif
|
||||
|
||||
avs_pending_reads <= avs_pending_reads_next;
|
||||
end
|
||||
end
|
||||
|
||||
// Vortex DRAM requests
|
||||
|
||||
assign vx_dram_req_ready = vx_dram_req_enable && !avs_waitrequest;
|
||||
|
||||
// Vortex DRAM fill response
|
||||
|
||||
assign vx_dram_rsp_valid = vortex_enabled && !avs_rdq_empty;
|
||||
if (`VX_DRAM_LINE_WIDTH != DRAM_LINE_WIDTH) begin
|
||||
assign vx_dram_rsp_data = (`VX_DRAM_LINE_WIDTH)'(avs_rdq_dout >> vx_dram_rsp_offset);
|
||||
end else begin
|
||||
assign vx_dram_rsp_data = avs_rdq_dout;
|
||||
end
|
||||
|
||||
// AVS address read request queue /////////////////////////////////////////////
|
||||
|
||||
assign avs_rtq_push = vx_dram_rd_req_fire;
|
||||
assign avs_rtq_pop = vx_dram_rd_rsp_fire;
|
||||
|
||||
VX_generic_queue #(
|
||||
.DATAW (`VX_DRAM_TAG_WIDTH + DRAM_LINE_LW),
|
||||
.SIZE (AVS_RD_QUEUE_SIZE)
|
||||
) avs_rd_req_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.push (avs_rtq_push),
|
||||
.data_in ({vx_dram_req_tag, vx_dram_req_offset}),
|
||||
.pop (avs_rtq_pop),
|
||||
.data_out ({vx_dram_rsp_tag, vx_dram_rsp_offset}),
|
||||
.empty (avs_rtq_empty),
|
||||
.full (avs_rtq_full),
|
||||
`UNUSED_PIN (size)
|
||||
// DRAM request
|
||||
.req_valid_out (dram_req_valid),
|
||||
.req_rw_out (dram_req_rw),
|
||||
.req_byteen_out (dram_req_byteen),
|
||||
.req_addr_out (dram_req_addr),
|
||||
.req_data_out (dram_req_data),
|
||||
.req_tag_out (dram_req_tag),
|
||||
.req_ready_out (dram_req_ready),
|
||||
|
||||
// DRAM response
|
||||
.rsp_valid_in (dram_rsp_valid),
|
||||
.rsp_tag_in (dram_rsp_tag),
|
||||
.rsp_data_in (dram_rsp_data),
|
||||
.rsp_ready_in (dram_rsp_ready)
|
||||
);
|
||||
|
||||
// AVS data read response queue ///////////////////////////////////////////////
|
||||
//--
|
||||
|
||||
wire cci_wr_req_fire;
|
||||
VX_avs_wrapper #(
|
||||
.AVS_DATAW ($bits(t_local_mem_data)),
|
||||
.AVS_ADDRW ($bits(t_local_mem_addr)),
|
||||
.AVS_BURSTW ($bits(t_local_mem_burst_cnt)),
|
||||
.AVS_BANKS (NUM_LOCAL_MEM_BANKS),
|
||||
.REQ_TAGW (AVS_REQ_TAGW+1),
|
||||
.RD_QUEUE_SIZE (AVS_RD_QUEUE_SIZE)
|
||||
) avs_wrapper (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
assign avs_rdq_push = avs_readdatavalid;
|
||||
assign avs_rdq_pop = vx_dram_rd_rsp_fire || cci_wr_req_fire;
|
||||
// AVS bus
|
||||
.avs_writedata (avs_writedata),
|
||||
.avs_readdata (avs_readdata),
|
||||
.avs_address (avs_address),
|
||||
.avs_waitrequest (avs_waitrequest),
|
||||
.avs_write (avs_write),
|
||||
.avs_read (avs_read),
|
||||
.avs_byteenable (avs_byteenable),
|
||||
.avs_burstcount (avs_burstcount),
|
||||
.avs_readdatavalid (avs_readdatavalid),
|
||||
.avs_bankselect (mem_bank_select),
|
||||
|
||||
VX_generic_queue #(
|
||||
.DATAW (DRAM_LINE_WIDTH),
|
||||
.SIZE (AVS_RD_QUEUE_SIZE)
|
||||
) avs_rd_rsp_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.push (avs_rdq_push),
|
||||
.data_in (avs_readdata),
|
||||
.pop (avs_rdq_pop),
|
||||
.data_out (avs_rdq_dout),
|
||||
.empty (avs_rdq_empty),
|
||||
.full (avs_rdq_full),
|
||||
`UNUSED_PIN (size)
|
||||
// DRAM request
|
||||
.dram_req_valid (dram_req_valid),
|
||||
.dram_req_rw (dram_req_rw),
|
||||
.dram_req_byteen (dram_req_byteen),
|
||||
.dram_req_addr (dram_req_addr),
|
||||
.dram_req_data (dram_req_data),
|
||||
.dram_req_tag (dram_req_tag),
|
||||
.dram_req_ready (dram_req_ready),
|
||||
|
||||
// DRAM response
|
||||
.dram_rsp_valid (dram_rsp_valid),
|
||||
.dram_rsp_data (dram_rsp_data),
|
||||
.dram_rsp_tag (dram_rsp_tag),
|
||||
.dram_rsp_ready (dram_rsp_ready)
|
||||
);
|
||||
|
||||
// CCI-P Read Request ///////////////////////////////////////////////////////////
|
||||
|
||||
reg [$clog2(CCI_RD_QUEUE_SIZE+1)-1:0] cci_pending_reads;
|
||||
wire [$clog2(CCI_RD_QUEUE_SIZE+1)-1:0] cci_pending_reads_next;
|
||||
reg [DRAM_ADDR_WIDTH-1:0] cci_dram_wr_req_ctr;
|
||||
reg [DRAM_ADDR_WIDTH-1:0] cci_rd_req_ctr;
|
||||
wire [DRAM_ADDR_WIDTH-1:0] cci_rd_req_ctr_next;
|
||||
reg [DRAM_ADDR_WIDTH-1:0] cci_dram_wr_req_addr_unqual;
|
||||
wire [CCI_RD_RQ_TAGW-1:0] cci_rd_req_tag, cci_rd_rsp_tag;
|
||||
reg [CCI_RD_RQ_TAGW-1:0] cci_rd_rsp_ctr;
|
||||
t_ccip_clAddr cci_rd_req_addr;
|
||||
|
||||
wire cci_rd_req_fire, cci_rd_rsp_fire;
|
||||
reg cci_rd_req_enable, cci_rd_req_wait;
|
||||
|
||||
wire cci_rdq_push, cci_rdq_pop;
|
||||
wire [CCI_RD_RQ_DATAW-1:0] cci_rdq_din;
|
||||
wire cci_rdq_empty;
|
||||
|
||||
always @(*) begin
|
||||
af2cp_sTxPort.c0.hdr = t_ccip_c0_ReqMemHdr'(0);
|
||||
|
@ -696,8 +651,10 @@ always @(*) begin
|
|||
af2cp_sTxPort.c0.hdr.mdata = t_ccip_mdata'(cci_rd_req_tag);
|
||||
end
|
||||
|
||||
assign cci_rd_req_fire = af2cp_sTxPort.c0.valid;
|
||||
assign cci_rd_rsp_fire = (STATE_WRITE == state) && cp2af_sRxPort.c0.rspValid;
|
||||
wire cci_dram_wr_req_fire = cci_dram_wr_req_valid && cci_dram_req_ready;
|
||||
|
||||
wire cci_rd_req_fire = af2cp_sTxPort.c0.valid;
|
||||
wire cci_rd_rsp_fire = (STATE_WRITE == state) && cp2af_sRxPort.c0.rspValid;
|
||||
|
||||
assign cci_rd_req_tag = CCI_RD_RQ_TAGW'(cci_rd_req_ctr);
|
||||
assign cci_rd_rsp_tag = CCI_RD_RQ_TAGW'(cp2af_sRxPort.c0.hdr.mdata);
|
||||
|
@ -712,28 +669,36 @@ assign cci_pending_reads_next = cci_pending_reads
|
|||
+ $bits(cci_pending_reads)'((cci_rd_req_fire && !cci_rdq_pop) ? 1 :
|
||||
(!cci_rd_req_fire && cci_rdq_pop) ? -1 : 0);
|
||||
|
||||
assign cci_dram_wr_req_valid = !cci_rdq_empty;
|
||||
|
||||
assign cci_dram_wr_req_addr = cci_dram_wr_req_addr_unqual + (DRAM_ADDR_WIDTH'(CCI_RD_RQ_TAGW'(cci_rdq_dout)));
|
||||
|
||||
assign af2cp_sTxPort.c0.valid = cci_rd_req_enable && !cci_rd_req_wait;
|
||||
|
||||
assign cmd_write_done = (cci_dram_wr_req_ctr == cmd_data_size);
|
||||
|
||||
// Send read requests to CCI
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
cci_rd_req_addr <= 0;
|
||||
cci_rd_req_ctr <= 0;
|
||||
cci_rd_rsp_ctr <= 0;
|
||||
cci_pending_reads <= 0;
|
||||
cci_rd_req_enable <= 0;
|
||||
cci_rd_req_wait <= 0;
|
||||
cci_rd_req_addr <= 0;
|
||||
cci_rd_req_ctr <= 0;
|
||||
cci_rd_rsp_ctr <= 0;
|
||||
cci_pending_reads <= 0;
|
||||
cci_rd_req_enable <= 0;
|
||||
cci_rd_req_wait <= 0;
|
||||
cci_dram_wr_req_ctr <= 0;
|
||||
end
|
||||
else begin
|
||||
|
||||
else begin
|
||||
if ((STATE_IDLE == state)
|
||||
&& (CMD_MEM_WRITE == cmd_type)) begin
|
||||
cci_rd_req_addr <= cmd_io_addr;
|
||||
cci_rd_req_ctr <= 0;
|
||||
cci_rd_rsp_ctr <= 0;
|
||||
cci_pending_reads <= 0;
|
||||
cci_rd_req_enable <= (cmd_data_size != 0);
|
||||
cci_rd_req_wait <= 0;
|
||||
cci_rd_req_addr <= cmd_io_addr;
|
||||
cci_rd_req_ctr <= 0;
|
||||
cci_rd_rsp_ctr <= 0;
|
||||
cci_pending_reads <= 0;
|
||||
cci_rd_req_enable <= (cmd_data_size != 0);
|
||||
cci_rd_req_wait <= 0;
|
||||
cci_dram_wr_req_ctr <= 0;
|
||||
cci_dram_wr_req_addr_unqual <= cmd_mem_addr;
|
||||
end
|
||||
|
||||
cci_rd_req_enable <= (STATE_WRITE == state)
|
||||
|
@ -768,6 +733,11 @@ always @(posedge clk) begin
|
|||
`endif
|
||||
end
|
||||
|
||||
if (cci_dram_wr_req_fire) begin
|
||||
cci_dram_wr_req_addr_unqual <= cci_dram_wr_req_addr_unqual + ((CCI_RD_RQ_TAGW'(cci_dram_wr_req_ctr) == CCI_RD_RQ_TAGW'(CCI_RD_WINDOW_SIZE-1)) ? DRAM_ADDR_WIDTH'(CCI_RD_WINDOW_SIZE) : DRAM_ADDR_WIDTH'(0));
|
||||
cci_dram_wr_req_ctr <= cci_dram_wr_req_ctr + DRAM_ADDR_WIDTH'(1);
|
||||
end
|
||||
|
||||
cci_pending_reads <= cci_pending_reads_next;
|
||||
end
|
||||
end
|
||||
|
@ -811,57 +781,61 @@ VX_generic_queue #(
|
|||
|
||||
reg [$clog2(CCI_RW_QUEUE_SIZE+1)-1:0] cci_pending_writes;
|
||||
wire [$clog2(CCI_RW_QUEUE_SIZE+1)-1:0] cci_pending_writes_next;
|
||||
reg [DRAM_ADDR_WIDTH-1:0] cci_dram_rd_req_ctr;
|
||||
reg [DRAM_ADDR_WIDTH-1:0] cci_wr_req_ctr;
|
||||
reg [DRAM_ADDR_WIDTH-1:0] cci_dram_rd_req_addr_unqual;
|
||||
t_ccip_clAddr cci_wr_req_addr;
|
||||
reg cci_wr_req_enable;
|
||||
wire cci_wr_rsp_fire;
|
||||
|
||||
always @(*) begin
|
||||
af2cp_sTxPort.c1.hdr = t_ccip_c1_ReqMemHdr'(0);
|
||||
af2cp_sTxPort.c1.hdr.address = cci_wr_req_addr;
|
||||
af2cp_sTxPort.c1.hdr.sop = 1; // single line write mode
|
||||
af2cp_sTxPort.c1.data = t_ccip_clData'(avs_rdq_dout);
|
||||
af2cp_sTxPort.c1.data = t_ccip_clData'(cci_dram_rsp_data);
|
||||
end
|
||||
|
||||
assign cci_wr_req_fire = af2cp_sTxPort.c1.valid;
|
||||
assign cci_wr_rsp_fire = (STATE_READ == state) && cp2af_sRxPort.c1.rspValid;
|
||||
wire cci_wr_req_fire = af2cp_sTxPort.c1.valid;
|
||||
wire cci_wr_rsp_fire = (STATE_READ == state) && cp2af_sRxPort.c1.rspValid;
|
||||
|
||||
wire cci_dram_rd_req_fire = cci_dram_rd_req_valid && cci_dram_req_ready;
|
||||
|
||||
assign cci_pending_writes_next = cci_pending_writes
|
||||
+ $bits(cci_pending_writes)'((cci_wr_req_fire && !cci_wr_rsp_fire) ? 1 :
|
||||
(!cci_wr_req_fire && cci_wr_rsp_fire) ? -1 : 0);
|
||||
|
||||
assign cmd_read_done = (0 == cci_wr_req_ctr) && (0 == cci_pending_writes);
|
||||
assign cci_dram_rd_req_valid = (cci_dram_rd_req_ctr != 0);
|
||||
|
||||
assign af2cp_sTxPort.c1.valid = cci_wr_req_enable && !avs_rdq_empty;
|
||||
assign cci_dram_rd_req_addr = cci_dram_rd_req_addr_unqual;
|
||||
|
||||
assign af2cp_sTxPort.c1.valid = cci_dram_rsp_valid;
|
||||
assign cci_dram_rsp_ready = !cp2af_sRxPort.c1TxAlmFull;
|
||||
|
||||
assign cmd_read_done = (0 == cci_wr_req_ctr) && (0 == cci_pending_writes);
|
||||
|
||||
// Send write requests to CCI
|
||||
always @(posedge clk)
|
||||
begin
|
||||
if (reset) begin
|
||||
cci_wr_req_addr <= 0;
|
||||
cci_wr_req_ctr <= 0;
|
||||
cci_wr_req_enable <= 0;
|
||||
cci_pending_writes <= 0;
|
||||
cci_wr_req_addr <= 0;
|
||||
cci_wr_req_ctr <= 0;
|
||||
cci_pending_writes <= 0;
|
||||
cci_dram_rd_req_ctr <= 0;
|
||||
end
|
||||
else begin
|
||||
|
||||
else begin
|
||||
if ((STATE_IDLE == state)
|
||||
&& (CMD_MEM_READ == cmd_type)) begin
|
||||
cci_wr_req_addr <= cmd_io_addr;
|
||||
cci_wr_req_ctr <= cmd_data_size;
|
||||
cci_pending_writes <= 0;
|
||||
end
|
||||
|
||||
cci_wr_req_enable <= (STATE_READ == state)
|
||||
&& (cci_pending_writes_next < CCI_RW_QUEUE_SIZE)
|
||||
&& !cp2af_sRxPort.c1TxAlmFull;
|
||||
cci_wr_req_addr <= cmd_io_addr;
|
||||
cci_wr_req_ctr <= cmd_data_size;
|
||||
cci_pending_writes <= 0;
|
||||
cci_dram_rd_req_ctr <= cmd_data_size;
|
||||
cci_dram_rd_req_addr_unqual <= cmd_mem_addr;
|
||||
end
|
||||
|
||||
if (cci_wr_req_fire) begin
|
||||
assert(cci_wr_req_ctr != 0);
|
||||
cci_wr_req_addr <= cci_wr_req_addr + t_ccip_clAddr'(1);
|
||||
cci_wr_req_ctr <= cci_wr_req_ctr - DRAM_ADDR_WIDTH'(1);
|
||||
`ifdef DBG_PRINT_OPAE
|
||||
$display("%t: CCI Wr Req: addr=%0h, rem=%0d, pending=%0d, data=%0h", $time, cci_wr_req_addr, (cci_wr_req_ctr - 1), cci_pending_writes_next, avs_rdq_dout);
|
||||
$display("%t: CCI Wr Req: addr=%0h, rem=%0d, pending=%0d", $time, cci_wr_req_addr, (cci_wr_req_ctr - 1), cci_pending_writes_next);
|
||||
`endif
|
||||
end
|
||||
|
||||
|
@ -871,6 +845,11 @@ begin
|
|||
end
|
||||
`endif
|
||||
|
||||
if (cci_dram_rd_req_fire) begin
|
||||
cci_dram_rd_req_addr_unqual <= cci_dram_rd_req_addr_unqual + DRAM_ADDR_WIDTH'(1);
|
||||
cci_dram_rd_req_ctr <= cci_dram_rd_req_ctr - DRAM_ADDR_WIDTH'(1);
|
||||
end
|
||||
|
||||
cci_pending_writes <= cci_pending_writes_next;
|
||||
end
|
||||
end
|
||||
|
|
|
@ -553,42 +553,42 @@ module VX_cluster #(
|
|||
|
||||
VX_mem_arb #(
|
||||
.NUM_REQUESTS (`L2NUM_REQUESTS),
|
||||
.WORD_SIZE (`L2BANK_LINE_SIZE),
|
||||
.DATA_WIDTH (`L2DRAM_LINE_WIDTH),
|
||||
.TAG_IN_WIDTH (`DDRAM_TAG_WIDTH),
|
||||
.TAG_OUT_WIDTH (`L2DRAM_TAG_WIDTH)
|
||||
) dram_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
// Core request
|
||||
.mem_req_valid_in (core_dram_req_valid),
|
||||
.mem_req_rw_in (core_dram_req_rw),
|
||||
.mem_req_byteen_in (core_dram_req_byteen),
|
||||
.mem_req_addr_in (core_dram_req_addr),
|
||||
.mem_req_data_in (core_dram_req_data),
|
||||
.mem_req_tag_in (core_dram_req_tag),
|
||||
.mem_req_ready_in (core_dram_req_ready),
|
||||
.req_valid_in (core_dram_req_valid),
|
||||
.req_rw_in (core_dram_req_rw),
|
||||
.req_byteen_in (core_dram_req_byteen),
|
||||
.req_addr_in (core_dram_req_addr),
|
||||
.req_data_in (core_dram_req_data),
|
||||
.req_tag_in (core_dram_req_tag),
|
||||
.req_ready_in (core_dram_req_ready),
|
||||
|
||||
// Core response
|
||||
.mem_rsp_valid_in (core_dram_rsp_valid),
|
||||
.mem_rsp_data_in (core_dram_rsp_data),
|
||||
.mem_rsp_tag_in (core_dram_rsp_tag),
|
||||
.mem_rsp_ready_in (core_dram_rsp_ready),
|
||||
.rsp_valid_out (core_dram_rsp_valid),
|
||||
.rsp_data_out (core_dram_rsp_data),
|
||||
.rsp_tag_out (core_dram_rsp_tag),
|
||||
.rsp_ready_out (core_dram_rsp_ready),
|
||||
|
||||
// DRAM request
|
||||
.mem_req_valid_out (dram_req_valid),
|
||||
.mem_req_rw_out (dram_req_rw),
|
||||
.mem_req_byteen_out (dram_req_byteen),
|
||||
.mem_req_addr_out (dram_req_addr),
|
||||
.mem_req_data_out (dram_req_data),
|
||||
.mem_req_tag_out (dram_req_tag),
|
||||
.mem_req_ready_out (dram_req_ready),
|
||||
.req_valid_out (dram_req_valid),
|
||||
.req_rw_out (dram_req_rw),
|
||||
.req_byteen_out (dram_req_byteen),
|
||||
.req_addr_out (dram_req_addr),
|
||||
.req_data_out (dram_req_data),
|
||||
.req_tag_out (dram_req_tag),
|
||||
.req_ready_out (dram_req_ready),
|
||||
|
||||
// DRAM response
|
||||
.mem_rsp_valid_out (dram_rsp_valid),
|
||||
.mem_rsp_tag_out (dram_rsp_tag),
|
||||
.mem_rsp_data_out (dram_rsp_data),
|
||||
.mem_rsp_ready_out (dram_rsp_ready)
|
||||
.rsp_valid_in (dram_rsp_valid),
|
||||
.rsp_tag_in (dram_rsp_tag),
|
||||
.rsp_data_in (dram_rsp_data),
|
||||
.rsp_ready_in (dram_rsp_ready)
|
||||
);
|
||||
|
||||
end
|
||||
|
|
|
@ -234,10 +234,10 @@
|
|||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`ifdef DBG_CORE_REQ_INFO // pc, rd, wid
|
||||
`define DBG_CORE_REQ_MDATAW (32 + `NR_BITS + `NW_BITS)
|
||||
`ifdef DBG_CACHE_REQ_INFO // pc, rd, wid
|
||||
`define DBG_CACHE_REQ_MDATAW (32 + `NR_BITS + `NW_BITS)
|
||||
`else
|
||||
`define DBG_CORE_REQ_MDATAW 0
|
||||
`define DBG_CACHE_REQ_MDATAW 0
|
||||
`endif
|
||||
|
||||
////////////////////////// Dcache Configurable Knobs //////////////////////////
|
||||
|
@ -249,7 +249,7 @@
|
|||
`define DCORE_TAG_ID_BITS `LOG2UP(`LSUQ_SIZE)
|
||||
|
||||
// Core request tag bits
|
||||
`define DCORE_TAG_WIDTH (`DBG_CORE_REQ_MDATAW + `DCORE_TAG_ID_BITS)
|
||||
`define DCORE_TAG_WIDTH (`DBG_CACHE_REQ_MDATAW + `DCORE_TAG_ID_BITS)
|
||||
|
||||
// DRAM request data bits
|
||||
`define DDRAM_LINE_WIDTH (`DBANK_LINE_SIZE * 8)
|
||||
|
@ -287,7 +287,7 @@
|
|||
`define ICORE_TAG_ID_BITS `NW_BITS
|
||||
|
||||
// Core request tag bits
|
||||
`define ICORE_TAG_WIDTH (`DBG_CORE_REQ_MDATAW + `ICORE_TAG_ID_BITS)
|
||||
`define ICORE_TAG_WIDTH (`DBG_CACHE_REQ_MDATAW + `ICORE_TAG_ID_BITS)
|
||||
|
||||
// DRAM request data bits
|
||||
`define IDRAM_LINE_WIDTH (`IBANK_LINE_SIZE * 8)
|
||||
|
|
|
@ -39,10 +39,6 @@ module VX_gpr_stage #(
|
|||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
rsp_valid <= 0;
|
||||
rsp_wid <= 0;
|
||||
rsp_pc <= 0;
|
||||
rs1_is_zero <= 0;
|
||||
rs2_is_zero <= 0;
|
||||
end else begin
|
||||
rsp_valid <= gpr_req_if.valid;
|
||||
rsp_wid <= gpr_req_if.wid;
|
||||
|
|
|
@ -45,7 +45,7 @@ module VX_icache_stage #(
|
|||
// Can accept new request?
|
||||
assign ifetch_req_if.ready = icache_req_if.ready;
|
||||
|
||||
`ifdef DBG_CORE_REQ_INFO
|
||||
`ifdef DBG_CACHE_REQ_INFO
|
||||
assign icache_req_if.tag = {ifetch_req_if.PC, `NR_BITS'(0), ifetch_req_if.wid, req_tag};
|
||||
`else
|
||||
assign icache_req_if.tag = req_tag;
|
||||
|
|
|
@ -144,7 +144,7 @@ module VX_lsu_unit #(
|
|||
assign dcache_req_if.addr = req_addr;
|
||||
assign dcache_req_if.data = req_data;
|
||||
|
||||
`ifdef DBG_CORE_REQ_INFO
|
||||
`ifdef DBG_CACHE_REQ_INFO
|
||||
assign dcache_req_if.tag = {req_pc, req_rd, req_wid, req_tag};
|
||||
`else
|
||||
assign dcache_req_if.tag = req_tag;
|
||||
|
|
|
@ -2,46 +2,46 @@
|
|||
|
||||
module VX_mem_arb #(
|
||||
parameter NUM_REQUESTS = 1,
|
||||
parameter WORD_SIZE = 1,
|
||||
parameter DATA_WIDTH = 1,
|
||||
parameter TAG_IN_WIDTH = 1,
|
||||
parameter TAG_OUT_WIDTH = 1,
|
||||
|
||||
parameter WORD_WIDTH = WORD_SIZE * 8,
|
||||
parameter ADDR_WIDTH = 32 - `CLOG2(WORD_SIZE),
|
||||
|
||||
parameter DATA_SIZE = (DATA_WIDTH / 8),
|
||||
parameter ADDR_WIDTH = 32 - `CLOG2(DATA_SIZE),
|
||||
parameter REQS_BITS = `CLOG2(NUM_REQUESTS)
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// input requests
|
||||
input wire [NUM_REQUESTS-1:0] mem_req_valid_in,
|
||||
input wire [NUM_REQUESTS-1:0][TAG_IN_WIDTH-1:0] mem_req_tag_in,
|
||||
input wire [NUM_REQUESTS-1:0][ADDR_WIDTH-1:0] mem_req_addr_in,
|
||||
input wire [NUM_REQUESTS-1:0] mem_req_rw_in,
|
||||
input wire [NUM_REQUESTS-1:0][WORD_SIZE-1:0] mem_req_byteen_in,
|
||||
input wire [NUM_REQUESTS-1:0][WORD_WIDTH-1:0] mem_req_data_in,
|
||||
output wire [NUM_REQUESTS-1:0] mem_req_ready_in,
|
||||
input wire [NUM_REQUESTS-1:0] req_valid_in,
|
||||
input wire [NUM_REQUESTS-1:0][TAG_IN_WIDTH-1:0] req_tag_in,
|
||||
input wire [NUM_REQUESTS-1:0][ADDR_WIDTH-1:0] req_addr_in,
|
||||
input wire [NUM_REQUESTS-1:0] req_rw_in,
|
||||
input wire [NUM_REQUESTS-1:0][DATA_SIZE-1:0] req_byteen_in,
|
||||
input wire [NUM_REQUESTS-1:0][DATA_WIDTH-1:0] req_data_in,
|
||||
output wire [NUM_REQUESTS-1:0] req_ready_in,
|
||||
|
||||
// input response
|
||||
output wire [NUM_REQUESTS-1:0] mem_rsp_valid_in,
|
||||
output wire [NUM_REQUESTS-1:0][TAG_IN_WIDTH-1:0] mem_rsp_tag_in,
|
||||
output wire [NUM_REQUESTS-1:0][WORD_WIDTH-1:0] mem_rsp_data_in,
|
||||
input wire [NUM_REQUESTS-1:0] mem_rsp_ready_in,
|
||||
output wire [NUM_REQUESTS-1:0] rsp_valid_out,
|
||||
output wire [NUM_REQUESTS-1:0][TAG_IN_WIDTH-1:0] rsp_tag_out,
|
||||
output wire [NUM_REQUESTS-1:0][DATA_WIDTH-1:0] rsp_data_out,
|
||||
input wire [NUM_REQUESTS-1:0] rsp_ready_out,
|
||||
|
||||
// output request
|
||||
output wire mem_req_valid_out,
|
||||
output wire [TAG_OUT_WIDTH-1:0] mem_req_tag_out,
|
||||
output wire [ADDR_WIDTH-1:0] mem_req_addr_out,
|
||||
output wire mem_req_rw_out,
|
||||
output wire [WORD_SIZE-1:0] mem_req_byteen_out,
|
||||
output wire [WORD_WIDTH-1:0] mem_req_data_out,
|
||||
input wire mem_req_ready_out,
|
||||
output wire req_valid_out,
|
||||
output wire [TAG_OUT_WIDTH-1:0] req_tag_out,
|
||||
output wire [ADDR_WIDTH-1:0] req_addr_out,
|
||||
output wire req_rw_out,
|
||||
output wire [DATA_SIZE-1:0] req_byteen_out,
|
||||
output wire [DATA_WIDTH-1:0] req_data_out,
|
||||
input wire req_ready_out,
|
||||
|
||||
// output response
|
||||
input wire mem_rsp_valid_out,
|
||||
input wire [TAG_OUT_WIDTH-1:0] mem_rsp_tag_out,
|
||||
input wire [WORD_WIDTH-1:0] mem_rsp_data_out,
|
||||
output wire mem_rsp_ready_out
|
||||
input wire rsp_valid_in,
|
||||
input wire [TAG_OUT_WIDTH-1:0] rsp_tag_in,
|
||||
input wire [DATA_WIDTH-1:0] rsp_data_in,
|
||||
output wire rsp_ready_in
|
||||
);
|
||||
if (NUM_REQUESTS > 1) begin
|
||||
|
||||
|
@ -53,59 +53,59 @@ module VX_mem_arb #(
|
|||
) req_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.requests (mem_req_valid_in),
|
||||
.requests (req_valid_in),
|
||||
`UNUSED_PIN (grant_valid),
|
||||
.grant_index (req_idx),
|
||||
.grant_onehot (req_1hot)
|
||||
);
|
||||
|
||||
wire stall = ~mem_req_ready_out && mem_req_valid_out;
|
||||
wire stall = ~req_ready_out && req_valid_out;
|
||||
|
||||
VX_generic_register #(
|
||||
.N(1 + TAG_OUT_WIDTH + ADDR_WIDTH + 1 + WORD_SIZE + WORD_WIDTH),
|
||||
.N(1 + TAG_OUT_WIDTH + ADDR_WIDTH + 1 + DATA_SIZE + DATA_WIDTH),
|
||||
.PASSTHRU(NUM_REQUESTS <= 2)
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall),
|
||||
.flush (1'b0),
|
||||
.in ({mem_req_valid_in[req_idx], {mem_req_tag_in[req_idx], REQS_BITS'(req_idx)}, mem_req_addr_in[req_idx], mem_req_rw_in[req_idx], mem_req_byteen_in[req_idx], mem_req_data_in[req_idx]}),
|
||||
.out ({mem_req_valid_out, mem_req_tag_out, mem_req_addr_out, mem_req_rw_out, mem_req_byteen_out, mem_req_data_out})
|
||||
.in ({req_valid_in[req_idx], {req_tag_in[req_idx], REQS_BITS'(req_idx)}, req_addr_in[req_idx], req_rw_in[req_idx], req_byteen_in[req_idx], req_data_in[req_idx]}),
|
||||
.out ({req_valid_out, req_tag_out, req_addr_out, req_rw_out, req_byteen_out, req_data_out})
|
||||
);
|
||||
|
||||
for (genvar i = 0; i < NUM_REQUESTS; i++) begin
|
||||
assign mem_req_ready_in[i] = req_1hot[i] && ~stall;
|
||||
assign req_ready_in[i] = req_1hot[i] && ~stall;
|
||||
end
|
||||
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
wire [REQS_BITS-1:0] rsp_sel = mem_rsp_tag_out[REQS_BITS-1:0];
|
||||
wire [REQS_BITS-1:0] rsp_sel = rsp_tag_in[REQS_BITS-1:0];
|
||||
|
||||
for (genvar i = 0; i < NUM_REQUESTS; i++) begin
|
||||
assign mem_rsp_valid_in[i] = mem_rsp_valid_out && (rsp_sel == REQS_BITS'(i));
|
||||
assign mem_rsp_tag_in[i] = mem_rsp_tag_out[REQS_BITS +: TAG_IN_WIDTH];
|
||||
assign mem_rsp_data_in[i] = mem_rsp_data_out;
|
||||
assign rsp_valid_out[i] = rsp_valid_in && (rsp_sel == REQS_BITS'(i));
|
||||
assign rsp_tag_out[i] = rsp_tag_in[REQS_BITS +: TAG_IN_WIDTH];
|
||||
assign rsp_data_out[i] = rsp_data_in;
|
||||
end
|
||||
|
||||
assign mem_rsp_ready_out = mem_rsp_ready_in[rsp_sel];
|
||||
assign rsp_ready_in = rsp_ready_out[rsp_sel];
|
||||
|
||||
end else begin
|
||||
|
||||
`UNUSED_VAR (clk)
|
||||
`UNUSED_VAR (reset)
|
||||
|
||||
assign mem_req_valid_out = mem_req_valid_in;
|
||||
assign mem_req_tag_out = mem_req_tag_in;
|
||||
assign mem_req_addr_out = mem_req_addr_in;
|
||||
assign mem_req_rw_out = mem_req_rw_in;
|
||||
assign mem_req_byteen_out = mem_req_byteen_in;
|
||||
assign mem_req_data_out = mem_req_data_in;
|
||||
assign mem_req_ready_in = mem_req_ready_out;
|
||||
assign req_valid_out = req_valid_in;
|
||||
assign req_tag_out = req_tag_in;
|
||||
assign req_addr_out = req_addr_in;
|
||||
assign req_rw_out = req_rw_in;
|
||||
assign req_byteen_out = req_byteen_in;
|
||||
assign req_data_out = req_data_in;
|
||||
assign req_ready_in = req_ready_out;
|
||||
|
||||
assign mem_rsp_valid_in = mem_rsp_valid_out;
|
||||
assign mem_rsp_tag_in = mem_rsp_tag_out;
|
||||
assign mem_rsp_data_in = mem_rsp_data_out;
|
||||
assign mem_rsp_ready_out = mem_rsp_ready_in;
|
||||
assign rsp_valid_out = rsp_valid_in;
|
||||
assign rsp_tag_out = rsp_tag_in;
|
||||
assign rsp_data_out = rsp_data_in;
|
||||
assign rsp_ready_in = rsp_ready_out;
|
||||
|
||||
end
|
||||
|
||||
|
|
|
@ -64,23 +64,27 @@ module VX_scoreboard #(
|
|||
assign ibuf_deq_if.ready = ~(delay || exe_delay || gpr_delay);
|
||||
|
||||
`ifdef DBG_PRINT_PIPELINE
|
||||
always @(posedge clk) begin
|
||||
if (ibuf_deq_if.valid && ~ibuf_deq_if.ready) begin
|
||||
$display("%t: core%0d-stall: wid=%0d, PC=%0h, rd=%0d, wb=%0d, inuse=%b%b%b%b, exe=%b, gpr=%b",
|
||||
$time, CORE_ID, ibuf_deq_if.wid, ibuf_deq_if.PC, ibuf_deq_if.rd, ibuf_deq_if.wb,
|
||||
inuse_regs[ibuf_deq_if.rd], inuse_regs[ibuf_deq_if.rs1], inuse_regs[ibuf_deq_if.rs2], inuse_regs[ibuf_deq_if.rs3], exe_delay, gpr_delay);
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
reg [31:0] stall_ctr;
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
stall_ctr <= 0;
|
||||
end else if (ibuf_deq_if.valid && ~ibuf_deq_if.ready) begin
|
||||
$display("%t: core%0d-stall: wid=%0d, PC=%0h, rd=%0d, wb=%0d, inuse=%b%b%b%b, exe=%b, gpr=%b",
|
||||
stall_ctr <= stall_ctr + 1;
|
||||
assert(stall_ctr < 100000) else $error("%t: core%0d-stalled: wid=%0d, PC=%0h, rd=%0d, wb=%0d, inuse=%b%b%b%b, exe=%b, gpr=%b",
|
||||
$time, CORE_ID, ibuf_deq_if.wid, ibuf_deq_if.PC, ibuf_deq_if.rd, ibuf_deq_if.wb,
|
||||
inuse_regs[ibuf_deq_if.rd], inuse_regs[ibuf_deq_if.rs1], inuse_regs[ibuf_deq_if.rs2], inuse_regs[ibuf_deq_if.rs3], exe_delay, gpr_delay);
|
||||
stall_ctr <= stall_ctr + 1;
|
||||
if (stall_ctr >= 2000) begin
|
||||
$fflush();
|
||||
assert(0);
|
||||
end
|
||||
end else if (ibuf_deq_if.valid && ibuf_deq_if.ready) begin
|
||||
stall_ctr <= 0;
|
||||
end
|
||||
end
|
||||
`endif
|
||||
end
|
||||
|
||||
endmodule
|
16
hw/rtl/cache/VX_bank.v
vendored
16
hw/rtl/cache/VX_bank.v
vendored
|
@ -100,7 +100,7 @@ module VX_bank #(
|
|||
output wire misses
|
||||
);
|
||||
|
||||
`ifdef DBG_CORE_REQ_INFO
|
||||
`ifdef DBG_CACHE_REQ_INFO
|
||||
/* verilator lint_off UNUSED */
|
||||
wire[31:0] debug_pc_st0;
|
||||
wire[`NR_BITS-1:0] debug_rd_st0;
|
||||
|
@ -352,7 +352,7 @@ module VX_bank #(
|
|||
wire msrq_pending_hazard_st0 = msrq_pending_hazard_unqual_st0
|
||||
|| ((miss_st3 || force_miss_st3) && (addr_st3 == addr_st0));
|
||||
|
||||
`ifdef DBG_CORE_REQ_INFO
|
||||
`ifdef DBG_CACHE_REQ_INFO
|
||||
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin
|
||||
assign {debug_pc_st0, debug_rd_st0, debug_wid_st0, debug_tagid_st0, debug_rw_st0, debug_byteen_st0, debug_tid_st0} = inst_meta_st0;
|
||||
end else begin
|
||||
|
@ -371,7 +371,7 @@ module VX_bank #(
|
|||
.out ({is_msrq_st1, is_snp_st1, snp_invalidate_st1, msrq_pending_hazard_st1, valid_st1, addr_st1, wsel_st1, writeword_st1, inst_meta_st1, is_fill_st1, writedata_st1})
|
||||
);
|
||||
|
||||
`ifdef DBG_CORE_REQ_INFO
|
||||
`ifdef DBG_CACHE_REQ_INFO
|
||||
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin
|
||||
assign {debug_pc_st1, debug_rd_st1, debug_wid_st1, debug_tagid_st1, debug_rw_st1, debug_byteen_st1, debug_tid_st1} = inst_meta_st1;
|
||||
end else begin
|
||||
|
@ -420,7 +420,7 @@ module VX_bank #(
|
|||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
`ifdef DBG_CORE_REQ_INFO
|
||||
`ifdef DBG_CACHE_REQ_INFO
|
||||
.debug_pc (debug_pc_st1),
|
||||
.debug_rd (debug_rd_st1),
|
||||
.debug_wid (debug_wid_st1),
|
||||
|
@ -474,7 +474,7 @@ module VX_bank #(
|
|||
.out ({is_msrq_st2, writeen_st2, force_miss_st2, is_snp_st2, snp_invalidate_st2, is_fill_st2, valid_st2, addr_st2, wsel_st2, writeword_st2, readtag_st2, miss_st2, dirty_st2, writedata_st2, mem_byteen_st2, inst_meta_st2})
|
||||
);
|
||||
|
||||
`ifdef DBG_CORE_REQ_INFO
|
||||
`ifdef DBG_CACHE_REQ_INFO
|
||||
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin
|
||||
assign {debug_pc_st2, debug_rd_st2, debug_wid_st2, debug_tagid_st2, debug_rw_st2, debug_byteen_st2, debug_tid_st2} = inst_meta_st2;
|
||||
end else begin
|
||||
|
@ -498,7 +498,7 @@ module VX_bank #(
|
|||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
`ifdef DBG_CORE_REQ_INFO
|
||||
`ifdef DBG_CACHE_REQ_INFO
|
||||
.debug_pc (debug_pc_st2),
|
||||
.debug_rd (debug_rd_st2),
|
||||
.debug_wid (debug_wid_st2),
|
||||
|
@ -562,7 +562,7 @@ module VX_bank #(
|
|||
.out ({is_msrq_st3, send_core_rsp_st3, send_fill_req_st3, do_writeback_st3, send_snp_rsp_st3, force_miss_st3, is_snp_st3, snp_invalidate_st3, valid_st3, addr_st3, wsel_st3, writeword_st3, readword_st3, readdata_st3, readtag_st3, miss_st3, dirtyb_st3, inst_meta_st3})
|
||||
);
|
||||
|
||||
`ifdef DBG_CORE_REQ_INFO
|
||||
`ifdef DBG_CACHE_REQ_INFO
|
||||
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin
|
||||
assign {debug_pc_st3, debug_rd_st3, debug_wid_st3, debug_tagid_st3, debug_rw_st3, debug_byteen_st3, debug_tid_st3} = inst_meta_st3;
|
||||
end else begin
|
||||
|
@ -623,7 +623,7 @@ module VX_bank #(
|
|||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
`ifdef DBG_CORE_REQ_INFO
|
||||
`ifdef DBG_CACHE_REQ_INFO
|
||||
.debug_pc_st0 (debug_pc_st0),
|
||||
.debug_rd_st0 (debug_rd_st0),
|
||||
.debug_wid_st0 (debug_wid_st0),
|
||||
|
|
2
hw/rtl/cache/VX_cache_config.vh
vendored
2
hw/rtl/cache/VX_cache_config.vh
vendored
|
@ -3,7 +3,7 @@
|
|||
|
||||
`include "VX_platform.vh"
|
||||
|
||||
`ifdef DBG_CORE_REQ_INFO
|
||||
`ifdef DBG_CACHE_REQ_INFO
|
||||
`include "VX_define.vh"
|
||||
`endif
|
||||
|
||||
|
|
2
hw/rtl/cache/VX_cache_miss_resrv.v
vendored
2
hw/rtl/cache/VX_cache_miss_resrv.v
vendored
|
@ -24,7 +24,7 @@ module VX_cache_miss_resrv #(
|
|||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
`ifdef DBG_CORE_REQ_INFO
|
||||
`ifdef DBG_CACHE_REQ_INFO
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
input wire[31:0] debug_pc_st0,
|
||||
input wire[`NR_BITS-1:0] debug_rd_st0,
|
||||
|
|
2
hw/rtl/cache/VX_data_access.v
vendored
2
hw/rtl/cache/VX_data_access.v
vendored
|
@ -25,7 +25,7 @@ module VX_data_access #(
|
|||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
`ifdef DBG_CORE_REQ_INFO
|
||||
`ifdef DBG_CACHE_REQ_INFO
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
input wire[31:0] debug_pc,
|
||||
input wire[`NR_BITS-1:0] debug_rd,
|
||||
|
|
4
hw/rtl/cache/VX_tag_access.v
vendored
4
hw/rtl/cache/VX_tag_access.v
vendored
|
@ -25,7 +25,7 @@ module VX_tag_access #(
|
|||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
`ifdef DBG_CORE_REQ_INFO
|
||||
`ifdef DBG_CACHE_REQ_INFO
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
input wire[31:0] debug_pc,
|
||||
input wire[`NR_BITS-1:0] debug_rd,
|
||||
|
@ -122,7 +122,7 @@ module VX_tag_access #(
|
|||
assign readtag_out = use_read_tag;
|
||||
assign writeen_out = (use_do_write || use_do_fill);
|
||||
|
||||
`ifdef DBG_PRINT_CACHE_DATA
|
||||
`ifdef DBG_PRINT_CACHE_TAG
|
||||
always @(posedge clk) begin
|
||||
if (valid_in && !stall) begin
|
||||
if (use_do_fill && tags_match) begin
|
||||
|
|
|
@ -10,15 +10,16 @@ DBG_PRINT_FLAGS += -DDBG_PRINT_CORE_DCACHE
|
|||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_BANK
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_SNP
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_MSRQ
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_TAG
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_DATA
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_DRAM
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_PIPELINE
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE
|
||||
DBG_PRINT_FLAGS += -DDBG_CORE_REQ_INFO
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_AVS
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_SCOPE
|
||||
|
||||
DBG_FLAGS += $(DBG_PRINT_FLAGS)
|
||||
DBG_FLAGS += -DDBG_CORE_REQ_INFO
|
||||
DBG_FLAGS += -DDBG_CACHE_REQ_INFO
|
||||
|
||||
FPU_INCLUDE = -I../rtl/fp_cores -I../rtl/fp_cores/svdpi -I../rtl/fp_cores/fpnew/src/common_cells/include -I../rtl/fp_cores/fpnew/src/common_cells/src -I../rtl/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl -I../rtl/fp_cores/fpnew/src
|
||||
INCLUDE = -I../rtl/ -I../rtl/libs -I../rtl/interfaces -I../rtl/cache -I../rtl/simulate $(FPU_INCLUDE)
|
||||
|
@ -45,7 +46,7 @@ gen-s:
|
|||
verilator $(VF) -DNDEBUG $(SINGLECORE) -CFLAGS '$(CF) -DNDEBUG $(SINGLECORE)'
|
||||
|
||||
gen-sd:
|
||||
verilator $(VF) -O0 $(SINGLECORE) -CFLAGS '$(CF) -O0 -g $(DBG) $(SINGLECORE)' --trace --trace-structs --trace-threads 1 $(DBG)
|
||||
verilator $(VF) -O0 $(SINGLECORE) -CFLAGS '$(CF) -O0 -g $(DBG) $(SINGLECORE)' --trace --trace-structs $(DBG)
|
||||
|
||||
gen-st:
|
||||
verilator $(VF) -DNDEBUG $(SINGLECORE) -CFLAGS '$(CF) -DNDEBUG -O2 $(SINGLECORE)' --threads $(THREADS)
|
||||
|
@ -54,7 +55,7 @@ gen-m:
|
|||
verilator $(VF) -DNDEBUG $(MULTICORE) -CFLAGS '$(CF) -DNDEBUG $(MULTICORE)'
|
||||
|
||||
gen-md:
|
||||
verilator $(VF) $(MULTICORE) -CFLAGS '$(CF) -O0 -g $(DBG) $(MULTICORE)' --trace --trace-structs --trace-threads 1 $(DBG)
|
||||
verilator $(VF) $(MULTICORE) -CFLAGS '$(CF) -O0 -g $(DBG) $(MULTICORE)' --trace --trace-structs $(DBG)
|
||||
|
||||
gen-mt:
|
||||
verilator $(VF) -DNDEBUG $(MULTICORE) -CFLAGS '$(CF) -DNDEBUG -O2 $(MULTICORE)' --threads $(THREADS)
|
||||
|
@ -63,7 +64,7 @@ build-s: gen-s
|
|||
(cd obj_dir && make -j -f VVortex.mk)
|
||||
|
||||
build-sd: gen-sd
|
||||
(cd obj_dir && OPT_FAST="-O0 -g" make -j -f VVortex.mk)
|
||||
(cd obj_dir && make -j -f VVortex.mk)
|
||||
|
||||
build-st: gen-st
|
||||
(cd obj_dir && make -j -f VVortex.mk)
|
||||
|
@ -72,7 +73,7 @@ build-m: gen-m
|
|||
(cd obj_dir && make -j -f VVortex.mk)
|
||||
|
||||
build-md: gen-md
|
||||
(cd obj_dir && OPT_FAST="-O0 -g" make -j -f VVortex.mk)
|
||||
(cd obj_dir && make -j -f VVortex.mk)
|
||||
|
||||
build-mt: gen-mt
|
||||
(cd obj_dir && make -j -f VVortex.mk)
|
||||
|
|
4
hw/unit_tests/cache/Makefile
vendored
4
hw/unit_tests/cache/Makefile
vendored
|
@ -8,9 +8,11 @@ DBG_PRINT_FLAGS = -DDBG_PRINT_CORE_ICACHE \
|
|||
-DDBG_PRINT_CACHE_BANK \
|
||||
-DDBG_PRINT_CACHE_SNP \
|
||||
-DDBG_PRINT_CACHE_MSRQ \
|
||||
-DDBG_PRINT_CACHE_TAG \
|
||||
-DDBG_PRINT_CACHE_DATA \
|
||||
-DDBG_PRINT_DRAM \
|
||||
-DDBG_PRINT_OPAE
|
||||
-DDBG_PRINT_OPAE \
|
||||
-DDBG_PRINT_AVS
|
||||
|
||||
#DBG_PRINT=$(DBG_PRINT_FLAGS)
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue