fixed XRT AFU deadlock on exit

This commit is contained in:
Blaise Tine 2024-09-28 05:20:37 -07:00
parent eee037ffcd
commit 87e613d29d
6 changed files with 143 additions and 112 deletions

View file

@ -61,6 +61,9 @@ module VX_afu_wrap #(
localparam STATE_IDLE = 0;
localparam STATE_RUN = 1;
localparam PENDING_SIZEW = 12; // max outstanding requests size
localparam C_M_AXI_MEM_NUM_BANKS_SW = `CLOG2(C_M_AXI_MEM_NUM_BANKS+1);
wire m_axi_mem_awvalid_a [C_M_AXI_MEM_NUM_BANKS];
wire m_axi_mem_awready_a [C_M_AXI_MEM_NUM_BANKS];
wire [C_M_AXI_MEM_ADDR_WIDTH-1:0] m_axi_mem_awaddr_a [C_M_AXI_MEM_NUM_BANKS];
@ -95,7 +98,7 @@ module VX_afu_wrap #(
`endif
reg [`CLOG2(`RESET_DELAY+1)-1:0] vx_reset_ctr;
reg [15:0] vx_pending_writes;
reg [PENDING_SIZEW-1:0] vx_pending_writes;
reg vx_busy_wait;
reg vx_reset = 1; // asserted at initialization
wire vx_busy;
@ -118,23 +121,10 @@ module VX_afu_wrap #(
wire scope_reset = reset;
`endif
reg m_axi_mem_wfire;
reg m_axi_mem_bfire;
always @(*) begin
m_axi_mem_wfire = 0;
m_axi_mem_bfire = 0;
for (integer i = 0; i < C_M_AXI_MEM_NUM_BANKS; ++i) begin
m_axi_mem_wfire |= m_axi_mem_wvalid_a[i] && m_axi_mem_wready_a[i];
m_axi_mem_bfire |= m_axi_mem_bvalid_a[i] && m_axi_mem_bready_a[i];
end
end
always @(posedge clk) begin
if (reset || ap_reset) begin
state <= STATE_IDLE;
vx_pending_writes <= '0;
vx_reset <= 1;
state <= STATE_IDLE;
vx_reset <= 1;
end else begin
case (state)
STATE_IDLE: begin
@ -181,12 +171,39 @@ module VX_afu_wrap #(
if (vx_reset_ctr != '0) begin
vx_reset_ctr <= vx_reset_ctr - 1;
end
end
end
// track pending writes
if (m_axi_mem_wfire && ~m_axi_mem_bfire)
vx_pending_writes <= vx_pending_writes + 1;
if (~m_axi_mem_wfire && m_axi_mem_bfire)
vx_pending_writes <= vx_pending_writes - 1;
wire [C_M_AXI_MEM_NUM_BANKS-1:0] m_axi_wr_req_fire, m_axi_wr_rsp_fire;
wire [C_M_AXI_MEM_NUM_BANKS_SW-1:0] cur_wr_reqs, cur_wr_rsps;
for (genvar i = 0; i < C_M_AXI_MEM_NUM_BANKS; ++i) begin : g_awfire
VX_axi_write_ack axi_write_ack (
.clk (clk),
.reset (reset),
.awvalid(m_axi_mem_awvalid_a[i]),
.awready(m_axi_mem_awready_a[i]),
.wvalid (m_axi_mem_wvalid_a[i]),
.wready (m_axi_mem_wready_a[i]),
.tx_ack (m_axi_wr_req_fire[i]),
`UNUSED_PIN (aw_ack),
`UNUSED_PIN (w_ack),
`UNUSED_PIN (tx_rdy)
);
assign m_axi_wr_rsp_fire[i] = m_axi_mem_bvalid_a[i] & m_axi_mem_bready_a[i];
end
`POP_COUNT(cur_wr_reqs, m_axi_wr_req_fire);
`POP_COUNT(cur_wr_rsps, m_axi_wr_rsp_fire);
wire signed [C_M_AXI_MEM_NUM_BANKS_SW:0] reqs_sub = (C_M_AXI_MEM_NUM_BANKS_SW+1)'(cur_wr_reqs) -
(C_M_AXI_MEM_NUM_BANKS_SW+1)'(cur_wr_rsps);
always @(posedge clk) begin
if (reset) begin
vx_pending_writes <= '0;
end else begin
vx_pending_writes <= vx_pending_writes + PENDING_SIZEW'(reqs_sub);
end
end
@ -408,16 +425,16 @@ module VX_afu_wrap #(
always @(posedge clk) begin
for (integer i = 0; i < C_M_AXI_MEM_NUM_BANKS; ++i) begin
if (m_axi_mem_awvalid_a[i] && m_axi_mem_awready_a[i]) begin
`TRACE(2, ("%t: AFU Wr Req [%0d]: addr=0x%0h, tag=0x%0h\n", $time, i, m_axi_mem_awaddr_a[i], m_axi_mem_awid_a[i]))
`TRACE(2, ("%t: AXI Wr Req [%0d]: addr=0x%0h, tag=0x%0h\n", $time, i, m_axi_mem_awaddr_a[i], m_axi_mem_awid_a[i]))
end
if (m_axi_mem_wvalid_a[i] && m_axi_mem_wready_a[i]) begin
`TRACE(2, ("%t: AFU Wr Req [%0d]: data=0x%h\n", $time, i, m_axi_mem_wdata_a[i]))
`TRACE(2, ("%t: AXI Wr Req [%0d]: data=0x%h\n", $time, i, m_axi_mem_wdata_a[i]))
end
if (m_axi_mem_arvalid_a[i] && m_axi_mem_arready_a[i]) begin
`TRACE(2, ("%t: AFU Rd Req [%0d]: addr=0x%0h, tag=0x%0h\n", $time, i, m_axi_mem_araddr_a[i], m_axi_mem_arid_a[i]))
`TRACE(2, ("%t: AXI Rd Req [%0d]: addr=0x%0h, tag=0x%0h\n", $time, i, m_axi_mem_araddr_a[i], m_axi_mem_arid_a[i]))
end
if (m_axi_mem_rvalid_a[i] && m_axi_mem_rready_a[i]) begin
`TRACE(2, ("%t: AVS Rd Rsp [%0d]: data=0x%h, tag=0x%0h\n", $time, i, m_axi_mem_rdata_a[i], m_axi_mem_rid_a[i]))
`TRACE(2, ("%t: AXI Rd Rsp [%0d]: data=0x%h, tag=0x%0h\n", $time, i, m_axi_mem_rdata_a[i], m_axi_mem_rid_a[i]))
end
end
end

View file

@ -116,32 +116,21 @@ module VX_axi_adapter #(
assign req_bank_off = mem_req_addr_out;
end
wire mem_req_fire = mem_req_valid && mem_req_ready;
// AXi write request synchronization
reg [NUM_BANKS-1:0] m_axi_aw_ack, m_axi_w_ack;
for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_m_axi_w
wire m_axi_aw_fire = m_axi_awvalid[i] && m_axi_awready[i];
wire m_axi_w_fire = m_axi_wvalid[i] && m_axi_wready[i];
always @(posedge clk) begin
if (reset || (mem_req_fire && (req_bank_sel == i))) begin
m_axi_aw_ack[i] <= 0;
m_axi_w_ack[i] <= 0;
end else begin
if (m_axi_aw_fire) begin
m_axi_aw_ack[i] <= 1;
end
if (m_axi_w_fire) begin
m_axi_w_ack[i] <= 1;
end
end
end
end
wire [NUM_BANKS-1:0] axi_write_ready;
reg [NUM_BANKS-1:0] m_axi_aw_ack, m_axi_w_ack, axi_write_ready;
for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_axi_write_ready
assign axi_write_ready[i] = (m_axi_awready[i] || m_axi_aw_ack[i])
&& (m_axi_wready[i] || m_axi_w_ack[i]);
VX_axi_write_ack axi_write_ack (
.clk (clk),
.reset (reset),
.awvalid(m_axi_awvalid[i]),
.awready(m_axi_awready[i]),
.wvalid (m_axi_wvalid[i]),
.wready (m_axi_wready[i]),
.aw_ack (m_axi_aw_ack[i]),
.w_ack (m_axi_w_ack[i]),
.tx_rdy (axi_write_ready[i]),
`UNUSED_PIN (tx_ack)
);
end
// request ack

View file

@ -0,0 +1,60 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
`include "VX_platform.vh"
`TRACING_OFF
module VX_axi_write_ack (
input wire clk,
input wire reset,
input wire awvalid,
input wire awready,
input wire wvalid,
input wire wready,
output wire aw_ack,
output wire w_ack,
output wire tx_ack,
output wire tx_rdy
);
reg awfired;
reg wfired;
wire awfire = awvalid && awready;
wire wfire = wvalid && wready;
always @(posedge clk) begin
if (reset) begin
awfired <= 0;
wfired <= 0;
end else begin
if (awfire) begin
awfired <= 1;
end
if (wfire) begin
wfired <= 1;
end
if (tx_ack) begin
awfired <= 0;
wfired <= 0;
end
end
end
assign aw_ack = awfired;
assign w_ack = wfired;
assign tx_ack = (awfire || awfired) && (wfire || wfired);
assign tx_rdy = (awready || awfired) && (wready || wfired);
endmodule
`TRACING_ON

View file

@ -263,13 +263,6 @@ private:
}
device_->reset = 0;
for (int i = 0; i < RESET_DELAY; ++i) {
device_->clk = 0;
this->eval();
device_->clk = 1;
this->eval();
}
}
void tick() {

View file

@ -151,9 +151,6 @@ public:
// reset device
this->reset();
// start execution
running_ = true;
// wait on device to go busy
while (!device_->busy) {
this->tick();
@ -181,8 +178,6 @@ private:
this->mem_bus_reset();
this->dcr_bus_reset();
running_ = false;
print_bufs_.clear();
pending_mem_reqs_.clear();
@ -192,8 +187,6 @@ private:
std::swap(dram_queue_, empty);
}
mem_rd_rsp_active_ = false;
device_->reset = 1;
for (int i = 0; i < RESET_DELAY; ++i) {
@ -204,13 +197,7 @@ private:
}
device_->reset = 0;
for (int i = 0; i < RESET_DELAY; ++i) {
device_->clk = 0;
this->eval();
device_->clk = 1;
this->eval();
}
device_->mem_req_ready = 1;
}
void tick() {
@ -261,11 +248,10 @@ private:
void mem_bus_eval() {
// process memory read responses
if (mem_rd_rsp_active_ && device_->mem_rsp_ready) {
if (device_->mem_rsp_valid && device_->mem_rsp_ready) {
device_->mem_rsp_valid = 0;
mem_rd_rsp_active_ = false;
}
if (!mem_rd_rsp_active_) {
if (!device_->mem_rsp_valid) {
if (!pending_mem_reqs_.empty()
&& (*pending_mem_reqs_.begin())->ready) {
auto mem_rsp_it = pending_mem_reqs_.begin();
@ -280,7 +266,6 @@ private:
memcpy(VDataCast<void*, MEM_BLOCK_SIZE>::get(device_->mem_rsp_data), mem_rsp->data.data(), MEM_BLOCK_SIZE);
device_->mem_rsp_tag = mem_rsp->tag;
pending_mem_reqs_.erase(mem_rsp_it);
mem_rd_rsp_active_ = true;
delete mem_rsp;
}
}
@ -291,7 +276,6 @@ private:
if (device_->mem_req_rw) {
auto byteen = device_->mem_req_byteen;
auto data = VDataCast<uint8_t*, MEM_BLOCK_SIZE>::get(device_->mem_req_data);
if (byte_addr >= uint64_t(IO_COUT_ADDR)
&& byte_addr < (uint64_t(IO_COUT_ADDR) + IO_COUT_SIZE)) {
// process console output
@ -350,8 +334,6 @@ private:
dram_queue_.push(mem_req);
}
}
device_->mem_req_ready = running_;
}
void dcr_bus_reset() {
@ -390,10 +372,6 @@ private:
#endif
RAM* ram_;
bool mem_rd_rsp_active_;
bool running_;
};
///////////////////////////////////////////////////////////////////////////////

View file

@ -333,12 +333,9 @@ private:
}
device_->ap_rst_n = 1;
for (int i = 0; i < RESET_DELAY; ++i) {
device_->ap_clk = 0;
this->eval();
device_->ap_clk = 1;
this->eval();
for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) {
*m_axi_mem_[i].arready = 1;
*m_axi_mem_[i].awready = 1;
}
}
@ -407,10 +404,10 @@ private:
void axi_mem_bus_reset() {
for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) {
// address read request
*m_axi_mem_[i].arready = 1;
*m_axi_mem_[i].arready = 0;
// address write request
*m_axi_mem_[i].awready = 1;
*m_axi_mem_[i].awready = 0;
// data write request
*m_axi_mem_[i].wready = 0;
@ -423,19 +420,16 @@ private:
// states
m_axi_states_[i].write_req_pending = false;
m_axi_states_[i].write_rsp_pending = false;
m_axi_states_[i].read_rsp_pending = false;
}
}
void axi_mem_bus_eval() {
for (int i = 0; i < PLATFORM_MEMORY_BANKS; ++i) {
// handle read responses
if (m_axi_states_[i].read_rsp_pending && (*m_axi_mem_[i].rready)) {
*m_axi_mem_[i].rvalid = 0;
m_axi_states_[i].read_rsp_pending = false;
if (*m_axi_mem_[i].rvalid && *m_axi_mem_[i].rready) {
*m_axi_mem_[i].rvalid = 0;
}
if (!m_axi_states_[i].read_rsp_pending) {
if (!*m_axi_mem_[i].rvalid) {
if (!pending_mem_reqs_[i].empty()
&& (*pending_mem_reqs_[i].begin())->ready
&& !(*pending_mem_reqs_[i].begin())->write) {
@ -447,17 +441,15 @@ private:
*m_axi_mem_[i].rlast = 1;
memcpy(m_axi_mem_[i].rdata->data(), mem_rsp->data.data(), PLATFORM_MEMORY_DATA_SIZE);
pending_mem_reqs_[i].erase(mem_rsp_it);
m_axi_states_[i].read_rsp_pending = true;
delete mem_rsp;
}
}
// handle write responses
if (m_axi_states_[i].write_rsp_pending && *m_axi_mem_[i].bready) {
if (*m_axi_mem_[i].bvalid && *m_axi_mem_[i].bready) {
*m_axi_mem_[i].bvalid = 0;
m_axi_states_[i].write_rsp_pending = false;
}
if (!m_axi_states_[i].write_rsp_pending) {
if (!*m_axi_mem_[i].bvalid) {
if (!pending_mem_reqs_[i].empty()
&& (*pending_mem_reqs_[i].begin())->ready
&& (*pending_mem_reqs_[i].begin())->write) {
@ -467,7 +459,6 @@ private:
*m_axi_mem_[i].bid = mem_rsp->tag;
*m_axi_mem_[i].bresp = 0;
pending_mem_reqs_[i].erase(mem_rsp_it);
m_axi_states_[i].write_rsp_pending = true;
delete mem_rsp;
}
}
@ -492,17 +483,21 @@ private:
dram_queues_[i].push(mem_req);
}
// handle address write requests
if (*m_axi_mem_[i].awvalid && *m_axi_mem_[i].awready && !m_axi_states_[i].write_req_pending) {
if (*m_axi_mem_[i].wready && !m_axi_states_[i].write_req_pending) {
*m_axi_mem_[i].wready = 0;
}
// handle address write requestsls
if (*m_axi_mem_[i].awvalid && *m_axi_mem_[i].awready && !*m_axi_mem_[i].wready) {
m_axi_states_[i].write_req_addr = *m_axi_mem_[i].awaddr;
m_axi_states_[i].write_req_tag = *m_axi_mem_[i].awid;
m_axi_states_[i].write_req_pending = true;
// activate data channel
*m_axi_mem_[i].wready = 1;
m_axi_states_[i].write_req_pending = !*m_axi_mem_[i].wvalid;
}
// handle data write requests
*m_axi_mem_[i].wready = false;
if (*m_axi_mem_[i].wvalid && m_axi_states_[i].write_req_pending) {
if (*m_axi_mem_[i].wvalid && *m_axi_mem_[i].wready) {
auto byteen = *m_axi_mem_[i].wstrb;
auto data = (uint8_t*)m_axi_mem_[i].wdata->data();
auto byte_addr = m_axi_states_[i].write_req_addr;
@ -529,10 +524,11 @@ private:
// send dram request
dram_queues_[i].push(mem_req);
m_axi_states_[i].write_req_pending = false;
// acquire write data
*m_axi_mem_[i].wready = true;
// deactivate data channel
if (m_axi_states_[i].write_req_pending) {
*m_axi_mem_[i].wready = 0;
m_axi_states_[i].write_req_pending = false;
}
}
}
}
@ -541,8 +537,6 @@ private:
uint64_t write_req_addr;
uint32_t write_req_tag;
bool write_req_pending;
bool read_rsp_pending;
bool write_rsp_pending;
} m_axi_state_t;
typedef struct {